1235474Sbz/*- 2235474Sbz * Copyright (c) 2007, Myricom Inc. 3235474Sbz * Copyright (c) 2008, Intel Corporation. 4235944Sbz * Copyright (c) 2012 The FreeBSD Foundation 5235474Sbz * All rights reserved. 6235474Sbz * 7235944Sbz * Portions of this software were developed by Bjoern Zeeb 8235944Sbz * under sponsorship from the FreeBSD Foundation. 9235944Sbz * 10235474Sbz * Redistribution and use in source and binary forms, with or without 11235474Sbz * modification, are permitted provided that the following conditions 12235474Sbz * are met: 13235474Sbz * 1. Redistributions of source code must retain the above copyright 14235474Sbz * notice, this list of conditions and the following disclaimer. 15235474Sbz * 2. Redistributions in binary form must reproduce the above copyright 16235474Sbz * notice, this list of conditions and the following disclaimer in the 17235474Sbz * documentation and/or other materials provided with the distribution. 18235474Sbz * 19235474Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20235474Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21235474Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22235474Sbz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23235474Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24235474Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25235474Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26235474Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27235474Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28235474Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29235474Sbz * SUCH DAMAGE. 30235474Sbz */ 31179737Sjfv 32235944Sbz#include <sys/cdefs.h> 33235944Sbz__FBSDID("$FreeBSD$"); 34235944Sbz 35235944Sbz#include "opt_inet.h" 36235944Sbz#include "opt_inet6.h" 37235944Sbz 38179737Sjfv#include <sys/param.h> 39179737Sjfv#include <sys/systm.h> 40179737Sjfv#include <sys/mbuf.h> 41179737Sjfv#include <sys/kernel.h> 42179737Sjfv#include <sys/socket.h> 43179737Sjfv 44179737Sjfv#include <net/if.h> 45235944Sbz#include <net/if_var.h> 46179737Sjfv#include <net/ethernet.h> 47236394Sbz#include <net/vnet.h> 48179737Sjfv 49179737Sjfv#include <netinet/in_systm.h> 50179737Sjfv#include <netinet/in.h> 51235944Sbz#include <netinet/ip6.h> 52179737Sjfv#include <netinet/ip.h> 53235981Sbz#include <netinet/ip_var.h> 54179737Sjfv#include <netinet/tcp.h> 55179737Sjfv#include <netinet/tcp_lro.h> 56179737Sjfv 57235981Sbz#include <netinet6/ip6_var.h> 58235981Sbz 59179737Sjfv#include <machine/in_cksum.h> 60179737Sjfv 61235944Sbz#ifndef LRO_ENTRIES 62235944Sbz#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ 63235944Sbz#endif 64179737Sjfv 65235944Sbz#define TCP_LRO_UPDATE_CSUM 1 66235944Sbz#ifndef TCP_LRO_UPDATE_CSUM 67235944Sbz#define TCP_LRO_INVALID_CSUM 0x0000 68235944Sbz#endif 69179737Sjfv 70179737Sjfvint 71235944Sbztcp_lro_init(struct lro_ctrl *lc) 72179737Sjfv{ 73235944Sbz struct lro_entry *le; 74235944Sbz int error, i; 75179737Sjfv 76235944Sbz lc->lro_bad_csum = 0; 77235944Sbz lc->lro_queued = 0; 78235944Sbz lc->lro_flushed = 0; 79235944Sbz lc->lro_cnt = 0; 80235944Sbz SLIST_INIT(&lc->lro_free); 81235944Sbz SLIST_INIT(&lc->lro_active); 82179737Sjfv 83235944Sbz error = 0; 84179737Sjfv for (i = 0; i < LRO_ENTRIES; i++) { 85235944Sbz le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF, 86235944Sbz M_NOWAIT | M_ZERO); 87235944Sbz if (le == NULL) { 88179737Sjfv if (i == 0) 89179737Sjfv error = ENOMEM; 90179737Sjfv break; 91179737Sjfv } 92235944Sbz lc->lro_cnt = i + 1; 93235944Sbz SLIST_INSERT_HEAD(&lc->lro_free, le, next); 94179737Sjfv } 95179737Sjfv 96179737Sjfv return (error); 97179737Sjfv} 98179737Sjfv 99179737Sjfvvoid 100235944Sbztcp_lro_free(struct lro_ctrl *lc) 101179737Sjfv{ 102235944Sbz struct lro_entry *le; 103179737Sjfv 104235944Sbz while (!SLIST_EMPTY(&lc->lro_free)) { 105235944Sbz le = SLIST_FIRST(&lc->lro_free); 106235944Sbz SLIST_REMOVE_HEAD(&lc->lro_free, next); 107235944Sbz free(le, M_DEVBUF); 108179737Sjfv } 109179737Sjfv} 110179737Sjfv 111235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 112235944Sbzstatic uint16_t 113235944Sbztcp_lro_csum_th(struct tcphdr *th) 114235944Sbz{ 115235944Sbz uint32_t ch; 116235944Sbz uint16_t *p, l; 117235944Sbz 118235944Sbz ch = th->th_sum = 0x0000; 119235944Sbz l = th->th_off; 120235944Sbz p = (uint16_t *)th; 121235944Sbz while (l > 0) { 122235944Sbz ch += *p; 123235944Sbz p++; 124235944Sbz ch += *p; 125235944Sbz p++; 126235944Sbz l--; 127235944Sbz } 128235944Sbz while (ch > 0xffff) 129235944Sbz ch = (ch >> 16) + (ch & 0xffff); 130235944Sbz 131235944Sbz return (ch & 0xffff); 132235944Sbz} 133235944Sbz 134235944Sbzstatic uint16_t 135235944Sbztcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th, 136235944Sbz uint16_t tcp_data_len, uint16_t csum) 137235944Sbz{ 138235944Sbz uint32_t c; 139235944Sbz uint16_t cs; 140235944Sbz 141235944Sbz c = csum; 142235944Sbz 143235944Sbz /* Remove length from checksum. */ 144235944Sbz switch (le->eh_type) { 145235944Sbz#ifdef INET6 146235944Sbz case ETHERTYPE_IPV6: 147235944Sbz { 148235944Sbz struct ip6_hdr *ip6; 149235944Sbz 150235944Sbz ip6 = (struct ip6_hdr *)l3hdr; 151235944Sbz if (le->append_cnt == 0) 152235944Sbz cs = ip6->ip6_plen; 153235944Sbz else { 154235944Sbz uint32_t cx; 155235944Sbz 156235944Sbz cx = ntohs(ip6->ip6_plen); 157235944Sbz cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0); 158235944Sbz } 159235944Sbz break; 160235944Sbz } 161235944Sbz#endif 162235944Sbz#ifdef INET 163235944Sbz case ETHERTYPE_IP: 164235944Sbz { 165235944Sbz struct ip *ip4; 166235944Sbz 167235944Sbz ip4 = (struct ip *)l3hdr; 168235944Sbz if (le->append_cnt == 0) 169235944Sbz cs = ip4->ip_len; 170235944Sbz else { 171235944Sbz cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4), 172235944Sbz IPPROTO_TCP); 173235944Sbz cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr, 174235944Sbz htons(cs)); 175235944Sbz } 176235944Sbz break; 177235944Sbz } 178235944Sbz#endif 179235944Sbz default: 180235944Sbz cs = 0; /* Keep compiler happy. */ 181235944Sbz } 182235944Sbz 183235944Sbz cs = ~cs; 184235944Sbz c += cs; 185235944Sbz 186235944Sbz /* Remove TCP header csum. */ 187235944Sbz cs = ~tcp_lro_csum_th(th); 188235944Sbz c += cs; 189235944Sbz while (c > 0xffff) 190235944Sbz c = (c >> 16) + (c & 0xffff); 191235944Sbz 192235944Sbz return (c & 0xffff); 193235944Sbz} 194235944Sbz#endif 195235944Sbz 196179737Sjfvvoid 197255010Snptcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout) 198255010Snp{ 199255010Snp struct lro_entry *le, *le_tmp; 200255010Snp struct timeval tv; 201255010Snp 202255010Snp if (SLIST_EMPTY(&lc->lro_active)) 203255010Snp return; 204255010Snp 205255010Snp getmicrotime(&tv); 206255010Snp timevalsub(&tv, timeout); 207255010Snp SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) { 208255010Snp if (timevalcmp(&tv, &le->mtime, >=)) { 209255010Snp SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 210255010Snp tcp_lro_flush(lc, le); 211255010Snp } 212255010Snp } 213255010Snp} 214255010Snp 215255010Snpvoid 216235944Sbztcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le) 217179737Sjfv{ 218179737Sjfv 219235944Sbz if (le->append_cnt > 0) { 220235944Sbz struct tcphdr *th; 221235944Sbz uint16_t p_len; 222179737Sjfv 223235944Sbz p_len = htons(le->p_len); 224235944Sbz switch (le->eh_type) { 225235944Sbz#ifdef INET6 226235944Sbz case ETHERTYPE_IPV6: 227235944Sbz { 228235944Sbz struct ip6_hdr *ip6; 229179737Sjfv 230235944Sbz ip6 = le->le_ip6; 231235944Sbz ip6->ip6_plen = p_len; 232235944Sbz th = (struct tcphdr *)(ip6 + 1); 233235944Sbz le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 234235944Sbz CSUM_PSEUDO_HDR; 235235944Sbz le->p_len += ETHER_HDR_LEN + sizeof(*ip6); 236235944Sbz break; 237235944Sbz } 238235944Sbz#endif 239235944Sbz#ifdef INET 240235944Sbz case ETHERTYPE_IP: 241235944Sbz { 242235944Sbz struct ip *ip4; 243235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 244235944Sbz uint32_t cl; 245235944Sbz uint16_t c; 246235944Sbz#endif 247179737Sjfv 248235944Sbz ip4 = le->le_ip4; 249235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 250235944Sbz /* Fix IP header checksum for new length. */ 251235944Sbz c = ~ip4->ip_sum; 252235944Sbz cl = c; 253235944Sbz c = ~ip4->ip_len; 254235944Sbz cl += c + p_len; 255235944Sbz while (cl > 0xffff) 256235944Sbz cl = (cl >> 16) + (cl & 0xffff); 257235944Sbz c = cl; 258235944Sbz ip4->ip_sum = ~c; 259235944Sbz#else 260235944Sbz ip4->ip_sum = TCP_LRO_INVALID_CSUM; 261235944Sbz#endif 262235944Sbz ip4->ip_len = p_len; 263235944Sbz th = (struct tcphdr *)(ip4 + 1); 264235944Sbz le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 265235944Sbz CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; 266235944Sbz le->p_len += ETHER_HDR_LEN; 267235944Sbz break; 268179737Sjfv } 269235944Sbz#endif 270235944Sbz default: 271235944Sbz th = NULL; /* Keep compiler happy. */ 272235944Sbz } 273235944Sbz le->m_head->m_pkthdr.csum_data = 0xffff; 274235944Sbz le->m_head->m_pkthdr.len = le->p_len; 275235944Sbz 276235944Sbz /* Incorporate the latest ACK into the TCP header. */ 277235944Sbz th->th_ack = le->ack_seq; 278235944Sbz th->th_win = le->window; 279235944Sbz /* Incorporate latest timestamp into the TCP header. */ 280235944Sbz if (le->timestamp != 0) { 281235944Sbz uint32_t *ts_ptr; 282235944Sbz 283235944Sbz ts_ptr = (uint32_t *)(th + 1); 284235944Sbz ts_ptr[1] = htonl(le->tsval); 285235944Sbz ts_ptr[2] = le->tsecr; 286235944Sbz } 287235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 288235944Sbz /* Update the TCP header checksum. */ 289235944Sbz le->ulp_csum += p_len; 290235944Sbz le->ulp_csum += tcp_lro_csum_th(th); 291235944Sbz while (le->ulp_csum > 0xffff) 292235944Sbz le->ulp_csum = (le->ulp_csum >> 16) + 293235944Sbz (le->ulp_csum & 0xffff); 294235944Sbz th->th_sum = (le->ulp_csum & 0xffff); 295235944Sbz th->th_sum = ~th->th_sum; 296235944Sbz#else 297235944Sbz th->th_sum = TCP_LRO_INVALID_CSUM; 298235944Sbz#endif 299179737Sjfv } 300235944Sbz 301235944Sbz (*lc->ifp->if_input)(lc->ifp, le->m_head); 302235944Sbz lc->lro_queued += le->append_cnt + 1; 303235944Sbz lc->lro_flushed++; 304235944Sbz bzero(le, sizeof(*le)); 305235944Sbz SLIST_INSERT_HEAD(&lc->lro_free, le, next); 306179737Sjfv} 307179737Sjfv 308235944Sbz#ifdef INET6 309235944Sbzstatic int 310235944Sbztcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6, 311235944Sbz struct tcphdr **th) 312179737Sjfv{ 313179737Sjfv 314235944Sbz /* XXX-BZ we should check the flow-label. */ 315179737Sjfv 316235944Sbz /* XXX-BZ We do not yet support ext. hdrs. */ 317235944Sbz if (ip6->ip6_nxt != IPPROTO_TCP) 318235944Sbz return (TCP_LRO_NOT_SUPPORTED); 319179737Sjfv 320235944Sbz /* Find the TCP header. */ 321235944Sbz *th = (struct tcphdr *)(ip6 + 1); 322179737Sjfv 323235944Sbz return (0); 324235944Sbz} 325235944Sbz#endif 326235944Sbz 327235944Sbz#ifdef INET 328235944Sbzstatic int 329235944Sbztcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4, 330235944Sbz struct tcphdr **th) 331235944Sbz{ 332235944Sbz int csum_flags; 333235944Sbz uint16_t csum; 334235944Sbz 335235944Sbz if (ip4->ip_p != IPPROTO_TCP) 336235944Sbz return (TCP_LRO_NOT_SUPPORTED); 337235944Sbz 338235944Sbz /* Ensure there are no options. */ 339235944Sbz if ((ip4->ip_hl << 2) != sizeof (*ip4)) 340235944Sbz return (TCP_LRO_CANNOT); 341235944Sbz 342235944Sbz /* .. and the packet is not fragmented. */ 343235944Sbz if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) 344235944Sbz return (TCP_LRO_CANNOT); 345235944Sbz 346235944Sbz /* Legacy IP has a header checksum that needs to be correct. */ 347235944Sbz csum_flags = m->m_pkthdr.csum_flags; 348182089Skmacy if (csum_flags & CSUM_IP_CHECKED) { 349182089Skmacy if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { 350235944Sbz lc->lro_bad_csum++; 351235944Sbz return (TCP_LRO_CANNOT); 352182089Skmacy } 353182089Skmacy } else { 354235944Sbz csum = in_cksum_hdr(ip4); 355247104Sgallatin if (__predict_false((csum) != 0)) { 356235944Sbz lc->lro_bad_csum++; 357235944Sbz return (TCP_LRO_CANNOT); 358182089Skmacy } 359179737Sjfv } 360179737Sjfv 361235944Sbz /* Find the TCP header (we assured there are no IP options). */ 362235944Sbz *th = (struct tcphdr *)(ip4 + 1); 363179737Sjfv 364235944Sbz return (0); 365235944Sbz} 366235944Sbz#endif 367179737Sjfv 368235944Sbzint 369235944Sbztcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) 370235944Sbz{ 371235944Sbz struct lro_entry *le; 372235944Sbz struct ether_header *eh; 373235944Sbz#ifdef INET6 374235944Sbz struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ 375235944Sbz#endif 376235944Sbz#ifdef INET 377235944Sbz struct ip *ip4 = NULL; /* Keep compiler happy. */ 378235944Sbz#endif 379235944Sbz struct tcphdr *th; 380235944Sbz void *l3hdr = NULL; /* Keep compiler happy. */ 381235944Sbz uint32_t *ts_ptr; 382235944Sbz tcp_seq seq; 383235944Sbz int error, ip_len, l; 384235944Sbz uint16_t eh_type, tcp_data_len; 385179737Sjfv 386235944Sbz /* We expect a contiguous header [eh, ip, tcp]. */ 387235944Sbz 388235944Sbz eh = mtod(m, struct ether_header *); 389235944Sbz eh_type = ntohs(eh->ether_type); 390235944Sbz switch (eh_type) { 391235944Sbz#ifdef INET6 392235944Sbz case ETHERTYPE_IPV6: 393236394Sbz { 394236394Sbz CURVNET_SET(lc->ifp->if_vnet); 395235981Sbz if (V_ip6_forwarding != 0) { 396235981Sbz /* XXX-BZ stats but changing lro_ctrl is a problem. */ 397236394Sbz CURVNET_RESTORE(); 398235981Sbz return (TCP_LRO_CANNOT); 399235981Sbz } 400236394Sbz CURVNET_RESTORE(); 401235944Sbz l3hdr = ip6 = (struct ip6_hdr *)(eh + 1); 402235944Sbz error = tcp_lro_rx_ipv6(lc, m, ip6, &th); 403235944Sbz if (error != 0) 404235944Sbz return (error); 405235944Sbz tcp_data_len = ntohs(ip6->ip6_plen); 406235944Sbz ip_len = sizeof(*ip6) + tcp_data_len; 407235944Sbz break; 408236394Sbz } 409235944Sbz#endif 410235944Sbz#ifdef INET 411235944Sbz case ETHERTYPE_IP: 412236394Sbz { 413236394Sbz CURVNET_SET(lc->ifp->if_vnet); 414235981Sbz if (V_ipforwarding != 0) { 415235981Sbz /* XXX-BZ stats but changing lro_ctrl is a problem. */ 416236394Sbz CURVNET_RESTORE(); 417235981Sbz return (TCP_LRO_CANNOT); 418235981Sbz } 419236394Sbz CURVNET_RESTORE(); 420235944Sbz l3hdr = ip4 = (struct ip *)(eh + 1); 421235944Sbz error = tcp_lro_rx_ipv4(lc, m, ip4, &th); 422235944Sbz if (error != 0) 423235944Sbz return (error); 424235944Sbz ip_len = ntohs(ip4->ip_len); 425235944Sbz tcp_data_len = ip_len - sizeof(*ip4); 426235944Sbz break; 427236394Sbz } 428235944Sbz#endif 429235944Sbz /* XXX-BZ what happens in case of VLAN(s)? */ 430235944Sbz default: 431235944Sbz return (TCP_LRO_NOT_SUPPORTED); 432179737Sjfv } 433179737Sjfv 434235944Sbz /* 435235944Sbz * If the frame is padded beyond the end of the IP packet, then we must 436235944Sbz * trim the extra bytes off. 437235944Sbz */ 438235944Sbz l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len); 439235944Sbz if (l != 0) { 440235944Sbz if (l < 0) 441235944Sbz /* Truncated packet. */ 442235944Sbz return (TCP_LRO_CANNOT); 443179737Sjfv 444235944Sbz m_adj(m, -l); 445235944Sbz } 446235944Sbz 447235944Sbz /* 448235944Sbz * Check TCP header constraints. 449179737Sjfv */ 450235944Sbz /* Ensure no bits set besides ACK or PSH. */ 451235944Sbz if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) 452235944Sbz return (TCP_LRO_CANNOT); 453235944Sbz 454235944Sbz /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */ 455235944Sbz /* XXX-BZ Ideally we'd flush on PUSH? */ 456235944Sbz 457235944Sbz /* 458235944Sbz * Check for timestamps. 459235944Sbz * Since the only option we handle are timestamps, we only have to 460235944Sbz * handle the simple case of aligned timestamps. 461235944Sbz */ 462235944Sbz l = (th->th_off << 2); 463235944Sbz tcp_data_len -= l; 464235944Sbz l -= sizeof(*th); 465235944Sbz ts_ptr = (uint32_t *)(th + 1); 466235944Sbz if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) || 467235944Sbz (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 468235944Sbz TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) 469235944Sbz return (TCP_LRO_CANNOT); 470235944Sbz 471235944Sbz /* If the driver did not pass in the checksum, set it now. */ 472235944Sbz if (csum == 0x0000) 473235944Sbz csum = th->th_sum; 474235944Sbz 475235944Sbz seq = ntohl(th->th_seq); 476235944Sbz 477235944Sbz /* Try to find a matching previous segment. */ 478235944Sbz SLIST_FOREACH(le, &lc->lro_active, next) { 479235944Sbz if (le->eh_type != eh_type) 480235944Sbz continue; 481235944Sbz if (le->source_port != th->th_sport || 482235944Sbz le->dest_port != th->th_dport) 483235944Sbz continue; 484235944Sbz switch (eh_type) { 485235944Sbz#ifdef INET6 486235944Sbz case ETHERTYPE_IPV6: 487235944Sbz if (bcmp(&le->source_ip6, &ip6->ip6_src, 488235944Sbz sizeof(struct in6_addr)) != 0 || 489235944Sbz bcmp(&le->dest_ip6, &ip6->ip6_dst, 490235944Sbz sizeof(struct in6_addr)) != 0) 491235944Sbz continue; 492235944Sbz break; 493235944Sbz#endif 494235944Sbz#ifdef INET 495235944Sbz case ETHERTYPE_IP: 496235944Sbz if (le->source_ip4 != ip4->ip_src.s_addr || 497235944Sbz le->dest_ip4 != ip4->ip_dst.s_addr) 498235944Sbz continue; 499235944Sbz break; 500235944Sbz#endif 501179737Sjfv } 502179737Sjfv 503235944Sbz /* Flush now if appending will result in overflow. */ 504235944Sbz if (le->p_len > (65535 - tcp_data_len)) { 505235944Sbz SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 506235944Sbz tcp_lro_flush(lc, le); 507235944Sbz break; 508235944Sbz } 509179737Sjfv 510235944Sbz /* Try to append the new segment. */ 511235944Sbz if (__predict_false(seq != le->next_seq || 512235944Sbz (tcp_data_len == 0 && le->ack_seq == th->th_ack))) { 513235944Sbz /* Out of order packet or duplicate ACK. */ 514235944Sbz SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 515235944Sbz tcp_lro_flush(lc, le); 516235944Sbz return (TCP_LRO_CANNOT); 517235944Sbz } 518179737Sjfv 519235944Sbz if (l != 0) { 520235944Sbz uint32_t tsval = ntohl(*(ts_ptr + 1)); 521235944Sbz /* Make sure timestamp values are increasing. */ 522235944Sbz /* XXX-BZ flip and use TSTMP_GEQ macro for this? */ 523235944Sbz if (__predict_false(le->tsval > tsval || 524235944Sbz *(ts_ptr + 2) == 0)) 525235944Sbz return (TCP_LRO_CANNOT); 526235944Sbz le->tsval = tsval; 527235944Sbz le->tsecr = *(ts_ptr + 2); 528235944Sbz } 529223797Scperciva 530235944Sbz le->next_seq += tcp_data_len; 531235944Sbz le->ack_seq = th->th_ack; 532235944Sbz le->window = th->th_win; 533235944Sbz le->append_cnt++; 534179737Sjfv 535235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 536235944Sbz le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th, 537235944Sbz tcp_data_len, ~csum); 538235944Sbz#endif 539179737Sjfv 540235944Sbz if (tcp_data_len == 0) { 541235944Sbz m_freem(m); 542235944Sbz return (0); 543235944Sbz } 544179737Sjfv 545235944Sbz le->p_len += tcp_data_len; 546179737Sjfv 547235944Sbz /* 548235944Sbz * Adjust the mbuf so that m_data points to the first byte of 549235944Sbz * the ULP payload. Adjust the mbuf to avoid complications and 550235944Sbz * append new segment to existing mbuf chain. 551235944Sbz */ 552235944Sbz m_adj(m, m->m_pkthdr.len - tcp_data_len); 553235944Sbz m->m_flags &= ~M_PKTHDR; 554179737Sjfv 555235944Sbz le->m_tail->m_next = m; 556235944Sbz le->m_tail = m_last(m); 557235944Sbz 558235944Sbz /* 559235944Sbz * If a possible next full length packet would cause an 560235944Sbz * overflow, pro-actively flush now. 561235944Sbz */ 562235944Sbz if (le->p_len > (65535 - lc->ifp->if_mtu)) { 563235944Sbz SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 564235944Sbz tcp_lro_flush(lc, le); 565255010Snp } else 566255010Snp getmicrotime(&le->mtime); 567235944Sbz 568235944Sbz return (0); 569179737Sjfv } 570179737Sjfv 571235944Sbz /* Try to find an empty slot. */ 572235944Sbz if (SLIST_EMPTY(&lc->lro_free)) 573235944Sbz return (TCP_LRO_CANNOT); 574179737Sjfv 575235944Sbz /* Start a new segment chain. */ 576235944Sbz le = SLIST_FIRST(&lc->lro_free); 577235944Sbz SLIST_REMOVE_HEAD(&lc->lro_free, next); 578235944Sbz SLIST_INSERT_HEAD(&lc->lro_active, le, next); 579255010Snp getmicrotime(&le->mtime); 580179737Sjfv 581235944Sbz /* Start filling in details. */ 582235944Sbz switch (eh_type) { 583235944Sbz#ifdef INET6 584235944Sbz case ETHERTYPE_IPV6: 585235944Sbz le->le_ip6 = ip6; 586235944Sbz le->source_ip6 = ip6->ip6_src; 587235944Sbz le->dest_ip6 = ip6->ip6_dst; 588235944Sbz le->eh_type = eh_type; 589235944Sbz le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6); 590235944Sbz break; 591235944Sbz#endif 592235944Sbz#ifdef INET 593235944Sbz case ETHERTYPE_IP: 594235944Sbz le->le_ip4 = ip4; 595235944Sbz le->source_ip4 = ip4->ip_src.s_addr; 596235944Sbz le->dest_ip4 = ip4->ip_dst.s_addr; 597235944Sbz le->eh_type = eh_type; 598235944Sbz le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN; 599235944Sbz break; 600235944Sbz#endif 601235944Sbz } 602235944Sbz le->source_port = th->th_sport; 603235944Sbz le->dest_port = th->th_dport; 604235944Sbz 605235944Sbz le->next_seq = seq + tcp_data_len; 606235944Sbz le->ack_seq = th->th_ack; 607235944Sbz le->window = th->th_win; 608235944Sbz if (l != 0) { 609235944Sbz le->timestamp = 1; 610235944Sbz le->tsval = ntohl(*(ts_ptr + 1)); 611235944Sbz le->tsecr = *(ts_ptr + 2); 612235944Sbz } 613235944Sbz 614235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 615235944Sbz /* 616235944Sbz * Do not touch the csum of the first packet. However save the 617235944Sbz * "adjusted" checksum of just the source and destination addresses, 618235944Sbz * the next header and the TCP payload. The length and TCP header 619235944Sbz * parts may change, so we remove those from the saved checksum and 620235944Sbz * re-add with final values on tcp_lro_flush() if needed. 621179737Sjfv */ 622235944Sbz KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n", 623235944Sbz __func__, le, le->ulp_csum)); 624235944Sbz 625235944Sbz le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len, 626235944Sbz ~csum); 627235944Sbz th->th_sum = csum; /* Restore checksum on first packet. */ 628235944Sbz#endif 629235944Sbz 630235944Sbz le->m_head = m; 631235944Sbz le->m_tail = m_last(m); 632235944Sbz 633235944Sbz return (0); 634179737Sjfv} 635235944Sbz 636235944Sbz/* end */ 637