1290650Shselasky/*- 2290650Shselasky * Copyright (c) 2015 Mellanox Technologies. All rights reserved. 3290650Shselasky * 4290650Shselasky * Redistribution and use in source and binary forms, with or without 5290650Shselasky * modification, are permitted provided that the following conditions 6290650Shselasky * are met: 7290650Shselasky * 1. Redistributions of source code must retain the above copyright 8290650Shselasky * notice, this list of conditions and the following disclaimer. 9290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright 10290650Shselasky * notice, this list of conditions and the following disclaimer in the 11290650Shselasky * documentation and/or other materials provided with the distribution. 12290650Shselasky * 13290650Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16290650Shselasky * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17290650Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23290650Shselasky * SUCH DAMAGE. 24290650Shselasky */ 25290650Shselasky 26290650Shselasky#include <sys/cdefs.h> 27290650Shselasky__FBSDID("$FreeBSD: releng/10.3/sys/dev/mlx5/mlx5_en/tcp_tlro.c 291184 2015-11-23 09:32:32Z hselasky $"); 28290650Shselasky 29290650Shselasky#include "opt_inet.h" 30290650Shselasky#include "opt_inet6.h" 31290650Shselasky 32290650Shselasky#include <sys/param.h> 33290650Shselasky#include <sys/libkern.h> 34290650Shselasky#include <sys/mbuf.h> 35290650Shselasky#include <sys/lock.h> 36290650Shselasky#include <sys/mutex.h> 37290650Shselasky#include <sys/sysctl.h> 38290650Shselasky#include <sys/malloc.h> 39290650Shselasky#include <sys/kernel.h> 40290650Shselasky#include <sys/endian.h> 41290650Shselasky#include <sys/socket.h> 42290650Shselasky#include <sys/sockopt.h> 43290650Shselasky#include <sys/smp.h> 44290650Shselasky 45290650Shselasky#include <net/if.h> 46290650Shselasky#include <net/if_var.h> 47290650Shselasky#include <net/ethernet.h> 48290650Shselasky 49290650Shselasky#if defined(INET) || defined(INET6) 50290650Shselasky#include <netinet/in.h> 51290650Shselasky#endif 52290650Shselasky 53290650Shselasky#ifdef INET 54290650Shselasky#include <netinet/ip.h> 55290650Shselasky#endif 56290650Shselasky 57290650Shselasky#ifdef INET6 58290650Shselasky#include <netinet/ip6.h> 59290650Shselasky#endif 60290650Shselasky 61290650Shselasky#include <netinet/tcp_var.h> 62290650Shselasky 63290650Shselasky#include "tcp_tlro.h" 64290650Shselasky 65290650Shselasky#ifndef M_HASHTYPE_LRO_TCP 66290650Shselasky#ifndef KLD_MODULE 67290650Shselasky#warning "M_HASHTYPE_LRO_TCP is not defined" 68290650Shselasky#endif 69290650Shselasky#define M_HASHTYPE_LRO_TCP 254 70290650Shselasky#endif 71290650Shselasky 72290650Shselaskystatic SYSCTL_NODE(_net_inet_tcp, OID_AUTO, tlro, 73290650Shselasky CTLFLAG_RW, 0, "TCP turbo LRO parameters"); 74290650Shselasky 75291184Shselaskystatic MALLOC_DEFINE(M_TLRO, "TLRO", "Turbo LRO"); 76290650Shselasky 77290650Shselaskystatic int tlro_min_rate = 20; /* Hz */ 78290650Shselasky 79290650ShselaskySYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, min_rate, CTLFLAG_RWTUN, 80290650Shselasky &tlro_min_rate, 0, "Minimum serving rate in Hz"); 81290650Shselasky 82290650Shselaskystatic int tlro_max_packet = IP_MAXPACKET; 83290650Shselasky 84290650ShselaskySYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, max_packet, CTLFLAG_RWTUN, 85290650Shselasky &tlro_max_packet, 0, "Maximum packet size in bytes"); 86290650Shselasky 87290650Shselaskytypedef struct { 88290650Shselasky uint32_t value; 89290650Shselasky} __packed uint32_p_t; 90290650Shselasky 91290650Shselaskystatic uint16_t 92290650Shselaskytcp_tlro_csum(const uint32_p_t *p, size_t l) 93290650Shselasky{ 94290650Shselasky const uint32_p_t *pend = p + (l / 4); 95290650Shselasky uint64_t cs; 96290650Shselasky 97290650Shselasky for (cs = 0; p != pend; p++) 98290650Shselasky cs += le32toh(p->value); 99290650Shselasky while (cs > 0xffff) 100290650Shselasky cs = (cs >> 16) + (cs & 0xffff); 101290650Shselasky return (cs); 102290650Shselasky} 103290650Shselasky 104290650Shselaskystatic void * 105290650Shselaskytcp_tlro_get_header(const struct mbuf *m, const u_int off, 106290650Shselasky const u_int len) 107290650Shselasky{ 108290650Shselasky if (m->m_len < (off + len)) 109290650Shselasky return (NULL); 110290650Shselasky return (mtod(m, char *) + off); 111290650Shselasky} 112290650Shselasky 113290650Shselaskystatic uint8_t 114290650Shselaskytcp_tlro_info_save_timestamp(struct tlro_mbuf_data *pinfo) 115290650Shselasky{ 116290650Shselasky struct tcphdr *tcp = pinfo->tcp; 117290650Shselasky uint32_t *ts_ptr; 118290650Shselasky 119290650Shselasky if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2)) 120290650Shselasky return (0); 121290650Shselasky 122290650Shselasky ts_ptr = (uint32_t *)(tcp + 1); 123290650Shselasky if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 124290650Shselasky (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) 125290650Shselasky return (0); 126290650Shselasky 127291184Shselasky /* Save timestamps */ 128290650Shselasky pinfo->tcp_ts = ts_ptr[1]; 129290650Shselasky pinfo->tcp_ts_reply = ts_ptr[2]; 130290650Shselasky return (1); 131290650Shselasky} 132290650Shselasky 133290650Shselaskystatic void 134290650Shselaskytcp_tlro_info_restore_timestamp(struct tlro_mbuf_data *pinfoa, 135290650Shselasky struct tlro_mbuf_data *pinfob) 136290650Shselasky{ 137290650Shselasky struct tcphdr *tcp = pinfoa->tcp; 138290650Shselasky uint32_t *ts_ptr; 139290650Shselasky 140290650Shselasky if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2)) 141290650Shselasky return; 142290650Shselasky 143290650Shselasky ts_ptr = (uint32_t *)(tcp + 1); 144290650Shselasky if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 145290650Shselasky (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) 146290650Shselasky return; 147290650Shselasky 148291184Shselasky /* Restore timestamps */ 149290650Shselasky ts_ptr[1] = pinfob->tcp_ts; 150290650Shselasky ts_ptr[2] = pinfob->tcp_ts_reply; 151290650Shselasky} 152290650Shselasky 153290650Shselaskystatic void 154290650Shselaskytcp_tlro_extract_header(struct tlro_mbuf_data *pinfo, struct mbuf *m, int seq) 155290650Shselasky{ 156290650Shselasky uint8_t *phdr = (uint8_t *)pinfo->buf; 157290650Shselasky struct ether_header *eh; 158290650Shselasky struct ether_vlan_header *vlan; 159290650Shselasky#ifdef INET 160290650Shselasky struct ip *ip; 161290650Shselasky#endif 162290650Shselasky#ifdef INET6 163290650Shselasky struct ip6_hdr *ip6; 164290650Shselasky#endif 165290650Shselasky struct tcphdr *tcp; 166290650Shselasky uint16_t etype; 167290650Shselasky int diff; 168290650Shselasky int off; 169290650Shselasky 170291184Shselasky /* Fill in information */ 171290650Shselasky pinfo->head = m; 172290650Shselasky pinfo->last_tick = ticks; 173290650Shselasky pinfo->sequence = seq; 174290650Shselasky pinfo->pprev = &m_last(m)->m_next; 175290650Shselasky 176290650Shselasky off = sizeof(*eh); 177290650Shselasky if (m->m_len < off) 178290650Shselasky goto error; 179290650Shselasky eh = tcp_tlro_get_header(m, 0, sizeof(*eh)); 180290650Shselasky if (eh == NULL) 181290650Shselasky goto error; 182290650Shselasky memcpy(phdr, &eh->ether_dhost, ETHER_ADDR_LEN); 183290650Shselasky phdr += ETHER_ADDR_LEN; 184290650Shselasky memcpy(phdr, &eh->ether_type, sizeof(eh->ether_type)); 185290650Shselasky phdr += sizeof(eh->ether_type); 186290650Shselasky etype = ntohs(eh->ether_type); 187290650Shselasky 188290650Shselasky if (etype == ETHERTYPE_VLAN) { 189290650Shselasky vlan = tcp_tlro_get_header(m, off, sizeof(*vlan)); 190290650Shselasky if (vlan == NULL) 191290650Shselasky goto error; 192290650Shselasky memcpy(phdr, &vlan->evl_tag, sizeof(vlan->evl_tag) + 193290650Shselasky sizeof(vlan->evl_proto)); 194290650Shselasky phdr += sizeof(vlan->evl_tag) + sizeof(vlan->evl_proto); 195290650Shselasky etype = ntohs(vlan->evl_proto); 196290650Shselasky off += sizeof(*vlan) - sizeof(*eh); 197290650Shselasky } 198290650Shselasky switch (etype) { 199290650Shselasky#ifdef INET 200290650Shselasky case ETHERTYPE_IP: 201290650Shselasky /* 202290650Shselasky * Cannot LRO: 203290650Shselasky * - Non-IP packets 204290650Shselasky * - Fragmented packets 205290650Shselasky * - Packets with IPv4 options 206290650Shselasky * - Non-TCP packets 207290650Shselasky */ 208290650Shselasky ip = tcp_tlro_get_header(m, off, sizeof(*ip)); 209290650Shselasky if (ip == NULL || 210290650Shselasky (ip->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 || 211290650Shselasky (ip->ip_p != IPPROTO_TCP) || 212290650Shselasky (ip->ip_hl << 2) != sizeof(*ip)) 213290650Shselasky goto error; 214290650Shselasky 215290650Shselasky /* Legacy IP has a header checksum that needs to be correct */ 216290650Shselasky if (!(m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)) { 217290650Shselasky /* Verify IP header */ 218290650Shselasky if (tcp_tlro_csum((uint32_p_t *)ip, sizeof(*ip)) != 0xFFFF) 219290650Shselasky m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; 220290650Shselasky else 221290650Shselasky m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | 222290650Shselasky CSUM_IP_VALID; 223290650Shselasky } 224290650Shselasky /* Only accept valid checksums */ 225290650Shselasky if (!(m->m_pkthdr.csum_flags & CSUM_IP_VALID) || 226290650Shselasky !(m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) 227290650Shselasky goto error; 228290650Shselasky memcpy(phdr, &ip->ip_src, sizeof(ip->ip_src) + 229290650Shselasky sizeof(ip->ip_dst)); 230290650Shselasky phdr += sizeof(ip->ip_src) + sizeof(ip->ip_dst); 231290650Shselasky if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) 232290650Shselasky pinfo->ip_len = m->m_pkthdr.len - off; 233290650Shselasky else 234290650Shselasky pinfo->ip_len = ntohs(ip->ip_len); 235290650Shselasky pinfo->ip_hdrlen = sizeof(*ip); 236290650Shselasky pinfo->ip.v4 = ip; 237290650Shselasky pinfo->ip_version = 4; 238290650Shselasky off += sizeof(*ip); 239290650Shselasky break; 240290650Shselasky#endif 241290650Shselasky#ifdef INET6 242290650Shselasky case ETHERTYPE_IPV6: 243290650Shselasky /* 244290650Shselasky * Cannot LRO: 245290650Shselasky * - Non-IP packets 246290650Shselasky * - Packets with IPv6 options 247290650Shselasky * - Non-TCP packets 248290650Shselasky */ 249290650Shselasky ip6 = tcp_tlro_get_header(m, off, sizeof(*ip6)); 250290650Shselasky if (ip6 == NULL || ip6->ip6_nxt != IPPROTO_TCP) 251290650Shselasky goto error; 252290650Shselasky if (!(m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) 253290650Shselasky goto error; 254290650Shselasky memcpy(phdr, &ip6->ip6_src, sizeof(struct in6_addr) + 255290650Shselasky sizeof(struct in6_addr)); 256290650Shselasky phdr += sizeof(struct in6_addr) + sizeof(struct in6_addr); 257290650Shselasky if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) 258290650Shselasky pinfo->ip_len = m->m_pkthdr.len - off; 259290650Shselasky else 260290650Shselasky pinfo->ip_len = ntohs(ip6->ip6_plen) + sizeof(*ip6); 261290650Shselasky pinfo->ip_hdrlen = sizeof(*ip6); 262290650Shselasky pinfo->ip.v6 = ip6; 263290650Shselasky pinfo->ip_version = 6; 264290650Shselasky off += sizeof(*ip6); 265290650Shselasky break; 266290650Shselasky#endif 267290650Shselasky default: 268290650Shselasky goto error; 269290650Shselasky } 270290650Shselasky tcp = tcp_tlro_get_header(m, off, sizeof(*tcp)); 271290650Shselasky if (tcp == NULL) 272290650Shselasky goto error; 273290650Shselasky memcpy(phdr, &tcp->th_sport, sizeof(tcp->th_sport) + 274290650Shselasky sizeof(tcp->th_dport)); 275290650Shselasky phdr += sizeof(tcp->th_sport) + 276290650Shselasky sizeof(tcp->th_dport); 277291184Shselasky /* Store TCP header length */ 278290650Shselasky *phdr++ = tcp->th_off; 279290650Shselasky if (tcp->th_off < (sizeof(*tcp) >> 2)) 280290650Shselasky goto error; 281290650Shselasky 282291184Shselasky /* Compute offset to data payload */ 283290650Shselasky pinfo->tcp_len = (tcp->th_off << 2); 284290650Shselasky off += pinfo->tcp_len; 285290650Shselasky 286291184Shselasky /* Store more info */ 287290650Shselasky pinfo->data_off = off; 288290650Shselasky pinfo->tcp = tcp; 289290650Shselasky 290291184Shselasky /* Try to save timestamp, if any */ 291290650Shselasky *phdr++ = tcp_tlro_info_save_timestamp(pinfo); 292290650Shselasky 293291184Shselasky /* Verify offset and IP/TCP length */ 294290650Shselasky if (off > m->m_pkthdr.len || 295290650Shselasky pinfo->ip_len < pinfo->tcp_len) 296290650Shselasky goto error; 297290650Shselasky 298291184Shselasky /* Compute data payload length */ 299290650Shselasky pinfo->data_len = (pinfo->ip_len - pinfo->tcp_len - pinfo->ip_hdrlen); 300290650Shselasky 301291184Shselasky /* Trim any padded data */ 302290650Shselasky diff = (m->m_pkthdr.len - off) - pinfo->data_len; 303290650Shselasky if (diff != 0) { 304290650Shselasky if (diff < 0) 305290650Shselasky goto error; 306290650Shselasky else 307290650Shselasky m_adj(m, -diff); 308290650Shselasky } 309291184Shselasky /* Compute header length */ 310290650Shselasky pinfo->buf_length = phdr - (uint8_t *)pinfo->buf; 311291184Shselasky /* Zero-pad rest of buffer */ 312290650Shselasky memset(phdr, 0, TLRO_MAX_HEADER - pinfo->buf_length); 313290650Shselasky return; 314290650Shselaskyerror: 315290650Shselasky pinfo->buf_length = 0; 316290650Shselasky} 317290650Shselasky 318290650Shselaskystatic int 319290650Shselaskytcp_tlro_cmp64(const uint64_t *pa, const uint64_t *pb) 320290650Shselasky{ 321290650Shselasky int64_t diff = 0; 322290650Shselasky unsigned x; 323290650Shselasky 324290650Shselasky for (x = 0; x != TLRO_MAX_HEADER / 8; x++) { 325290650Shselasky /* 326290650Shselasky * NOTE: Endianness does not matter in this 327290650Shselasky * comparisation: 328290650Shselasky */ 329290650Shselasky diff = pa[x] - pb[x]; 330290650Shselasky if (diff != 0) 331290650Shselasky goto done; 332290650Shselasky } 333290650Shselaskydone: 334290650Shselasky if (diff < 0) 335290650Shselasky return (-1); 336290650Shselasky else if (diff > 0) 337290650Shselasky return (1); 338290650Shselasky return (0); 339290650Shselasky} 340290650Shselasky 341290650Shselaskystatic int 342290650Shselaskytcp_tlro_compare_header(const void *_ppa, const void *_ppb) 343290650Shselasky{ 344290650Shselasky const struct tlro_mbuf_ptr *ppa = _ppa; 345290650Shselasky const struct tlro_mbuf_ptr *ppb = _ppb; 346290650Shselasky struct tlro_mbuf_data *pinfoa = ppa->data; 347290650Shselasky struct tlro_mbuf_data *pinfob = ppb->data; 348290650Shselasky int ret; 349290650Shselasky 350290650Shselasky ret = (pinfoa->head == NULL) - (pinfob->head == NULL); 351290650Shselasky if (ret != 0) 352290650Shselasky goto done; 353290650Shselasky 354290650Shselasky ret = pinfoa->buf_length - pinfob->buf_length; 355290650Shselasky if (ret != 0) 356290650Shselasky goto done; 357290650Shselasky if (pinfoa->buf_length != 0) { 358290650Shselasky ret = tcp_tlro_cmp64(pinfoa->buf, pinfob->buf); 359290650Shselasky if (ret != 0) 360290650Shselasky goto done; 361290650Shselasky ret = ntohl(pinfoa->tcp->th_seq) - ntohl(pinfob->tcp->th_seq); 362290650Shselasky if (ret != 0) 363290650Shselasky goto done; 364290650Shselasky ret = ntohl(pinfoa->tcp->th_ack) - ntohl(pinfob->tcp->th_ack); 365290650Shselasky if (ret != 0) 366290650Shselasky goto done; 367290650Shselasky ret = pinfoa->sequence - pinfob->sequence; 368290650Shselasky if (ret != 0) 369290650Shselasky goto done; 370290650Shselasky } 371290650Shselaskydone: 372290650Shselasky return (ret); 373290650Shselasky} 374290650Shselasky 375290650Shselaskystatic void 376290650Shselaskytcp_tlro_sort(struct tlro_ctrl *tlro) 377290650Shselasky{ 378290650Shselasky if (tlro->curr == 0) 379290650Shselasky return; 380290650Shselasky 381290650Shselasky qsort(tlro->mbuf, tlro->curr, sizeof(struct tlro_mbuf_ptr), 382290650Shselasky &tcp_tlro_compare_header); 383290650Shselasky} 384290650Shselasky 385290650Shselaskystatic int 386290650Shselaskytcp_tlro_get_ticks(void) 387290650Shselasky{ 388290650Shselasky int to = tlro_min_rate; 389290650Shselasky 390290650Shselasky if (to < 1) 391290650Shselasky to = 1; 392290650Shselasky to = hz / to; 393290650Shselasky if (to < 1) 394290650Shselasky to = 1; 395290650Shselasky return (to); 396290650Shselasky} 397290650Shselasky 398290650Shselaskystatic void 399290650Shselaskytcp_tlro_combine(struct tlro_ctrl *tlro, int force) 400290650Shselasky{ 401290650Shselasky struct tlro_mbuf_data *pinfoa; 402290650Shselasky struct tlro_mbuf_data *pinfob; 403290650Shselasky uint32_t cs; 404290650Shselasky int curr_ticks = ticks; 405290650Shselasky int ticks_limit = tcp_tlro_get_ticks(); 406290650Shselasky unsigned x; 407290650Shselasky unsigned y; 408290650Shselasky unsigned z; 409290650Shselasky int temp; 410290650Shselasky 411290650Shselasky if (tlro->curr == 0) 412290650Shselasky return; 413290650Shselasky 414290650Shselasky for (y = 0; y != tlro->curr;) { 415290650Shselasky struct mbuf *m; 416290650Shselasky 417290650Shselasky pinfoa = tlro->mbuf[y].data; 418290650Shselasky for (x = y + 1; x != tlro->curr; x++) { 419290650Shselasky pinfob = tlro->mbuf[x].data; 420290650Shselasky if (pinfoa->buf_length != pinfob->buf_length || 421290650Shselasky tcp_tlro_cmp64(pinfoa->buf, pinfob->buf) != 0) 422290650Shselasky break; 423290650Shselasky } 424290650Shselasky if (pinfoa->buf_length == 0) { 425291184Shselasky /* Forward traffic which cannot be combined */ 426290650Shselasky for (z = y; z != x; z++) { 427291184Shselasky /* Just forward packets */ 428290650Shselasky pinfob = tlro->mbuf[z].data; 429290650Shselasky 430290650Shselasky m = pinfob->head; 431290650Shselasky 432291184Shselasky /* Reset info structure */ 433290650Shselasky pinfob->head = NULL; 434290650Shselasky pinfob->buf_length = 0; 435290650Shselasky 436291184Shselasky /* Do stats */ 437290650Shselasky tlro->lro_flushed++; 438290650Shselasky 439291184Shselasky /* Input packet to network layer */ 440290650Shselasky (*tlro->ifp->if_input) (tlro->ifp, m); 441290650Shselasky } 442290650Shselasky y = z; 443290650Shselasky continue; 444290650Shselasky } 445290650Shselasky 446291184Shselasky /* Compute current checksum subtracted some header parts */ 447290650Shselasky temp = (pinfoa->ip_len - pinfoa->ip_hdrlen); 448290650Shselasky cs = ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) + 449290650Shselasky tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len); 450290650Shselasky 451291184Shselasky /* Append all fragments into one block */ 452290650Shselasky for (z = y + 1; z != x; z++) { 453290650Shselasky 454290650Shselasky pinfob = tlro->mbuf[z].data; 455290650Shselasky 456291184Shselasky /* Check for command packets */ 457290650Shselasky if ((pinfoa->tcp->th_flags & ~(TH_ACK | TH_PUSH)) || 458290650Shselasky (pinfob->tcp->th_flags & ~(TH_ACK | TH_PUSH))) 459290650Shselasky break; 460290650Shselasky 461291184Shselasky /* Check if there is enough space */ 462290650Shselasky if ((pinfoa->ip_len + pinfob->data_len) > tlro_max_packet) 463290650Shselasky break; 464290650Shselasky 465291184Shselasky /* Try to append the new segment */ 466290650Shselasky temp = ntohl(pinfoa->tcp->th_seq) + pinfoa->data_len; 467290650Shselasky if (temp != (int)ntohl(pinfob->tcp->th_seq)) 468290650Shselasky break; 469290650Shselasky 470290650Shselasky temp = pinfob->ip_len - pinfob->ip_hdrlen; 471290650Shselasky cs += ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) + 472290650Shselasky tcp_tlro_csum((uint32_p_t *)pinfob->tcp, pinfob->tcp_len); 473291184Shselasky /* Remove fields which appear twice */ 474290650Shselasky cs += (IPPROTO_TCP << 8); 475290650Shselasky if (pinfob->ip_version == 4) { 476290650Shselasky cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_src, 4); 477290650Shselasky cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_dst, 4); 478290650Shselasky } else { 479290650Shselasky cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_src, 16); 480290650Shselasky cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_dst, 16); 481290650Shselasky } 482291184Shselasky /* Remainder computation */ 483290650Shselasky while (cs > 0xffff) 484290650Shselasky cs = (cs >> 16) + (cs & 0xffff); 485290650Shselasky 486291184Shselasky /* Update window and ack sequence number */ 487290650Shselasky pinfoa->tcp->th_ack = pinfob->tcp->th_ack; 488290650Shselasky pinfoa->tcp->th_win = pinfob->tcp->th_win; 489290650Shselasky 490291184Shselasky /* Check if we should restore the timestamp */ 491290650Shselasky tcp_tlro_info_restore_timestamp(pinfoa, pinfob); 492290650Shselasky 493291184Shselasky /* Accumulate TCP flags */ 494290650Shselasky pinfoa->tcp->th_flags |= pinfob->tcp->th_flags; 495290650Shselasky 496290650Shselasky /* update lengths */ 497290650Shselasky pinfoa->ip_len += pinfob->data_len; 498290650Shselasky pinfoa->data_len += pinfob->data_len; 499290650Shselasky 500291184Shselasky /* Clear mbuf pointer - packet is accumulated */ 501290650Shselasky m = pinfob->head; 502290650Shselasky 503291184Shselasky /* Reset info structure */ 504290650Shselasky pinfob->head = NULL; 505290650Shselasky pinfob->buf_length = 0; 506290650Shselasky 507291184Shselasky /* Append data to mbuf [y] */ 508290650Shselasky m_adj(m, pinfob->data_off); 509291184Shselasky /* Delete mbuf tags, if any */ 510290650Shselasky m_tag_delete_chain(m, NULL); 511291184Shselasky /* Clear packet header flag */ 512290650Shselasky m->m_flags &= ~M_PKTHDR; 513290650Shselasky 514291184Shselasky /* Concat mbuf(s) to end of list */ 515290650Shselasky pinfoa->pprev[0] = m; 516290650Shselasky m = m_last(m); 517290650Shselasky pinfoa->pprev = &m->m_next; 518290650Shselasky pinfoa->head->m_pkthdr.len += pinfob->data_len; 519290650Shselasky } 520291184Shselasky /* Compute new TCP header checksum */ 521290650Shselasky pinfoa->tcp->th_sum = 0; 522290650Shselasky 523290650Shselasky temp = pinfoa->ip_len - pinfoa->ip_hdrlen; 524290650Shselasky cs = (cs ^ 0xFFFF) + 525290650Shselasky tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len) + 526290650Shselasky ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8); 527290650Shselasky 528291184Shselasky /* Remainder computation */ 529290650Shselasky while (cs > 0xffff) 530290650Shselasky cs = (cs >> 16) + (cs & 0xffff); 531290650Shselasky 532291184Shselasky /* Update new checksum */ 533290650Shselasky pinfoa->tcp->th_sum = ~htole16(cs); 534290650Shselasky 535291184Shselasky /* Update IP length, if any */ 536290650Shselasky if (pinfoa->ip_version == 4) { 537290650Shselasky if (pinfoa->ip_len > IP_MAXPACKET) { 538290650Shselasky M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP); 539290650Shselasky pinfoa->ip.v4->ip_len = htons(IP_MAXPACKET); 540290650Shselasky } else { 541290650Shselasky pinfoa->ip.v4->ip_len = htons(pinfoa->ip_len); 542290650Shselasky } 543290650Shselasky } else { 544290650Shselasky if (pinfoa->ip_len > (IP_MAXPACKET + sizeof(*pinfoa->ip.v6))) { 545290650Shselasky M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP); 546290650Shselasky pinfoa->ip.v6->ip6_plen = htons(IP_MAXPACKET); 547290650Shselasky } else { 548290650Shselasky temp = pinfoa->ip_len - sizeof(*pinfoa->ip.v6); 549290650Shselasky pinfoa->ip.v6->ip6_plen = htons(temp); 550290650Shselasky } 551290650Shselasky } 552290650Shselasky 553290650Shselasky temp = curr_ticks - pinfoa->last_tick; 554291184Shselasky /* Check if packet should be forwarded */ 555290650Shselasky if (force != 0 || z != x || temp >= ticks_limit || 556290650Shselasky pinfoa->data_len == 0) { 557290650Shselasky 558291184Shselasky /* Compute new IPv4 header checksum */ 559290650Shselasky if (pinfoa->ip_version == 4) { 560290650Shselasky pinfoa->ip.v4->ip_sum = 0; 561290650Shselasky cs = tcp_tlro_csum((uint32_p_t *)pinfoa->ip.v4, 562290650Shselasky sizeof(*pinfoa->ip.v4)); 563290650Shselasky pinfoa->ip.v4->ip_sum = ~htole16(cs); 564290650Shselasky } 565291184Shselasky /* Forward packet */ 566290650Shselasky m = pinfoa->head; 567290650Shselasky 568291184Shselasky /* Reset info structure */ 569290650Shselasky pinfoa->head = NULL; 570290650Shselasky pinfoa->buf_length = 0; 571290650Shselasky 572291184Shselasky /* Do stats */ 573290650Shselasky tlro->lro_flushed++; 574290650Shselasky 575291184Shselasky /* Input packet to network layer */ 576290650Shselasky (*tlro->ifp->if_input) (tlro->ifp, m); 577290650Shselasky } 578290650Shselasky y = z; 579290650Shselasky } 580290650Shselasky 581291184Shselasky /* Cleanup all NULL heads */ 582290650Shselasky for (y = 0; y != tlro->curr; y++) { 583290650Shselasky if (tlro->mbuf[y].data->head == NULL) { 584290650Shselasky for (z = y + 1; z != tlro->curr; z++) { 585290650Shselasky struct tlro_mbuf_ptr ptemp; 586290650Shselasky if (tlro->mbuf[z].data->head == NULL) 587290650Shselasky continue; 588290650Shselasky ptemp = tlro->mbuf[y]; 589290650Shselasky tlro->mbuf[y] = tlro->mbuf[z]; 590290650Shselasky tlro->mbuf[z] = ptemp; 591290650Shselasky y++; 592290650Shselasky } 593290650Shselasky break; 594290650Shselasky } 595290650Shselasky } 596290650Shselasky tlro->curr = y; 597290650Shselasky} 598290650Shselasky 599290650Shselaskystatic void 600290650Shselaskytcp_tlro_cleanup(struct tlro_ctrl *tlro) 601290650Shselasky{ 602290650Shselasky while (tlro->curr != 0 && 603290650Shselasky tlro->mbuf[tlro->curr - 1].data->head == NULL) 604290650Shselasky tlro->curr--; 605290650Shselasky} 606290650Shselasky 607290650Shselaskyvoid 608290650Shselaskytcp_tlro_flush(struct tlro_ctrl *tlro, int force) 609290650Shselasky{ 610290650Shselasky if (tlro->curr == 0) 611290650Shselasky return; 612290650Shselasky 613290650Shselasky tcp_tlro_sort(tlro); 614290650Shselasky tcp_tlro_cleanup(tlro); 615290650Shselasky tcp_tlro_combine(tlro, force); 616290650Shselasky} 617290650Shselasky 618290650Shselaskyint 619290650Shselaskytcp_tlro_init(struct tlro_ctrl *tlro, struct ifnet *ifp, 620290650Shselasky int max_mbufs) 621290650Shselasky{ 622290650Shselasky ssize_t size; 623290650Shselasky uint32_t x; 624290650Shselasky 625291184Shselasky /* Set zero defaults */ 626290650Shselasky memset(tlro, 0, sizeof(*tlro)); 627290650Shselasky 628291184Shselasky /* Compute size needed for data */ 629290650Shselasky size = (sizeof(struct tlro_mbuf_ptr) * max_mbufs) + 630290650Shselasky (sizeof(struct tlro_mbuf_data) * max_mbufs); 631290650Shselasky 632291184Shselasky /* Range check */ 633290650Shselasky if (max_mbufs <= 0 || size <= 0 || ifp == NULL) 634290650Shselasky return (EINVAL); 635290650Shselasky 636291184Shselasky /* Setup tlro control structure */ 637290650Shselasky tlro->mbuf = malloc(size, M_TLRO, M_WAITOK | M_ZERO); 638290650Shselasky tlro->max = max_mbufs; 639290650Shselasky tlro->ifp = ifp; 640290650Shselasky 641291184Shselasky /* Setup pointer array */ 642290650Shselasky for (x = 0; x != tlro->max; x++) { 643290650Shselasky tlro->mbuf[x].data = ((struct tlro_mbuf_data *) 644290650Shselasky &tlro->mbuf[max_mbufs]) + x; 645290650Shselasky } 646290650Shselasky return (0); 647290650Shselasky} 648290650Shselasky 649290650Shselaskyvoid 650290650Shselaskytcp_tlro_free(struct tlro_ctrl *tlro) 651290650Shselasky{ 652290650Shselasky struct tlro_mbuf_data *pinfo; 653290650Shselasky struct mbuf *m; 654290650Shselasky uint32_t y; 655290650Shselasky 656291184Shselasky /* Check if not setup */ 657290650Shselasky if (tlro->mbuf == NULL) 658290650Shselasky return; 659291184Shselasky /* Free MBUF array and any leftover MBUFs */ 660290650Shselasky for (y = 0; y != tlro->max; y++) { 661290650Shselasky 662290650Shselasky pinfo = tlro->mbuf[y].data; 663290650Shselasky 664290650Shselasky m = pinfo->head; 665290650Shselasky 666291184Shselasky /* Reset info structure */ 667290650Shselasky pinfo->head = NULL; 668290650Shselasky pinfo->buf_length = 0; 669290650Shselasky 670290650Shselasky m_freem(m); 671290650Shselasky } 672290650Shselasky free(tlro->mbuf, M_TLRO); 673291184Shselasky /* Reset buffer */ 674290650Shselasky memset(tlro, 0, sizeof(*tlro)); 675290650Shselasky} 676290650Shselasky 677290650Shselaskyvoid 678290650Shselaskytcp_tlro_rx(struct tlro_ctrl *tlro, struct mbuf *m) 679290650Shselasky{ 680290650Shselasky if (m->m_len > 0 && tlro->curr < tlro->max) { 681290650Shselasky /* do stats */ 682290650Shselasky tlro->lro_queued++; 683290650Shselasky 684290650Shselasky /* extract header */ 685290650Shselasky tcp_tlro_extract_header(tlro->mbuf[tlro->curr++].data, 686290650Shselasky m, tlro->sequence++); 687290650Shselasky } else if (tlro->ifp != NULL) { 688290650Shselasky /* do stats */ 689290650Shselasky tlro->lro_flushed++; 690290650Shselasky 691290650Shselasky /* input packet to network layer */ 692290650Shselasky (*tlro->ifp->if_input) (tlro->ifp, m); 693290650Shselasky } else { 694290650Shselasky /* packet drop */ 695290650Shselasky m_freem(m); 696290650Shselasky } 697290650Shselasky} 698