tcp_tlro.c revision 291184
1226046Sdes/*-
2224638Sbrooks * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
357429Smarkm *
457429Smarkm * Redistribution and use in source and binary forms, with or without
557429Smarkm * modification, are permitted provided that the following conditions
657429Smarkm * are met:
757429Smarkm * 1. Redistributions of source code must retain the above copyright
857429Smarkm *    notice, this list of conditions and the following disclaimer.
957429Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1060573Skris *    notice, this list of conditions and the following disclaimer in the
1165668Skris *    documentation and/or other materials provided with the distribution.
1265668Skris *
1365668Skris * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
1465668Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1565668Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1665668Skris * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
1760573Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1892559Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1965668Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2065668Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2165668Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2265668Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2365668Skris * SUCH DAMAGE.
2465668Skris */
2565668Skris
2665668Skris#include <sys/cdefs.h>
2765668Skris__FBSDID("$FreeBSD: stable/10/sys/dev/mlx5/mlx5_en/tcp_tlro.c 291184 2015-11-23 09:32:32Z hselasky $");
2865668Skris
2965668Skris#include "opt_inet.h"
3065668Skris#include "opt_inet6.h"
3165668Skris
3265668Skris#include <sys/param.h>
3365668Skris#include <sys/libkern.h>
3465668Skris#include <sys/mbuf.h>
3565668Skris#include <sys/lock.h>
3665668Skris#include <sys/mutex.h>
3765668Skris#include <sys/sysctl.h>
3865668Skris#include <sys/malloc.h>
3965668Skris#include <sys/kernel.h>
4065668Skris#include <sys/endian.h>
4157429Smarkm#include <sys/socket.h>
4257429Smarkm#include <sys/sockopt.h>
4357429Smarkm#include <sys/smp.h>
4457429Smarkm
45162856Sdes#include <net/if.h>
46162856Sdes#include <net/if_var.h>
47162856Sdes#include <net/ethernet.h>
48162856Sdes
49162856Sdes#if defined(INET) || defined(INET6)
50162856Sdes#include <netinet/in.h>
51162856Sdes#endif
52162856Sdes
53162856Sdes#ifdef INET
54162856Sdes#include <netinet/ip.h>
55162856Sdes#endif
56162856Sdes
57204917Sdes#ifdef INET6
58162856Sdes#include <netinet/ip6.h>
59162856Sdes#endif
60162856Sdes
61162856Sdes#include <netinet/tcp_var.h>
62162856Sdes
63162856Sdes#include "tcp_tlro.h"
64162856Sdes
65162856Sdes#ifndef M_HASHTYPE_LRO_TCP
66181111Sdes#ifndef KLD_MODULE
67162856Sdes#warning "M_HASHTYPE_LRO_TCP is not defined"
6857429Smarkm#endif
6976262Sgreen#define	M_HASHTYPE_LRO_TCP 254
7076262Sgreen#endif
7157429Smarkm
7276262Sgreenstatic SYSCTL_NODE(_net_inet_tcp, OID_AUTO, tlro,
7376262Sgreen    CTLFLAG_RW, 0, "TCP turbo LRO parameters");
74162856Sdes
7557429Smarkmstatic MALLOC_DEFINE(M_TLRO, "TLRO", "Turbo LRO");
7657429Smarkm
7776262Sgreenstatic int tlro_min_rate = 20;		/* Hz */
7865668Skris
7965668SkrisSYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, min_rate, CTLFLAG_RWTUN,
8092559Sdes    &tlro_min_rate, 0, "Minimum serving rate in Hz");
8165668Skris
8292559Sdesstatic int tlro_max_packet = IP_MAXPACKET;
8357429Smarkm
8457429SmarkmSYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, max_packet, CTLFLAG_RWTUN,
8557429Smarkm    &tlro_max_packet, 0, "Maximum packet size in bytes");
8657429Smarkm
8757429Smarkmtypedef struct {
8892559Sdes	uint32_t value;
8957429Smarkm} __packed uint32_p_t;
9057429Smarkm
9157429Smarkmstatic uint16_t
9292559Sdestcp_tlro_csum(const uint32_p_t *p, size_t l)
9357429Smarkm{
94137019Sdes	const uint32_p_t *pend = p + (l / 4);
9557429Smarkm	uint64_t cs;
9657429Smarkm
9757429Smarkm	for (cs = 0; p != pend; p++)
9892559Sdes		cs += le32toh(p->value);
9957429Smarkm	while (cs > 0xffff)
10076262Sgreen		cs = (cs >> 16) + (cs & 0xffff);
10157429Smarkm	return (cs);
10257429Smarkm}
10392559Sdes
10457429Smarkmstatic void *
10557429Smarkmtcp_tlro_get_header(const struct mbuf *m, const u_int off,
10657429Smarkm    const u_int len)
10757429Smarkm{
10857429Smarkm	if (m->m_len < (off + len))
10957429Smarkm		return (NULL);
11057429Smarkm	return (mtod(m, char *) + off);
11157429Smarkm}
11260573Skris
11360573Skrisstatic uint8_t
11460573Skristcp_tlro_info_save_timestamp(struct tlro_mbuf_data *pinfo)
11557429Smarkm{
11657429Smarkm	struct tcphdr *tcp = pinfo->tcp;
117162856Sdes	uint32_t *ts_ptr;
118215116Sdes
11992559Sdes	if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
120162856Sdes		return (0);
121215116Sdes
122162856Sdes	ts_ptr = (uint32_t *)(tcp + 1);
123162856Sdes	if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
12457429Smarkm	    (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
125162856Sdes		return (0);
126162856Sdes
127162856Sdes	/* Save timestamps */
128162856Sdes	pinfo->tcp_ts = ts_ptr[1];
12957429Smarkm	pinfo->tcp_ts_reply = ts_ptr[2];
13057429Smarkm	return (1);
13157429Smarkm}
13257429Smarkm
13357429Smarkmstatic void
13457429Smarkmtcp_tlro_info_restore_timestamp(struct tlro_mbuf_data *pinfoa,
13557429Smarkm    struct tlro_mbuf_data *pinfob)
13657429Smarkm{
13792559Sdes	struct tcphdr *tcp = pinfoa->tcp;
13892559Sdes	uint32_t *ts_ptr;
13992559Sdes
14092559Sdes	if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
14192559Sdes		return;
142149753Sdes
143149753Sdes	ts_ptr = (uint32_t *)(tcp + 1);
144149753Sdes	if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
14592559Sdes	    (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
14692559Sdes		return;
14792559Sdes
14892559Sdes	/* Restore timestamps */
14992559Sdes	ts_ptr[1] = pinfob->tcp_ts;
15092559Sdes	ts_ptr[2] = pinfob->tcp_ts_reply;
15192559Sdes}
15292559Sdes
15392559Sdesstatic void
15492559Sdestcp_tlro_extract_header(struct tlro_mbuf_data *pinfo, struct mbuf *m, int seq)
15592559Sdes{
156162856Sdes	uint8_t *phdr = (uint8_t *)pinfo->buf;
15792559Sdes	struct ether_header *eh;
15892559Sdes	struct ether_vlan_header *vlan;
15992559Sdes#ifdef INET
16092559Sdes	struct ip *ip;
16192559Sdes#endif
16292559Sdes#ifdef INET6
16392559Sdes	struct ip6_hdr *ip6;
16476262Sgreen#endif
16598941Sdes	struct tcphdr *tcp;
16676262Sgreen	uint16_t etype;
16792559Sdes	int diff;
16892559Sdes	int off;
16976262Sgreen
170181111Sdes	/* Fill in information */
171181111Sdes	pinfo->head = m;
172181111Sdes	pinfo->last_tick = ticks;
173181111Sdes	pinfo->sequence = seq;
174224638Sbrooks	pinfo->pprev = &m_last(m)->m_next;
175224638Sbrooks
176224638Sbrooks	off = sizeof(*eh);
177224638Sbrooks	if (m->m_len < off)
178224638Sbrooks		goto error;
17992559Sdes	eh = tcp_tlro_get_header(m, 0, sizeof(*eh));
18057429Smarkm	if (eh == NULL)
18160573Skris		goto error;
182157019Sdes	memcpy(phdr, &eh->ether_dhost, ETHER_ADDR_LEN);
18360573Skris	phdr += ETHER_ADDR_LEN;
18460573Skris	memcpy(phdr, &eh->ether_type, sizeof(eh->ether_type));
18592559Sdes	phdr += sizeof(eh->ether_type);
186137019Sdes	etype = ntohs(eh->ether_type);
187157019Sdes
18860573Skris	if (etype == ETHERTYPE_VLAN) {
18960573Skris		vlan = tcp_tlro_get_header(m, off, sizeof(*vlan));
19092559Sdes		if (vlan == NULL)
19192559Sdes			goto error;
192157019Sdes		memcpy(phdr, &vlan->evl_tag, sizeof(vlan->evl_tag) +
19360573Skris		    sizeof(vlan->evl_proto));
19460573Skris		phdr += sizeof(vlan->evl_tag) + sizeof(vlan->evl_proto);
19560573Skris		etype = ntohs(vlan->evl_proto);
19660573Skris		off += sizeof(*vlan) - sizeof(*eh);
19760573Skris	}
19857429Smarkm	switch (etype) {
199157019Sdes#ifdef INET
200157019Sdes	case ETHERTYPE_IP:
201157019Sdes		/*
202157019Sdes		 * Cannot LRO:
203157019Sdes		 * - Non-IP packets
204157019Sdes		 * - Fragmented packets
205157019Sdes		 * - Packets with IPv4 options
206157019Sdes		 * - Non-TCP packets
207157019Sdes		 */
208157019Sdes		ip = tcp_tlro_get_header(m, off, sizeof(*ip));
209157019Sdes		if (ip == NULL ||
210162856Sdes		    (ip->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 ||
211157019Sdes		    (ip->ip_p != IPPROTO_TCP) ||
212157019Sdes		    (ip->ip_hl << 2) != sizeof(*ip))
213157019Sdes			goto error;
214157019Sdes
215157019Sdes		/* Legacy IP has a header checksum that needs to be correct */
216157019Sdes		if (!(m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)) {
217157019Sdes			/* Verify IP header */
218157019Sdes			if (tcp_tlro_csum((uint32_p_t *)ip, sizeof(*ip)) != 0xFFFF)
219157019Sdes				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
220157019Sdes			else
221157019Sdes				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED |
222157019Sdes				    CSUM_IP_VALID;
223157019Sdes		}
224157019Sdes		/* Only accept valid checksums */
225157019Sdes		if (!(m->m_pkthdr.csum_flags & CSUM_IP_VALID) ||
22660573Skris		    !(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
22760573Skris			goto error;
22860573Skris		memcpy(phdr, &ip->ip_src, sizeof(ip->ip_src) +
22992559Sdes		    sizeof(ip->ip_dst));
23069587Sgreen		phdr += sizeof(ip->ip_src) + sizeof(ip->ip_dst);
231181111Sdes		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
23260573Skris			pinfo->ip_len = m->m_pkthdr.len - off;
23360573Skris		else
23476262Sgreen			pinfo->ip_len = ntohs(ip->ip_len);
23576262Sgreen		pinfo->ip_hdrlen = sizeof(*ip);
23676262Sgreen		pinfo->ip.v4 = ip;
23776262Sgreen		pinfo->ip_version = 4;
238204917Sdes		off += sizeof(*ip);
239204917Sdes		break;
240204917Sdes#endif
241204917Sdes#ifdef INET6
242204917Sdes	case ETHERTYPE_IPV6:
243204917Sdes		/*
24460573Skris		 * Cannot LRO:
24560573Skris		 * - Non-IP packets
24660573Skris		 * - Packets with IPv6 options
24760573Skris		 * - Non-TCP packets
24860573Skris		 */
24960573Skris		ip6 = tcp_tlro_get_header(m, off, sizeof(*ip6));
25069587Sgreen		if (ip6 == NULL || ip6->ip6_nxt != IPPROTO_TCP)
251181111Sdes			goto error;
252124207Sdes		if (!(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
253181111Sdes			goto error;
25474500Sgreen		memcpy(phdr, &ip6->ip6_src, sizeof(struct in6_addr) +
25569587Sgreen		    sizeof(struct in6_addr));
25669587Sgreen		phdr += sizeof(struct in6_addr) + sizeof(struct in6_addr);
25769587Sgreen		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
25869587Sgreen			pinfo->ip_len = m->m_pkthdr.len - off;
25969587Sgreen		else
26069587Sgreen			pinfo->ip_len = ntohs(ip6->ip6_plen) + sizeof(*ip6);
26169587Sgreen		pinfo->ip_hdrlen = sizeof(*ip6);
26269587Sgreen		pinfo->ip.v6 = ip6;
26369587Sgreen		pinfo->ip_version = 6;
26460573Skris		off += sizeof(*ip6);
26560573Skris		break;
26660573Skris#endif
26757429Smarkm	default:
26857429Smarkm		goto error;
26957429Smarkm	}
27092559Sdes	tcp = tcp_tlro_get_header(m, off, sizeof(*tcp));
27160573Skris	if (tcp == NULL)
27299063Sdes		goto error;
27357429Smarkm	memcpy(phdr, &tcp->th_sport, sizeof(tcp->th_sport) +
274137019Sdes	    sizeof(tcp->th_dport));
275137019Sdes	phdr += sizeof(tcp->th_sport) +
27657429Smarkm	    sizeof(tcp->th_dport);
27757429Smarkm	/* Store TCP header length */
27857429Smarkm	*phdr++ = tcp->th_off;
27957429Smarkm	if (tcp->th_off < (sizeof(*tcp) >> 2))
28057429Smarkm		goto error;
281162856Sdes
28257429Smarkm	/* Compute offset to data payload */
28392559Sdes	pinfo->tcp_len = (tcp->th_off << 2);
28457429Smarkm	off += pinfo->tcp_len;
28557429Smarkm
28657429Smarkm	/* Store more info */
28792559Sdes	pinfo->data_off = off;
28857429Smarkm	pinfo->tcp = tcp;
289137019Sdes
29057429Smarkm	/* Try to save timestamp, if any */
29157429Smarkm	*phdr++ = tcp_tlro_info_save_timestamp(pinfo);
292137019Sdes
29357429Smarkm	/* Verify offset and IP/TCP length */
29457429Smarkm	if (off > m->m_pkthdr.len ||
29599063Sdes	    pinfo->ip_len < pinfo->tcp_len)
29699063Sdes		goto error;
29799063Sdes
298162856Sdes	/* Compute data payload length */
299162856Sdes	pinfo->data_len = (pinfo->ip_len - pinfo->tcp_len - pinfo->ip_hdrlen);
300120489Sjoe
30169587Sgreen	/* Trim any padded data */
30257429Smarkm	diff = (m->m_pkthdr.len - off) - pinfo->data_len;
30392559Sdes	if (diff != 0) {
30457429Smarkm		if (diff < 0)
30592559Sdes			goto error;
306162856Sdes		else
30757429Smarkm			m_adj(m, -diff);
30857429Smarkm	}
30960573Skris	/* Compute header length */
310192595Sdes	pinfo->buf_length = phdr - (uint8_t *)pinfo->buf;
31192559Sdes	/* Zero-pad rest of buffer */
31292559Sdes	memset(phdr, 0, TLRO_MAX_HEADER - pinfo->buf_length);
31392559Sdes	return;
314181111Sdeserror:
31557429Smarkm	pinfo->buf_length = 0;
31657429Smarkm}
31760573Skris
318224638Sbrooksstatic int
31960573Skristcp_tlro_cmp64(const uint64_t *pa, const uint64_t *pb)
32060573Skris{
32160573Skris	int64_t diff = 0;
32260573Skris	unsigned x;
32357429Smarkm
324124207Sdes	for (x = 0; x != TLRO_MAX_HEADER / 8; x++) {
32560573Skris		/*
32660573Skris		 * NOTE: Endianness does not matter in this
32792559Sdes		 * comparisation:
32892559Sdes		 */
32992559Sdes		diff = pa[x] - pb[x];
330157019Sdes		if (diff != 0)
331181111Sdes			goto done;
332181111Sdes	}
33365668Skrisdone:
334157019Sdes	if (diff < 0)
335181111Sdes		return (-1);
336181111Sdes	else if (diff > 0)
337204917Sdes		return (1);
338204917Sdes	return (0);
339204917Sdes}
340215116Sdes
341204917Sdesstatic int
342181111Sdestcp_tlro_compare_header(const void *_ppa, const void *_ppb)
34357429Smarkm{
34492559Sdes	const struct tlro_mbuf_ptr *ppa = _ppa;
34557429Smarkm	const struct tlro_mbuf_ptr *ppb = _ppb;
34692559Sdes	struct tlro_mbuf_data *pinfoa = ppa->data;
34792559Sdes	struct tlro_mbuf_data *pinfob = ppb->data;
34892559Sdes	int ret;
34992559Sdes
350137019Sdes	ret = (pinfoa->head == NULL) - (pinfob->head == NULL);
351137019Sdes	if (ret != 0)
35292559Sdes		goto done;
35392559Sdes
35492559Sdes	ret = pinfoa->buf_length - pinfob->buf_length;
35592559Sdes	if (ret != 0)
35692559Sdes		goto done;
35792559Sdes	if (pinfoa->buf_length != 0) {
35892559Sdes		ret = tcp_tlro_cmp64(pinfoa->buf, pinfob->buf);
35992559Sdes		if (ret != 0)
36092559Sdes			goto done;
36192559Sdes		ret = ntohl(pinfoa->tcp->th_seq) - ntohl(pinfob->tcp->th_seq);
36292559Sdes		if (ret != 0)
36392559Sdes			goto done;
36492559Sdes		ret = ntohl(pinfoa->tcp->th_ack) - ntohl(pinfob->tcp->th_ack);
36560573Skris		if (ret != 0)
36692559Sdes			goto done;
36760573Skris		ret = pinfoa->sequence - pinfob->sequence;
36892559Sdes		if (ret != 0)
36992559Sdes			goto done;
37092559Sdes	}
37192559Sdesdone:
37292559Sdes	return (ret);
37392559Sdes}
37492559Sdes
37592559Sdesstatic void
37692559Sdestcp_tlro_sort(struct tlro_ctrl *tlro)
37760573Skris{
37857429Smarkm	if (tlro->curr == 0)
37960573Skris		return;
38092559Sdes
38160573Skris	qsort(tlro->mbuf, tlro->curr, sizeof(struct tlro_mbuf_ptr),
38257429Smarkm	    &tcp_tlro_compare_header);
38392559Sdes}
38492559Sdes
38592559Sdesstatic int
38692559Sdestcp_tlro_get_ticks(void)
38760573Skris{
38857429Smarkm	int to = tlro_min_rate;
38960573Skris
39060573Skris	if (to < 1)
39192559Sdes		to = 1;
39260573Skris	to = hz / to;
39392559Sdes	if (to < 1)
394137019Sdes		to = 1;
395181111Sdes	return (to);
39676262Sgreen}
39792559Sdes
39892559Sdesstatic void
39992559Sdestcp_tlro_combine(struct tlro_ctrl *tlro, int force)
400137019Sdes{
40192559Sdes	struct tlro_mbuf_data *pinfoa;
40292559Sdes	struct tlro_mbuf_data *pinfob;
40392559Sdes	uint32_t cs;
404124207Sdes	int curr_ticks = ticks;
40576262Sgreen	int ticks_limit = tcp_tlro_get_ticks();
40676262Sgreen	unsigned x;
40760573Skris	unsigned y;
40860573Skris	unsigned z;
40960573Skris	int temp;
41060573Skris
41160573Skris	if (tlro->curr == 0)
41260573Skris		return;
41360573Skris
41460573Skris	for (y = 0; y != tlro->curr;) {
41560573Skris		struct mbuf *m;
41660573Skris
417192595Sdes		pinfoa = tlro->mbuf[y].data;
418192595Sdes		for (x = y + 1; x != tlro->curr; x++) {
419192595Sdes			pinfob = tlro->mbuf[x].data;
420192595Sdes			if (pinfoa->buf_length != pinfob->buf_length ||
421181111Sdes			    tcp_tlro_cmp64(pinfoa->buf, pinfob->buf) != 0)
422181111Sdes				break;
423181111Sdes		}
424181111Sdes		if (pinfoa->buf_length == 0) {
425181111Sdes			/* Forward traffic which cannot be combined */
426181111Sdes			for (z = y; z != x; z++) {
427181111Sdes				/* Just forward packets */
428181111Sdes				pinfob = tlro->mbuf[z].data;
429181111Sdes
43092559Sdes				m = pinfob->head;
43192559Sdes
43257429Smarkm				/* Reset info structure */
43357429Smarkm				pinfob->head = NULL;
43492559Sdes				pinfob->buf_length = 0;
43592559Sdes
43692559Sdes				/* Do stats */
437137019Sdes				tlro->lro_flushed++;
43892559Sdes
43992559Sdes				/* Input packet to network layer */
44092559Sdes				(*tlro->ifp->if_input) (tlro->ifp, m);
44192559Sdes			}
44292559Sdes			y = z;
44392559Sdes			continue;
44457429Smarkm		}
44592559Sdes
44692559Sdes		/* Compute current checksum subtracted some header parts */
44792559Sdes		temp = (pinfoa->ip_len - pinfoa->ip_hdrlen);
44892559Sdes		cs = ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
44992559Sdes		    tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len);
45092559Sdes
451137019Sdes		/* Append all fragments into one block */
45292559Sdes		for (z = y + 1; z != x; z++) {
45392559Sdes
45492559Sdes			pinfob = tlro->mbuf[z].data;
45592559Sdes
45692559Sdes			/* Check for command packets */
45792559Sdes			if ((pinfoa->tcp->th_flags & ~(TH_ACK | TH_PUSH)) ||
45892559Sdes			    (pinfob->tcp->th_flags & ~(TH_ACK | TH_PUSH)))
45992559Sdes				break;
46092559Sdes
46192559Sdes			/* Check if there is enough space */
46292559Sdes			if ((pinfoa->ip_len + pinfob->data_len) > tlro_max_packet)
46392559Sdes				break;
464137019Sdes
46592559Sdes			/* Try to append the new segment */
46692559Sdes			temp = ntohl(pinfoa->tcp->th_seq) + pinfoa->data_len;
46792559Sdes			if (temp != (int)ntohl(pinfob->tcp->th_seq))
46892559Sdes				break;
46992559Sdes
47092559Sdes			temp = pinfob->ip_len - pinfob->ip_hdrlen;
47192559Sdes			cs += ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
47292559Sdes			    tcp_tlro_csum((uint32_p_t *)pinfob->tcp, pinfob->tcp_len);
47392559Sdes			/* Remove fields which appear twice */
47492559Sdes			cs += (IPPROTO_TCP << 8);
47592559Sdes			if (pinfob->ip_version == 4) {
47692559Sdes				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_src, 4);
47792559Sdes				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_dst, 4);
47892559Sdes			} else {
47992559Sdes				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_src, 16);
48092559Sdes				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_dst, 16);
48192559Sdes			}
48292559Sdes			/* Remainder computation */
48392559Sdes			while (cs > 0xffff)
48492559Sdes				cs = (cs >> 16) + (cs & 0xffff);
48592559Sdes
48692559Sdes			/* Update window and ack sequence number */
48792559Sdes			pinfoa->tcp->th_ack = pinfob->tcp->th_ack;
48892559Sdes			pinfoa->tcp->th_win = pinfob->tcp->th_win;
48992559Sdes
49092559Sdes			/* Check if we should restore the timestamp */
49192559Sdes			tcp_tlro_info_restore_timestamp(pinfoa, pinfob);
49292559Sdes
49392559Sdes			/* Accumulate TCP flags */
49492559Sdes			pinfoa->tcp->th_flags |= pinfob->tcp->th_flags;
49592559Sdes
49692559Sdes			/* update lengths */
49792559Sdes			pinfoa->ip_len += pinfob->data_len;
49892559Sdes			pinfoa->data_len += pinfob->data_len;
499113911Sdes
50092559Sdes			/* Clear mbuf pointer - packet is accumulated */
50192559Sdes			m = pinfob->head;
50292559Sdes
50392559Sdes			/* Reset info structure */
50492559Sdes			pinfob->head = NULL;
505124207Sdes			pinfob->buf_length = 0;
50692559Sdes
50792559Sdes			/* Append data to mbuf [y] */
50892559Sdes			m_adj(m, pinfob->data_off);
50992559Sdes			/* Delete mbuf tags, if any */
51092559Sdes			m_tag_delete_chain(m, NULL);
51192559Sdes			/* Clear packet header flag */
51292559Sdes			m->m_flags &= ~M_PKTHDR;
51392559Sdes
51492559Sdes			/* Concat mbuf(s) to end of list */
51592559Sdes			pinfoa->pprev[0] = m;
51692559Sdes			m = m_last(m);
51792559Sdes			pinfoa->pprev = &m->m_next;
51892559Sdes			pinfoa->head->m_pkthdr.len += pinfob->data_len;
519137019Sdes		}
52092559Sdes		/* Compute new TCP header checksum */
52192559Sdes		pinfoa->tcp->th_sum = 0;
52292559Sdes
52392559Sdes		temp = pinfoa->ip_len - pinfoa->ip_hdrlen;
52492559Sdes		cs = (cs ^ 0xFFFF) +
52592559Sdes		    tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len) +
52692559Sdes		    ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8);
52792559Sdes
52892559Sdes		/* Remainder computation */
52992559Sdes		while (cs > 0xffff)
530204917Sdes			cs = (cs >> 16) + (cs & 0xffff);
53192559Sdes
53292559Sdes		/* Update new checksum */
53392559Sdes		pinfoa->tcp->th_sum = ~htole16(cs);
53492559Sdes
53592559Sdes		/* Update IP length, if any */
53692559Sdes		if (pinfoa->ip_version == 4) {
53792559Sdes			if (pinfoa->ip_len > IP_MAXPACKET) {
53892559Sdes				M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
53992559Sdes				pinfoa->ip.v4->ip_len = htons(IP_MAXPACKET);
54092559Sdes			} else {
54192559Sdes				pinfoa->ip.v4->ip_len = htons(pinfoa->ip_len);
54292559Sdes			}
54392559Sdes		} else {
544204917Sdes			if (pinfoa->ip_len > (IP_MAXPACKET + sizeof(*pinfoa->ip.v6))) {
54592559Sdes				M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
54692559Sdes				pinfoa->ip.v6->ip6_plen = htons(IP_MAXPACKET);
54792559Sdes			} else {
54892559Sdes				temp = pinfoa->ip_len - sizeof(*pinfoa->ip.v6);
54992559Sdes				pinfoa->ip.v6->ip6_plen = htons(temp);
55092559Sdes			}
55192559Sdes		}
55292559Sdes
55392559Sdes		temp = curr_ticks - pinfoa->last_tick;
55492559Sdes		/* Check if packet should be forwarded */
55592559Sdes		if (force != 0 || z != x || temp >= ticks_limit ||
55692559Sdes		    pinfoa->data_len == 0) {
55792559Sdes
55892559Sdes			/* Compute new IPv4 header checksum */
55992559Sdes			if (pinfoa->ip_version == 4) {
56092559Sdes				pinfoa->ip.v4->ip_sum = 0;
56192559Sdes				cs = tcp_tlro_csum((uint32_p_t *)pinfoa->ip.v4,
56292559Sdes				    sizeof(*pinfoa->ip.v4));
563137019Sdes				pinfoa->ip.v4->ip_sum = ~htole16(cs);
56492559Sdes			}
56592559Sdes			/* Forward packet */
56692559Sdes			m = pinfoa->head;
56792559Sdes
568137019Sdes			/* Reset info structure */
56992559Sdes			pinfoa->head = NULL;
57092559Sdes			pinfoa->buf_length = 0;
57192559Sdes
57292559Sdes			/* Do stats */
57392559Sdes			tlro->lro_flushed++;
57492559Sdes
57592559Sdes			/* Input packet to network layer */
576204917Sdes			(*tlro->ifp->if_input) (tlro->ifp, m);
577204917Sdes		}
57892559Sdes		y = z;
57992559Sdes	}
58092559Sdes
58192559Sdes	/* Cleanup all NULL heads */
58292559Sdes	for (y = 0; y != tlro->curr; y++) {
58392559Sdes		if (tlro->mbuf[y].data->head == NULL) {
58492559Sdes			for (z = y + 1; z != tlro->curr; z++) {
58592559Sdes				struct tlro_mbuf_ptr ptemp;
58692559Sdes				if (tlro->mbuf[z].data->head == NULL)
58792559Sdes					continue;
58892559Sdes				ptemp = tlro->mbuf[y];
58992559Sdes				tlro->mbuf[y] = tlro->mbuf[z];
59092559Sdes				tlro->mbuf[z] = ptemp;
59192559Sdes				y++;
59292559Sdes			}
59392559Sdes			break;
59492559Sdes		}
59592559Sdes	}
59692559Sdes	tlro->curr = y;
59792559Sdes}
59892559Sdes
59992559Sdesstatic void
60092559Sdestcp_tlro_cleanup(struct tlro_ctrl *tlro)
60192559Sdes{
60292559Sdes	while (tlro->curr != 0 &&
60392559Sdes	    tlro->mbuf[tlro->curr - 1].data->head == NULL)
60492559Sdes		tlro->curr--;
60592559Sdes}
60692559Sdes
60792559Sdesvoid
60892559Sdestcp_tlro_flush(struct tlro_ctrl *tlro, int force)
60992559Sdes{
61092559Sdes	if (tlro->curr == 0)
61192559Sdes		return;
612137019Sdes
61392559Sdes	tcp_tlro_sort(tlro);
61492559Sdes	tcp_tlro_cleanup(tlro);
61592559Sdes	tcp_tlro_combine(tlro, force);
61692559Sdes}
61792559Sdes
61892559Sdesint
61992559Sdestcp_tlro_init(struct tlro_ctrl *tlro, struct ifnet *ifp,
62092559Sdes    int max_mbufs)
62192559Sdes{
62292559Sdes	ssize_t size;
62392559Sdes	uint32_t x;
62492559Sdes
62592559Sdes	/* Set zero defaults */
62692559Sdes	memset(tlro, 0, sizeof(*tlro));
62792559Sdes
628204917Sdes	/* Compute size needed for data */
629204917Sdes	size = (sizeof(struct tlro_mbuf_ptr) * max_mbufs) +
63092559Sdes	    (sizeof(struct tlro_mbuf_data) * max_mbufs);
63192559Sdes
63292559Sdes	/* Range check */
63392559Sdes	if (max_mbufs <= 0 || size <= 0 || ifp == NULL)
63492559Sdes		return (EINVAL);
63592559Sdes
63692559Sdes	/* Setup tlro control structure */
63792559Sdes	tlro->mbuf = malloc(size, M_TLRO, M_WAITOK | M_ZERO);
63892559Sdes	tlro->max = max_mbufs;
639137019Sdes	tlro->ifp = ifp;
640204917Sdes
64192559Sdes	/* Setup pointer array */
64292559Sdes	for (x = 0; x != tlro->max; x++) {
64392559Sdes		tlro->mbuf[x].data = ((struct tlro_mbuf_data *)
64492559Sdes		    &tlro->mbuf[max_mbufs]) + x;
645204917Sdes	}
64692559Sdes	return (0);
64792559Sdes}
64892559Sdes
64992559Sdesvoid
65092559Sdestcp_tlro_free(struct tlro_ctrl *tlro)
65192559Sdes{
65292559Sdes	struct tlro_mbuf_data *pinfo;
65392559Sdes	struct mbuf *m;
65492559Sdes	uint32_t y;
65592559Sdes
65692559Sdes	/* Check if not setup */
65792559Sdes	if (tlro->mbuf == NULL)
65892559Sdes		return;
65992559Sdes	/* Free MBUF array and any leftover MBUFs */
66092559Sdes	for (y = 0; y != tlro->max; y++) {
66192559Sdes
66292559Sdes		pinfo = tlro->mbuf[y].data;
663106130Sdes
66492559Sdes		m = pinfo->head;
665124207Sdes
66692559Sdes		/* Reset info structure */
66792559Sdes		pinfo->head = NULL;
668113911Sdes		pinfo->buf_length = 0;
66992559Sdes
67092559Sdes		m_freem(m);
67192559Sdes	}
67292559Sdes	free(tlro->mbuf, M_TLRO);
67392559Sdes	/* Reset buffer */
67492559Sdes	memset(tlro, 0, sizeof(*tlro));
67592559Sdes}
67692559Sdes
67792559Sdesvoid
678113911Sdestcp_tlro_rx(struct tlro_ctrl *tlro, struct mbuf *m)
67992559Sdes{
680113911Sdes	if (m->m_len > 0 && tlro->curr < tlro->max) {
681106130Sdes		/* do stats */
68292559Sdes		tlro->lro_queued++;
683124207Sdes
68492559Sdes		/* extract header */
68592559Sdes		tcp_tlro_extract_header(tlro->mbuf[tlro->curr++].data,
686137019Sdes		    m, tlro->sequence++);
68792559Sdes	} else if (tlro->ifp != NULL) {
68892559Sdes		/* do stats */
68992559Sdes		tlro->lro_flushed++;
69092559Sdes
69192559Sdes		/* input packet to network layer */
692162856Sdes		(*tlro->ifp->if_input) (tlro->ifp, m);
69392559Sdes	} else {
694181111Sdes		/* packet drop */
695181111Sdes		m_freem(m);
69692559Sdes	}
697181111Sdes}
698181111Sdes