1290650Shselasky/*-
2290650Shselasky * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3290650Shselasky *
4290650Shselasky * Redistribution and use in source and binary forms, with or without
5290650Shselasky * modification, are permitted provided that the following conditions
6290650Shselasky * are met:
7290650Shselasky * 1. Redistributions of source code must retain the above copyright
8290650Shselasky *    notice, this list of conditions and the following disclaimer.
9290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright
10290650Shselasky *    notice, this list of conditions and the following disclaimer in the
11290650Shselasky *    documentation and/or other materials provided with the distribution.
12290650Shselasky *
13290650Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16290650Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17290650Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23290650Shselasky * SUCH DAMAGE.
24290650Shselasky */
25290650Shselasky
26290650Shselasky#include <sys/cdefs.h>
27290650Shselasky__FBSDID("$FreeBSD: releng/10.3/sys/dev/mlx5/mlx5_en/tcp_tlro.c 291184 2015-11-23 09:32:32Z hselasky $");
28290650Shselasky
29290650Shselasky#include "opt_inet.h"
30290650Shselasky#include "opt_inet6.h"
31290650Shselasky
32290650Shselasky#include <sys/param.h>
33290650Shselasky#include <sys/libkern.h>
34290650Shselasky#include <sys/mbuf.h>
35290650Shselasky#include <sys/lock.h>
36290650Shselasky#include <sys/mutex.h>
37290650Shselasky#include <sys/sysctl.h>
38290650Shselasky#include <sys/malloc.h>
39290650Shselasky#include <sys/kernel.h>
40290650Shselasky#include <sys/endian.h>
41290650Shselasky#include <sys/socket.h>
42290650Shselasky#include <sys/sockopt.h>
43290650Shselasky#include <sys/smp.h>
44290650Shselasky
45290650Shselasky#include <net/if.h>
46290650Shselasky#include <net/if_var.h>
47290650Shselasky#include <net/ethernet.h>
48290650Shselasky
49290650Shselasky#if defined(INET) || defined(INET6)
50290650Shselasky#include <netinet/in.h>
51290650Shselasky#endif
52290650Shselasky
53290650Shselasky#ifdef INET
54290650Shselasky#include <netinet/ip.h>
55290650Shselasky#endif
56290650Shselasky
57290650Shselasky#ifdef INET6
58290650Shselasky#include <netinet/ip6.h>
59290650Shselasky#endif
60290650Shselasky
61290650Shselasky#include <netinet/tcp_var.h>
62290650Shselasky
63290650Shselasky#include "tcp_tlro.h"
64290650Shselasky
65290650Shselasky#ifndef M_HASHTYPE_LRO_TCP
66290650Shselasky#ifndef KLD_MODULE
67290650Shselasky#warning "M_HASHTYPE_LRO_TCP is not defined"
68290650Shselasky#endif
69290650Shselasky#define	M_HASHTYPE_LRO_TCP 254
70290650Shselasky#endif
71290650Shselasky
72290650Shselaskystatic SYSCTL_NODE(_net_inet_tcp, OID_AUTO, tlro,
73290650Shselasky    CTLFLAG_RW, 0, "TCP turbo LRO parameters");
74290650Shselasky
75291184Shselaskystatic MALLOC_DEFINE(M_TLRO, "TLRO", "Turbo LRO");
76290650Shselasky
77290650Shselaskystatic int tlro_min_rate = 20;		/* Hz */
78290650Shselasky
79290650ShselaskySYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, min_rate, CTLFLAG_RWTUN,
80290650Shselasky    &tlro_min_rate, 0, "Minimum serving rate in Hz");
81290650Shselasky
82290650Shselaskystatic int tlro_max_packet = IP_MAXPACKET;
83290650Shselasky
84290650ShselaskySYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, max_packet, CTLFLAG_RWTUN,
85290650Shselasky    &tlro_max_packet, 0, "Maximum packet size in bytes");
86290650Shselasky
87290650Shselaskytypedef struct {
88290650Shselasky	uint32_t value;
89290650Shselasky} __packed uint32_p_t;
90290650Shselasky
91290650Shselaskystatic uint16_t
92290650Shselaskytcp_tlro_csum(const uint32_p_t *p, size_t l)
93290650Shselasky{
94290650Shselasky	const uint32_p_t *pend = p + (l / 4);
95290650Shselasky	uint64_t cs;
96290650Shselasky
97290650Shselasky	for (cs = 0; p != pend; p++)
98290650Shselasky		cs += le32toh(p->value);
99290650Shselasky	while (cs > 0xffff)
100290650Shselasky		cs = (cs >> 16) + (cs & 0xffff);
101290650Shselasky	return (cs);
102290650Shselasky}
103290650Shselasky
104290650Shselaskystatic void *
105290650Shselaskytcp_tlro_get_header(const struct mbuf *m, const u_int off,
106290650Shselasky    const u_int len)
107290650Shselasky{
108290650Shselasky	if (m->m_len < (off + len))
109290650Shselasky		return (NULL);
110290650Shselasky	return (mtod(m, char *) + off);
111290650Shselasky}
112290650Shselasky
113290650Shselaskystatic uint8_t
114290650Shselaskytcp_tlro_info_save_timestamp(struct tlro_mbuf_data *pinfo)
115290650Shselasky{
116290650Shselasky	struct tcphdr *tcp = pinfo->tcp;
117290650Shselasky	uint32_t *ts_ptr;
118290650Shselasky
119290650Shselasky	if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
120290650Shselasky		return (0);
121290650Shselasky
122290650Shselasky	ts_ptr = (uint32_t *)(tcp + 1);
123290650Shselasky	if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
124290650Shselasky	    (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
125290650Shselasky		return (0);
126290650Shselasky
127291184Shselasky	/* Save timestamps */
128290650Shselasky	pinfo->tcp_ts = ts_ptr[1];
129290650Shselasky	pinfo->tcp_ts_reply = ts_ptr[2];
130290650Shselasky	return (1);
131290650Shselasky}
132290650Shselasky
133290650Shselaskystatic void
134290650Shselaskytcp_tlro_info_restore_timestamp(struct tlro_mbuf_data *pinfoa,
135290650Shselasky    struct tlro_mbuf_data *pinfob)
136290650Shselasky{
137290650Shselasky	struct tcphdr *tcp = pinfoa->tcp;
138290650Shselasky	uint32_t *ts_ptr;
139290650Shselasky
140290650Shselasky	if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
141290650Shselasky		return;
142290650Shselasky
143290650Shselasky	ts_ptr = (uint32_t *)(tcp + 1);
144290650Shselasky	if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
145290650Shselasky	    (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
146290650Shselasky		return;
147290650Shselasky
148291184Shselasky	/* Restore timestamps */
149290650Shselasky	ts_ptr[1] = pinfob->tcp_ts;
150290650Shselasky	ts_ptr[2] = pinfob->tcp_ts_reply;
151290650Shselasky}
152290650Shselasky
153290650Shselaskystatic void
154290650Shselaskytcp_tlro_extract_header(struct tlro_mbuf_data *pinfo, struct mbuf *m, int seq)
155290650Shselasky{
156290650Shselasky	uint8_t *phdr = (uint8_t *)pinfo->buf;
157290650Shselasky	struct ether_header *eh;
158290650Shselasky	struct ether_vlan_header *vlan;
159290650Shselasky#ifdef INET
160290650Shselasky	struct ip *ip;
161290650Shselasky#endif
162290650Shselasky#ifdef INET6
163290650Shselasky	struct ip6_hdr *ip6;
164290650Shselasky#endif
165290650Shselasky	struct tcphdr *tcp;
166290650Shselasky	uint16_t etype;
167290650Shselasky	int diff;
168290650Shselasky	int off;
169290650Shselasky
170291184Shselasky	/* Fill in information */
171290650Shselasky	pinfo->head = m;
172290650Shselasky	pinfo->last_tick = ticks;
173290650Shselasky	pinfo->sequence = seq;
174290650Shselasky	pinfo->pprev = &m_last(m)->m_next;
175290650Shselasky
176290650Shselasky	off = sizeof(*eh);
177290650Shselasky	if (m->m_len < off)
178290650Shselasky		goto error;
179290650Shselasky	eh = tcp_tlro_get_header(m, 0, sizeof(*eh));
180290650Shselasky	if (eh == NULL)
181290650Shselasky		goto error;
182290650Shselasky	memcpy(phdr, &eh->ether_dhost, ETHER_ADDR_LEN);
183290650Shselasky	phdr += ETHER_ADDR_LEN;
184290650Shselasky	memcpy(phdr, &eh->ether_type, sizeof(eh->ether_type));
185290650Shselasky	phdr += sizeof(eh->ether_type);
186290650Shselasky	etype = ntohs(eh->ether_type);
187290650Shselasky
188290650Shselasky	if (etype == ETHERTYPE_VLAN) {
189290650Shselasky		vlan = tcp_tlro_get_header(m, off, sizeof(*vlan));
190290650Shselasky		if (vlan == NULL)
191290650Shselasky			goto error;
192290650Shselasky		memcpy(phdr, &vlan->evl_tag, sizeof(vlan->evl_tag) +
193290650Shselasky		    sizeof(vlan->evl_proto));
194290650Shselasky		phdr += sizeof(vlan->evl_tag) + sizeof(vlan->evl_proto);
195290650Shselasky		etype = ntohs(vlan->evl_proto);
196290650Shselasky		off += sizeof(*vlan) - sizeof(*eh);
197290650Shselasky	}
198290650Shselasky	switch (etype) {
199290650Shselasky#ifdef INET
200290650Shselasky	case ETHERTYPE_IP:
201290650Shselasky		/*
202290650Shselasky		 * Cannot LRO:
203290650Shselasky		 * - Non-IP packets
204290650Shselasky		 * - Fragmented packets
205290650Shselasky		 * - Packets with IPv4 options
206290650Shselasky		 * - Non-TCP packets
207290650Shselasky		 */
208290650Shselasky		ip = tcp_tlro_get_header(m, off, sizeof(*ip));
209290650Shselasky		if (ip == NULL ||
210290650Shselasky		    (ip->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 ||
211290650Shselasky		    (ip->ip_p != IPPROTO_TCP) ||
212290650Shselasky		    (ip->ip_hl << 2) != sizeof(*ip))
213290650Shselasky			goto error;
214290650Shselasky
215290650Shselasky		/* Legacy IP has a header checksum that needs to be correct */
216290650Shselasky		if (!(m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)) {
217290650Shselasky			/* Verify IP header */
218290650Shselasky			if (tcp_tlro_csum((uint32_p_t *)ip, sizeof(*ip)) != 0xFFFF)
219290650Shselasky				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
220290650Shselasky			else
221290650Shselasky				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED |
222290650Shselasky				    CSUM_IP_VALID;
223290650Shselasky		}
224290650Shselasky		/* Only accept valid checksums */
225290650Shselasky		if (!(m->m_pkthdr.csum_flags & CSUM_IP_VALID) ||
226290650Shselasky		    !(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
227290650Shselasky			goto error;
228290650Shselasky		memcpy(phdr, &ip->ip_src, sizeof(ip->ip_src) +
229290650Shselasky		    sizeof(ip->ip_dst));
230290650Shselasky		phdr += sizeof(ip->ip_src) + sizeof(ip->ip_dst);
231290650Shselasky		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
232290650Shselasky			pinfo->ip_len = m->m_pkthdr.len - off;
233290650Shselasky		else
234290650Shselasky			pinfo->ip_len = ntohs(ip->ip_len);
235290650Shselasky		pinfo->ip_hdrlen = sizeof(*ip);
236290650Shselasky		pinfo->ip.v4 = ip;
237290650Shselasky		pinfo->ip_version = 4;
238290650Shselasky		off += sizeof(*ip);
239290650Shselasky		break;
240290650Shselasky#endif
241290650Shselasky#ifdef INET6
242290650Shselasky	case ETHERTYPE_IPV6:
243290650Shselasky		/*
244290650Shselasky		 * Cannot LRO:
245290650Shselasky		 * - Non-IP packets
246290650Shselasky		 * - Packets with IPv6 options
247290650Shselasky		 * - Non-TCP packets
248290650Shselasky		 */
249290650Shselasky		ip6 = tcp_tlro_get_header(m, off, sizeof(*ip6));
250290650Shselasky		if (ip6 == NULL || ip6->ip6_nxt != IPPROTO_TCP)
251290650Shselasky			goto error;
252290650Shselasky		if (!(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
253290650Shselasky			goto error;
254290650Shselasky		memcpy(phdr, &ip6->ip6_src, sizeof(struct in6_addr) +
255290650Shselasky		    sizeof(struct in6_addr));
256290650Shselasky		phdr += sizeof(struct in6_addr) + sizeof(struct in6_addr);
257290650Shselasky		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
258290650Shselasky			pinfo->ip_len = m->m_pkthdr.len - off;
259290650Shselasky		else
260290650Shselasky			pinfo->ip_len = ntohs(ip6->ip6_plen) + sizeof(*ip6);
261290650Shselasky		pinfo->ip_hdrlen = sizeof(*ip6);
262290650Shselasky		pinfo->ip.v6 = ip6;
263290650Shselasky		pinfo->ip_version = 6;
264290650Shselasky		off += sizeof(*ip6);
265290650Shselasky		break;
266290650Shselasky#endif
267290650Shselasky	default:
268290650Shselasky		goto error;
269290650Shselasky	}
270290650Shselasky	tcp = tcp_tlro_get_header(m, off, sizeof(*tcp));
271290650Shselasky	if (tcp == NULL)
272290650Shselasky		goto error;
273290650Shselasky	memcpy(phdr, &tcp->th_sport, sizeof(tcp->th_sport) +
274290650Shselasky	    sizeof(tcp->th_dport));
275290650Shselasky	phdr += sizeof(tcp->th_sport) +
276290650Shselasky	    sizeof(tcp->th_dport);
277291184Shselasky	/* Store TCP header length */
278290650Shselasky	*phdr++ = tcp->th_off;
279290650Shselasky	if (tcp->th_off < (sizeof(*tcp) >> 2))
280290650Shselasky		goto error;
281290650Shselasky
282291184Shselasky	/* Compute offset to data payload */
283290650Shselasky	pinfo->tcp_len = (tcp->th_off << 2);
284290650Shselasky	off += pinfo->tcp_len;
285290650Shselasky
286291184Shselasky	/* Store more info */
287290650Shselasky	pinfo->data_off = off;
288290650Shselasky	pinfo->tcp = tcp;
289290650Shselasky
290291184Shselasky	/* Try to save timestamp, if any */
291290650Shselasky	*phdr++ = tcp_tlro_info_save_timestamp(pinfo);
292290650Shselasky
293291184Shselasky	/* Verify offset and IP/TCP length */
294290650Shselasky	if (off > m->m_pkthdr.len ||
295290650Shselasky	    pinfo->ip_len < pinfo->tcp_len)
296290650Shselasky		goto error;
297290650Shselasky
298291184Shselasky	/* Compute data payload length */
299290650Shselasky	pinfo->data_len = (pinfo->ip_len - pinfo->tcp_len - pinfo->ip_hdrlen);
300290650Shselasky
301291184Shselasky	/* Trim any padded data */
302290650Shselasky	diff = (m->m_pkthdr.len - off) - pinfo->data_len;
303290650Shselasky	if (diff != 0) {
304290650Shselasky		if (diff < 0)
305290650Shselasky			goto error;
306290650Shselasky		else
307290650Shselasky			m_adj(m, -diff);
308290650Shselasky	}
309291184Shselasky	/* Compute header length */
310290650Shselasky	pinfo->buf_length = phdr - (uint8_t *)pinfo->buf;
311291184Shselasky	/* Zero-pad rest of buffer */
312290650Shselasky	memset(phdr, 0, TLRO_MAX_HEADER - pinfo->buf_length);
313290650Shselasky	return;
314290650Shselaskyerror:
315290650Shselasky	pinfo->buf_length = 0;
316290650Shselasky}
317290650Shselasky
318290650Shselaskystatic int
319290650Shselaskytcp_tlro_cmp64(const uint64_t *pa, const uint64_t *pb)
320290650Shselasky{
321290650Shselasky	int64_t diff = 0;
322290650Shselasky	unsigned x;
323290650Shselasky
324290650Shselasky	for (x = 0; x != TLRO_MAX_HEADER / 8; x++) {
325290650Shselasky		/*
326290650Shselasky		 * NOTE: Endianness does not matter in this
327290650Shselasky		 * comparisation:
328290650Shselasky		 */
329290650Shselasky		diff = pa[x] - pb[x];
330290650Shselasky		if (diff != 0)
331290650Shselasky			goto done;
332290650Shselasky	}
333290650Shselaskydone:
334290650Shselasky	if (diff < 0)
335290650Shselasky		return (-1);
336290650Shselasky	else if (diff > 0)
337290650Shselasky		return (1);
338290650Shselasky	return (0);
339290650Shselasky}
340290650Shselasky
341290650Shselaskystatic int
342290650Shselaskytcp_tlro_compare_header(const void *_ppa, const void *_ppb)
343290650Shselasky{
344290650Shselasky	const struct tlro_mbuf_ptr *ppa = _ppa;
345290650Shselasky	const struct tlro_mbuf_ptr *ppb = _ppb;
346290650Shselasky	struct tlro_mbuf_data *pinfoa = ppa->data;
347290650Shselasky	struct tlro_mbuf_data *pinfob = ppb->data;
348290650Shselasky	int ret;
349290650Shselasky
350290650Shselasky	ret = (pinfoa->head == NULL) - (pinfob->head == NULL);
351290650Shselasky	if (ret != 0)
352290650Shselasky		goto done;
353290650Shselasky
354290650Shselasky	ret = pinfoa->buf_length - pinfob->buf_length;
355290650Shselasky	if (ret != 0)
356290650Shselasky		goto done;
357290650Shselasky	if (pinfoa->buf_length != 0) {
358290650Shselasky		ret = tcp_tlro_cmp64(pinfoa->buf, pinfob->buf);
359290650Shselasky		if (ret != 0)
360290650Shselasky			goto done;
361290650Shselasky		ret = ntohl(pinfoa->tcp->th_seq) - ntohl(pinfob->tcp->th_seq);
362290650Shselasky		if (ret != 0)
363290650Shselasky			goto done;
364290650Shselasky		ret = ntohl(pinfoa->tcp->th_ack) - ntohl(pinfob->tcp->th_ack);
365290650Shselasky		if (ret != 0)
366290650Shselasky			goto done;
367290650Shselasky		ret = pinfoa->sequence - pinfob->sequence;
368290650Shselasky		if (ret != 0)
369290650Shselasky			goto done;
370290650Shselasky	}
371290650Shselaskydone:
372290650Shselasky	return (ret);
373290650Shselasky}
374290650Shselasky
375290650Shselaskystatic void
376290650Shselaskytcp_tlro_sort(struct tlro_ctrl *tlro)
377290650Shselasky{
378290650Shselasky	if (tlro->curr == 0)
379290650Shselasky		return;
380290650Shselasky
381290650Shselasky	qsort(tlro->mbuf, tlro->curr, sizeof(struct tlro_mbuf_ptr),
382290650Shselasky	    &tcp_tlro_compare_header);
383290650Shselasky}
384290650Shselasky
385290650Shselaskystatic int
386290650Shselaskytcp_tlro_get_ticks(void)
387290650Shselasky{
388290650Shselasky	int to = tlro_min_rate;
389290650Shselasky
390290650Shselasky	if (to < 1)
391290650Shselasky		to = 1;
392290650Shselasky	to = hz / to;
393290650Shselasky	if (to < 1)
394290650Shselasky		to = 1;
395290650Shselasky	return (to);
396290650Shselasky}
397290650Shselasky
398290650Shselaskystatic void
399290650Shselaskytcp_tlro_combine(struct tlro_ctrl *tlro, int force)
400290650Shselasky{
401290650Shselasky	struct tlro_mbuf_data *pinfoa;
402290650Shselasky	struct tlro_mbuf_data *pinfob;
403290650Shselasky	uint32_t cs;
404290650Shselasky	int curr_ticks = ticks;
405290650Shselasky	int ticks_limit = tcp_tlro_get_ticks();
406290650Shselasky	unsigned x;
407290650Shselasky	unsigned y;
408290650Shselasky	unsigned z;
409290650Shselasky	int temp;
410290650Shselasky
411290650Shselasky	if (tlro->curr == 0)
412290650Shselasky		return;
413290650Shselasky
414290650Shselasky	for (y = 0; y != tlro->curr;) {
415290650Shselasky		struct mbuf *m;
416290650Shselasky
417290650Shselasky		pinfoa = tlro->mbuf[y].data;
418290650Shselasky		for (x = y + 1; x != tlro->curr; x++) {
419290650Shselasky			pinfob = tlro->mbuf[x].data;
420290650Shselasky			if (pinfoa->buf_length != pinfob->buf_length ||
421290650Shselasky			    tcp_tlro_cmp64(pinfoa->buf, pinfob->buf) != 0)
422290650Shselasky				break;
423290650Shselasky		}
424290650Shselasky		if (pinfoa->buf_length == 0) {
425291184Shselasky			/* Forward traffic which cannot be combined */
426290650Shselasky			for (z = y; z != x; z++) {
427291184Shselasky				/* Just forward packets */
428290650Shselasky				pinfob = tlro->mbuf[z].data;
429290650Shselasky
430290650Shselasky				m = pinfob->head;
431290650Shselasky
432291184Shselasky				/* Reset info structure */
433290650Shselasky				pinfob->head = NULL;
434290650Shselasky				pinfob->buf_length = 0;
435290650Shselasky
436291184Shselasky				/* Do stats */
437290650Shselasky				tlro->lro_flushed++;
438290650Shselasky
439291184Shselasky				/* Input packet to network layer */
440290650Shselasky				(*tlro->ifp->if_input) (tlro->ifp, m);
441290650Shselasky			}
442290650Shselasky			y = z;
443290650Shselasky			continue;
444290650Shselasky		}
445290650Shselasky
446291184Shselasky		/* Compute current checksum subtracted some header parts */
447290650Shselasky		temp = (pinfoa->ip_len - pinfoa->ip_hdrlen);
448290650Shselasky		cs = ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
449290650Shselasky		    tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len);
450290650Shselasky
451291184Shselasky		/* Append all fragments into one block */
452290650Shselasky		for (z = y + 1; z != x; z++) {
453290650Shselasky
454290650Shselasky			pinfob = tlro->mbuf[z].data;
455290650Shselasky
456291184Shselasky			/* Check for command packets */
457290650Shselasky			if ((pinfoa->tcp->th_flags & ~(TH_ACK | TH_PUSH)) ||
458290650Shselasky			    (pinfob->tcp->th_flags & ~(TH_ACK | TH_PUSH)))
459290650Shselasky				break;
460290650Shselasky
461291184Shselasky			/* Check if there is enough space */
462290650Shselasky			if ((pinfoa->ip_len + pinfob->data_len) > tlro_max_packet)
463290650Shselasky				break;
464290650Shselasky
465291184Shselasky			/* Try to append the new segment */
466290650Shselasky			temp = ntohl(pinfoa->tcp->th_seq) + pinfoa->data_len;
467290650Shselasky			if (temp != (int)ntohl(pinfob->tcp->th_seq))
468290650Shselasky				break;
469290650Shselasky
470290650Shselasky			temp = pinfob->ip_len - pinfob->ip_hdrlen;
471290650Shselasky			cs += ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
472290650Shselasky			    tcp_tlro_csum((uint32_p_t *)pinfob->tcp, pinfob->tcp_len);
473291184Shselasky			/* Remove fields which appear twice */
474290650Shselasky			cs += (IPPROTO_TCP << 8);
475290650Shselasky			if (pinfob->ip_version == 4) {
476290650Shselasky				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_src, 4);
477290650Shselasky				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_dst, 4);
478290650Shselasky			} else {
479290650Shselasky				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_src, 16);
480290650Shselasky				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_dst, 16);
481290650Shselasky			}
482291184Shselasky			/* Remainder computation */
483290650Shselasky			while (cs > 0xffff)
484290650Shselasky				cs = (cs >> 16) + (cs & 0xffff);
485290650Shselasky
486291184Shselasky			/* Update window and ack sequence number */
487290650Shselasky			pinfoa->tcp->th_ack = pinfob->tcp->th_ack;
488290650Shselasky			pinfoa->tcp->th_win = pinfob->tcp->th_win;
489290650Shselasky
490291184Shselasky			/* Check if we should restore the timestamp */
491290650Shselasky			tcp_tlro_info_restore_timestamp(pinfoa, pinfob);
492290650Shselasky
493291184Shselasky			/* Accumulate TCP flags */
494290650Shselasky			pinfoa->tcp->th_flags |= pinfob->tcp->th_flags;
495290650Shselasky
496290650Shselasky			/* update lengths */
497290650Shselasky			pinfoa->ip_len += pinfob->data_len;
498290650Shselasky			pinfoa->data_len += pinfob->data_len;
499290650Shselasky
500291184Shselasky			/* Clear mbuf pointer - packet is accumulated */
501290650Shselasky			m = pinfob->head;
502290650Shselasky
503291184Shselasky			/* Reset info structure */
504290650Shselasky			pinfob->head = NULL;
505290650Shselasky			pinfob->buf_length = 0;
506290650Shselasky
507291184Shselasky			/* Append data to mbuf [y] */
508290650Shselasky			m_adj(m, pinfob->data_off);
509291184Shselasky			/* Delete mbuf tags, if any */
510290650Shselasky			m_tag_delete_chain(m, NULL);
511291184Shselasky			/* Clear packet header flag */
512290650Shselasky			m->m_flags &= ~M_PKTHDR;
513290650Shselasky
514291184Shselasky			/* Concat mbuf(s) to end of list */
515290650Shselasky			pinfoa->pprev[0] = m;
516290650Shselasky			m = m_last(m);
517290650Shselasky			pinfoa->pprev = &m->m_next;
518290650Shselasky			pinfoa->head->m_pkthdr.len += pinfob->data_len;
519290650Shselasky		}
520291184Shselasky		/* Compute new TCP header checksum */
521290650Shselasky		pinfoa->tcp->th_sum = 0;
522290650Shselasky
523290650Shselasky		temp = pinfoa->ip_len - pinfoa->ip_hdrlen;
524290650Shselasky		cs = (cs ^ 0xFFFF) +
525290650Shselasky		    tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len) +
526290650Shselasky		    ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8);
527290650Shselasky
528291184Shselasky		/* Remainder computation */
529290650Shselasky		while (cs > 0xffff)
530290650Shselasky			cs = (cs >> 16) + (cs & 0xffff);
531290650Shselasky
532291184Shselasky		/* Update new checksum */
533290650Shselasky		pinfoa->tcp->th_sum = ~htole16(cs);
534290650Shselasky
535291184Shselasky		/* Update IP length, if any */
536290650Shselasky		if (pinfoa->ip_version == 4) {
537290650Shselasky			if (pinfoa->ip_len > IP_MAXPACKET) {
538290650Shselasky				M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
539290650Shselasky				pinfoa->ip.v4->ip_len = htons(IP_MAXPACKET);
540290650Shselasky			} else {
541290650Shselasky				pinfoa->ip.v4->ip_len = htons(pinfoa->ip_len);
542290650Shselasky			}
543290650Shselasky		} else {
544290650Shselasky			if (pinfoa->ip_len > (IP_MAXPACKET + sizeof(*pinfoa->ip.v6))) {
545290650Shselasky				M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
546290650Shselasky				pinfoa->ip.v6->ip6_plen = htons(IP_MAXPACKET);
547290650Shselasky			} else {
548290650Shselasky				temp = pinfoa->ip_len - sizeof(*pinfoa->ip.v6);
549290650Shselasky				pinfoa->ip.v6->ip6_plen = htons(temp);
550290650Shselasky			}
551290650Shselasky		}
552290650Shselasky
553290650Shselasky		temp = curr_ticks - pinfoa->last_tick;
554291184Shselasky		/* Check if packet should be forwarded */
555290650Shselasky		if (force != 0 || z != x || temp >= ticks_limit ||
556290650Shselasky		    pinfoa->data_len == 0) {
557290650Shselasky
558291184Shselasky			/* Compute new IPv4 header checksum */
559290650Shselasky			if (pinfoa->ip_version == 4) {
560290650Shselasky				pinfoa->ip.v4->ip_sum = 0;
561290650Shselasky				cs = tcp_tlro_csum((uint32_p_t *)pinfoa->ip.v4,
562290650Shselasky				    sizeof(*pinfoa->ip.v4));
563290650Shselasky				pinfoa->ip.v4->ip_sum = ~htole16(cs);
564290650Shselasky			}
565291184Shselasky			/* Forward packet */
566290650Shselasky			m = pinfoa->head;
567290650Shselasky
568291184Shselasky			/* Reset info structure */
569290650Shselasky			pinfoa->head = NULL;
570290650Shselasky			pinfoa->buf_length = 0;
571290650Shselasky
572291184Shselasky			/* Do stats */
573290650Shselasky			tlro->lro_flushed++;
574290650Shselasky
575291184Shselasky			/* Input packet to network layer */
576290650Shselasky			(*tlro->ifp->if_input) (tlro->ifp, m);
577290650Shselasky		}
578290650Shselasky		y = z;
579290650Shselasky	}
580290650Shselasky
581291184Shselasky	/* Cleanup all NULL heads */
582290650Shselasky	for (y = 0; y != tlro->curr; y++) {
583290650Shselasky		if (tlro->mbuf[y].data->head == NULL) {
584290650Shselasky			for (z = y + 1; z != tlro->curr; z++) {
585290650Shselasky				struct tlro_mbuf_ptr ptemp;
586290650Shselasky				if (tlro->mbuf[z].data->head == NULL)
587290650Shselasky					continue;
588290650Shselasky				ptemp = tlro->mbuf[y];
589290650Shselasky				tlro->mbuf[y] = tlro->mbuf[z];
590290650Shselasky				tlro->mbuf[z] = ptemp;
591290650Shselasky				y++;
592290650Shselasky			}
593290650Shselasky			break;
594290650Shselasky		}
595290650Shselasky	}
596290650Shselasky	tlro->curr = y;
597290650Shselasky}
598290650Shselasky
599290650Shselaskystatic void
600290650Shselaskytcp_tlro_cleanup(struct tlro_ctrl *tlro)
601290650Shselasky{
602290650Shselasky	while (tlro->curr != 0 &&
603290650Shselasky	    tlro->mbuf[tlro->curr - 1].data->head == NULL)
604290650Shselasky		tlro->curr--;
605290650Shselasky}
606290650Shselasky
607290650Shselaskyvoid
608290650Shselaskytcp_tlro_flush(struct tlro_ctrl *tlro, int force)
609290650Shselasky{
610290650Shselasky	if (tlro->curr == 0)
611290650Shselasky		return;
612290650Shselasky
613290650Shselasky	tcp_tlro_sort(tlro);
614290650Shselasky	tcp_tlro_cleanup(tlro);
615290650Shselasky	tcp_tlro_combine(tlro, force);
616290650Shselasky}
617290650Shselasky
618290650Shselaskyint
619290650Shselaskytcp_tlro_init(struct tlro_ctrl *tlro, struct ifnet *ifp,
620290650Shselasky    int max_mbufs)
621290650Shselasky{
622290650Shselasky	ssize_t size;
623290650Shselasky	uint32_t x;
624290650Shselasky
625291184Shselasky	/* Set zero defaults */
626290650Shselasky	memset(tlro, 0, sizeof(*tlro));
627290650Shselasky
628291184Shselasky	/* Compute size needed for data */
629290650Shselasky	size = (sizeof(struct tlro_mbuf_ptr) * max_mbufs) +
630290650Shselasky	    (sizeof(struct tlro_mbuf_data) * max_mbufs);
631290650Shselasky
632291184Shselasky	/* Range check */
633290650Shselasky	if (max_mbufs <= 0 || size <= 0 || ifp == NULL)
634290650Shselasky		return (EINVAL);
635290650Shselasky
636291184Shselasky	/* Setup tlro control structure */
637290650Shselasky	tlro->mbuf = malloc(size, M_TLRO, M_WAITOK | M_ZERO);
638290650Shselasky	tlro->max = max_mbufs;
639290650Shselasky	tlro->ifp = ifp;
640290650Shselasky
641291184Shselasky	/* Setup pointer array */
642290650Shselasky	for (x = 0; x != tlro->max; x++) {
643290650Shselasky		tlro->mbuf[x].data = ((struct tlro_mbuf_data *)
644290650Shselasky		    &tlro->mbuf[max_mbufs]) + x;
645290650Shselasky	}
646290650Shselasky	return (0);
647290650Shselasky}
648290650Shselasky
649290650Shselaskyvoid
650290650Shselaskytcp_tlro_free(struct tlro_ctrl *tlro)
651290650Shselasky{
652290650Shselasky	struct tlro_mbuf_data *pinfo;
653290650Shselasky	struct mbuf *m;
654290650Shselasky	uint32_t y;
655290650Shselasky
656291184Shselasky	/* Check if not setup */
657290650Shselasky	if (tlro->mbuf == NULL)
658290650Shselasky		return;
659291184Shselasky	/* Free MBUF array and any leftover MBUFs */
660290650Shselasky	for (y = 0; y != tlro->max; y++) {
661290650Shselasky
662290650Shselasky		pinfo = tlro->mbuf[y].data;
663290650Shselasky
664290650Shselasky		m = pinfo->head;
665290650Shselasky
666291184Shselasky		/* Reset info structure */
667290650Shselasky		pinfo->head = NULL;
668290650Shselasky		pinfo->buf_length = 0;
669290650Shselasky
670290650Shselasky		m_freem(m);
671290650Shselasky	}
672290650Shselasky	free(tlro->mbuf, M_TLRO);
673291184Shselasky	/* Reset buffer */
674290650Shselasky	memset(tlro, 0, sizeof(*tlro));
675290650Shselasky}
676290650Shselasky
677290650Shselaskyvoid
678290650Shselaskytcp_tlro_rx(struct tlro_ctrl *tlro, struct mbuf *m)
679290650Shselasky{
680290650Shselasky	if (m->m_len > 0 && tlro->curr < tlro->max) {
681290650Shselasky		/* do stats */
682290650Shselasky		tlro->lro_queued++;
683290650Shselasky
684290650Shselasky		/* extract header */
685290650Shselasky		tcp_tlro_extract_header(tlro->mbuf[tlro->curr++].data,
686290650Shselasky		    m, tlro->sequence++);
687290650Shselasky	} else if (tlro->ifp != NULL) {
688290650Shselasky		/* do stats */
689290650Shselasky		tlro->lro_flushed++;
690290650Shselasky
691290650Shselasky		/* input packet to network layer */
692290650Shselasky		(*tlro->ifp->if_input) (tlro->ifp, m);
693290650Shselasky	} else {
694290650Shselasky		/* packet drop */
695290650Shselasky		m_freem(m);
696290650Shselasky	}
697290650Shselasky}
698