1235474Sbz/*-
2235474Sbz * Copyright (c) 2007, Myricom Inc.
3235474Sbz * Copyright (c) 2008, Intel Corporation.
4235944Sbz * Copyright (c) 2012 The FreeBSD Foundation
5235474Sbz * All rights reserved.
6235474Sbz *
7235944Sbz * Portions of this software were developed by Bjoern Zeeb
8235944Sbz * under sponsorship from the FreeBSD Foundation.
9235944Sbz *
10235474Sbz * Redistribution and use in source and binary forms, with or without
11235474Sbz * modification, are permitted provided that the following conditions
12235474Sbz * are met:
13235474Sbz * 1. Redistributions of source code must retain the above copyright
14235474Sbz *    notice, this list of conditions and the following disclaimer.
15235474Sbz * 2. Redistributions in binary form must reproduce the above copyright
16235474Sbz *    notice, this list of conditions and the following disclaimer in the
17235474Sbz *    documentation and/or other materials provided with the distribution.
18235474Sbz *
19235474Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20235474Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21235474Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22235474Sbz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23235474Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24235474Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25235474Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26235474Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27235474Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28235474Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29235474Sbz * SUCH DAMAGE.
30235474Sbz */
31179737Sjfv
32235944Sbz#include <sys/cdefs.h>
33235944Sbz__FBSDID("$FreeBSD$");
34235944Sbz
35235944Sbz#include "opt_inet.h"
36235944Sbz#include "opt_inet6.h"
37235944Sbz
38179737Sjfv#include <sys/param.h>
39179737Sjfv#include <sys/systm.h>
40179737Sjfv#include <sys/mbuf.h>
41179737Sjfv#include <sys/kernel.h>
42179737Sjfv#include <sys/socket.h>
43179737Sjfv
44179737Sjfv#include <net/if.h>
45235944Sbz#include <net/if_var.h>
46179737Sjfv#include <net/ethernet.h>
47236394Sbz#include <net/vnet.h>
48179737Sjfv
49179737Sjfv#include <netinet/in_systm.h>
50179737Sjfv#include <netinet/in.h>
51235944Sbz#include <netinet/ip6.h>
52179737Sjfv#include <netinet/ip.h>
53235981Sbz#include <netinet/ip_var.h>
54179737Sjfv#include <netinet/tcp.h>
55179737Sjfv#include <netinet/tcp_lro.h>
56179737Sjfv
57235981Sbz#include <netinet6/ip6_var.h>
58235981Sbz
59179737Sjfv#include <machine/in_cksum.h>
60179737Sjfv
61235944Sbz#ifndef LRO_ENTRIES
62235944Sbz#define	LRO_ENTRIES	8	/* # of LRO entries per RX queue. */
63235944Sbz#endif
64179737Sjfv
65235944Sbz#define	TCP_LRO_UPDATE_CSUM	1
66235944Sbz#ifndef	TCP_LRO_UPDATE_CSUM
67235944Sbz#define	TCP_LRO_INVALID_CSUM	0x0000
68235944Sbz#endif
69179737Sjfv
70179737Sjfvint
71235944Sbztcp_lro_init(struct lro_ctrl *lc)
72179737Sjfv{
73235944Sbz	struct lro_entry *le;
74235944Sbz	int error, i;
75179737Sjfv
76235944Sbz	lc->lro_bad_csum = 0;
77235944Sbz	lc->lro_queued = 0;
78235944Sbz	lc->lro_flushed = 0;
79235944Sbz	lc->lro_cnt = 0;
80235944Sbz	SLIST_INIT(&lc->lro_free);
81235944Sbz	SLIST_INIT(&lc->lro_active);
82179737Sjfv
83235944Sbz	error = 0;
84179737Sjfv	for (i = 0; i < LRO_ENTRIES; i++) {
85235944Sbz		le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
86235944Sbz		    M_NOWAIT | M_ZERO);
87235944Sbz                if (le == NULL) {
88179737Sjfv			if (i == 0)
89179737Sjfv				error = ENOMEM;
90179737Sjfv                        break;
91179737Sjfv                }
92235944Sbz		lc->lro_cnt = i + 1;
93235944Sbz		SLIST_INSERT_HEAD(&lc->lro_free, le, next);
94179737Sjfv        }
95179737Sjfv
96179737Sjfv	return (error);
97179737Sjfv}
98179737Sjfv
99179737Sjfvvoid
100235944Sbztcp_lro_free(struct lro_ctrl *lc)
101179737Sjfv{
102235944Sbz	struct lro_entry *le;
103179737Sjfv
104235944Sbz	while (!SLIST_EMPTY(&lc->lro_free)) {
105235944Sbz		le = SLIST_FIRST(&lc->lro_free);
106235944Sbz		SLIST_REMOVE_HEAD(&lc->lro_free, next);
107235944Sbz		free(le, M_DEVBUF);
108179737Sjfv	}
109179737Sjfv}
110179737Sjfv
111235944Sbz#ifdef TCP_LRO_UPDATE_CSUM
112235944Sbzstatic uint16_t
113235944Sbztcp_lro_csum_th(struct tcphdr *th)
114235944Sbz{
115235944Sbz	uint32_t ch;
116235944Sbz	uint16_t *p, l;
117235944Sbz
118235944Sbz	ch = th->th_sum = 0x0000;
119235944Sbz	l = th->th_off;
120235944Sbz	p = (uint16_t *)th;
121235944Sbz	while (l > 0) {
122235944Sbz		ch += *p;
123235944Sbz		p++;
124235944Sbz		ch += *p;
125235944Sbz		p++;
126235944Sbz		l--;
127235944Sbz	}
128235944Sbz	while (ch > 0xffff)
129235944Sbz		ch = (ch >> 16) + (ch & 0xffff);
130235944Sbz
131235944Sbz	return (ch & 0xffff);
132235944Sbz}
133235944Sbz
134235944Sbzstatic uint16_t
135235944Sbztcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
136235944Sbz    uint16_t tcp_data_len, uint16_t csum)
137235944Sbz{
138235944Sbz	uint32_t c;
139235944Sbz	uint16_t cs;
140235944Sbz
141235944Sbz	c = csum;
142235944Sbz
143235944Sbz	/* Remove length from checksum. */
144235944Sbz	switch (le->eh_type) {
145235944Sbz#ifdef INET6
146235944Sbz	case ETHERTYPE_IPV6:
147235944Sbz	{
148235944Sbz		struct ip6_hdr *ip6;
149235944Sbz
150235944Sbz		ip6 = (struct ip6_hdr *)l3hdr;
151235944Sbz		if (le->append_cnt == 0)
152235944Sbz			cs = ip6->ip6_plen;
153235944Sbz		else {
154235944Sbz			uint32_t cx;
155235944Sbz
156235944Sbz			cx = ntohs(ip6->ip6_plen);
157235944Sbz			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
158235944Sbz		}
159235944Sbz		break;
160235944Sbz	}
161235944Sbz#endif
162235944Sbz#ifdef INET
163235944Sbz	case ETHERTYPE_IP:
164235944Sbz	{
165235944Sbz		struct ip *ip4;
166235944Sbz
167235944Sbz		ip4 = (struct ip *)l3hdr;
168235944Sbz		if (le->append_cnt == 0)
169235944Sbz			cs = ip4->ip_len;
170235944Sbz		else {
171235944Sbz			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
172235944Sbz			    IPPROTO_TCP);
173235944Sbz			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
174235944Sbz			    htons(cs));
175235944Sbz		}
176235944Sbz		break;
177235944Sbz	}
178235944Sbz#endif
179235944Sbz	default:
180235944Sbz		cs = 0;		/* Keep compiler happy. */
181235944Sbz	}
182235944Sbz
183235944Sbz	cs = ~cs;
184235944Sbz	c += cs;
185235944Sbz
186235944Sbz	/* Remove TCP header csum. */
187235944Sbz	cs = ~tcp_lro_csum_th(th);
188235944Sbz	c += cs;
189235944Sbz	while (c > 0xffff)
190235944Sbz		c = (c >> 16) + (c & 0xffff);
191235944Sbz
192235944Sbz	return (c & 0xffff);
193235944Sbz}
194235944Sbz#endif
195235944Sbz
196179737Sjfvvoid
197255010Snptcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
198255010Snp{
199255010Snp	struct lro_entry *le, *le_tmp;
200255010Snp	struct timeval tv;
201255010Snp
202255010Snp	if (SLIST_EMPTY(&lc->lro_active))
203255010Snp		return;
204255010Snp
205255010Snp	getmicrotime(&tv);
206255010Snp	timevalsub(&tv, timeout);
207255010Snp	SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
208255010Snp		if (timevalcmp(&tv, &le->mtime, >=)) {
209255010Snp			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
210255010Snp			tcp_lro_flush(lc, le);
211255010Snp		}
212255010Snp	}
213255010Snp}
214255010Snp
215255010Snpvoid
216235944Sbztcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
217179737Sjfv{
218179737Sjfv
219235944Sbz	if (le->append_cnt > 0) {
220235944Sbz		struct tcphdr *th;
221235944Sbz		uint16_t p_len;
222179737Sjfv
223235944Sbz		p_len = htons(le->p_len);
224235944Sbz		switch (le->eh_type) {
225235944Sbz#ifdef INET6
226235944Sbz		case ETHERTYPE_IPV6:
227235944Sbz		{
228235944Sbz			struct ip6_hdr *ip6;
229179737Sjfv
230235944Sbz			ip6 = le->le_ip6;
231235944Sbz			ip6->ip6_plen = p_len;
232235944Sbz			th = (struct tcphdr *)(ip6 + 1);
233235944Sbz			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
234235944Sbz			    CSUM_PSEUDO_HDR;
235235944Sbz			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
236235944Sbz			break;
237235944Sbz		}
238235944Sbz#endif
239235944Sbz#ifdef INET
240235944Sbz		case ETHERTYPE_IP:
241235944Sbz		{
242235944Sbz			struct ip *ip4;
243235944Sbz#ifdef TCP_LRO_UPDATE_CSUM
244235944Sbz			uint32_t cl;
245235944Sbz			uint16_t c;
246235944Sbz#endif
247179737Sjfv
248235944Sbz			ip4 = le->le_ip4;
249235944Sbz#ifdef TCP_LRO_UPDATE_CSUM
250235944Sbz			/* Fix IP header checksum for new length. */
251235944Sbz			c = ~ip4->ip_sum;
252235944Sbz			cl = c;
253235944Sbz			c = ~ip4->ip_len;
254235944Sbz			cl += c + p_len;
255235944Sbz			while (cl > 0xffff)
256235944Sbz				cl = (cl >> 16) + (cl & 0xffff);
257235944Sbz			c = cl;
258235944Sbz			ip4->ip_sum = ~c;
259235944Sbz#else
260235944Sbz			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
261235944Sbz#endif
262235944Sbz			ip4->ip_len = p_len;
263235944Sbz			th = (struct tcphdr *)(ip4 + 1);
264235944Sbz			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
265235944Sbz			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
266235944Sbz			le->p_len += ETHER_HDR_LEN;
267235944Sbz			break;
268179737Sjfv		}
269235944Sbz#endif
270235944Sbz		default:
271235944Sbz			th = NULL;	/* Keep compiler happy. */
272235944Sbz		}
273235944Sbz		le->m_head->m_pkthdr.csum_data = 0xffff;
274235944Sbz		le->m_head->m_pkthdr.len = le->p_len;
275235944Sbz
276235944Sbz		/* Incorporate the latest ACK into the TCP header. */
277235944Sbz		th->th_ack = le->ack_seq;
278235944Sbz		th->th_win = le->window;
279235944Sbz		/* Incorporate latest timestamp into the TCP header. */
280235944Sbz		if (le->timestamp != 0) {
281235944Sbz			uint32_t *ts_ptr;
282235944Sbz
283235944Sbz			ts_ptr = (uint32_t *)(th + 1);
284235944Sbz			ts_ptr[1] = htonl(le->tsval);
285235944Sbz			ts_ptr[2] = le->tsecr;
286235944Sbz		}
287235944Sbz#ifdef TCP_LRO_UPDATE_CSUM
288235944Sbz		/* Update the TCP header checksum. */
289235944Sbz		le->ulp_csum += p_len;
290235944Sbz		le->ulp_csum += tcp_lro_csum_th(th);
291235944Sbz		while (le->ulp_csum > 0xffff)
292235944Sbz			le->ulp_csum = (le->ulp_csum >> 16) +
293235944Sbz			    (le->ulp_csum & 0xffff);
294235944Sbz		th->th_sum = (le->ulp_csum & 0xffff);
295235944Sbz		th->th_sum = ~th->th_sum;
296235944Sbz#else
297235944Sbz		th->th_sum = TCP_LRO_INVALID_CSUM;
298235944Sbz#endif
299179737Sjfv	}
300235944Sbz
301235944Sbz	(*lc->ifp->if_input)(lc->ifp, le->m_head);
302235944Sbz	lc->lro_queued += le->append_cnt + 1;
303235944Sbz	lc->lro_flushed++;
304235944Sbz	bzero(le, sizeof(*le));
305235944Sbz	SLIST_INSERT_HEAD(&lc->lro_free, le, next);
306179737Sjfv}
307179737Sjfv
308235944Sbz#ifdef INET6
309235944Sbzstatic int
310235944Sbztcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
311235944Sbz    struct tcphdr **th)
312179737Sjfv{
313179737Sjfv
314235944Sbz	/* XXX-BZ we should check the flow-label. */
315179737Sjfv
316235944Sbz	/* XXX-BZ We do not yet support ext. hdrs. */
317235944Sbz	if (ip6->ip6_nxt != IPPROTO_TCP)
318235944Sbz		return (TCP_LRO_NOT_SUPPORTED);
319179737Sjfv
320235944Sbz	/* Find the TCP header. */
321235944Sbz	*th = (struct tcphdr *)(ip6 + 1);
322179737Sjfv
323235944Sbz	return (0);
324235944Sbz}
325235944Sbz#endif
326235944Sbz
327235944Sbz#ifdef INET
328235944Sbzstatic int
329235944Sbztcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
330235944Sbz    struct tcphdr **th)
331235944Sbz{
332235944Sbz	int csum_flags;
333235944Sbz	uint16_t csum;
334235944Sbz
335235944Sbz	if (ip4->ip_p != IPPROTO_TCP)
336235944Sbz		return (TCP_LRO_NOT_SUPPORTED);
337235944Sbz
338235944Sbz	/* Ensure there are no options. */
339235944Sbz	if ((ip4->ip_hl << 2) != sizeof (*ip4))
340235944Sbz		return (TCP_LRO_CANNOT);
341235944Sbz
342235944Sbz	/* .. and the packet is not fragmented. */
343235944Sbz	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
344235944Sbz		return (TCP_LRO_CANNOT);
345235944Sbz
346235944Sbz	/* Legacy IP has a header checksum that needs to be correct. */
347235944Sbz	csum_flags = m->m_pkthdr.csum_flags;
348182089Skmacy	if (csum_flags & CSUM_IP_CHECKED) {
349182089Skmacy		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
350235944Sbz			lc->lro_bad_csum++;
351235944Sbz			return (TCP_LRO_CANNOT);
352182089Skmacy		}
353182089Skmacy	} else {
354235944Sbz		csum = in_cksum_hdr(ip4);
355247104Sgallatin		if (__predict_false((csum) != 0)) {
356235944Sbz			lc->lro_bad_csum++;
357235944Sbz			return (TCP_LRO_CANNOT);
358182089Skmacy		}
359179737Sjfv	}
360179737Sjfv
361235944Sbz	/* Find the TCP header (we assured there are no IP options). */
362235944Sbz	*th = (struct tcphdr *)(ip4 + 1);
363179737Sjfv
364235944Sbz	return (0);
365235944Sbz}
366235944Sbz#endif
367179737Sjfv
368235944Sbzint
369235944Sbztcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
370235944Sbz{
371235944Sbz	struct lro_entry *le;
372235944Sbz	struct ether_header *eh;
373235944Sbz#ifdef INET6
374235944Sbz	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
375235944Sbz#endif
376235944Sbz#ifdef INET
377235944Sbz	struct ip *ip4 = NULL;		/* Keep compiler happy. */
378235944Sbz#endif
379235944Sbz	struct tcphdr *th;
380235944Sbz	void *l3hdr = NULL;		/* Keep compiler happy. */
381235944Sbz	uint32_t *ts_ptr;
382235944Sbz	tcp_seq seq;
383235944Sbz	int error, ip_len, l;
384235944Sbz	uint16_t eh_type, tcp_data_len;
385179737Sjfv
386235944Sbz	/* We expect a contiguous header [eh, ip, tcp]. */
387235944Sbz
388235944Sbz	eh = mtod(m, struct ether_header *);
389235944Sbz	eh_type = ntohs(eh->ether_type);
390235944Sbz	switch (eh_type) {
391235944Sbz#ifdef INET6
392235944Sbz	case ETHERTYPE_IPV6:
393236394Sbz	{
394236394Sbz		CURVNET_SET(lc->ifp->if_vnet);
395235981Sbz		if (V_ip6_forwarding != 0) {
396235981Sbz			/* XXX-BZ stats but changing lro_ctrl is a problem. */
397236394Sbz			CURVNET_RESTORE();
398235981Sbz			return (TCP_LRO_CANNOT);
399235981Sbz		}
400236394Sbz		CURVNET_RESTORE();
401235944Sbz		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
402235944Sbz		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
403235944Sbz		if (error != 0)
404235944Sbz			return (error);
405235944Sbz		tcp_data_len = ntohs(ip6->ip6_plen);
406235944Sbz		ip_len = sizeof(*ip6) + tcp_data_len;
407235944Sbz		break;
408236394Sbz	}
409235944Sbz#endif
410235944Sbz#ifdef INET
411235944Sbz	case ETHERTYPE_IP:
412236394Sbz	{
413236394Sbz		CURVNET_SET(lc->ifp->if_vnet);
414235981Sbz		if (V_ipforwarding != 0) {
415235981Sbz			/* XXX-BZ stats but changing lro_ctrl is a problem. */
416236394Sbz			CURVNET_RESTORE();
417235981Sbz			return (TCP_LRO_CANNOT);
418235981Sbz		}
419236394Sbz		CURVNET_RESTORE();
420235944Sbz		l3hdr = ip4 = (struct ip *)(eh + 1);
421235944Sbz		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
422235944Sbz		if (error != 0)
423235944Sbz			return (error);
424235944Sbz		ip_len = ntohs(ip4->ip_len);
425235944Sbz		tcp_data_len = ip_len - sizeof(*ip4);
426235944Sbz		break;
427236394Sbz	}
428235944Sbz#endif
429235944Sbz	/* XXX-BZ what happens in case of VLAN(s)? */
430235944Sbz	default:
431235944Sbz		return (TCP_LRO_NOT_SUPPORTED);
432179737Sjfv	}
433179737Sjfv
434235944Sbz	/*
435235944Sbz	 * If the frame is padded beyond the end of the IP packet, then we must
436235944Sbz	 * trim the extra bytes off.
437235944Sbz	 */
438235944Sbz	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
439235944Sbz	if (l != 0) {
440235944Sbz		if (l < 0)
441235944Sbz			/* Truncated packet. */
442235944Sbz			return (TCP_LRO_CANNOT);
443179737Sjfv
444235944Sbz		m_adj(m, -l);
445235944Sbz	}
446235944Sbz
447235944Sbz	/*
448235944Sbz	 * Check TCP header constraints.
449179737Sjfv	 */
450235944Sbz	/* Ensure no bits set besides ACK or PSH. */
451235944Sbz	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
452235944Sbz		return (TCP_LRO_CANNOT);
453235944Sbz
454235944Sbz	/* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
455235944Sbz	/* XXX-BZ Ideally we'd flush on PUSH? */
456235944Sbz
457235944Sbz	/*
458235944Sbz	 * Check for timestamps.
459235944Sbz	 * Since the only option we handle are timestamps, we only have to
460235944Sbz	 * handle the simple case of aligned timestamps.
461235944Sbz	 */
462235944Sbz	l = (th->th_off << 2);
463235944Sbz	tcp_data_len -= l;
464235944Sbz	l -= sizeof(*th);
465235944Sbz	ts_ptr = (uint32_t *)(th + 1);
466235944Sbz	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
467235944Sbz	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
468235944Sbz	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
469235944Sbz		return (TCP_LRO_CANNOT);
470235944Sbz
471235944Sbz	/* If the driver did not pass in the checksum, set it now. */
472235944Sbz	if (csum == 0x0000)
473235944Sbz		csum = th->th_sum;
474235944Sbz
475235944Sbz	seq = ntohl(th->th_seq);
476235944Sbz
477235944Sbz	/* Try to find a matching previous segment. */
478235944Sbz	SLIST_FOREACH(le, &lc->lro_active, next) {
479235944Sbz		if (le->eh_type != eh_type)
480235944Sbz			continue;
481235944Sbz		if (le->source_port != th->th_sport ||
482235944Sbz		    le->dest_port != th->th_dport)
483235944Sbz			continue;
484235944Sbz		switch (eh_type) {
485235944Sbz#ifdef INET6
486235944Sbz		case ETHERTYPE_IPV6:
487235944Sbz			if (bcmp(&le->source_ip6, &ip6->ip6_src,
488235944Sbz			    sizeof(struct in6_addr)) != 0 ||
489235944Sbz			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
490235944Sbz			    sizeof(struct in6_addr)) != 0)
491235944Sbz				continue;
492235944Sbz			break;
493235944Sbz#endif
494235944Sbz#ifdef INET
495235944Sbz		case ETHERTYPE_IP:
496235944Sbz			if (le->source_ip4 != ip4->ip_src.s_addr ||
497235944Sbz			    le->dest_ip4 != ip4->ip_dst.s_addr)
498235944Sbz				continue;
499235944Sbz			break;
500235944Sbz#endif
501179737Sjfv		}
502179737Sjfv
503235944Sbz		/* Flush now if appending will result in overflow. */
504235944Sbz		if (le->p_len > (65535 - tcp_data_len)) {
505235944Sbz			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
506235944Sbz			tcp_lro_flush(lc, le);
507235944Sbz			break;
508235944Sbz		}
509179737Sjfv
510235944Sbz		/* Try to append the new segment. */
511235944Sbz		if (__predict_false(seq != le->next_seq ||
512235944Sbz		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
513235944Sbz			/* Out of order packet or duplicate ACK. */
514235944Sbz			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
515235944Sbz			tcp_lro_flush(lc, le);
516235944Sbz			return (TCP_LRO_CANNOT);
517235944Sbz		}
518179737Sjfv
519235944Sbz		if (l != 0) {
520235944Sbz			uint32_t tsval = ntohl(*(ts_ptr + 1));
521235944Sbz			/* Make sure timestamp values are increasing. */
522235944Sbz			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
523235944Sbz			if (__predict_false(le->tsval > tsval ||
524235944Sbz			    *(ts_ptr + 2) == 0))
525235944Sbz				return (TCP_LRO_CANNOT);
526235944Sbz			le->tsval = tsval;
527235944Sbz			le->tsecr = *(ts_ptr + 2);
528235944Sbz		}
529223797Scperciva
530235944Sbz		le->next_seq += tcp_data_len;
531235944Sbz		le->ack_seq = th->th_ack;
532235944Sbz		le->window = th->th_win;
533235944Sbz		le->append_cnt++;
534179737Sjfv
535235944Sbz#ifdef TCP_LRO_UPDATE_CSUM
536235944Sbz		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
537235944Sbz		    tcp_data_len, ~csum);
538235944Sbz#endif
539179737Sjfv
540235944Sbz		if (tcp_data_len == 0) {
541235944Sbz			m_freem(m);
542235944Sbz			return (0);
543235944Sbz		}
544179737Sjfv
545235944Sbz		le->p_len += tcp_data_len;
546179737Sjfv
547235944Sbz		/*
548235944Sbz		 * Adjust the mbuf so that m_data points to the first byte of
549235944Sbz		 * the ULP payload.  Adjust the mbuf to avoid complications and
550235944Sbz		 * append new segment to existing mbuf chain.
551235944Sbz		 */
552235944Sbz		m_adj(m, m->m_pkthdr.len - tcp_data_len);
553235944Sbz		m->m_flags &= ~M_PKTHDR;
554179737Sjfv
555235944Sbz		le->m_tail->m_next = m;
556235944Sbz		le->m_tail = m_last(m);
557235944Sbz
558235944Sbz		/*
559235944Sbz		 * If a possible next full length packet would cause an
560235944Sbz		 * overflow, pro-actively flush now.
561235944Sbz		 */
562235944Sbz		if (le->p_len > (65535 - lc->ifp->if_mtu)) {
563235944Sbz			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
564235944Sbz			tcp_lro_flush(lc, le);
565255010Snp		} else
566255010Snp			getmicrotime(&le->mtime);
567235944Sbz
568235944Sbz		return (0);
569179737Sjfv	}
570179737Sjfv
571235944Sbz	/* Try to find an empty slot. */
572235944Sbz	if (SLIST_EMPTY(&lc->lro_free))
573235944Sbz		return (TCP_LRO_CANNOT);
574179737Sjfv
575235944Sbz	/* Start a new segment chain. */
576235944Sbz	le = SLIST_FIRST(&lc->lro_free);
577235944Sbz	SLIST_REMOVE_HEAD(&lc->lro_free, next);
578235944Sbz	SLIST_INSERT_HEAD(&lc->lro_active, le, next);
579255010Snp	getmicrotime(&le->mtime);
580179737Sjfv
581235944Sbz	/* Start filling in details. */
582235944Sbz	switch (eh_type) {
583235944Sbz#ifdef INET6
584235944Sbz	case ETHERTYPE_IPV6:
585235944Sbz		le->le_ip6 = ip6;
586235944Sbz		le->source_ip6 = ip6->ip6_src;
587235944Sbz		le->dest_ip6 = ip6->ip6_dst;
588235944Sbz		le->eh_type = eh_type;
589235944Sbz		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
590235944Sbz		break;
591235944Sbz#endif
592235944Sbz#ifdef INET
593235944Sbz	case ETHERTYPE_IP:
594235944Sbz		le->le_ip4 = ip4;
595235944Sbz		le->source_ip4 = ip4->ip_src.s_addr;
596235944Sbz		le->dest_ip4 = ip4->ip_dst.s_addr;
597235944Sbz		le->eh_type = eh_type;
598235944Sbz		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
599235944Sbz		break;
600235944Sbz#endif
601235944Sbz	}
602235944Sbz	le->source_port = th->th_sport;
603235944Sbz	le->dest_port = th->th_dport;
604235944Sbz
605235944Sbz	le->next_seq = seq + tcp_data_len;
606235944Sbz	le->ack_seq = th->th_ack;
607235944Sbz	le->window = th->th_win;
608235944Sbz	if (l != 0) {
609235944Sbz		le->timestamp = 1;
610235944Sbz		le->tsval = ntohl(*(ts_ptr + 1));
611235944Sbz		le->tsecr = *(ts_ptr + 2);
612235944Sbz	}
613235944Sbz
614235944Sbz#ifdef TCP_LRO_UPDATE_CSUM
615235944Sbz	/*
616235944Sbz	 * Do not touch the csum of the first packet.  However save the
617235944Sbz	 * "adjusted" checksum of just the source and destination addresses,
618235944Sbz	 * the next header and the TCP payload.  The length and TCP header
619235944Sbz	 * parts may change, so we remove those from the saved checksum and
620235944Sbz	 * re-add with final values on tcp_lro_flush() if needed.
621179737Sjfv	 */
622235944Sbz	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
623235944Sbz	    __func__, le, le->ulp_csum));
624235944Sbz
625235944Sbz	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
626235944Sbz	    ~csum);
627235944Sbz	th->th_sum = csum;	/* Restore checksum on first packet. */
628235944Sbz#endif
629235944Sbz
630235944Sbz	le->m_head = m;
631235944Sbz	le->m_tail = m_last(m);
632235944Sbz
633235944Sbz	return (0);
634179737Sjfv}
635235944Sbz
636235944Sbz/* end */
637