tcp_lro.c revision 295126
1/*-
2 * Copyright (c) 2007, Myricom Inc.
3 * Copyright (c) 2008, Intel Corporation.
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * Copyright (c) 2016 Mellanox Technologies.
6 * All rights reserved.
7 *
8 * Portions of this software were developed by Bjoern Zeeb
9 * under sponsorship from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/netinet/tcp_lro.c 295126 2016-02-01 17:41:21Z glebius $");
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/socket.h>
45
46#include <net/if.h>
47#include <net/if_var.h>
48#include <net/ethernet.h>
49#include <net/vnet.h>
50
51#include <netinet/in_systm.h>
52#include <netinet/in.h>
53#include <netinet/ip6.h>
54#include <netinet/ip.h>
55#include <netinet/ip_var.h>
56#include <netinet/tcp.h>
57#include <netinet/tcp_lro.h>
58
59#include <netinet6/ip6_var.h>
60
61#include <machine/in_cksum.h>
62
63static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
64
65#define	TCP_LRO_UPDATE_CSUM	1
66#ifndef	TCP_LRO_UPDATE_CSUM
67#define	TCP_LRO_INVALID_CSUM	0x0000
68#endif
69
70int
71tcp_lro_init(struct lro_ctrl *lc)
72{
73	return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0));
74}
75
76int
77tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
78    unsigned lro_entries, unsigned lro_mbufs)
79{
80	struct lro_entry *le;
81	size_t size;
82	unsigned i;
83
84	lc->lro_bad_csum = 0;
85	lc->lro_queued = 0;
86	lc->lro_flushed = 0;
87	lc->lro_cnt = 0;
88	lc->lro_mbuf_count = 0;
89	lc->lro_mbuf_max = lro_mbufs;
90	lc->lro_cnt = lro_entries;
91	lc->ifp = ifp;
92	SLIST_INIT(&lc->lro_free);
93	SLIST_INIT(&lc->lro_active);
94
95	/* compute size to allocate */
96	size = (lro_mbufs * sizeof(struct mbuf *)) +
97	    (lro_entries * sizeof(*le));
98	lc->lro_mbuf_data = (struct mbuf **)
99	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
100
101	/* check for out of memory */
102	if (lc->lro_mbuf_data == NULL) {
103		memset(lc, 0, sizeof(*lc));
104		return (ENOMEM);
105	}
106	/* compute offset for LRO entries */
107	le = (struct lro_entry *)
108	    (lc->lro_mbuf_data + lro_mbufs);
109
110	/* setup linked list */
111	for (i = 0; i != lro_entries; i++)
112		SLIST_INSERT_HEAD(&lc->lro_free, le + i, next);
113
114	return (0);
115}
116
117void
118tcp_lro_free(struct lro_ctrl *lc)
119{
120	struct lro_entry *le;
121	unsigned x;
122
123	/* reset LRO free list */
124	SLIST_INIT(&lc->lro_free);
125
126	/* free active mbufs, if any */
127	while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
128		SLIST_REMOVE_HEAD(&lc->lro_active, next);
129		m_freem(le->m_head);
130	}
131
132	/* free mbuf array, if any */
133	for (x = 0; x != lc->lro_mbuf_count; x++)
134		m_freem(lc->lro_mbuf_data[x]);
135	lc->lro_mbuf_count = 0;
136
137	/* free allocated memory, if any */
138	free(lc->lro_mbuf_data, M_LRO);
139	lc->lro_mbuf_data = NULL;
140}
141
142#ifdef TCP_LRO_UPDATE_CSUM
143static uint16_t
144tcp_lro_csum_th(struct tcphdr *th)
145{
146	uint32_t ch;
147	uint16_t *p, l;
148
149	ch = th->th_sum = 0x0000;
150	l = th->th_off;
151	p = (uint16_t *)th;
152	while (l > 0) {
153		ch += *p;
154		p++;
155		ch += *p;
156		p++;
157		l--;
158	}
159	while (ch > 0xffff)
160		ch = (ch >> 16) + (ch & 0xffff);
161
162	return (ch & 0xffff);
163}
164
165static uint16_t
166tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
167    uint16_t tcp_data_len, uint16_t csum)
168{
169	uint32_t c;
170	uint16_t cs;
171
172	c = csum;
173
174	/* Remove length from checksum. */
175	switch (le->eh_type) {
176#ifdef INET6
177	case ETHERTYPE_IPV6:
178	{
179		struct ip6_hdr *ip6;
180
181		ip6 = (struct ip6_hdr *)l3hdr;
182		if (le->append_cnt == 0)
183			cs = ip6->ip6_plen;
184		else {
185			uint32_t cx;
186
187			cx = ntohs(ip6->ip6_plen);
188			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
189		}
190		break;
191	}
192#endif
193#ifdef INET
194	case ETHERTYPE_IP:
195	{
196		struct ip *ip4;
197
198		ip4 = (struct ip *)l3hdr;
199		if (le->append_cnt == 0)
200			cs = ip4->ip_len;
201		else {
202			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
203			    IPPROTO_TCP);
204			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
205			    htons(cs));
206		}
207		break;
208	}
209#endif
210	default:
211		cs = 0;		/* Keep compiler happy. */
212	}
213
214	cs = ~cs;
215	c += cs;
216
217	/* Remove TCP header csum. */
218	cs = ~tcp_lro_csum_th(th);
219	c += cs;
220	while (c > 0xffff)
221		c = (c >> 16) + (c & 0xffff);
222
223	return (c & 0xffff);
224}
225#endif
226
227void
228tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
229{
230	struct lro_entry *le, *le_tmp;
231	struct timeval tv;
232
233	if (SLIST_EMPTY(&lc->lro_active))
234		return;
235
236	getmicrotime(&tv);
237	timevalsub(&tv, timeout);
238	SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
239		if (timevalcmp(&tv, &le->mtime, >=)) {
240			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
241			tcp_lro_flush(lc, le);
242		}
243	}
244}
245
246void
247tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
248{
249
250	if (le->append_cnt > 0) {
251		struct tcphdr *th;
252		uint16_t p_len;
253
254		p_len = htons(le->p_len);
255		switch (le->eh_type) {
256#ifdef INET6
257		case ETHERTYPE_IPV6:
258		{
259			struct ip6_hdr *ip6;
260
261			ip6 = le->le_ip6;
262			ip6->ip6_plen = p_len;
263			th = (struct tcphdr *)(ip6 + 1);
264			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
265			    CSUM_PSEUDO_HDR;
266			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
267			break;
268		}
269#endif
270#ifdef INET
271		case ETHERTYPE_IP:
272		{
273			struct ip *ip4;
274#ifdef TCP_LRO_UPDATE_CSUM
275			uint32_t cl;
276			uint16_t c;
277#endif
278
279			ip4 = le->le_ip4;
280#ifdef TCP_LRO_UPDATE_CSUM
281			/* Fix IP header checksum for new length. */
282			c = ~ip4->ip_sum;
283			cl = c;
284			c = ~ip4->ip_len;
285			cl += c + p_len;
286			while (cl > 0xffff)
287				cl = (cl >> 16) + (cl & 0xffff);
288			c = cl;
289			ip4->ip_sum = ~c;
290#else
291			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
292#endif
293			ip4->ip_len = p_len;
294			th = (struct tcphdr *)(ip4 + 1);
295			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
296			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
297			le->p_len += ETHER_HDR_LEN;
298			break;
299		}
300#endif
301		default:
302			th = NULL;	/* Keep compiler happy. */
303		}
304		le->m_head->m_pkthdr.csum_data = 0xffff;
305		le->m_head->m_pkthdr.len = le->p_len;
306
307		/* Incorporate the latest ACK into the TCP header. */
308		th->th_ack = le->ack_seq;
309		th->th_win = le->window;
310		/* Incorporate latest timestamp into the TCP header. */
311		if (le->timestamp != 0) {
312			uint32_t *ts_ptr;
313
314			ts_ptr = (uint32_t *)(th + 1);
315			ts_ptr[1] = htonl(le->tsval);
316			ts_ptr[2] = le->tsecr;
317		}
318#ifdef TCP_LRO_UPDATE_CSUM
319		/* Update the TCP header checksum. */
320		le->ulp_csum += p_len;
321		le->ulp_csum += tcp_lro_csum_th(th);
322		while (le->ulp_csum > 0xffff)
323			le->ulp_csum = (le->ulp_csum >> 16) +
324			    (le->ulp_csum & 0xffff);
325		th->th_sum = (le->ulp_csum & 0xffff);
326		th->th_sum = ~th->th_sum;
327#else
328		th->th_sum = TCP_LRO_INVALID_CSUM;
329#endif
330	}
331
332	(*lc->ifp->if_input)(lc->ifp, le->m_head);
333	lc->lro_queued += le->append_cnt + 1;
334	lc->lro_flushed++;
335	bzero(le, sizeof(*le));
336	SLIST_INSERT_HEAD(&lc->lro_free, le, next);
337}
338
339static int
340tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb)
341{
342	const struct mbuf *ma = *((const struct mbuf * const *)ppa);
343	const struct mbuf *mb = *((const struct mbuf * const *)ppb);
344	int ret;
345
346	ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb);
347	if (ret != 0)
348		goto done;
349
350	ret = ma->m_pkthdr.flowid - mb->m_pkthdr.flowid;
351	if (ret != 0)
352		goto done;
353
354	ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb);
355done:
356	return (ret);
357}
358
359void
360tcp_lro_flush_all(struct lro_ctrl *lc)
361{
362	struct lro_entry *le;
363	uint32_t hashtype;
364	uint32_t flowid;
365	unsigned x;
366
367	/* check if no mbufs to flush */
368	if (__predict_false(lc->lro_mbuf_count == 0))
369		goto done;
370
371	/* sort all mbufs according to stream */
372	qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *),
373	    &tcp_lro_mbuf_compare_header);
374
375	/* input data into LRO engine, stream by stream */
376	flowid = 0;
377	hashtype = M_HASHTYPE_NONE;
378	for (x = 0; x != lc->lro_mbuf_count; x++) {
379		struct mbuf *mb;
380
381		mb = lc->lro_mbuf_data[x];
382
383		/* check for new stream */
384		if (mb->m_pkthdr.flowid != flowid ||
385		    M_HASHTYPE_GET(mb) != hashtype) {
386			flowid = mb->m_pkthdr.flowid;
387			hashtype = M_HASHTYPE_GET(mb);
388
389			/* flush active streams */
390			while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
391				SLIST_REMOVE_HEAD(&lc->lro_active, next);
392				tcp_lro_flush(lc, le);
393			}
394		}
395#ifdef TCP_LRO_RESET_SEQUENCE
396		/* reset sequence number */
397		TCP_LRO_SEQUENCE(mb) = 0;
398#endif
399		/* add packet to LRO engine */
400		if (tcp_lro_rx(lc, mb, 0) != 0) {
401			/* input packet to network layer */
402			(*lc->ifp->if_input)(lc->ifp, mb);
403			lc->lro_queued++;
404			lc->lro_flushed++;
405		}
406	}
407done:
408	/* flush active streams */
409	while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
410		SLIST_REMOVE_HEAD(&lc->lro_active, next);
411		tcp_lro_flush(lc, le);
412	}
413	lc->lro_mbuf_count = 0;
414}
415
416#ifdef INET6
417static int
418tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
419    struct tcphdr **th)
420{
421
422	/* XXX-BZ we should check the flow-label. */
423
424	/* XXX-BZ We do not yet support ext. hdrs. */
425	if (ip6->ip6_nxt != IPPROTO_TCP)
426		return (TCP_LRO_NOT_SUPPORTED);
427
428	/* Find the TCP header. */
429	*th = (struct tcphdr *)(ip6 + 1);
430
431	return (0);
432}
433#endif
434
435#ifdef INET
436static int
437tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
438    struct tcphdr **th)
439{
440	int csum_flags;
441	uint16_t csum;
442
443	if (ip4->ip_p != IPPROTO_TCP)
444		return (TCP_LRO_NOT_SUPPORTED);
445
446	/* Ensure there are no options. */
447	if ((ip4->ip_hl << 2) != sizeof (*ip4))
448		return (TCP_LRO_CANNOT);
449
450	/* .. and the packet is not fragmented. */
451	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
452		return (TCP_LRO_CANNOT);
453
454	/* Legacy IP has a header checksum that needs to be correct. */
455	csum_flags = m->m_pkthdr.csum_flags;
456	if (csum_flags & CSUM_IP_CHECKED) {
457		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
458			lc->lro_bad_csum++;
459			return (TCP_LRO_CANNOT);
460		}
461	} else {
462		csum = in_cksum_hdr(ip4);
463		if (__predict_false((csum) != 0)) {
464			lc->lro_bad_csum++;
465			return (TCP_LRO_CANNOT);
466		}
467	}
468
469	/* Find the TCP header (we assured there are no IP options). */
470	*th = (struct tcphdr *)(ip4 + 1);
471
472	return (0);
473}
474#endif
475
476int
477tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
478{
479	struct lro_entry *le;
480	struct ether_header *eh;
481#ifdef INET6
482	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
483#endif
484#ifdef INET
485	struct ip *ip4 = NULL;		/* Keep compiler happy. */
486#endif
487	struct tcphdr *th;
488	void *l3hdr = NULL;		/* Keep compiler happy. */
489	uint32_t *ts_ptr;
490	tcp_seq seq;
491	int error, ip_len, l;
492	uint16_t eh_type, tcp_data_len;
493
494	/* We expect a contiguous header [eh, ip, tcp]. */
495
496	eh = mtod(m, struct ether_header *);
497	eh_type = ntohs(eh->ether_type);
498	switch (eh_type) {
499#ifdef INET6
500	case ETHERTYPE_IPV6:
501	{
502		CURVNET_SET(lc->ifp->if_vnet);
503		if (V_ip6_forwarding != 0) {
504			/* XXX-BZ stats but changing lro_ctrl is a problem. */
505			CURVNET_RESTORE();
506			return (TCP_LRO_CANNOT);
507		}
508		CURVNET_RESTORE();
509		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
510		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
511		if (error != 0)
512			return (error);
513		tcp_data_len = ntohs(ip6->ip6_plen);
514		ip_len = sizeof(*ip6) + tcp_data_len;
515		break;
516	}
517#endif
518#ifdef INET
519	case ETHERTYPE_IP:
520	{
521		CURVNET_SET(lc->ifp->if_vnet);
522		if (V_ipforwarding != 0) {
523			/* XXX-BZ stats but changing lro_ctrl is a problem. */
524			CURVNET_RESTORE();
525			return (TCP_LRO_CANNOT);
526		}
527		CURVNET_RESTORE();
528		l3hdr = ip4 = (struct ip *)(eh + 1);
529		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
530		if (error != 0)
531			return (error);
532		ip_len = ntohs(ip4->ip_len);
533		tcp_data_len = ip_len - sizeof(*ip4);
534		break;
535	}
536#endif
537	/* XXX-BZ what happens in case of VLAN(s)? */
538	default:
539		return (TCP_LRO_NOT_SUPPORTED);
540	}
541
542	/*
543	 * If the frame is padded beyond the end of the IP packet, then we must
544	 * trim the extra bytes off.
545	 */
546	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
547	if (l != 0) {
548		if (l < 0)
549			/* Truncated packet. */
550			return (TCP_LRO_CANNOT);
551
552		m_adj(m, -l);
553	}
554
555	/*
556	 * Check TCP header constraints.
557	 */
558	/* Ensure no bits set besides ACK or PSH. */
559	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
560		return (TCP_LRO_CANNOT);
561
562	/* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
563	/* XXX-BZ Ideally we'd flush on PUSH? */
564
565	/*
566	 * Check for timestamps.
567	 * Since the only option we handle are timestamps, we only have to
568	 * handle the simple case of aligned timestamps.
569	 */
570	l = (th->th_off << 2);
571	tcp_data_len -= l;
572	l -= sizeof(*th);
573	ts_ptr = (uint32_t *)(th + 1);
574	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
575	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
576	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
577		return (TCP_LRO_CANNOT);
578
579	/* If the driver did not pass in the checksum, set it now. */
580	if (csum == 0x0000)
581		csum = th->th_sum;
582
583	seq = ntohl(th->th_seq);
584
585	/* Try to find a matching previous segment. */
586	SLIST_FOREACH(le, &lc->lro_active, next) {
587		if (le->eh_type != eh_type)
588			continue;
589		if (le->source_port != th->th_sport ||
590		    le->dest_port != th->th_dport)
591			continue;
592		switch (eh_type) {
593#ifdef INET6
594		case ETHERTYPE_IPV6:
595			if (bcmp(&le->source_ip6, &ip6->ip6_src,
596			    sizeof(struct in6_addr)) != 0 ||
597			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
598			    sizeof(struct in6_addr)) != 0)
599				continue;
600			break;
601#endif
602#ifdef INET
603		case ETHERTYPE_IP:
604			if (le->source_ip4 != ip4->ip_src.s_addr ||
605			    le->dest_ip4 != ip4->ip_dst.s_addr)
606				continue;
607			break;
608#endif
609		}
610
611		/* Flush now if appending will result in overflow. */
612		if (le->p_len > (65535 - tcp_data_len)) {
613			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
614			tcp_lro_flush(lc, le);
615			break;
616		}
617
618		/* Try to append the new segment. */
619		if (__predict_false(seq != le->next_seq ||
620		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
621			/* Out of order packet or duplicate ACK. */
622			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
623			tcp_lro_flush(lc, le);
624			return (TCP_LRO_CANNOT);
625		}
626
627		if (l != 0) {
628			uint32_t tsval = ntohl(*(ts_ptr + 1));
629			/* Make sure timestamp values are increasing. */
630			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
631			if (__predict_false(le->tsval > tsval ||
632			    *(ts_ptr + 2) == 0))
633				return (TCP_LRO_CANNOT);
634			le->tsval = tsval;
635			le->tsecr = *(ts_ptr + 2);
636		}
637
638		le->next_seq += tcp_data_len;
639		le->ack_seq = th->th_ack;
640		le->window = th->th_win;
641		le->append_cnt++;
642
643#ifdef TCP_LRO_UPDATE_CSUM
644		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
645		    tcp_data_len, ~csum);
646#endif
647
648		if (tcp_data_len == 0) {
649			m_freem(m);
650			return (0);
651		}
652
653		le->p_len += tcp_data_len;
654
655		/*
656		 * Adjust the mbuf so that m_data points to the first byte of
657		 * the ULP payload.  Adjust the mbuf to avoid complications and
658		 * append new segment to existing mbuf chain.
659		 */
660		m_adj(m, m->m_pkthdr.len - tcp_data_len);
661		m_demote_pkthdr(m);
662
663		le->m_tail->m_next = m;
664		le->m_tail = m_last(m);
665
666		/*
667		 * If a possible next full length packet would cause an
668		 * overflow, pro-actively flush now.
669		 */
670		if (le->p_len > (65535 - lc->ifp->if_mtu)) {
671			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
672			tcp_lro_flush(lc, le);
673		} else
674			getmicrotime(&le->mtime);
675
676		return (0);
677	}
678
679	/* Try to find an empty slot. */
680	if (SLIST_EMPTY(&lc->lro_free))
681		return (TCP_LRO_CANNOT);
682
683	/* Start a new segment chain. */
684	le = SLIST_FIRST(&lc->lro_free);
685	SLIST_REMOVE_HEAD(&lc->lro_free, next);
686	SLIST_INSERT_HEAD(&lc->lro_active, le, next);
687	getmicrotime(&le->mtime);
688
689	/* Start filling in details. */
690	switch (eh_type) {
691#ifdef INET6
692	case ETHERTYPE_IPV6:
693		le->le_ip6 = ip6;
694		le->source_ip6 = ip6->ip6_src;
695		le->dest_ip6 = ip6->ip6_dst;
696		le->eh_type = eh_type;
697		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
698		break;
699#endif
700#ifdef INET
701	case ETHERTYPE_IP:
702		le->le_ip4 = ip4;
703		le->source_ip4 = ip4->ip_src.s_addr;
704		le->dest_ip4 = ip4->ip_dst.s_addr;
705		le->eh_type = eh_type;
706		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
707		break;
708#endif
709	}
710	le->source_port = th->th_sport;
711	le->dest_port = th->th_dport;
712
713	le->next_seq = seq + tcp_data_len;
714	le->ack_seq = th->th_ack;
715	le->window = th->th_win;
716	if (l != 0) {
717		le->timestamp = 1;
718		le->tsval = ntohl(*(ts_ptr + 1));
719		le->tsecr = *(ts_ptr + 2);
720	}
721
722#ifdef TCP_LRO_UPDATE_CSUM
723	/*
724	 * Do not touch the csum of the first packet.  However save the
725	 * "adjusted" checksum of just the source and destination addresses,
726	 * the next header and the TCP payload.  The length and TCP header
727	 * parts may change, so we remove those from the saved checksum and
728	 * re-add with final values on tcp_lro_flush() if needed.
729	 */
730	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
731	    __func__, le, le->ulp_csum));
732
733	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
734	    ~csum);
735	th->th_sum = csum;	/* Restore checksum on first packet. */
736#endif
737
738	le->m_head = m;
739	le->m_tail = m_last(m);
740
741	return (0);
742}
743
744void
745tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
746{
747	/* sanity checks */
748	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
749	    lc->lro_mbuf_max == 0)) {
750		/* packet drop */
751		m_freem(mb);
752		return;
753	}
754
755	/* check if packet is not LRO capable */
756	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
757	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
758		lc->lro_flushed++;
759		lc->lro_queued++;
760
761		/* input packet to network layer */
762		(*lc->ifp->if_input) (lc->ifp, mb);
763		return;
764	}
765
766	/* check if array is full */
767	if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
768		tcp_lro_flush_all(lc);
769
770	/* store sequence number */
771	TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count;
772
773	/* enter mbuf */
774	lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb;
775}
776
777/* end */
778