Deleted Added
full compact
tcp_lro.c (235474) tcp_lro.c (235944)
1/*-
2 * Copyright (c) 2007, Myricom Inc.
3 * Copyright (c) 2008, Intel Corporation.
1/*-
2 * Copyright (c) 2007, Myricom Inc.
3 * Copyright (c) 2008, Intel Corporation.
4 * Copyright (c) 2012 The FreeBSD Foundation
4 * All rights reserved.
5 *
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Bjoern Zeeb
8 * under sponsorship from the FreeBSD Foundation.
9 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.

--- 5 unchanged lines hidden (view full) ---

19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.

--- 5 unchanged lines hidden (view full) ---

23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
27 * $FreeBSD: head/sys/netinet/tcp_lro.c 235474 2012-05-15 13:23:44Z bz $
31 * $FreeBSD: head/sys/netinet/tcp_lro.c 235944 2012-05-24 23:03:23Z bz $
28 */
29
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/netinet/tcp_lro.c 235944 2012-05-24 23:03:23Z bz $");
36
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
30#include <sys/param.h>
31#include <sys/systm.h>
40#include <sys/param.h>
41#include <sys/systm.h>
32#include <sys/endian.h>
33#include <sys/mbuf.h>
34#include <sys/kernel.h>
35#include <sys/socket.h>
36
37#include <net/if.h>
42#include <sys/mbuf.h>
43#include <sys/kernel.h>
44#include <sys/socket.h>
45
46#include <net/if.h>
47#include <net/if_var.h>
38#include <net/ethernet.h>
48#include <net/ethernet.h>
39#include <net/if_media.h>
40
41#include <netinet/in_systm.h>
42#include <netinet/in.h>
49
50#include <netinet/in_systm.h>
51#include <netinet/in.h>
52#include <netinet/ip6.h>
43#include <netinet/ip.h>
44#include <netinet/tcp.h>
45#include <netinet/tcp_lro.h>
46
53#include <netinet/ip.h>
54#include <netinet/tcp.h>
55#include <netinet/tcp_lro.h>
56
47#include <machine/bus.h>
48#include <machine/in_cksum.h>
49
57#include <machine/in_cksum.h>
58
59#ifndef LRO_ENTRIES
60#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */
61#endif
50
62
51static uint16_t do_csum_data(uint16_t *raw, int len)
52{
53 uint32_t csum;
54 csum = 0;
55 while (len > 0) {
56 csum += *raw;
57 raw++;
58 csum += *raw;
59 raw++;
60 len -= 4;
61 }
62 csum = (csum >> 16) + (csum & 0xffff);
63 csum = (csum >> 16) + (csum & 0xffff);
64 return (uint16_t)csum;
65}
63#define TCP_LRO_UPDATE_CSUM 1
64#ifndef TCP_LRO_UPDATE_CSUM
65#define TCP_LRO_INVALID_CSUM 0x0000
66#endif
66
67
67/*
68 * Allocate and init the LRO data structures
69 */
70int
68int
71tcp_lro_init(struct lro_ctrl *cntl)
69tcp_lro_init(struct lro_ctrl *lc)
72{
70{
73 struct lro_entry *lro;
74 int i, error = 0;
71 struct lro_entry *le;
72 int error, i;
75
73
76 SLIST_INIT(&cntl->lro_free);
77 SLIST_INIT(&cntl->lro_active);
74 lc->lro_bad_csum = 0;
75 lc->lro_queued = 0;
76 lc->lro_flushed = 0;
77 lc->lro_cnt = 0;
78 SLIST_INIT(&lc->lro_free);
79 SLIST_INIT(&lc->lro_active);
78
80
79 cntl->lro_bad_csum = 0;
80 cntl->lro_queued = 0;
81 cntl->lro_flushed = 0;
82
81 error = 0;
83 for (i = 0; i < LRO_ENTRIES; i++) {
82 for (i = 0; i < LRO_ENTRIES; i++) {
84 lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
85 M_DEVBUF, M_NOWAIT | M_ZERO);
86 if (lro == NULL) {
83 le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
84 M_NOWAIT | M_ZERO);
85 if (le == NULL) {
87 if (i == 0)
88 error = ENOMEM;
89 break;
90 }
86 if (i == 0)
87 error = ENOMEM;
88 break;
89 }
91 cntl->lro_cnt = i;
92 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
90 lc->lro_cnt = i + 1;
91 SLIST_INSERT_HEAD(&lc->lro_free, le, next);
93 }
94
95 return (error);
96}
97
98void
92 }
93
94 return (error);
95}
96
97void
99tcp_lro_free(struct lro_ctrl *cntl)
98tcp_lro_free(struct lro_ctrl *lc)
100{
99{
101 struct lro_entry *entry;
100 struct lro_entry *le;
102
101
103 while (!SLIST_EMPTY(&cntl->lro_free)) {
104 entry = SLIST_FIRST(&cntl->lro_free);
105 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
106 free(entry, M_DEVBUF);
102 while (!SLIST_EMPTY(&lc->lro_free)) {
103 le = SLIST_FIRST(&lc->lro_free);
104 SLIST_REMOVE_HEAD(&lc->lro_free, next);
105 free(le, M_DEVBUF);
107 }
108}
109
106 }
107}
108
109#ifdef TCP_LRO_UPDATE_CSUM
110static uint16_t
111tcp_lro_csum_th(struct tcphdr *th)
112{
113 uint32_t ch;
114 uint16_t *p, l;
115
116 ch = th->th_sum = 0x0000;
117 l = th->th_off;
118 p = (uint16_t *)th;
119 while (l > 0) {
120 ch += *p;
121 p++;
122 ch += *p;
123 p++;
124 l--;
125 }
126 while (ch > 0xffff)
127 ch = (ch >> 16) + (ch & 0xffff);
128
129 return (ch & 0xffff);
130}
131
132static uint16_t
133tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
134 uint16_t tcp_data_len, uint16_t csum)
135{
136 uint32_t c;
137 uint16_t cs;
138
139 c = csum;
140
141 /* Remove length from checksum. */
142 switch (le->eh_type) {
143#ifdef INET6
144 case ETHERTYPE_IPV6:
145 {
146 struct ip6_hdr *ip6;
147
148 ip6 = (struct ip6_hdr *)l3hdr;
149 if (le->append_cnt == 0)
150 cs = ip6->ip6_plen;
151 else {
152 uint32_t cx;
153
154 cx = ntohs(ip6->ip6_plen);
155 cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
156 }
157 break;
158 }
159#endif
160#ifdef INET
161 case ETHERTYPE_IP:
162 {
163 struct ip *ip4;
164
165 ip4 = (struct ip *)l3hdr;
166 if (le->append_cnt == 0)
167 cs = ip4->ip_len;
168 else {
169 cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
170 IPPROTO_TCP);
171 cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
172 htons(cs));
173 }
174 break;
175 }
176#endif
177 default:
178 cs = 0; /* Keep compiler happy. */
179 }
180
181 cs = ~cs;
182 c += cs;
183
184 /* Remove TCP header csum. */
185 cs = ~tcp_lro_csum_th(th);
186 c += cs;
187 while (c > 0xffff)
188 c = (c >> 16) + (c & 0xffff);
189
190 return (c & 0xffff);
191}
192#endif
193
110void
194void
111tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
195tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
112{
196{
113 struct ifnet *ifp;
114 struct ip *ip;
115 struct tcphdr *tcp;
116 uint32_t *ts_ptr;
117 uint32_t tcplen, tcp_csum;
118
197
198 if (le->append_cnt > 0) {
199 struct tcphdr *th;
200 uint16_t p_len;
119
201
120 if (lro->append_cnt) {
121 /* incorporate the new len into the ip header and
122 * re-calculate the checksum */
123 ip = lro->ip;
124 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
125 ip->ip_sum = 0;
126 ip->ip_sum = 0xffff ^
127 do_csum_data((uint16_t*)ip,
128 sizeof (*ip));
202 p_len = htons(le->p_len);
203 switch (le->eh_type) {
204#ifdef INET6
205 case ETHERTYPE_IPV6:
206 {
207 struct ip6_hdr *ip6;
129
208
130 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
131 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
132 lro->m_head->m_pkthdr.csum_data = 0xffff;
133 lro->m_head->m_pkthdr.len = lro->len;
209 ip6 = le->le_ip6;
210 ip6->ip6_plen = p_len;
211 th = (struct tcphdr *)(ip6 + 1);
212 le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
213 CSUM_PSEUDO_HDR;
214 le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
215 break;
216 }
217#endif
218#ifdef INET
219 case ETHERTYPE_IP:
220 {
221 struct ip *ip4;
222#ifdef TCP_LRO_UPDATE_CSUM
223 uint32_t cl;
224 uint16_t c;
225#endif
134
226
135 /* incorporate the latest ack into the tcp header */
136 tcp = (struct tcphdr *) (ip + 1);
137 tcp->th_ack = lro->ack_seq;
138 tcp->th_win = lro->window;
139 /* incorporate latest timestamp into the tcp header */
140 if (lro->timestamp) {
141 ts_ptr = (uint32_t *)(tcp + 1);
142 ts_ptr[1] = htonl(lro->tsval);
143 ts_ptr[2] = lro->tsecr;
227 ip4 = le->le_ip4;
228#ifdef TCP_LRO_UPDATE_CSUM
229 /* Fix IP header checksum for new length. */
230 c = ~ip4->ip_sum;
231 cl = c;
232 c = ~ip4->ip_len;
233 cl += c + p_len;
234 while (cl > 0xffff)
235 cl = (cl >> 16) + (cl & 0xffff);
236 c = cl;
237 ip4->ip_sum = ~c;
238#else
239 ip4->ip_sum = TCP_LRO_INVALID_CSUM;
240#endif
241 ip4->ip_len = p_len;
242 th = (struct tcphdr *)(ip4 + 1);
243 le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
244 CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
245 le->p_len += ETHER_HDR_LEN;
246 break;
144 }
247 }
145 /*
146 * update checksum in tcp header by re-calculating the
147 * tcp pseudoheader checksum, and adding it to the checksum
148 * of the tcp payload data
149 */
150 tcp->th_sum = 0;
151 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
152 tcp_csum = lro->data_csum;
153 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
154 htons(tcplen + IPPROTO_TCP));
155 tcp_csum += do_csum_data((uint16_t*)tcp,
156 tcp->th_off << 2);
157 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
158 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
159 tcp->th_sum = 0xffff ^ tcp_csum;
248#endif
249 default:
250 th = NULL; /* Keep compiler happy. */
251 }
252 le->m_head->m_pkthdr.csum_data = 0xffff;
253 le->m_head->m_pkthdr.len = le->p_len;
254
255 /* Incorporate the latest ACK into the TCP header. */
256 th->th_ack = le->ack_seq;
257 th->th_win = le->window;
258 /* Incorporate latest timestamp into the TCP header. */
259 if (le->timestamp != 0) {
260 uint32_t *ts_ptr;
261
262 ts_ptr = (uint32_t *)(th + 1);
263 ts_ptr[1] = htonl(le->tsval);
264 ts_ptr[2] = le->tsecr;
265 }
266#ifdef TCP_LRO_UPDATE_CSUM
267 /* Update the TCP header checksum. */
268 le->ulp_csum += p_len;
269 le->ulp_csum += tcp_lro_csum_th(th);
270 while (le->ulp_csum > 0xffff)
271 le->ulp_csum = (le->ulp_csum >> 16) +
272 (le->ulp_csum & 0xffff);
273 th->th_sum = (le->ulp_csum & 0xffff);
274 th->th_sum = ~th->th_sum;
275#else
276 th->th_sum = TCP_LRO_INVALID_CSUM;
277#endif
160 }
278 }
161 ifp = cntl->ifp;
162 (*ifp->if_input)(cntl->ifp, lro->m_head);
163 cntl->lro_queued += lro->append_cnt + 1;
164 cntl->lro_flushed++;
165 lro->m_head = NULL;
166 lro->timestamp = 0;
167 lro->append_cnt = 0;
168 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
279
280 (*lc->ifp->if_input)(lc->ifp, le->m_head);
281 lc->lro_queued += le->append_cnt + 1;
282 lc->lro_flushed++;
283 bzero(le, sizeof(*le));
284 SLIST_INSERT_HEAD(&lc->lro_free, le, next);
169}
170
285}
286
171int
172tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
287#ifdef INET6
288static int
289tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
290 struct tcphdr **th)
173{
291{
174 struct ether_header *eh;
175 struct ip *ip;
176 struct tcphdr *tcp;
177 uint32_t *ts_ptr;
178 struct mbuf *m_nxt, *m_tail;
179 struct lro_entry *lro;
180 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
181 int opt_bytes, trim, csum_flags;
182 uint32_t seq, tmp_csum, device_mtu;
183
292
293 /* XXX-BZ we should check the flow-label. */
184
294
185 eh = mtod(m_head, struct ether_header *);
186 if (eh->ether_type != htons(ETHERTYPE_IP))
187 return 1;
188 ip = (struct ip *) (eh + 1);
189 if (ip->ip_p != IPPROTO_TCP)
190 return 1;
191
192 /* ensure there are no options */
193 if ((ip->ip_hl << 2) != sizeof (*ip))
194 return -1;
295 /* XXX-BZ We do not yet support ext. hdrs. */
296 if (ip6->ip6_nxt != IPPROTO_TCP)
297 return (TCP_LRO_NOT_SUPPORTED);
195
298
196 /* .. and the packet is not fragmented */
197 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
198 return -1;
299 /* Find the TCP header. */
300 *th = (struct tcphdr *)(ip6 + 1);
199
301
200 /* verify that the IP header checksum is correct */
201 csum_flags = m_head->m_pkthdr.csum_flags;
302 return (0);
303}
304#endif
305
306#ifdef INET
307static int
308tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
309 struct tcphdr **th)
310{
311 int csum_flags;
312 uint16_t csum;
313
314 if (ip4->ip_p != IPPROTO_TCP)
315 return (TCP_LRO_NOT_SUPPORTED);
316
317 /* Ensure there are no options. */
318 if ((ip4->ip_hl << 2) != sizeof (*ip4))
319 return (TCP_LRO_CANNOT);
320
321 /* .. and the packet is not fragmented. */
322 if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
323 return (TCP_LRO_CANNOT);
324
325 /* Legacy IP has a header checksum that needs to be correct. */
326 csum_flags = m->m_pkthdr.csum_flags;
202 if (csum_flags & CSUM_IP_CHECKED) {
203 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
327 if (csum_flags & CSUM_IP_CHECKED) {
328 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
204 cntl->lro_bad_csum++;
205 return -1;
329 lc->lro_bad_csum++;
330 return (TCP_LRO_CANNOT);
206 }
207 } else {
331 }
332 } else {
208 tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
209 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
210 cntl->lro_bad_csum++;
211 return -1;
333 csum = in_cksum_hdr(ip4);
334 if (__predict_false((csum ^ 0xffff) != 0)) {
335 lc->lro_bad_csum++;
336 return (TCP_LRO_CANNOT);
212 }
213 }
337 }
338 }
214
215 /* find the TCP header */
216 tcp = (struct tcphdr *) (ip + 1);
217
339
218 /* Get the TCP checksum if we dont have it */
219 if (!csum)
220 csum = tcp->th_sum;
340 /* Find the TCP header (we assured there are no IP options). */
341 *th = (struct tcphdr *)(ip4 + 1);
221
342
222 /* ensure no bits set besides ack or psh */
223 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
224 return -1;
343 return (0);
344}
345#endif
225
346
226 /* check for timestamps. Since the only option we handle are
227 timestamps, we only have to handle the simple case of
228 aligned timestamps */
347int
348tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
349{
350 struct lro_entry *le;
351 struct ether_header *eh;
352#ifdef INET6
353 struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
354#endif
355#ifdef INET
356 struct ip *ip4 = NULL; /* Keep compiler happy. */
357#endif
358 struct tcphdr *th;
359 void *l3hdr = NULL; /* Keep compiler happy. */
360 uint32_t *ts_ptr;
361 tcp_seq seq;
362 int error, ip_len, l;
363 uint16_t eh_type, tcp_data_len;
229
364
230 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
231 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
232 ts_ptr = (uint32_t *)(tcp + 1);
233 if (opt_bytes != 0) {
234 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
235 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
236 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
237 return -1;
365 /* We expect a contiguous header [eh, ip, tcp]. */
366
367 eh = mtod(m, struct ether_header *);
368 eh_type = ntohs(eh->ether_type);
369 switch (eh_type) {
370#ifdef INET6
371 case ETHERTYPE_IPV6:
372 l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
373 error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
374 if (error != 0)
375 return (error);
376 tcp_data_len = ntohs(ip6->ip6_plen);
377 ip_len = sizeof(*ip6) + tcp_data_len;
378 break;
379#endif
380#ifdef INET
381 case ETHERTYPE_IP:
382 l3hdr = ip4 = (struct ip *)(eh + 1);
383 error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
384 if (error != 0)
385 return (error);
386 ip_len = ntohs(ip4->ip_len);
387 tcp_data_len = ip_len - sizeof(*ip4);
388 break;
389#endif
390 /* XXX-BZ what happens in case of VLAN(s)? */
391 default:
392 return (TCP_LRO_NOT_SUPPORTED);
238 }
239
393 }
394
240 ip_len = ntohs(ip->ip_len);
241 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
242
395 /*
396 * If the frame is padded beyond the end of the IP packet, then we must
397 * trim the extra bytes off.
398 */
399 l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
400 if (l != 0) {
401 if (l < 0)
402 /* Truncated packet. */
403 return (TCP_LRO_CANNOT);
243
404
244 /*
245 * If frame is padded beyond the end of the IP packet,
246 * then we must trim the extra bytes off the end.
405 m_adj(m, -l);
406 }
407
408 /*
409 * Check TCP header constraints.
247 */
410 */
248 tot_len = m_head->m_pkthdr.len;
249 trim = tot_len - (ip_len + ETHER_HDR_LEN);
250 if (trim != 0) {
251 if (trim < 0) {
252 /* truncated packet */
253 return -1;
411 /* Ensure no bits set besides ACK or PSH. */
412 if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
413 return (TCP_LRO_CANNOT);
414
415 /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
416 /* XXX-BZ Ideally we'd flush on PUSH? */
417
418 /*
419 * Check for timestamps.
420 * Since the only option we handle are timestamps, we only have to
421 * handle the simple case of aligned timestamps.
422 */
423 l = (th->th_off << 2);
424 tcp_data_len -= l;
425 l -= sizeof(*th);
426 ts_ptr = (uint32_t *)(th + 1);
427 if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
428 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
429 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
430 return (TCP_LRO_CANNOT);
431
432 /* If the driver did not pass in the checksum, set it now. */
433 if (csum == 0x0000)
434 csum = th->th_sum;
435
436 seq = ntohl(th->th_seq);
437
438 /* Try to find a matching previous segment. */
439 SLIST_FOREACH(le, &lc->lro_active, next) {
440 if (le->eh_type != eh_type)
441 continue;
442 if (le->source_port != th->th_sport ||
443 le->dest_port != th->th_dport)
444 continue;
445 switch (eh_type) {
446#ifdef INET6
447 case ETHERTYPE_IPV6:
448 if (bcmp(&le->source_ip6, &ip6->ip6_src,
449 sizeof(struct in6_addr)) != 0 ||
450 bcmp(&le->dest_ip6, &ip6->ip6_dst,
451 sizeof(struct in6_addr)) != 0)
452 continue;
453 break;
454#endif
455#ifdef INET
456 case ETHERTYPE_IP:
457 if (le->source_ip4 != ip4->ip_src.s_addr ||
458 le->dest_ip4 != ip4->ip_dst.s_addr)
459 continue;
460 break;
461#endif
254 }
462 }
255 m_adj(m_head, -trim);
256 tot_len = m_head->m_pkthdr.len;
257 }
258
463
259 m_nxt = m_head;
260 m_tail = NULL; /* -Wuninitialized */
261 while (m_nxt != NULL) {
262 m_tail = m_nxt;
263 m_nxt = m_tail->m_next;
264 }
464 /* Flush now if appending will result in overflow. */
465 if (le->p_len > (65535 - tcp_data_len)) {
466 SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
467 tcp_lro_flush(lc, le);
468 break;
469 }
265
470
266 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
267 seq = ntohl(tcp->th_seq);
471 /* Try to append the new segment. */
472 if (__predict_false(seq != le->next_seq ||
473 (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
474 /* Out of order packet or duplicate ACK. */
475 SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
476 tcp_lro_flush(lc, le);
477 return (TCP_LRO_CANNOT);
478 }
268
479
269 SLIST_FOREACH(lro, &cntl->lro_active, next) {
270 if (lro->source_port == tcp->th_sport &&
271 lro->dest_port == tcp->th_dport &&
272 lro->source_ip == ip->ip_src.s_addr &&
273 lro->dest_ip == ip->ip_dst.s_addr) {
274 /* Flush now if appending will result in overflow. */
275 if (lro->len > (65535 - tcp_data_len)) {
276 SLIST_REMOVE(&cntl->lro_active, lro,
277 lro_entry, next);
278 tcp_lro_flush(cntl, lro);
279 break;
280 }
480 if (l != 0) {
481 uint32_t tsval = ntohl(*(ts_ptr + 1));
482 /* Make sure timestamp values are increasing. */
483 /* XXX-BZ flip and use TSTMP_GEQ macro for this? */
484 if (__predict_false(le->tsval > tsval ||
485 *(ts_ptr + 2) == 0))
486 return (TCP_LRO_CANNOT);
487 le->tsval = tsval;
488 le->tsecr = *(ts_ptr + 2);
489 }
281
490
282 /* Try to append it */
491 le->next_seq += tcp_data_len;
492 le->ack_seq = th->th_ack;
493 le->window = th->th_win;
494 le->append_cnt++;
283
495
284 if (__predict_false(seq != lro->next_seq ||
285 (tcp_data_len == 0 &&
286 lro->ack_seq == tcp->th_ack))) {
287 /* out of order packet or dup ack */
288 SLIST_REMOVE(&cntl->lro_active, lro,
289 lro_entry, next);
290 tcp_lro_flush(cntl, lro);
291 return -1;
292 }
496#ifdef TCP_LRO_UPDATE_CSUM
497 le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
498 tcp_data_len, ~csum);
499#endif
293
500
294 if (opt_bytes) {
295 uint32_t tsval = ntohl(*(ts_ptr + 1));
296 /* make sure timestamp values are increasing */
297 if (__predict_false(lro->tsval > tsval ||
298 *(ts_ptr + 2) == 0)) {
299 return -1;
300 }
301 lro->tsval = tsval;
302 lro->tsecr = *(ts_ptr + 2);
303 }
501 if (tcp_data_len == 0) {
502 m_freem(m);
503 return (0);
504 }
304
505
305 lro->next_seq += tcp_data_len;
306 lro->ack_seq = tcp->th_ack;
307 lro->window = tcp->th_win;
308 lro->append_cnt++;
309 if (tcp_data_len == 0) {
310 m_freem(m_head);
311 return 0;
312 }
313 /* subtract off the checksum of the tcp header
314 * from the hardware checksum, and add it to the
315 * stored tcp data checksum. Byteswap the checksum
316 * if the total length so far is odd
317 */
318 tmp_csum = do_csum_data((uint16_t*)tcp,
319 tcp_hdr_len);
320 csum = csum + (tmp_csum ^ 0xffff);
321 csum = (csum & 0xffff) + (csum >> 16);
322 csum = (csum & 0xffff) + (csum >> 16);
323 if (lro->len & 0x1) {
324 /* Odd number of bytes so far, flip bytes */
325 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
326 }
327 csum = csum + lro->data_csum;
328 csum = (csum & 0xffff) + (csum >> 16);
329 csum = (csum & 0xffff) + (csum >> 16);
330 lro->data_csum = csum;
506 le->p_len += tcp_data_len;
331
507
332 lro->len += tcp_data_len;
508 /*
509 * Adjust the mbuf so that m_data points to the first byte of
510 * the ULP payload. Adjust the mbuf to avoid complications and
511 * append new segment to existing mbuf chain.
512 */
513 m_adj(m, m->m_pkthdr.len - tcp_data_len);
514 m->m_flags &= ~M_PKTHDR;
333
515
334 /* adjust mbuf so that m->m_data points to
335 the first byte of the payload */
336 m_adj(m_head, hlen);
337 /* append mbuf chain */
338 lro->m_tail->m_next = m_head;
339 /* advance the last pointer */
340 lro->m_tail = m_tail;
341 /* flush packet if required */
342 device_mtu = cntl->ifp->if_mtu;
343 if (lro->len > (65535 - device_mtu)) {
344 SLIST_REMOVE(&cntl->lro_active, lro,
345 lro_entry, next);
346 tcp_lro_flush(cntl, lro);
347 }
348 return 0;
516 le->m_tail->m_next = m;
517 le->m_tail = m_last(m);
518
519 /*
520 * If a possible next full length packet would cause an
521 * overflow, pro-actively flush now.
522 */
523 if (le->p_len > (65535 - lc->ifp->if_mtu)) {
524 SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
525 tcp_lro_flush(lc, le);
349 }
526 }
527
528 return (0);
350 }
351
529 }
530
352 if (SLIST_EMPTY(&cntl->lro_free))
353 return -1;
531 /* Try to find an empty slot. */
532 if (SLIST_EMPTY(&lc->lro_free))
533 return (TCP_LRO_CANNOT);
354
534
355 /* start a new chain */
356 lro = SLIST_FIRST(&cntl->lro_free);
357 SLIST_REMOVE_HEAD(&cntl->lro_free, next);
358 SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
359 lro->source_port = tcp->th_sport;
360 lro->dest_port = tcp->th_dport;
361 lro->source_ip = ip->ip_src.s_addr;
362 lro->dest_ip = ip->ip_dst.s_addr;
363 lro->next_seq = seq + tcp_data_len;
364 lro->mss = tcp_data_len;
365 lro->ack_seq = tcp->th_ack;
366 lro->window = tcp->th_win;
535 /* Start a new segment chain. */
536 le = SLIST_FIRST(&lc->lro_free);
537 SLIST_REMOVE_HEAD(&lc->lro_free, next);
538 SLIST_INSERT_HEAD(&lc->lro_active, le, next);
367
539
368 /* save the checksum of just the TCP payload by
369 * subtracting off the checksum of the TCP header from
370 * the entire hardware checksum
371 * Since IP header checksum is correct, checksum over
372 * the IP header is -0. Substracting -0 is unnecessary.
373 */
374 tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
375 csum = csum + (tmp_csum ^ 0xffff);
376 csum = (csum & 0xffff) + (csum >> 16);
377 csum = (csum & 0xffff) + (csum >> 16);
378 lro->data_csum = csum;
379
380 lro->ip = ip;
381 /* record timestamp if it is present */
382 if (opt_bytes) {
383 lro->timestamp = 1;
384 lro->tsval = ntohl(*(ts_ptr + 1));
385 lro->tsecr = *(ts_ptr + 2);
540 /* Start filling in details. */
541 switch (eh_type) {
542#ifdef INET6
543 case ETHERTYPE_IPV6:
544 le->le_ip6 = ip6;
545 le->source_ip6 = ip6->ip6_src;
546 le->dest_ip6 = ip6->ip6_dst;
547 le->eh_type = eh_type;
548 le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
549 break;
550#endif
551#ifdef INET
552 case ETHERTYPE_IP:
553 le->le_ip4 = ip4;
554 le->source_ip4 = ip4->ip_src.s_addr;
555 le->dest_ip4 = ip4->ip_dst.s_addr;
556 le->eh_type = eh_type;
557 le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
558 break;
559#endif
386 }
560 }
387 lro->len = tot_len;
388 lro->m_head = m_head;
389 lro->m_tail = m_tail;
390 return 0;
561 le->source_port = th->th_sport;
562 le->dest_port = th->th_dport;
563
564 le->next_seq = seq + tcp_data_len;
565 le->ack_seq = th->th_ack;
566 le->window = th->th_win;
567 if (l != 0) {
568 le->timestamp = 1;
569 le->tsval = ntohl(*(ts_ptr + 1));
570 le->tsecr = *(ts_ptr + 2);
571 }
572
573#ifdef TCP_LRO_UPDATE_CSUM
574 /*
575 * Do not touch the csum of the first packet. However save the
576 * "adjusted" checksum of just the source and destination addresses,
577 * the next header and the TCP payload. The length and TCP header
578 * parts may change, so we remove those from the saved checksum and
579 * re-add with final values on tcp_lro_flush() if needed.
580 */
581 KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
582 __func__, le, le->ulp_csum));
583
584 le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
585 ~csum);
586 th->th_sum = csum; /* Restore checksum on first packet. */
587#endif
588
589 le->m_head = m;
590 le->m_tail = m_last(m);
591
592 return (0);
391}
593}
594
595/* end */