tcp_lro.c (235474) | tcp_lro.c (235944) |
---|---|
1/*- 2 * Copyright (c) 2007, Myricom Inc. 3 * Copyright (c) 2008, Intel Corporation. | 1/*- 2 * Copyright (c) 2007, Myricom Inc. 3 * Copyright (c) 2008, Intel Corporation. |
4 * Copyright (c) 2012 The FreeBSD Foundation |
|
4 * All rights reserved. 5 * | 5 * All rights reserved. 6 * |
7 * Portions of this software were developed by Bjoern Zeeb 8 * under sponsorship from the FreeBSD Foundation. 9 * |
|
6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. --- 5 unchanged lines hidden (view full) --- 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * | 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. --- 5 unchanged lines hidden (view full) --- 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * |
27 * $FreeBSD: head/sys/netinet/tcp_lro.c 235474 2012-05-15 13:23:44Z bz $ | 31 * $FreeBSD: head/sys/netinet/tcp_lro.c 235944 2012-05-24 23:03:23Z bz $ |
28 */ 29 | 32 */ 33 |
34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: head/sys/netinet/tcp_lro.c 235944 2012-05-24 23:03:23Z bz $"); 36 37#include "opt_inet.h" 38#include "opt_inet6.h" 39 |
|
30#include <sys/param.h> 31#include <sys/systm.h> | 40#include <sys/param.h> 41#include <sys/systm.h> |
32#include <sys/endian.h> | |
33#include <sys/mbuf.h> 34#include <sys/kernel.h> 35#include <sys/socket.h> 36 37#include <net/if.h> | 42#include <sys/mbuf.h> 43#include <sys/kernel.h> 44#include <sys/socket.h> 45 46#include <net/if.h> |
47#include <net/if_var.h> |
|
38#include <net/ethernet.h> | 48#include <net/ethernet.h> |
39#include <net/if_media.h> | |
40 41#include <netinet/in_systm.h> 42#include <netinet/in.h> | 49 50#include <netinet/in_systm.h> 51#include <netinet/in.h> |
52#include <netinet/ip6.h> |
|
43#include <netinet/ip.h> 44#include <netinet/tcp.h> 45#include <netinet/tcp_lro.h> 46 | 53#include <netinet/ip.h> 54#include <netinet/tcp.h> 55#include <netinet/tcp_lro.h> 56 |
47#include <machine/bus.h> | |
48#include <machine/in_cksum.h> 49 | 57#include <machine/in_cksum.h> 58 |
59#ifndef LRO_ENTRIES 60#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ 61#endif |
|
50 | 62 |
51static uint16_t do_csum_data(uint16_t *raw, int len) 52{ 53 uint32_t csum; 54 csum = 0; 55 while (len > 0) { 56 csum += *raw; 57 raw++; 58 csum += *raw; 59 raw++; 60 len -= 4; 61 } 62 csum = (csum >> 16) + (csum & 0xffff); 63 csum = (csum >> 16) + (csum & 0xffff); 64 return (uint16_t)csum; 65} | 63#define TCP_LRO_UPDATE_CSUM 1 64#ifndef TCP_LRO_UPDATE_CSUM 65#define TCP_LRO_INVALID_CSUM 0x0000 66#endif |
66 | 67 |
67/* 68 * Allocate and init the LRO data structures 69 */ | |
70int | 68int |
71tcp_lro_init(struct lro_ctrl *cntl) | 69tcp_lro_init(struct lro_ctrl *lc) |
72{ | 70{ |
73 struct lro_entry *lro; 74 int i, error = 0; | 71 struct lro_entry *le; 72 int error, i; |
75 | 73 |
76 SLIST_INIT(&cntl->lro_free); 77 SLIST_INIT(&cntl->lro_active); | 74 lc->lro_bad_csum = 0; 75 lc->lro_queued = 0; 76 lc->lro_flushed = 0; 77 lc->lro_cnt = 0; 78 SLIST_INIT(&lc->lro_free); 79 SLIST_INIT(&lc->lro_active); |
78 | 80 |
79 cntl->lro_bad_csum = 0; 80 cntl->lro_queued = 0; 81 cntl->lro_flushed = 0; 82 | 81 error = 0; |
83 for (i = 0; i < LRO_ENTRIES; i++) { | 82 for (i = 0; i < LRO_ENTRIES; i++) { |
84 lro = (struct lro_entry *) malloc(sizeof (struct lro_entry), 85 M_DEVBUF, M_NOWAIT | M_ZERO); 86 if (lro == NULL) { | 83 le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF, 84 M_NOWAIT | M_ZERO); 85 if (le == NULL) { |
87 if (i == 0) 88 error = ENOMEM; 89 break; 90 } | 86 if (i == 0) 87 error = ENOMEM; 88 break; 89 } |
91 cntl->lro_cnt = i; 92 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next); | 90 lc->lro_cnt = i + 1; 91 SLIST_INSERT_HEAD(&lc->lro_free, le, next); |
93 } 94 95 return (error); 96} 97 98void | 92 } 93 94 return (error); 95} 96 97void |
99tcp_lro_free(struct lro_ctrl *cntl) | 98tcp_lro_free(struct lro_ctrl *lc) |
100{ | 99{ |
101 struct lro_entry *entry; | 100 struct lro_entry *le; |
102 | 101 |
103 while (!SLIST_EMPTY(&cntl->lro_free)) { 104 entry = SLIST_FIRST(&cntl->lro_free); 105 SLIST_REMOVE_HEAD(&cntl->lro_free, next); 106 free(entry, M_DEVBUF); | 102 while (!SLIST_EMPTY(&lc->lro_free)) { 103 le = SLIST_FIRST(&lc->lro_free); 104 SLIST_REMOVE_HEAD(&lc->lro_free, next); 105 free(le, M_DEVBUF); |
107 } 108} 109 | 106 } 107} 108 |
109#ifdef TCP_LRO_UPDATE_CSUM 110static uint16_t 111tcp_lro_csum_th(struct tcphdr *th) 112{ 113 uint32_t ch; 114 uint16_t *p, l; 115 116 ch = th->th_sum = 0x0000; 117 l = th->th_off; 118 p = (uint16_t *)th; 119 while (l > 0) { 120 ch += *p; 121 p++; 122 ch += *p; 123 p++; 124 l--; 125 } 126 while (ch > 0xffff) 127 ch = (ch >> 16) + (ch & 0xffff); 128 129 return (ch & 0xffff); 130} 131 132static uint16_t 133tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th, 134 uint16_t tcp_data_len, uint16_t csum) 135{ 136 uint32_t c; 137 uint16_t cs; 138 139 c = csum; 140 141 /* Remove length from checksum. */ 142 switch (le->eh_type) { 143#ifdef INET6 144 case ETHERTYPE_IPV6: 145 { 146 struct ip6_hdr *ip6; 147 148 ip6 = (struct ip6_hdr *)l3hdr; 149 if (le->append_cnt == 0) 150 cs = ip6->ip6_plen; 151 else { 152 uint32_t cx; 153 154 cx = ntohs(ip6->ip6_plen); 155 cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0); 156 } 157 break; 158 } 159#endif 160#ifdef INET 161 case ETHERTYPE_IP: 162 { 163 struct ip *ip4; 164 165 ip4 = (struct ip *)l3hdr; 166 if (le->append_cnt == 0) 167 cs = ip4->ip_len; 168 else { 169 cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4), 170 IPPROTO_TCP); 171 cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr, 172 htons(cs)); 173 } 174 break; 175 } 176#endif 177 default: 178 cs = 0; /* Keep compiler happy. */ 179 } 180 181 cs = ~cs; 182 c += cs; 183 184 /* Remove TCP header csum. */ 185 cs = ~tcp_lro_csum_th(th); 186 c += cs; 187 while (c > 0xffff) 188 c = (c >> 16) + (c & 0xffff); 189 190 return (c & 0xffff); 191} 192#endif 193 |
|
110void | 194void |
111tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro) | 195tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le) |
112{ | 196{ |
113 struct ifnet *ifp; 114 struct ip *ip; 115 struct tcphdr *tcp; 116 uint32_t *ts_ptr; 117 uint32_t tcplen, tcp_csum; | |
118 | 197 |
198 if (le->append_cnt > 0) { 199 struct tcphdr *th; 200 uint16_t p_len; |
|
119 | 201 |
120 if (lro->append_cnt) { 121 /* incorporate the new len into the ip header and 122 * re-calculate the checksum */ 123 ip = lro->ip; 124 ip->ip_len = htons(lro->len - ETHER_HDR_LEN); 125 ip->ip_sum = 0; 126 ip->ip_sum = 0xffff ^ 127 do_csum_data((uint16_t*)ip, 128 sizeof (*ip)); | 202 p_len = htons(le->p_len); 203 switch (le->eh_type) { 204#ifdef INET6 205 case ETHERTYPE_IPV6: 206 { 207 struct ip6_hdr *ip6; |
129 | 208 |
130 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 131 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 132 lro->m_head->m_pkthdr.csum_data = 0xffff; 133 lro->m_head->m_pkthdr.len = lro->len; | 209 ip6 = le->le_ip6; 210 ip6->ip6_plen = p_len; 211 th = (struct tcphdr *)(ip6 + 1); 212 le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 213 CSUM_PSEUDO_HDR; 214 le->p_len += ETHER_HDR_LEN + sizeof(*ip6); 215 break; 216 } 217#endif 218#ifdef INET 219 case ETHERTYPE_IP: 220 { 221 struct ip *ip4; 222#ifdef TCP_LRO_UPDATE_CSUM 223 uint32_t cl; 224 uint16_t c; 225#endif |
134 | 226 |
135 /* incorporate the latest ack into the tcp header */ 136 tcp = (struct tcphdr *) (ip + 1); 137 tcp->th_ack = lro->ack_seq; 138 tcp->th_win = lro->window; 139 /* incorporate latest timestamp into the tcp header */ 140 if (lro->timestamp) { 141 ts_ptr = (uint32_t *)(tcp + 1); 142 ts_ptr[1] = htonl(lro->tsval); 143 ts_ptr[2] = lro->tsecr; | 227 ip4 = le->le_ip4; 228#ifdef TCP_LRO_UPDATE_CSUM 229 /* Fix IP header checksum for new length. */ 230 c = ~ip4->ip_sum; 231 cl = c; 232 c = ~ip4->ip_len; 233 cl += c + p_len; 234 while (cl > 0xffff) 235 cl = (cl >> 16) + (cl & 0xffff); 236 c = cl; 237 ip4->ip_sum = ~c; 238#else 239 ip4->ip_sum = TCP_LRO_INVALID_CSUM; 240#endif 241 ip4->ip_len = p_len; 242 th = (struct tcphdr *)(ip4 + 1); 243 le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 244 CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; 245 le->p_len += ETHER_HDR_LEN; 246 break; |
144 } | 247 } |
145 /* 146 * update checksum in tcp header by re-calculating the 147 * tcp pseudoheader checksum, and adding it to the checksum 148 * of the tcp payload data 149 */ 150 tcp->th_sum = 0; 151 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN; 152 tcp_csum = lro->data_csum; 153 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 154 htons(tcplen + IPPROTO_TCP)); 155 tcp_csum += do_csum_data((uint16_t*)tcp, 156 tcp->th_off << 2); 157 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); 158 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); 159 tcp->th_sum = 0xffff ^ tcp_csum; | 248#endif 249 default: 250 th = NULL; /* Keep compiler happy. */ 251 } 252 le->m_head->m_pkthdr.csum_data = 0xffff; 253 le->m_head->m_pkthdr.len = le->p_len; 254 255 /* Incorporate the latest ACK into the TCP header. */ 256 th->th_ack = le->ack_seq; 257 th->th_win = le->window; 258 /* Incorporate latest timestamp into the TCP header. */ 259 if (le->timestamp != 0) { 260 uint32_t *ts_ptr; 261 262 ts_ptr = (uint32_t *)(th + 1); 263 ts_ptr[1] = htonl(le->tsval); 264 ts_ptr[2] = le->tsecr; 265 } 266#ifdef TCP_LRO_UPDATE_CSUM 267 /* Update the TCP header checksum. */ 268 le->ulp_csum += p_len; 269 le->ulp_csum += tcp_lro_csum_th(th); 270 while (le->ulp_csum > 0xffff) 271 le->ulp_csum = (le->ulp_csum >> 16) + 272 (le->ulp_csum & 0xffff); 273 th->th_sum = (le->ulp_csum & 0xffff); 274 th->th_sum = ~th->th_sum; 275#else 276 th->th_sum = TCP_LRO_INVALID_CSUM; 277#endif |
160 } | 278 } |
161 ifp = cntl->ifp; 162 (*ifp->if_input)(cntl->ifp, lro->m_head); 163 cntl->lro_queued += lro->append_cnt + 1; 164 cntl->lro_flushed++; 165 lro->m_head = NULL; 166 lro->timestamp = 0; 167 lro->append_cnt = 0; 168 SLIST_INSERT_HEAD(&cntl->lro_free, lro, next); | 279 280 (*lc->ifp->if_input)(lc->ifp, le->m_head); 281 lc->lro_queued += le->append_cnt + 1; 282 lc->lro_flushed++; 283 bzero(le, sizeof(*le)); 284 SLIST_INSERT_HEAD(&lc->lro_free, le, next); |
169} 170 | 285} 286 |
171int 172tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum) | 287#ifdef INET6 288static int 289tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6, 290 struct tcphdr **th) |
173{ | 291{ |
174 struct ether_header *eh; 175 struct ip *ip; 176 struct tcphdr *tcp; 177 uint32_t *ts_ptr; 178 struct mbuf *m_nxt, *m_tail; 179 struct lro_entry *lro; 180 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len; 181 int opt_bytes, trim, csum_flags; 182 uint32_t seq, tmp_csum, device_mtu; | |
183 | 292 |
293 /* XXX-BZ we should check the flow-label. */ |
|
184 | 294 |
185 eh = mtod(m_head, struct ether_header *); 186 if (eh->ether_type != htons(ETHERTYPE_IP)) 187 return 1; 188 ip = (struct ip *) (eh + 1); 189 if (ip->ip_p != IPPROTO_TCP) 190 return 1; 191 192 /* ensure there are no options */ 193 if ((ip->ip_hl << 2) != sizeof (*ip)) 194 return -1; | 295 /* XXX-BZ We do not yet support ext. hdrs. */ 296 if (ip6->ip6_nxt != IPPROTO_TCP) 297 return (TCP_LRO_NOT_SUPPORTED); |
195 | 298 |
196 /* .. and the packet is not fragmented */ 197 if (ip->ip_off & htons(IP_MF|IP_OFFMASK)) 198 return -1; | 299 /* Find the TCP header. */ 300 *th = (struct tcphdr *)(ip6 + 1); |
199 | 301 |
200 /* verify that the IP header checksum is correct */ 201 csum_flags = m_head->m_pkthdr.csum_flags; | 302 return (0); 303} 304#endif 305 306#ifdef INET 307static int 308tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4, 309 struct tcphdr **th) 310{ 311 int csum_flags; 312 uint16_t csum; 313 314 if (ip4->ip_p != IPPROTO_TCP) 315 return (TCP_LRO_NOT_SUPPORTED); 316 317 /* Ensure there are no options. */ 318 if ((ip4->ip_hl << 2) != sizeof (*ip4)) 319 return (TCP_LRO_CANNOT); 320 321 /* .. and the packet is not fragmented. */ 322 if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) 323 return (TCP_LRO_CANNOT); 324 325 /* Legacy IP has a header checksum that needs to be correct. */ 326 csum_flags = m->m_pkthdr.csum_flags; |
202 if (csum_flags & CSUM_IP_CHECKED) { 203 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { | 327 if (csum_flags & CSUM_IP_CHECKED) { 328 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { |
204 cntl->lro_bad_csum++; 205 return -1; | 329 lc->lro_bad_csum++; 330 return (TCP_LRO_CANNOT); |
206 } 207 } else { | 331 } 332 } else { |
208 tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip)); 209 if (__predict_false((tmp_csum ^ 0xffff) != 0)) { 210 cntl->lro_bad_csum++; 211 return -1; | 333 csum = in_cksum_hdr(ip4); 334 if (__predict_false((csum ^ 0xffff) != 0)) { 335 lc->lro_bad_csum++; 336 return (TCP_LRO_CANNOT); |
212 } 213 } | 337 } 338 } |
214 215 /* find the TCP header */ 216 tcp = (struct tcphdr *) (ip + 1); | |
217 | 339 |
218 /* Get the TCP checksum if we dont have it */ 219 if (!csum) 220 csum = tcp->th_sum; | 340 /* Find the TCP header (we assured there are no IP options). */ 341 *th = (struct tcphdr *)(ip4 + 1); |
221 | 342 |
222 /* ensure no bits set besides ack or psh */ 223 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) 224 return -1; | 343 return (0); 344} 345#endif |
225 | 346 |
226 /* check for timestamps. Since the only option we handle are 227 timestamps, we only have to handle the simple case of 228 aligned timestamps */ | 347int 348tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) 349{ 350 struct lro_entry *le; 351 struct ether_header *eh; 352#ifdef INET6 353 struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ 354#endif 355#ifdef INET 356 struct ip *ip4 = NULL; /* Keep compiler happy. */ 357#endif 358 struct tcphdr *th; 359 void *l3hdr = NULL; /* Keep compiler happy. */ 360 uint32_t *ts_ptr; 361 tcp_seq seq; 362 int error, ip_len, l; 363 uint16_t eh_type, tcp_data_len; |
229 | 364 |
230 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp); 231 tcp_hdr_len = sizeof (*tcp) + opt_bytes; 232 ts_ptr = (uint32_t *)(tcp + 1); 233 if (opt_bytes != 0) { 234 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) || 235 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 236 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))) 237 return -1; | 365 /* We expect a contiguous header [eh, ip, tcp]. */ 366 367 eh = mtod(m, struct ether_header *); 368 eh_type = ntohs(eh->ether_type); 369 switch (eh_type) { 370#ifdef INET6 371 case ETHERTYPE_IPV6: 372 l3hdr = ip6 = (struct ip6_hdr *)(eh + 1); 373 error = tcp_lro_rx_ipv6(lc, m, ip6, &th); 374 if (error != 0) 375 return (error); 376 tcp_data_len = ntohs(ip6->ip6_plen); 377 ip_len = sizeof(*ip6) + tcp_data_len; 378 break; 379#endif 380#ifdef INET 381 case ETHERTYPE_IP: 382 l3hdr = ip4 = (struct ip *)(eh + 1); 383 error = tcp_lro_rx_ipv4(lc, m, ip4, &th); 384 if (error != 0) 385 return (error); 386 ip_len = ntohs(ip4->ip_len); 387 tcp_data_len = ip_len - sizeof(*ip4); 388 break; 389#endif 390 /* XXX-BZ what happens in case of VLAN(s)? */ 391 default: 392 return (TCP_LRO_NOT_SUPPORTED); |
238 } 239 | 393 } 394 |
240 ip_len = ntohs(ip->ip_len); 241 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip); 242 | 395 /* 396 * If the frame is padded beyond the end of the IP packet, then we must 397 * trim the extra bytes off. 398 */ 399 l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len); 400 if (l != 0) { 401 if (l < 0) 402 /* Truncated packet. */ 403 return (TCP_LRO_CANNOT); |
243 | 404 |
244 /* 245 * If frame is padded beyond the end of the IP packet, 246 * then we must trim the extra bytes off the end. | 405 m_adj(m, -l); 406 } 407 408 /* 409 * Check TCP header constraints. |
247 */ | 410 */ |
248 tot_len = m_head->m_pkthdr.len; 249 trim = tot_len - (ip_len + ETHER_HDR_LEN); 250 if (trim != 0) { 251 if (trim < 0) { 252 /* truncated packet */ 253 return -1; | 411 /* Ensure no bits set besides ACK or PSH. */ 412 if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) 413 return (TCP_LRO_CANNOT); 414 415 /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */ 416 /* XXX-BZ Ideally we'd flush on PUSH? */ 417 418 /* 419 * Check for timestamps. 420 * Since the only option we handle are timestamps, we only have to 421 * handle the simple case of aligned timestamps. 422 */ 423 l = (th->th_off << 2); 424 tcp_data_len -= l; 425 l -= sizeof(*th); 426 ts_ptr = (uint32_t *)(th + 1); 427 if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) || 428 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 429 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) 430 return (TCP_LRO_CANNOT); 431 432 /* If the driver did not pass in the checksum, set it now. */ 433 if (csum == 0x0000) 434 csum = th->th_sum; 435 436 seq = ntohl(th->th_seq); 437 438 /* Try to find a matching previous segment. */ 439 SLIST_FOREACH(le, &lc->lro_active, next) { 440 if (le->eh_type != eh_type) 441 continue; 442 if (le->source_port != th->th_sport || 443 le->dest_port != th->th_dport) 444 continue; 445 switch (eh_type) { 446#ifdef INET6 447 case ETHERTYPE_IPV6: 448 if (bcmp(&le->source_ip6, &ip6->ip6_src, 449 sizeof(struct in6_addr)) != 0 || 450 bcmp(&le->dest_ip6, &ip6->ip6_dst, 451 sizeof(struct in6_addr)) != 0) 452 continue; 453 break; 454#endif 455#ifdef INET 456 case ETHERTYPE_IP: 457 if (le->source_ip4 != ip4->ip_src.s_addr || 458 le->dest_ip4 != ip4->ip_dst.s_addr) 459 continue; 460 break; 461#endif |
254 } | 462 } |
255 m_adj(m_head, -trim); 256 tot_len = m_head->m_pkthdr.len; 257 } | |
258 | 463 |
259 m_nxt = m_head; 260 m_tail = NULL; /* -Wuninitialized */ 261 while (m_nxt != NULL) { 262 m_tail = m_nxt; 263 m_nxt = m_tail->m_next; 264 } | 464 /* Flush now if appending will result in overflow. */ 465 if (le->p_len > (65535 - tcp_data_len)) { 466 SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 467 tcp_lro_flush(lc, le); 468 break; 469 } |
265 | 470 |
266 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len; 267 seq = ntohl(tcp->th_seq); | 471 /* Try to append the new segment. */ 472 if (__predict_false(seq != le->next_seq || 473 (tcp_data_len == 0 && le->ack_seq == th->th_ack))) { 474 /* Out of order packet or duplicate ACK. */ 475 SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 476 tcp_lro_flush(lc, le); 477 return (TCP_LRO_CANNOT); 478 } |
268 | 479 |
269 SLIST_FOREACH(lro, &cntl->lro_active, next) { 270 if (lro->source_port == tcp->th_sport && 271 lro->dest_port == tcp->th_dport && 272 lro->source_ip == ip->ip_src.s_addr && 273 lro->dest_ip == ip->ip_dst.s_addr) { 274 /* Flush now if appending will result in overflow. */ 275 if (lro->len > (65535 - tcp_data_len)) { 276 SLIST_REMOVE(&cntl->lro_active, lro, 277 lro_entry, next); 278 tcp_lro_flush(cntl, lro); 279 break; 280 } | 480 if (l != 0) { 481 uint32_t tsval = ntohl(*(ts_ptr + 1)); 482 /* Make sure timestamp values are increasing. */ 483 /* XXX-BZ flip and use TSTMP_GEQ macro for this? */ 484 if (__predict_false(le->tsval > tsval || 485 *(ts_ptr + 2) == 0)) 486 return (TCP_LRO_CANNOT); 487 le->tsval = tsval; 488 le->tsecr = *(ts_ptr + 2); 489 } |
281 | 490 |
282 /* Try to append it */ | 491 le->next_seq += tcp_data_len; 492 le->ack_seq = th->th_ack; 493 le->window = th->th_win; 494 le->append_cnt++; |
283 | 495 |
284 if (__predict_false(seq != lro->next_seq || 285 (tcp_data_len == 0 && 286 lro->ack_seq == tcp->th_ack))) { 287 /* out of order packet or dup ack */ 288 SLIST_REMOVE(&cntl->lro_active, lro, 289 lro_entry, next); 290 tcp_lro_flush(cntl, lro); 291 return -1; 292 } | 496#ifdef TCP_LRO_UPDATE_CSUM 497 le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th, 498 tcp_data_len, ~csum); 499#endif |
293 | 500 |
294 if (opt_bytes) { 295 uint32_t tsval = ntohl(*(ts_ptr + 1)); 296 /* make sure timestamp values are increasing */ 297 if (__predict_false(lro->tsval > tsval || 298 *(ts_ptr + 2) == 0)) { 299 return -1; 300 } 301 lro->tsval = tsval; 302 lro->tsecr = *(ts_ptr + 2); 303 } | 501 if (tcp_data_len == 0) { 502 m_freem(m); 503 return (0); 504 } |
304 | 505 |
305 lro->next_seq += tcp_data_len; 306 lro->ack_seq = tcp->th_ack; 307 lro->window = tcp->th_win; 308 lro->append_cnt++; 309 if (tcp_data_len == 0) { 310 m_freem(m_head); 311 return 0; 312 } 313 /* subtract off the checksum of the tcp header 314 * from the hardware checksum, and add it to the 315 * stored tcp data checksum. Byteswap the checksum 316 * if the total length so far is odd 317 */ 318 tmp_csum = do_csum_data((uint16_t*)tcp, 319 tcp_hdr_len); 320 csum = csum + (tmp_csum ^ 0xffff); 321 csum = (csum & 0xffff) + (csum >> 16); 322 csum = (csum & 0xffff) + (csum >> 16); 323 if (lro->len & 0x1) { 324 /* Odd number of bytes so far, flip bytes */ 325 csum = ((csum << 8) | (csum >> 8)) & 0xffff; 326 } 327 csum = csum + lro->data_csum; 328 csum = (csum & 0xffff) + (csum >> 16); 329 csum = (csum & 0xffff) + (csum >> 16); 330 lro->data_csum = csum; | 506 le->p_len += tcp_data_len; |
331 | 507 |
332 lro->len += tcp_data_len; | 508 /* 509 * Adjust the mbuf so that m_data points to the first byte of 510 * the ULP payload. Adjust the mbuf to avoid complications and 511 * append new segment to existing mbuf chain. 512 */ 513 m_adj(m, m->m_pkthdr.len - tcp_data_len); 514 m->m_flags &= ~M_PKTHDR; |
333 | 515 |
334 /* adjust mbuf so that m->m_data points to 335 the first byte of the payload */ 336 m_adj(m_head, hlen); 337 /* append mbuf chain */ 338 lro->m_tail->m_next = m_head; 339 /* advance the last pointer */ 340 lro->m_tail = m_tail; 341 /* flush packet if required */ 342 device_mtu = cntl->ifp->if_mtu; 343 if (lro->len > (65535 - device_mtu)) { 344 SLIST_REMOVE(&cntl->lro_active, lro, 345 lro_entry, next); 346 tcp_lro_flush(cntl, lro); 347 } 348 return 0; | 516 le->m_tail->m_next = m; 517 le->m_tail = m_last(m); 518 519 /* 520 * If a possible next full length packet would cause an 521 * overflow, pro-actively flush now. 522 */ 523 if (le->p_len > (65535 - lc->ifp->if_mtu)) { 524 SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 525 tcp_lro_flush(lc, le); |
349 } | 526 } |
527 528 return (0); |
|
350 } 351 | 529 } 530 |
352 if (SLIST_EMPTY(&cntl->lro_free)) 353 return -1; | 531 /* Try to find an empty slot. */ 532 if (SLIST_EMPTY(&lc->lro_free)) 533 return (TCP_LRO_CANNOT); |
354 | 534 |
355 /* start a new chain */ 356 lro = SLIST_FIRST(&cntl->lro_free); 357 SLIST_REMOVE_HEAD(&cntl->lro_free, next); 358 SLIST_INSERT_HEAD(&cntl->lro_active, lro, next); 359 lro->source_port = tcp->th_sport; 360 lro->dest_port = tcp->th_dport; 361 lro->source_ip = ip->ip_src.s_addr; 362 lro->dest_ip = ip->ip_dst.s_addr; 363 lro->next_seq = seq + tcp_data_len; 364 lro->mss = tcp_data_len; 365 lro->ack_seq = tcp->th_ack; 366 lro->window = tcp->th_win; | 535 /* Start a new segment chain. */ 536 le = SLIST_FIRST(&lc->lro_free); 537 SLIST_REMOVE_HEAD(&lc->lro_free, next); 538 SLIST_INSERT_HEAD(&lc->lro_active, le, next); |
367 | 539 |
368 /* save the checksum of just the TCP payload by 369 * subtracting off the checksum of the TCP header from 370 * the entire hardware checksum 371 * Since IP header checksum is correct, checksum over 372 * the IP header is -0. Substracting -0 is unnecessary. 373 */ 374 tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len); 375 csum = csum + (tmp_csum ^ 0xffff); 376 csum = (csum & 0xffff) + (csum >> 16); 377 csum = (csum & 0xffff) + (csum >> 16); 378 lro->data_csum = csum; 379 380 lro->ip = ip; 381 /* record timestamp if it is present */ 382 if (opt_bytes) { 383 lro->timestamp = 1; 384 lro->tsval = ntohl(*(ts_ptr + 1)); 385 lro->tsecr = *(ts_ptr + 2); | 540 /* Start filling in details. */ 541 switch (eh_type) { 542#ifdef INET6 543 case ETHERTYPE_IPV6: 544 le->le_ip6 = ip6; 545 le->source_ip6 = ip6->ip6_src; 546 le->dest_ip6 = ip6->ip6_dst; 547 le->eh_type = eh_type; 548 le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6); 549 break; 550#endif 551#ifdef INET 552 case ETHERTYPE_IP: 553 le->le_ip4 = ip4; 554 le->source_ip4 = ip4->ip_src.s_addr; 555 le->dest_ip4 = ip4->ip_dst.s_addr; 556 le->eh_type = eh_type; 557 le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN; 558 break; 559#endif |
386 } | 560 } |
387 lro->len = tot_len; 388 lro->m_head = m_head; 389 lro->m_tail = m_tail; 390 return 0; | 561 le->source_port = th->th_sport; 562 le->dest_port = th->th_dport; 563 564 le->next_seq = seq + tcp_data_len; 565 le->ack_seq = th->th_ack; 566 le->window = th->th_win; 567 if (l != 0) { 568 le->timestamp = 1; 569 le->tsval = ntohl(*(ts_ptr + 1)); 570 le->tsecr = *(ts_ptr + 2); 571 } 572 573#ifdef TCP_LRO_UPDATE_CSUM 574 /* 575 * Do not touch the csum of the first packet. However save the 576 * "adjusted" checksum of just the source and destination addresses, 577 * the next header and the TCP payload. The length and TCP header 578 * parts may change, so we remove those from the saved checksum and 579 * re-add with final values on tcp_lro_flush() if needed. 580 */ 581 KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n", 582 __func__, le, le->ulp_csum)); 583 584 le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len, 585 ~csum); 586 th->th_sum = csum; /* Restore checksum on first packet. */ 587#endif 588 589 le->m_head = m; 590 le->m_tail = m_last(m); 591 592 return (0); |
391} | 593} |
594 595/* end */ |
|