tcp_sack.c revision 144855
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95 30 * $FreeBSD: head/sys/netinet/tcp_sack.c 144855 2005-04-10 05:19:22Z ps $ 31 */ 32 33/*- 34 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995 62 * 63 * NRL grants permission for redistribution and use in source and binary 64 * forms, with or without modification, of the software and documentation 65 * created at NRL provided that the following conditions are met: 66 * 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 3. All advertising materials mentioning features or use of this software 73 * must display the following acknowledgements: 74 * This product includes software developed by the University of 75 * California, Berkeley and its contributors. 76 * This product includes software developed at the Information 77 * Technology Division, US Naval Research Laboratory. 78 * 4. Neither the name of the NRL nor the names of its contributors 79 * may be used to endorse or promote products derived from this software 80 * without specific prior written permission. 81 * 82 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 83 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 84 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 85 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 86 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 87 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 88 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 89 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 90 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 91 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 92 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 93 * 94 * The views and conclusions contained in the software and documentation 95 * are those of the authors and should not be interpreted as representing 96 * official policies, either expressed or implied, of the US Naval 97 * Research Laboratory (NRL). 98 */ 99#include "opt_inet.h" 100#include "opt_inet6.h" 101#include "opt_ipsec.h" 102#include "opt_tcpdebug.h" 103#include "opt_tcp_input.h" 104#include "opt_tcp_sack.h" 105 106#include <sys/param.h> 107#include <sys/systm.h> 108#include <sys/kernel.h> 109#include <sys/sysctl.h> 110#include <sys/malloc.h> 111#include <sys/mbuf.h> 112#include <sys/proc.h> /* for proc0 declaration */ 113#include <sys/protosw.h> 114#include <sys/socket.h> 115#include <sys/socketvar.h> 116#include <sys/syslog.h> 117#include <sys/systm.h> 118 119#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 120 121#include <vm/uma.h> 122 123#include <net/if.h> 124#include <net/route.h> 125 126#include <netinet/in.h> 127#include <netinet/in_systm.h> 128#include <netinet/ip.h> 129#include <netinet/ip_icmp.h> /* for ICMP_BANDLIM */ 130#include <netinet/in_var.h> 131#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ 132#include <netinet/in_pcb.h> 133#include <netinet/ip_var.h> 134#include <netinet/ip6.h> 135#include <netinet/icmp6.h> 136#include <netinet6/nd6.h> 137#include <netinet6/ip6_var.h> 138#include <netinet6/in6_pcb.h> 139#include <netinet/tcp.h> 140#include <netinet/tcp_fsm.h> 141#include <netinet/tcp_seq.h> 142#include <netinet/tcp_timer.h> 143#include <netinet/tcp_var.h> 144#include <netinet6/tcp6_var.h> 145#include <netinet/tcpip.h> 146#ifdef TCPDEBUG 147#include <netinet/tcp_debug.h> 148#endif /* TCPDEBUG */ 149 150#ifdef FAST_IPSEC 151#include <netipsec/ipsec.h> 152#include <netipsec/ipsec6.h> 153#endif 154 155#ifdef IPSEC 156#include <netinet6/ipsec.h> 157#include <netinet6/ipsec6.h> 158#include <netkey/key.h> 159#endif /*IPSEC*/ 160#include <machine/in_cksum.h> 161 162extern struct uma_zone *sack_hole_zone; 163 164SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); 165int tcp_do_sack = 1; 166SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, 167 &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); 168TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack); 169 170static int tcp_sack_maxholes = 128; 171SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW, 172 &tcp_sack_maxholes, 0, 173 "Maximum number of TCP SACK holes allowed per connection"); 174 175static int tcp_sack_globalmaxholes = 65536; 176SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW, 177 &tcp_sack_globalmaxholes, 0, 178 "Global maximum number of TCP SACK holes"); 179 180static int tcp_sack_globalholes = 0; 181SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_RD, 182 &tcp_sack_globalholes, 0, 183 "Global number of TCP SACK holes currently allocated"); 184/* 185 * This function is called upon receipt of new valid data (while not in header 186 * prediction mode), and it updates the ordered list of sacks. 187 */ 188void 189tcp_update_sack_list(tp, rcv_laststart, rcv_lastend) 190 struct tcpcb *tp; 191 tcp_seq rcv_laststart, rcv_lastend; 192{ 193 /* 194 * First reported block MUST be the most recent one. Subsequent 195 * blocks SHOULD be in the order in which they arrived at the 196 * receiver. These two conditions make the implementation fully 197 * compliant with RFC 2018. 198 */ 199 int i, j = 0, count = 0, lastpos = -1; 200 struct sackblk sack, firstsack, temp[MAX_SACK_BLKS]; 201 202 INP_LOCK_ASSERT(tp->t_inpcb); 203 /* First clean up current list of sacks */ 204 for (i = 0; i < tp->rcv_numsacks; i++) { 205 sack = tp->sackblks[i]; 206 if (sack.start == 0 && sack.end == 0) { 207 count++; /* count = number of blocks to be discarded */ 208 continue; 209 } 210 if (SEQ_LEQ(sack.end, tp->rcv_nxt)) { 211 tp->sackblks[i].start = tp->sackblks[i].end = 0; 212 count++; 213 } else { 214 temp[j].start = tp->sackblks[i].start; 215 temp[j++].end = tp->sackblks[i].end; 216 } 217 } 218 tp->rcv_numsacks -= count; 219 if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */ 220 tcp_clean_sackreport(tp); 221 if (SEQ_LT(tp->rcv_nxt, rcv_laststart)) { 222 /* ==> need first sack block */ 223 tp->sackblks[0].start = rcv_laststart; 224 tp->sackblks[0].end = rcv_lastend; 225 tp->rcv_numsacks = 1; 226 } 227 return; 228 } 229 /* Otherwise, sack blocks are already present. */ 230 for (i = 0; i < tp->rcv_numsacks; i++) 231 tp->sackblks[i] = temp[i]; /* first copy back sack list */ 232 if (SEQ_GEQ(tp->rcv_nxt, rcv_lastend)) 233 return; /* sack list remains unchanged */ 234 /* 235 * From here, segment just received should be (part of) the 1st sack. 236 * Go through list, possibly coalescing sack block entries. 237 */ 238 firstsack.start = rcv_laststart; 239 firstsack.end = rcv_lastend; 240 for (i = 0; i < tp->rcv_numsacks; i++) { 241 sack = tp->sackblks[i]; 242 if (SEQ_LT(sack.end, firstsack.start) || 243 SEQ_GT(sack.start, firstsack.end)) 244 continue; /* no overlap */ 245 if (sack.start == firstsack.start && sack.end == firstsack.end){ 246 /* 247 * identical block; delete it here since we will 248 * move it to the front of the list. 249 */ 250 tp->sackblks[i].start = tp->sackblks[i].end = 0; 251 lastpos = i; /* last posn with a zero entry */ 252 continue; 253 } 254 if (SEQ_LEQ(sack.start, firstsack.start)) 255 firstsack.start = sack.start; /* merge blocks */ 256 if (SEQ_GEQ(sack.end, firstsack.end)) 257 firstsack.end = sack.end; /* merge blocks */ 258 tp->sackblks[i].start = tp->sackblks[i].end = 0; 259 lastpos = i; /* last posn with a zero entry */ 260 } 261 if (lastpos != -1) { /* at least one merge */ 262 for (i = 0, j = 1; i < tp->rcv_numsacks; i++) { 263 sack = tp->sackblks[i]; 264 if (sack.start == 0 && sack.end == 0) 265 continue; 266 temp[j++] = sack; 267 } 268 tp->rcv_numsacks = j; /* including first blk (added later) */ 269 for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */ 270 tp->sackblks[i] = temp[i]; 271 } else { /* no merges -- shift sacks by 1 */ 272 if (tp->rcv_numsacks < MAX_SACK_BLKS) 273 tp->rcv_numsacks++; 274 for (i = tp->rcv_numsacks-1; i > 0; i--) 275 tp->sackblks[i] = tp->sackblks[i-1]; 276 } 277 tp->sackblks[0] = firstsack; 278 return; 279} 280 281/* 282 * Delete all receiver-side SACK information. 283 */ 284void 285tcp_clean_sackreport(tp) 286 struct tcpcb *tp; 287{ 288 int i; 289 290 INP_LOCK_ASSERT(tp->t_inpcb); 291 tp->rcv_numsacks = 0; 292 for (i = 0; i < MAX_SACK_BLKS; i++) 293 tp->sackblks[i].start = tp->sackblks[i].end=0; 294} 295 296/* 297 * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue, 298 * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list 299 * of holes (oldest to newest, in terms of the sequence space). 300 */ 301int 302tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) 303{ 304 int tmp_olen; 305 u_char *tmp_cp; 306 struct sackhole *cur, *p, *temp; 307 308 INP_LOCK_ASSERT(tp->t_inpcb); 309 if (!tp->sack_enable) 310 return (1); 311 if ((th->th_flags & TH_ACK) == 0) 312 return (1); 313 /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */ 314 if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) 315 return (1); 316 /* If ack is outside [snd_una, snd_max], ignore the SACK options */ 317 if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max)) 318 return (1); 319 tmp_cp = cp + 2; 320 tmp_olen = optlen - 2; 321 tcpstat.tcps_sack_rcv_blocks++; 322 if (tp->snd_numholes < 0) /* XXX panic? */ 323 tp->snd_numholes = 0; 324 if (tp->t_maxseg == 0) 325 panic("tcp_sack_option"); /* Should never happen */ 326 while (tmp_olen > 0) { 327 struct sackblk sack; 328 329 bcopy(tmp_cp, (char *) &(sack.start), sizeof(tcp_seq)); 330 sack.start = ntohl(sack.start); 331 bcopy(tmp_cp + sizeof(tcp_seq), 332 (char *) &(sack.end), sizeof(tcp_seq)); 333 sack.end = ntohl(sack.end); 334 tmp_olen -= TCPOLEN_SACK; 335 tmp_cp += TCPOLEN_SACK; 336 if (SEQ_LEQ(sack.end, sack.start)) 337 continue; /* bad SACK fields */ 338 if (SEQ_LEQ(sack.end, tp->snd_una)) 339 continue; /* old block */ 340 if (SEQ_GT(th->th_ack, tp->snd_una)) { 341 if (SEQ_LT(sack.start, th->th_ack)) 342 continue; 343 } 344 if (SEQ_GT(sack.end, tp->snd_max)) 345 continue; 346 if (tp->snd_holes == NULL) { /* first hole */ 347 if (tcp_sack_globalholes >= tcp_sack_globalmaxholes || 348 tcp_sack_maxholes == 0) { 349 tcpstat.tcps_sack_sboverflow++; 350 continue; 351 } 352 tp->snd_holes = (struct sackhole *) 353 uma_zalloc(sack_hole_zone,M_NOWAIT); 354 if (tp->snd_holes == NULL) { 355 /* ENOBUFS, so ignore SACKed block for now*/ 356 continue; 357 } 358 cur = tp->snd_holes; 359 cur->start = th->th_ack; 360 cur->end = sack.start; 361 cur->rxmit = cur->start; 362 cur->next = NULL; 363 tp->snd_numholes = 1; 364 tcp_sack_globalholes++; 365 tp->rcv_lastsack = sack.end; 366 continue; /* with next sack block */ 367 } 368 /* Go thru list of holes: p = previous, cur = current */ 369 p = cur = tp->snd_holes; 370 while (cur) { 371 if (SEQ_LEQ(sack.end, cur->start)) 372 /* SACKs data before the current hole */ 373 break; /* no use going through more holes */ 374 if (SEQ_GEQ(sack.start, cur->end)) { 375 /* SACKs data beyond the current hole */ 376 p = cur; 377 cur = cur->next; 378 continue; 379 } 380 if (SEQ_LEQ(sack.start, cur->start)) { 381 /* Data acks at least the beginning of hole */ 382 if (SEQ_GEQ(sack.end, cur->end)) { 383 /* Acks entire hole, so delete hole */ 384 if (p != cur) { 385 p->next = cur->next; 386 uma_zfree(sack_hole_zone, cur); 387 cur = p->next; 388 } else { 389 cur = cur->next; 390 uma_zfree(sack_hole_zone, p); 391 p = cur; 392 tp->snd_holes = p; 393 } 394 tp->snd_numholes--; 395 tcp_sack_globalholes--; 396 continue; 397 } 398 /* otherwise, move start of hole forward */ 399 cur->start = sack.end; 400 cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); 401 p = cur; 402 cur = cur->next; 403 continue; 404 } 405 /* move end of hole backward */ 406 if (SEQ_GEQ(sack.end, cur->end)) { 407 cur->end = sack.start; 408 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); 409 p = cur; 410 cur = cur->next; 411 continue; 412 } 413 if (SEQ_LT(cur->start, sack.start) && 414 SEQ_GT(cur->end, sack.end)) { 415 /* 416 * ACKs some data in middle of a hole; need to 417 * split current hole 418 */ 419 if (tp->snd_numholes >= tcp_sack_maxholes || 420 tcp_sack_globalholes >= 421 tcp_sack_globalmaxholes) { 422 tcpstat.tcps_sack_sboverflow++; 423 continue; 424 } 425 temp = (struct sackhole *) 426 uma_zalloc(sack_hole_zone,M_NOWAIT); 427 if (temp == NULL) 428 continue; /* ENOBUFS */ 429 temp->next = cur->next; 430 temp->start = sack.end; 431 temp->end = cur->end; 432 temp->rxmit = SEQ_MAX(cur->rxmit, temp->start); 433 cur->end = sack.start; 434 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); 435 cur->next = temp; 436 p = temp; 437 cur = p->next; 438 tp->snd_numholes++; 439 tcp_sack_globalholes++; 440 } 441 } 442 /* At this point, p points to the last hole on the list */ 443 if (SEQ_LT(tp->rcv_lastsack, sack.start)) { 444 /* 445 * Need to append new hole at end. 446 * Last hole is p (and it's not NULL). 447 */ 448 if (tp->snd_numholes >= tcp_sack_maxholes || 449 tcp_sack_globalholes >= tcp_sack_globalmaxholes) { 450 tcpstat.tcps_sack_sboverflow++; 451 continue; 452 } 453 temp = (struct sackhole *) 454 uma_zalloc(sack_hole_zone,M_NOWAIT); 455 if (temp == NULL) 456 continue; /* ENOBUFS */ 457 temp->start = tp->rcv_lastsack; 458 temp->end = sack.start; 459 temp->rxmit = temp->start; 460 temp->next = 0; 461 p->next = temp; 462 tp->rcv_lastsack = sack.end; 463 tp->snd_numholes++; 464 tcp_sack_globalholes++; 465 } 466 } 467 return (0); 468} 469 470/* 471 * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if 472 * it is completely acked; otherwise, tcp_sack_option(), called from 473 * tcp_dooptions(), will fix up the hole. 474 */ 475void 476tcp_del_sackholes(tp, th) 477 struct tcpcb *tp; 478 struct tcphdr *th; 479{ 480 INP_LOCK_ASSERT(tp->t_inpcb); 481 if (tp->sack_enable && tp->t_state != TCPS_LISTEN) { 482 /* max because this could be an older ack just arrived */ 483 tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ? 484 th->th_ack : tp->snd_una; 485 struct sackhole *cur = tp->snd_holes; 486 struct sackhole *prev; 487 while (cur) 488 if (SEQ_LEQ(cur->end, lastack)) { 489 prev = cur; 490 cur = cur->next; 491 uma_zfree(sack_hole_zone, prev); 492 tp->snd_numholes--; 493 tcp_sack_globalholes--; 494 } else if (SEQ_LT(cur->start, lastack)) { 495 cur->start = lastack; 496 if (SEQ_LT(cur->rxmit, cur->start)) 497 cur->rxmit = cur->start; 498 break; 499 } else 500 break; 501 tp->snd_holes = cur; 502 } 503} 504 505void 506tcp_free_sackholes(struct tcpcb *tp) 507{ 508 struct sackhole *p, *q; 509 510 INP_LOCK_ASSERT(tp->t_inpcb); 511 q = tp->snd_holes; 512 while (q != NULL) { 513 p = q; 514 q = q->next; 515 uma_zfree(sack_hole_zone, p); 516 tcp_sack_globalholes--; 517 } 518 tp->snd_holes = 0; 519 tp->snd_numholes = 0; 520} 521 522/* 523 * Partial ack handling within a sack recovery episode. 524 * Keeping this very simple for now. When a partial ack 525 * is received, force snd_cwnd to a value that will allow 526 * the sender to transmit no more than 2 segments. 527 * If necessary, a better scheme can be adopted at a 528 * later point, but for now, the goal is to prevent the 529 * sender from bursting a large amount of data in the midst 530 * of sack recovery. 531 */ 532void 533tcp_sack_partialack(tp, th) 534 struct tcpcb *tp; 535 struct tcphdr *th; 536{ 537 INP_LOCK_ASSERT(tp->t_inpcb); 538 int num_segs = 1; 539 int sack_bytes_rxmt = 0; 540 541 callout_stop(tp->tt_rexmt); 542 tp->t_rtttime = 0; 543 /* send one or 2 segments based on how much new data was acked */ 544 if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2) 545 num_segs = 2; 546 (void)tcp_sack_output(tp, &sack_bytes_rxmt); 547 tp->snd_cwnd = sack_bytes_rxmt + (tp->snd_nxt - tp->sack_newdata) + 548 num_segs * tp->t_maxseg; 549 tp->t_flags |= TF_ACKNOW; 550 (void) tcp_output(tp); 551} 552 553#ifdef TCP_SACK_DEBUG 554void 555tcp_print_holes(struct tcpcb *tp) 556{ 557 struct sackhole *p = tp->snd_holes; 558 if (p == 0) 559 return; 560 printf("Hole report: start--end dups rxmit\n"); 561 while (p) { 562 printf("%x--%x r %x\n", p->start, p->end, p->rxmit); 563 p = p->next; 564 } 565 printf("\n"); 566} 567#endif /* TCP_SACK_DEBUG */ 568 569/* 570 * Returns pointer to a sackhole if there are any pending retransmissions; 571 * NULL otherwise. 572 */ 573struct sackhole * 574tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) 575{ 576 struct sackhole *p = NULL; 577 578 INP_LOCK_ASSERT(tp->t_inpcb); 579 if (!tp->sack_enable) 580 return (NULL); 581 *sack_bytes_rexmt = 0; 582 for (p = tp->snd_holes; p ; p = p->next) { 583 if (SEQ_LT(p->rxmit, p->end)) { 584 if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ 585 continue; 586 } 587#ifdef TCP_SACK_DEBUG 588 if (p) 589 tcp_print_holes(tp); 590#endif 591 *sack_bytes_rexmt += (p->rxmit - p->start); 592 break; 593 } 594 *sack_bytes_rexmt += (p->rxmit - p->start); 595 } 596 return (p); 597} 598 599/* 600 * After a timeout, the SACK list may be rebuilt. This SACK information 601 * should be used to avoid retransmitting SACKed data. This function 602 * traverses the SACK list to see if snd_nxt should be moved forward. 603 */ 604void 605tcp_sack_adjust(struct tcpcb *tp) 606{ 607 INP_LOCK_ASSERT(tp->t_inpcb); 608 struct sackhole *cur = tp->snd_holes; 609 if (cur == NULL) 610 return; /* No holes */ 611 if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack)) 612 return; /* We're already beyond any SACKed blocks */ 613 /* 614 * Two cases for which we want to advance snd_nxt: 615 * i) snd_nxt lies between end of one hole and beginning of another 616 * ii) snd_nxt lies between end of last hole and rcv_lastsack 617 */ 618 while (cur->next) { 619 if (SEQ_LT(tp->snd_nxt, cur->end)) 620 return; 621 if (SEQ_GEQ(tp->snd_nxt, cur->next->start)) 622 cur = cur->next; 623 else { 624 tp->snd_nxt = cur->next->start; 625 return; 626 } 627 } 628 if (SEQ_LT(tp->snd_nxt, cur->end)) 629 return; 630 tp->snd_nxt = tp->rcv_lastsack; 631 return; 632} 633