tcp_output.c (50477) | tcp_output.c (50673) |
---|---|
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 17 unchanged lines hidden (view full) --- 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 | 1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 17 unchanged lines hidden (view full) --- 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 |
34 * $FreeBSD: head/sys/netinet/tcp_output.c 50477 1999-08-28 01:08:13Z peter $ | 34 * $FreeBSD: head/sys/netinet/tcp_output.c 50673 1999-08-30 21:17:07Z jlemon $ |
35 */ 36 37#include "opt_tcpdebug.h" 38 39#include <stddef.h> 40 41#include <sys/param.h> 42#include <sys/systm.h> --- 25 unchanged lines hidden (view full) --- 68#ifdef notyet 69extern struct mbuf *m_copypack(); 70#endif 71 72static int path_mtu_discovery = 1; 73SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW, 74 &path_mtu_discovery, 1, "Enable Path MTU Discovery"); 75 | 35 */ 36 37#include "opt_tcpdebug.h" 38 39#include <stddef.h> 40 41#include <sys/param.h> 42#include <sys/systm.h> --- 25 unchanged lines hidden (view full) --- 68#ifdef notyet 69extern struct mbuf *m_copypack(); 70#endif 71 72static int path_mtu_discovery = 1; 73SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW, 74 &path_mtu_discovery, 1, "Enable Path MTU Discovery"); 75 |
76int ss_fltsz = 1; 77SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW, 78 &ss_fltsz, 1, "Slow start flight size"); |
|
76 | 79 |
80int ss_fltsz_local = TCP_MAXWIN; /* something large */ 81SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW, 82 &ss_fltsz_local, 1, "Slow start flight size for local networks"); 83 |
|
77/* 78 * Tcp output routine: figure out what should be sent and send it. 79 */ 80int 81tcp_output(tp) 82 register struct tcpcb *tp; 83{ 84 register struct socket *so = tp->t_inpcb->inp_socket; --- 9 unchanged lines hidden (view full) --- 94 95 /* 96 * Determine length of data that should be transmitted, 97 * and flags that will be used. 98 * If there is some data or critical controls (SYN, RST) 99 * to send, then transmit; otherwise, investigate further. 100 */ 101 idle = (tp->snd_max == tp->snd_una); | 84/* 85 * Tcp output routine: figure out what should be sent and send it. 86 */ 87int 88tcp_output(tp) 89 register struct tcpcb *tp; 90{ 91 register struct socket *so = tp->t_inpcb->inp_socket; --- 9 unchanged lines hidden (view full) --- 101 102 /* 103 * Determine length of data that should be transmitted, 104 * and flags that will be used. 105 * If there is some data or critical controls (SYN, RST) 106 * to send, then transmit; otherwise, investigate further. 107 */ 108 idle = (tp->snd_max == tp->snd_una); |
102 if (idle && tp->t_idle >= tp->t_rxtcur) | 109 if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) { |
103 /* 104 * We have been idle for "a while" and no acks are 105 * expected to clock out any data we send -- 106 * slow start to get ack "clock" running again. | 110 /* 111 * We have been idle for "a while" and no acks are 112 * expected to clock out any data we send -- 113 * slow start to get ack "clock" running again. |
107 */ 108 tp->snd_cwnd = tp->t_maxseg; | 114 * 115 * Set the slow-start flight size depending on whether 116 * this is a local network or not. 117 */ 118 if (in_localaddr(tp->t_inpcb->inp_faddr)) 119 tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local; 120 else 121 tp->snd_cwnd = tp->t_maxseg * ss_fltsz; 122 } |
109again: 110 sendalot = 0; 111 off = tp->snd_nxt - tp->snd_una; 112 win = min(tp->snd_wnd, tp->snd_cwnd); 113 114 flags = tcp_outflags[tp->t_state]; 115 /* 116 * Get standard flags, and add SYN or FIN if requested by 'hidden' --- 27 unchanged lines hidden (view full) --- 144 * because if we don't have any more data 145 * to send then the probe will be the FIN 146 * itself. 147 */ 148 if (off < so->so_snd.sb_cc) 149 flags &= ~TH_FIN; 150 win = 1; 151 } else { | 123again: 124 sendalot = 0; 125 off = tp->snd_nxt - tp->snd_una; 126 win = min(tp->snd_wnd, tp->snd_cwnd); 127 128 flags = tcp_outflags[tp->t_state]; 129 /* 130 * Get standard flags, and add SYN or FIN if requested by 'hidden' --- 27 unchanged lines hidden (view full) --- 158 * because if we don't have any more data 159 * to send then the probe will be the FIN 160 * itself. 161 */ 162 if (off < so->so_snd.sb_cc) 163 flags &= ~TH_FIN; 164 win = 1; 165 } else { |
152 tp->t_timer[TCPT_PERSIST] = 0; | 166 callout_stop(tp->tt_persist); |
153 tp->t_rxtshift = 0; 154 } 155 } 156 157 len = (long)ulmin(so->so_snd.sb_cc, win) - off; 158 159 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 160 taop = &tao_noncached; --- 34 unchanged lines hidden (view full) --- 195 * after we sent into it. If window shrank to 0, 196 * cancel pending retransmit, pull snd_nxt back 197 * to (closed) window, and set the persist timer 198 * if it isn't already going. If the window didn't 199 * close completely, just wait for an ACK. 200 */ 201 len = 0; 202 if (win == 0) { | 167 tp->t_rxtshift = 0; 168 } 169 } 170 171 len = (long)ulmin(so->so_snd.sb_cc, win) - off; 172 173 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 174 taop = &tao_noncached; --- 34 unchanged lines hidden (view full) --- 209 * after we sent into it. If window shrank to 0, 210 * cancel pending retransmit, pull snd_nxt back 211 * to (closed) window, and set the persist timer 212 * if it isn't already going. If the window didn't 213 * close completely, just wait for an ACK. 214 */ 215 len = 0; 216 if (win == 0) { |
203 tp->t_timer[TCPT_REXMT] = 0; | 217 callout_stop(tp->tt_rexmt); |
204 tp->t_rxtshift = 0; 205 tp->snd_nxt = tp->snd_una; | 218 tp->t_rxtshift = 0; 219 tp->snd_nxt = tp->snd_una; |
206 if (tp->t_timer[TCPT_PERSIST] == 0) | 220 if (!callout_active(tp->tt_persist)) |
207 tcp_setpersist(tp); 208 } 209 } 210 if (len > tp->t_maxseg) { 211 len = tp->t_maxseg; 212 sendalot = 1; 213 } 214 if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) --- 71 unchanged lines hidden (view full) --- 286 /* 287 * TCP window updates are not reliable, rather a polling protocol 288 * using ``persist'' packets is used to insure receipt of window 289 * updates. The three ``states'' for the output side are: 290 * idle not doing retransmits or persists 291 * persisting to move a small or zero window 292 * (re)transmitting and thereby not persisting 293 * | 221 tcp_setpersist(tp); 222 } 223 } 224 if (len > tp->t_maxseg) { 225 len = tp->t_maxseg; 226 sendalot = 1; 227 } 228 if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) --- 71 unchanged lines hidden (view full) --- 300 /* 301 * TCP window updates are not reliable, rather a polling protocol 302 * using ``persist'' packets is used to insure receipt of window 303 * updates. The three ``states'' for the output side are: 304 * idle not doing retransmits or persists 305 * persisting to move a small or zero window 306 * (re)transmitting and thereby not persisting 307 * |
294 * tp->t_timer[TCPT_PERSIST] 295 * is set when we are in persist state. | 308 * callout_active(tp->tt_persist) 309 * is true when we are in persist state. |
296 * tp->t_force 297 * is set when we are called to send a persist packet. | 310 * tp->t_force 311 * is set when we are called to send a persist packet. |
298 * tp->t_timer[TCPT_REXMT] | 312 * callout_active(tp->tt_rexmt) |
299 * is set when we are retransmitting 300 * The output side is idle when both timers are zero. 301 * 302 * If send window is too small, there is data to transmit, and no 303 * retransmit or persist is pending, then go to persist state. 304 * If nothing happens soon, send when timer expires: 305 * if window is nonzero, transmit what we can, 306 * otherwise force out a byte. 307 */ | 313 * is set when we are retransmitting 314 * The output side is idle when both timers are zero. 315 * 316 * If send window is too small, there is data to transmit, and no 317 * retransmit or persist is pending, then go to persist state. 318 * If nothing happens soon, send when timer expires: 319 * if window is nonzero, transmit what we can, 320 * otherwise force out a byte. 321 */ |
308 if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && 309 tp->t_timer[TCPT_PERSIST] == 0) { | 322 if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) && 323 !callout_active(tp->tt_persist)) { |
310 tp->t_rxtshift = 0; 311 tcp_setpersist(tp); 312 } 313 314 /* 315 * No reason to send a segment, just return. 316 */ 317 return (0); --- 41 unchanged lines hidden (view full) --- 359 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 360 (flags & TH_RST) == 0 && 361 ((flags & TH_ACK) == 0 || 362 (tp->t_flags & TF_RCVD_TSTMP))) { 363 u_int32_t *lp = (u_int32_t *)(opt + optlen); 364 365 /* Form timestamp option as shown in appendix A of RFC 1323. */ 366 *lp++ = htonl(TCPOPT_TSTAMP_HDR); | 324 tp->t_rxtshift = 0; 325 tcp_setpersist(tp); 326 } 327 328 /* 329 * No reason to send a segment, just return. 330 */ 331 return (0); --- 41 unchanged lines hidden (view full) --- 373 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 374 (flags & TH_RST) == 0 && 375 ((flags & TH_ACK) == 0 || 376 (tp->t_flags & TF_RCVD_TSTMP))) { 377 u_int32_t *lp = (u_int32_t *)(opt + optlen); 378 379 /* Form timestamp option as shown in appendix A of RFC 1323. */ 380 *lp++ = htonl(TCPOPT_TSTAMP_HDR); |
367 *lp++ = htonl(tcp_now); | 381 *lp++ = htonl(ticks); |
368 *lp = htonl(tp->ts_recent); 369 optlen += TCPOLEN_TSTAMP_APPA; 370 } 371 372 /* 373 * Send `CC-family' options if our side wants to use them (TF_REQ_CC), 374 * options are allowed (!TF_NOOPT) and it's not a RST. 375 */ --- 188 unchanged lines hidden (view full) --- 564 * of the next unsent octet. So, if there is no data 565 * (and no SYN or FIN), use snd_max instead of snd_nxt 566 * when filling in ti_seq. But if we are in persist 567 * state, snd_max might reflect one byte beyond the 568 * right edge of the window, so use snd_nxt in that 569 * case, since we know we aren't doing a retransmission. 570 * (retransmit and persist are mutually exclusive...) 571 */ | 382 *lp = htonl(tp->ts_recent); 383 optlen += TCPOLEN_TSTAMP_APPA; 384 } 385 386 /* 387 * Send `CC-family' options if our side wants to use them (TF_REQ_CC), 388 * options are allowed (!TF_NOOPT) and it's not a RST. 389 */ --- 188 unchanged lines hidden (view full) --- 578 * of the next unsent octet. So, if there is no data 579 * (and no SYN or FIN), use snd_max instead of snd_nxt 580 * when filling in ti_seq. But if we are in persist 581 * state, snd_max might reflect one byte beyond the 582 * right edge of the window, so use snd_nxt in that 583 * case, since we know we aren't doing a retransmission. 584 * (retransmit and persist are mutually exclusive...) 585 */ |
572 if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) | 586 if (len || (flags & (TH_SYN|TH_FIN)) 587 || callout_active(tp->tt_persist)) |
573 ti->ti_seq = htonl(tp->snd_nxt); 574 else 575 ti->ti_seq = htonl(tp->snd_max); 576 ti->ti_ack = htonl(tp->rcv_nxt); 577 if (optlen) { 578 bcopy(opt, ti + 1, optlen); 579 ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 580 } --- 29 unchanged lines hidden (view full) --- 610 ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + 611 optlen + len)); 612 ti->ti_sum = in_cksum(m, (int)(hdrlen + len)); 613 614 /* 615 * In transmit state, time the transmission and arrange for 616 * the retransmit. In persist state, just set snd_max. 617 */ | 588 ti->ti_seq = htonl(tp->snd_nxt); 589 else 590 ti->ti_seq = htonl(tp->snd_max); 591 ti->ti_ack = htonl(tp->rcv_nxt); 592 if (optlen) { 593 bcopy(opt, ti + 1, optlen); 594 ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 595 } --- 29 unchanged lines hidden (view full) --- 625 ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + 626 optlen + len)); 627 ti->ti_sum = in_cksum(m, (int)(hdrlen + len)); 628 629 /* 630 * In transmit state, time the transmission and arrange for 631 * the retransmit. In persist state, just set snd_max. 632 */ |
618 if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { | 633 if (tp->t_force == 0 || !callout_active(tp->tt_persist)) { |
619 tcp_seq startseq = tp->snd_nxt; 620 621 /* 622 * Advance snd_nxt over sequence space of this segment. 623 */ 624 if (flags & (TH_SYN|TH_FIN)) { 625 if (flags & TH_SYN) 626 tp->snd_nxt++; --- 4 unchanged lines hidden (view full) --- 631 } 632 tp->snd_nxt += len; 633 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 634 tp->snd_max = tp->snd_nxt; 635 /* 636 * Time this transmission if not a retransmission and 637 * not currently timing anything. 638 */ | 634 tcp_seq startseq = tp->snd_nxt; 635 636 /* 637 * Advance snd_nxt over sequence space of this segment. 638 */ 639 if (flags & (TH_SYN|TH_FIN)) { 640 if (flags & TH_SYN) 641 tp->snd_nxt++; --- 4 unchanged lines hidden (view full) --- 646 } 647 tp->snd_nxt += len; 648 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 649 tp->snd_max = tp->snd_nxt; 650 /* 651 * Time this transmission if not a retransmission and 652 * not currently timing anything. 653 */ |
639 if (tp->t_rtt == 0) { 640 tp->t_rtt = 1; | 654 if (tp->t_rtttime == 0) { 655 tp->t_rtttime = ticks; |
641 tp->t_rtseq = startseq; 642 tcpstat.tcps_segstimed++; 643 } 644 } 645 646 /* 647 * Set retransmit timer if not currently set, 648 * and not doing an ack or a keep-alive probe. 649 * Initial value for retransmit timer is smoothed 650 * round-trip time + 2 * round-trip time variance. 651 * Initialize shift counter which is used for backoff 652 * of retransmit time. 653 */ | 656 tp->t_rtseq = startseq; 657 tcpstat.tcps_segstimed++; 658 } 659 } 660 661 /* 662 * Set retransmit timer if not currently set, 663 * and not doing an ack or a keep-alive probe. 664 * Initial value for retransmit timer is smoothed 665 * round-trip time + 2 * round-trip time variance. 666 * Initialize shift counter which is used for backoff 667 * of retransmit time. 668 */ |
654 if (tp->t_timer[TCPT_REXMT] == 0 && | 669 if (!callout_active(tp->tt_rexmt) && |
655 tp->snd_nxt != tp->snd_una) { | 670 tp->snd_nxt != tp->snd_una) { |
656 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 657 if (tp->t_timer[TCPT_PERSIST]) { 658 tp->t_timer[TCPT_PERSIST] = 0; | 671 callout_reset(tp->tt_rexmt, tp->t_rxtcur, 672 tcp_timer_rexmt, tp); 673 if (callout_active(tp->tt_persist)) { 674 callout_stop(tp->tt_persist); |
659 tp->t_rxtshift = 0; 660 } 661 } 662 } else 663 if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) 664 tp->snd_max = tp->snd_nxt + len; 665 666#ifdef TCPDEBUG --- 61 unchanged lines hidden (view full) --- 728 * Data sent (as far as we can tell). 729 * If this advertises a larger window than any other segment, 730 * then remember the size of the advertised window. 731 * Any pending ACK has now been sent. 732 */ 733 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 734 tp->rcv_adv = tp->rcv_nxt + win; 735 tp->last_ack_sent = tp->rcv_nxt; | 675 tp->t_rxtshift = 0; 676 } 677 } 678 } else 679 if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) 680 tp->snd_max = tp->snd_nxt + len; 681 682#ifdef TCPDEBUG --- 61 unchanged lines hidden (view full) --- 744 * Data sent (as far as we can tell). 745 * If this advertises a larger window than any other segment, 746 * then remember the size of the advertised window. 747 * Any pending ACK has now been sent. 748 */ 749 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 750 tp->rcv_adv = tp->rcv_nxt + win; 751 tp->last_ack_sent = tp->rcv_nxt; |
736 tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); | 752 tp->t_flags &= ~TF_ACKNOW; 753 if (tcp_delack_enabled) 754 callout_stop(tp->tt_delack); |
737 if (sendalot) 738 goto again; 739 return (0); 740} 741 742void 743tcp_setpersist(tp) 744 register struct tcpcb *tp; 745{ | 755 if (sendalot) 756 goto again; 757 return (0); 758} 759 760void 761tcp_setpersist(tp) 762 register struct tcpcb *tp; 763{ |
746 register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; | 764 int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; 765 int tt; |
747 | 766 |
748 if (tp->t_timer[TCPT_REXMT]) 749 panic("tcp_output REXMT"); | 767 if (callout_active(tp->tt_rexmt)) 768 panic("tcp_setpersist: retransmit pending"); |
750 /* 751 * Start/restart persistance timer. 752 */ | 769 /* 770 * Start/restart persistance timer. 771 */ |
753 TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 754 t * tcp_backoff[tp->t_rxtshift], 755 TCPTV_PERSMIN, TCPTV_PERSMAX); | 772 TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], 773 TCPTV_PERSMIN, TCPTV_PERSMAX); 774 callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp); |
756 if (tp->t_rxtshift < TCP_MAXRXTSHIFT) 757 tp->t_rxtshift++; 758} | 775 if (tp->t_rxtshift < TCP_MAXRXTSHIFT) 776 tp->t_rxtshift++; 777} |