tcp_timer.c revision 1.50
1/* $NetBSD: tcp_timer.c,v 1.50 2001/09/10 15:23:10 thorpej Exp $ */ 2 3/* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32/*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69/* 70 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 102 */ 103 104#include "opt_inet.h" 105 106#include <sys/param.h> 107#include <sys/systm.h> 108#include <sys/malloc.h> 109#include <sys/mbuf.h> 110#include <sys/socket.h> 111#include <sys/socketvar.h> 112#include <sys/protosw.h> 113#include <sys/errno.h> 114#include <sys/kernel.h> 115 116#include <net/if.h> 117#include <net/route.h> 118 119#include <netinet/in.h> 120#include <netinet/in_systm.h> 121#include <netinet/ip.h> 122#include <netinet/in_pcb.h> 123#include <netinet/ip_var.h> 124 125#ifdef INET6 126#ifndef INET 127#include <netinet/in.h> 128#endif 129#include <netinet/ip6.h> 130#include <netinet6/in6_pcb.h> 131#endif 132 133#include <netinet/tcp.h> 134#include <netinet/tcp_fsm.h> 135#include <netinet/tcp_seq.h> 136#include <netinet/tcp_timer.h> 137#include <netinet/tcp_var.h> 138#include <netinet/tcpip.h> 139 140int tcp_keepidle = TCPTV_KEEP_IDLE; 141int tcp_keepintvl = TCPTV_KEEPINTVL; 142int tcp_keepcnt = TCPTV_KEEPCNT; /* max idle probes */ 143int tcp_maxpersistidle = TCPTV_KEEP_IDLE; /* max idle time in persist */ 144int tcp_maxidle; 145 146/* 147 * Time to delay the ACK. This is initialized in tcp_init(), unless 148 * its patched. 149 */ 150int tcp_delack_ticks = 0; 151 152/* 153 * Callout to process delayed ACKs for a TCPCB. 154 */ 155void 156tcp_delack(void *arg) 157{ 158 struct tcpcb *tp = arg; 159 int s; 160 161 /* 162 * If tcp_output() wasn't able to transmit the ACK 163 * for whatever reason, it will restart the delayed 164 * ACK callout. 165 */ 166 167 s = splsoftnet(); 168 tp->t_flags |= TF_ACKNOW; 169 (void) tcp_output(tp); 170 splx(s); 171} 172 173/* 174 * Tcp protocol timeout routine called every 500 ms. 175 * Updates the timers in all active tcb's and 176 * causes finite state machine actions if timers expire. 177 */ 178void 179tcp_slowtimo() 180{ 181 struct inpcb *inp, *ninp; 182 struct tcpcb *tp; 183#ifdef INET6 184 struct in6pcb *in6p, *nin6p; 185#endif 186 int s; 187 long i; 188 static int syn_cache_last = 0; 189 int skip, mask; 190 191 skip = mask = 0; 192 193 s = splsoftnet(); 194 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 195 /* 196 * Search through tcb's and update active timers. 197 */ 198 mask |= 1; 199 inp = tcbtable.inpt_queue.cqh_first; 200 if (inp == (struct inpcb *)0) { /* XXX */ 201 skip |= 1; 202 goto dotcb6; 203 } 204 for (; inp != (struct inpcb *)&tcbtable.inpt_queue; inp = ninp) { 205 ninp = inp->inp_queue.cqe_next; 206 tp = intotcpcb(inp); 207 if (tp == 0 || tp->t_state == TCPS_LISTEN) 208 continue; 209 for (i = 0; i < TCPT_NTIMERS; i++) { 210 if (TCP_TIMER_ISEXPIRED(tp, i)) { 211 TCP_TIMER_DISARM(tp, i); 212 (void) tcp_usrreq(tp->t_inpcb->inp_socket, 213 PRU_SLOWTIMO, (struct mbuf *)0, 214 (struct mbuf *)i, (struct mbuf *)0, 215 (struct proc *)0); 216 /* XXX NOT MP SAFE */ 217 if ((ninp == (void *)&tcbtable.inpt_queue && 218 tcbtable.inpt_queue.cqh_last != inp) || 219 ninp->inp_queue.cqe_prev != inp) 220 goto tpgone; 221 } 222 } 223tpgone: 224 ; 225 } 226dotcb6: 227#ifdef INET6 228 mask |= 2; 229 in6p = tcb6.in6p_next; 230 if (in6p == (struct in6pcb *)0) { /* XXX */ 231 skip |= 2; 232 goto doiss; 233 } 234 for (; in6p != (struct in6pcb *)&tcb6; in6p = nin6p) { 235 nin6p = in6p->in6p_next; 236 tp = in6totcpcb(in6p); 237 if (tp == 0 || tp->t_state == TCPS_LISTEN) 238 continue; 239 for (i = 0; i < TCPT_NTIMERS; i++) { 240 if (TCP_TIMER_ISEXPIRED(tp, i)) { 241 TCP_TIMER_DISARM(tp, i); 242 (void) tcp_usrreq(tp->t_in6pcb->in6p_socket, 243 PRU_SLOWTIMO, (struct mbuf *)0, 244 (struct mbuf *)i, (struct mbuf *)0, 245 (struct proc *)0); 246 /* XXX NOT MP SAFE */ 247 if ((nin6p == (void *)&tcb6 && 248 tcb6.in6p_prev != in6p) || 249 nin6p->in6p_prev != in6p) 250 goto tp6gone; 251 } 252 } 253tp6gone: 254 ; 255 } 256 257doiss: 258#endif 259 if (mask == skip) 260 goto done; 261 tcp_iss_seq += TCP_ISSINCR; /* increment iss */ 262 tcp_now++; /* for timestamps */ 263 if (++syn_cache_last >= tcp_syn_cache_interval) { 264 syn_cache_timer(); 265 syn_cache_last = 0; 266 } 267done: 268 splx(s); 269} 270 271/* 272 * Cancel all timers for TCP tp. 273 */ 274void 275tcp_canceltimers(tp) 276 struct tcpcb *tp; 277{ 278 int i; 279 280 for (i = 0; i < TCPT_NTIMERS; i++) 281 TCP_TIMER_DISARM(tp, i); 282} 283 284int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 285 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 286 287int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 288 289/* 290 * TCP timer processing. 291 */ 292struct tcpcb * 293tcp_timers(tp, timer) 294 struct tcpcb *tp; 295 int timer; 296{ 297 short rto; 298 299#ifdef DIAGNOSTIC 300 if (tp->t_inpcb && tp->t_in6pcb) 301 panic("tcp_timers: both t_inpcb and t_in6pcb are set"); 302#endif 303 304 switch (timer) { 305 306 /* 307 * 2 MSL timeout in shutdown went off. If we're closed but 308 * still waiting for peer to close and connection has been idle 309 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 310 * control block. Otherwise, check again in a bit. 311 */ 312 case TCPT_2MSL: 313 if (tp->t_state != TCPS_TIME_WAIT && 314 ((tcp_maxidle == 0) || 315 ((tcp_now - tp->t_rcvtime) <= tcp_maxidle))) 316 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); 317 else 318 tp = tcp_close(tp); 319 break; 320 321 /* 322 * Retransmission timer went off. Message has not 323 * been acked within retransmit interval. Back off 324 * to a longer retransmit interval and retransmit one segment. 325 */ 326 case TCPT_REXMT: 327 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 328 tp->t_rxtshift = TCP_MAXRXTSHIFT; 329 tcpstat.tcps_timeoutdrop++; 330 tp = tcp_drop(tp, tp->t_softerror ? 331 tp->t_softerror : ETIMEDOUT); 332 break; 333 } 334 tcpstat.tcps_rexmttimeo++; 335 rto = TCP_REXMTVAL(tp); 336 if (rto < tp->t_rttmin) 337 rto = tp->t_rttmin; 338 TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift], 339 tp->t_rttmin, TCPTV_REXMTMAX); 340 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 341#if 0 342 /* 343 * If we are losing and we are trying path MTU discovery, 344 * try turning it off. This will avoid black holes in 345 * the network which suppress or fail to send "packet 346 * too big" ICMP messages. We should ideally do 347 * lots more sophisticated searching to find the right 348 * value here... 349 */ 350 if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { 351 struct rtentry *rt = NULL; 352 353#ifdef INET 354 if (tp->t_inpcb) 355 rt = in_pcbrtentry(tp->t_inpcb); 356#endif 357#ifdef INET6 358 if (tp->t_in6pcb) 359 rt = in6_pcbrtentry(tp->t_in6pcb); 360#endif 361 362 /* XXX: Black hole recovery code goes here */ 363 } 364#endif 365 /* 366 * If losing, let the lower level know and try for 367 * a better route. Also, if we backed off this far, 368 * our srtt estimate is probably bogus. Clobber it 369 * so we'll take the next rtt measurement as our srtt; 370 * move the current srtt into rttvar to keep the current 371 * retransmit times until then. 372 */ 373 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 374#ifdef INET 375 if (tp->t_inpcb) 376 in_losing(tp->t_inpcb); 377#endif 378#ifdef INET6 379 if (tp->t_in6pcb) 380 in6_losing(tp->t_in6pcb); 381#endif 382 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 383 tp->t_srtt = 0; 384 } 385 tp->snd_nxt = tp->snd_una; 386 /* 387 * If timing a segment in this window, stop the timer. 388 */ 389 tp->t_rtttime = 0; 390 /* 391 * Remember if we are retransmitting a SYN, because if 392 * we do, set the initial congestion window must be set 393 * to 1 segment. 394 */ 395 if (tp->t_state == TCPS_SYN_SENT) 396 tp->t_flags |= TF_SYN_REXMT; 397 /* 398 * Close the congestion window down to one segment 399 * (we'll open it by one segment for each ack we get). 400 * Since we probably have a window's worth of unacked 401 * data accumulated, this "slow start" keeps us from 402 * dumping all that data as back-to-back packets (which 403 * might overwhelm an intermediate gateway). 404 * 405 * There are two phases to the opening: Initially we 406 * open by one mss on each ack. This makes the window 407 * size increase exponentially with time. If the 408 * window is larger than the path can handle, this 409 * exponential growth results in dropped packet(s) 410 * almost immediately. To get more time between 411 * drops but still "push" the network to take advantage 412 * of improving conditions, we switch from exponential 413 * to linear window opening at some threshhold size. 414 * For a threshhold, we use half the current window 415 * size, truncated to a multiple of the mss. 416 * 417 * (the minimum cwnd that will give us exponential 418 * growth is 2 mss. We don't allow the threshhold 419 * to go below this.) 420 */ 421 { 422 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 423 if (win < 2) 424 win = 2; 425 /* Loss Window MUST be one segment. */ 426 tp->snd_cwnd = tp->t_segsz; 427 tp->snd_ssthresh = win * tp->t_segsz; 428 tp->t_dupacks = 0; 429 } 430 (void) tcp_output(tp); 431 break; 432 433 /* 434 * Persistance timer into zero window. 435 * Force a byte to be output, if possible. 436 */ 437 case TCPT_PERSIST: 438 /* 439 * Hack: if the peer is dead/unreachable, we do not 440 * time out if the window is closed. After a full 441 * backoff, drop the connection if the idle time 442 * (no responses to probes) reaches the maximum 443 * backoff that we would use if retransmitting. 444 */ 445 rto = TCP_REXMTVAL(tp); 446 if (rto < tp->t_rttmin) 447 rto = tp->t_rttmin; 448 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 449 ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle || 450 (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { 451 tcpstat.tcps_persistdrops++; 452 tp = tcp_drop(tp, ETIMEDOUT); 453 break; 454 } 455 tcpstat.tcps_persisttimeo++; 456 tcp_setpersist(tp); 457 tp->t_force = 1; 458 (void) tcp_output(tp); 459 tp->t_force = 0; 460 break; 461 462 /* 463 * Keep-alive timer went off; send something 464 * or drop connection if idle for too long. 465 */ 466 case TCPT_KEEP: 467 { 468 struct socket *so = NULL; 469 470 tcpstat.tcps_keeptimeo++; 471 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 472 goto dropit; 473#ifdef INET 474 if (tp->t_inpcb) 475 so = tp->t_inpcb->inp_socket; 476#endif 477#ifdef INET6 478 if (tp->t_in6pcb) 479 so = tp->t_in6pcb->in6p_socket; 480#endif 481 if (so->so_options & SO_KEEPALIVE && 482 tp->t_state <= TCPS_CLOSE_WAIT) { 483 if ((tcp_maxidle > 0) && 484 ((tcp_now - tp->t_rcvtime) >= 485 tcp_keepidle + tcp_maxidle)) 486 goto dropit; 487 /* 488 * Send a packet designed to force a response 489 * if the peer is up and reachable: 490 * either an ACK if the connection is still alive, 491 * or an RST if the peer has closed the connection 492 * due to timeout or reboot. 493 * Using sequence number tp->snd_una-1 494 * causes the transmitted zero-length segment 495 * to lie outside the receive window; 496 * by the protocol spec, this requires the 497 * correspondent TCP to respond. 498 */ 499 tcpstat.tcps_keepprobe++; 500 if (tcp_compat_42) { 501 /* 502 * The keepalive packet must have nonzero 503 * length to get a 4.2 host to respond. 504 */ 505 (void)tcp_respond(tp, tp->t_template, 506 (struct mbuf *)NULL, NULL, tp->rcv_nxt - 1, 507 tp->snd_una - 1, 0); 508 } else { 509 (void)tcp_respond(tp, tp->t_template, 510 (struct mbuf *)NULL, NULL, tp->rcv_nxt, 511 tp->snd_una - 1, 0); 512 } 513 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); 514 } else 515 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); 516 break; 517 } 518 dropit: 519 tcpstat.tcps_keepdrops++; 520 tp = tcp_drop(tp, ETIMEDOUT); 521 break; 522 } 523 return (tp); 524} 525