tcp_timer.c revision 1.63
1/* $NetBSD: tcp_timer.c,v 1.63 2003/07/20 16:35:09 he Exp $ */ 2 3/* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32/*- 33 * Copyright (c) 1997, 1998, 2001 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69/* 70 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 102 */ 103 104#include <sys/cdefs.h> 105__KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.63 2003/07/20 16:35:09 he Exp $"); 106 107#include "opt_inet.h" 108#include "opt_tcp_debug.h" 109 110#include <sys/param.h> 111#include <sys/systm.h> 112#include <sys/malloc.h> 113#include <sys/mbuf.h> 114#include <sys/socket.h> 115#include <sys/socketvar.h> 116#include <sys/protosw.h> 117#include <sys/errno.h> 118#include <sys/kernel.h> 119 120#include <net/if.h> 121#include <net/route.h> 122 123#include <netinet/in.h> 124#include <netinet/in_systm.h> 125#include <netinet/ip.h> 126#include <netinet/in_pcb.h> 127#include <netinet/ip_var.h> 128 129#ifdef INET6 130#ifndef INET 131#include <netinet/in.h> 132#endif 133#include <netinet/ip6.h> 134#include <netinet6/in6_pcb.h> 135#endif 136 137#include <netinet/tcp.h> 138#include <netinet/tcp_fsm.h> 139#include <netinet/tcp_seq.h> 140#include <netinet/tcp_timer.h> 141#include <netinet/tcp_var.h> 142#include <netinet/tcpip.h> 143#ifdef TCP_DEBUG 144#include <netinet/tcp_debug.h> 145#endif 146 147/* 148 * Various tunable timer parameters. These are initialized in tcp_init(), 149 * unless they are patched. 150 */ 151int tcp_keepidle = 0; 152int tcp_keepintvl = 0; 153int tcp_keepcnt = 0; /* max idle probes */ 154int tcp_maxpersistidle = 0; /* max idle time in persist */ 155int tcp_maxidle; /* computed in tcp_slowtimo() */ 156 157/* 158 * Time to delay the ACK. This is initialized in tcp_init(), unless 159 * its patched. 160 */ 161int tcp_delack_ticks = 0; 162 163void tcp_timer_rexmt(void *); 164void tcp_timer_persist(void *); 165void tcp_timer_keep(void *); 166void tcp_timer_2msl(void *); 167 168const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { 169 tcp_timer_rexmt, 170 tcp_timer_persist, 171 tcp_timer_keep, 172 tcp_timer_2msl, 173}; 174 175/* 176 * Timer state initialization, called from tcp_init(). 177 */ 178void 179tcp_timer_init(void) 180{ 181 182 if (tcp_keepidle == 0) 183 tcp_keepidle = TCPTV_KEEP_IDLE; 184 185 if (tcp_keepintvl == 0) 186 tcp_keepintvl = TCPTV_KEEPINTVL; 187 188 if (tcp_keepcnt == 0) 189 tcp_keepcnt = TCPTV_KEEPCNT; 190 191 if (tcp_maxpersistidle == 0) 192 tcp_maxpersistidle = TCPTV_KEEP_IDLE; 193 194 if (tcp_delack_ticks == 0) 195 tcp_delack_ticks = TCP_DELACK_TICKS; 196} 197 198/* 199 * Return how many timers are currently being invoked. 200 */ 201int 202tcp_timers_invoking(struct tcpcb *tp) 203{ 204 int i; 205 int count = 0; 206 207 for (i = 0; i < TCPT_NTIMERS; i++) 208 if (callout_invoking(&tp->t_timer[i])) 209 count++; 210 if (callout_invoking(&tp->t_delack_ch)) 211 count++; 212 213 return count; 214} 215 216/* 217 * Callout to process delayed ACKs for a TCPCB. 218 */ 219void 220tcp_delack(void *arg) 221{ 222 struct tcpcb *tp = arg; 223 int s; 224 225 /* 226 * If tcp_output() wasn't able to transmit the ACK 227 * for whatever reason, it will restart the delayed 228 * ACK callout. 229 */ 230 231 s = splsoftnet(); 232 callout_ack(&tp->t_delack_ch); 233 if (tcp_isdead(tp)) { 234 splx(s); 235 return; 236 } 237 238 tp->t_flags |= TF_ACKNOW; 239 (void) tcp_output(tp); 240 splx(s); 241} 242 243/* 244 * Tcp protocol timeout routine called every 500 ms. 245 * Updates the timers in all active tcb's and 246 * causes finite state machine actions if timers expire. 247 */ 248void 249tcp_slowtimo() 250{ 251 int s; 252 253 s = splsoftnet(); 254 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 255 tcp_iss_seq += TCP_ISSINCR; /* increment iss */ 256 tcp_now++; /* for timestamps */ 257 splx(s); 258} 259 260/* 261 * Cancel all timers for TCP tp. 262 */ 263void 264tcp_canceltimers(tp) 265 struct tcpcb *tp; 266{ 267 int i; 268 269 for (i = 0; i < TCPT_NTIMERS; i++) 270 TCP_TIMER_DISARM(tp, i); 271} 272 273const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 274 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 275 276const int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 277 278/* 279 * TCP timer processing. 280 */ 281 282void 283tcp_timer_rexmt(void *arg) 284{ 285 struct tcpcb *tp = arg; 286 uint32_t rto; 287 int s; 288#ifdef TCP_DEBUG 289 struct socket *so; 290 short ostate; 291#endif 292 293 s = splsoftnet(); 294 callout_ack(&tp->t_timer[TCPT_KEEP]); 295 if (tcp_isdead(tp)) { 296 splx(s); 297 return; 298 } 299 300#ifdef TCP_DEBUG 301#ifdef INET 302 if (tp->t_inpcb) 303 so = tp->t_inpcb->inp_socket; 304#endif 305#ifdef INET6 306 if (tp->t_in6pcb) 307 so = tp->t_in6pcb->in6p_socket; 308#endif 309 ostate = tp->t_state; 310#endif /* TCP_DEBUG */ 311 312 /* 313 * Retransmission timer went off. Message has not 314 * been acked within retransmit interval. Back off 315 * to a longer retransmit interval and retransmit one segment. 316 */ 317 318 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 319 tp->t_rxtshift = TCP_MAXRXTSHIFT; 320 tcpstat.tcps_timeoutdrop++; 321 tp = tcp_drop(tp, tp->t_softerror ? 322 tp->t_softerror : ETIMEDOUT); 323 goto out; 324 } 325 tcpstat.tcps_rexmttimeo++; 326 rto = TCP_REXMTVAL(tp); 327 if (rto < tp->t_rttmin) 328 rto = tp->t_rttmin; 329 TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift], 330 tp->t_rttmin, TCPTV_REXMTMAX); 331 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 332 333 /* 334 * If we are losing and we are trying path MTU discovery, 335 * try turning it off. This will avoid black holes in 336 * the network which suppress or fail to send "packet 337 * too big" ICMP messages. We should ideally do 338 * lots more sophisticated searching to find the right 339 * value here... 340 */ 341 if (tp->t_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { 342 tcpstat.tcps_pmtublackhole++; 343 344#ifdef INET 345 /* try turning PMTUD off */ 346 if (tp->t_inpcb) 347 tp->t_mtudisc = 0; 348#endif 349#ifdef INET6 350 /* try using IPv6 minimum MTU */ 351 if (tp->t_in6pcb) 352 tp->t_mtudisc = 0; 353#endif 354 355 /* XXX: more sophisticated Black hole recovery code? */ 356 } 357 358 /* 359 * If losing, let the lower level know and try for 360 * a better route. Also, if we backed off this far, 361 * our srtt estimate is probably bogus. Clobber it 362 * so we'll take the next rtt measurement as our srtt; 363 * move the current srtt into rttvar to keep the current 364 * retransmit times until then. 365 */ 366 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 367#ifdef INET 368 if (tp->t_inpcb) 369 in_losing(tp->t_inpcb); 370#endif 371#ifdef INET6 372 if (tp->t_in6pcb) 373 in6_losing(tp->t_in6pcb); 374#endif 375 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 376 tp->t_srtt = 0; 377 } 378 tp->snd_nxt = tp->snd_una; 379 /* 380 * If timing a segment in this window, stop the timer. 381 */ 382 tp->t_rtttime = 0; 383 /* 384 * Remember if we are retransmitting a SYN, because if 385 * we do, set the initial congestion window must be set 386 * to 1 segment. 387 */ 388 if (tp->t_state == TCPS_SYN_SENT) 389 tp->t_flags |= TF_SYN_REXMT; 390 /* 391 * Close the congestion window down to one segment 392 * (we'll open it by one segment for each ack we get). 393 * Since we probably have a window's worth of unacked 394 * data accumulated, this "slow start" keeps us from 395 * dumping all that data as back-to-back packets (which 396 * might overwhelm an intermediate gateway). 397 * 398 * There are two phases to the opening: Initially we 399 * open by one mss on each ack. This makes the window 400 * size increase exponentially with time. If the 401 * window is larger than the path can handle, this 402 * exponential growth results in dropped packet(s) 403 * almost immediately. To get more time between 404 * drops but still "push" the network to take advantage 405 * of improving conditions, we switch from exponential 406 * to linear window opening at some threshhold size. 407 * For a threshhold, we use half the current window 408 * size, truncated to a multiple of the mss. 409 * 410 * (the minimum cwnd that will give us exponential 411 * growth is 2 mss. We don't allow the threshhold 412 * to go below this.) 413 */ 414 { 415 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz; 416 if (win < 2) 417 win = 2; 418 /* Loss Window MUST be one segment. */ 419 tp->snd_cwnd = tp->t_segsz; 420 tp->snd_ssthresh = win * tp->t_segsz; 421 tp->t_dupacks = 0; 422 } 423 (void) tcp_output(tp); 424 425 out: 426#ifdef TCP_DEBUG 427 if (tp && so->so_options & SO_DEBUG) 428 tcp_trace(TA_USER, ostate, tp, NULL, 429 PRU_SLOWTIMO | (TCPT_REXMT << 8)); 430#endif 431 splx(s); 432} 433 434void 435tcp_timer_persist(void *arg) 436{ 437 struct tcpcb *tp = arg; 438 uint32_t rto; 439 int s; 440#ifdef TCP_DEBUG 441 struct socket *so; 442 short ostate; 443#endif 444 445 s = splsoftnet(); 446 callout_ack(&tp->t_timer[TCPT_PERSIST]); 447 if (tcp_isdead(tp)) { 448 splx(s); 449 return; 450 } 451 452#ifdef TCP_DEBUG 453#ifdef INET 454 if (tp->t_inpcb) 455 so = tp->t_inpcb->inp_socket; 456#endif 457#ifdef INET6 458 if (tp->t_in6pcb) 459 so = tp->t_in6pcb->in6p_socket; 460#endif 461 462 ostate = tp->t_state; 463#endif /* TCP_DEBUG */ 464 465 /* 466 * Persistance timer into zero window. 467 * Force a byte to be output, if possible. 468 */ 469 470 /* 471 * Hack: if the peer is dead/unreachable, we do not 472 * time out if the window is closed. After a full 473 * backoff, drop the connection if the idle time 474 * (no responses to probes) reaches the maximum 475 * backoff that we would use if retransmitting. 476 */ 477 rto = TCP_REXMTVAL(tp); 478 if (rto < tp->t_rttmin) 479 rto = tp->t_rttmin; 480 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 481 ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle || 482 (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { 483 tcpstat.tcps_persistdrops++; 484 tp = tcp_drop(tp, ETIMEDOUT); 485 goto out; 486 } 487 tcpstat.tcps_persisttimeo++; 488 tcp_setpersist(tp); 489 tp->t_force = 1; 490 (void) tcp_output(tp); 491 tp->t_force = 0; 492 493 out: 494#ifdef TCP_DEBUG 495 if (tp && so->so_options & SO_DEBUG) 496 tcp_trace(TA_USER, ostate, tp, NULL, 497 PRU_SLOWTIMO | (TCPT_PERSIST << 8)); 498#endif 499 splx(s); 500} 501 502void 503tcp_timer_keep(void *arg) 504{ 505 struct tcpcb *tp = arg; 506 struct socket *so = NULL; /* Quell compiler warning */ 507 int s; 508#ifdef TCP_DEBUG 509 short ostate; 510#endif 511 512 s = splsoftnet(); 513 callout_ack(&tp->t_timer[TCPT_KEEP]); 514 if (tcp_isdead(tp)) { 515 splx(s); 516 return; 517 } 518 519#ifdef TCP_DEBUG 520 ostate = tp->t_state; 521#endif /* TCP_DEBUG */ 522 523 /* 524 * Keep-alive timer went off; send something 525 * or drop connection if idle for too long. 526 */ 527 528 tcpstat.tcps_keeptimeo++; 529 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 530 goto dropit; 531#ifdef INET 532 if (tp->t_inpcb) 533 so = tp->t_inpcb->inp_socket; 534#endif 535#ifdef INET6 536 if (tp->t_in6pcb) 537 so = tp->t_in6pcb->in6p_socket; 538#endif 539 if (so->so_options & SO_KEEPALIVE && 540 tp->t_state <= TCPS_CLOSE_WAIT) { 541 if ((tcp_maxidle > 0) && 542 ((tcp_now - tp->t_rcvtime) >= 543 tcp_keepidle + tcp_maxidle)) 544 goto dropit; 545 /* 546 * Send a packet designed to force a response 547 * if the peer is up and reachable: 548 * either an ACK if the connection is still alive, 549 * or an RST if the peer has closed the connection 550 * due to timeout or reboot. 551 * Using sequence number tp->snd_una-1 552 * causes the transmitted zero-length segment 553 * to lie outside the receive window; 554 * by the protocol spec, this requires the 555 * correspondent TCP to respond. 556 */ 557 tcpstat.tcps_keepprobe++; 558 if (tcp_compat_42) { 559 /* 560 * The keepalive packet must have nonzero 561 * length to get a 4.2 host to respond. 562 */ 563 (void)tcp_respond(tp, tp->t_template, 564 (struct mbuf *)NULL, NULL, tp->rcv_nxt - 1, 565 tp->snd_una - 1, 0); 566 } else { 567 (void)tcp_respond(tp, tp->t_template, 568 (struct mbuf *)NULL, NULL, tp->rcv_nxt, 569 tp->snd_una - 1, 0); 570 } 571 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); 572 } else 573 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); 574 575#ifdef TCP_DEBUG 576 if (tp && so->so_options & SO_DEBUG) 577 tcp_trace(TA_USER, ostate, tp, NULL, 578 PRU_SLOWTIMO | (TCPT_KEEP << 8)); 579#endif 580 splx(s); 581 return; 582 583 dropit: 584 tcpstat.tcps_keepdrops++; 585 (void) tcp_drop(tp, ETIMEDOUT); 586 splx(s); 587} 588 589void 590tcp_timer_2msl(void *arg) 591{ 592 struct tcpcb *tp = arg; 593 int s; 594#ifdef TCP_DEBUG 595 struct socket *so; 596 short ostate; 597#endif 598 599 s = splsoftnet(); 600 callout_ack(&tp->t_timer[TCPT_2MSL]); 601 if (tcp_isdead(tp)) { 602 splx(s); 603 return; 604 } 605 606#ifdef TCP_DEBUG 607#ifdef INET 608 if (tp->t_inpcb) 609 so = tp->t_inpcb->inp_socket; 610#endif 611#ifdef INET6 612 if (tp->t_in6pcb) 613 so = tp->t_in6pcb->in6p_socket; 614#endif 615 616 ostate = tp->t_state; 617#endif /* TCP_DEBUG */ 618 619 /* 620 * 2 MSL timeout in shutdown went off. If we're closed but 621 * still waiting for peer to close and connection has been idle 622 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 623 * control block. Otherwise, check again in a bit. 624 */ 625 if (tp->t_state != TCPS_TIME_WAIT && 626 ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle))) 627 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); 628 else 629 tp = tcp_close(tp); 630 631#ifdef TCP_DEBUG 632 if (tp && so->so_options & SO_DEBUG) 633 tcp_trace(TA_USER, ostate, tp, NULL, 634 PRU_SLOWTIMO | (TCPT_2MSL << 8)); 635#endif 636 splx(s); 637} 638