tcp_input.c revision 6348
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 34 * $Id: tcp_input.c,v 1.11 1995/02/09 23:13:23 wollman Exp $ 35 */ 36 37#ifndef TUBA_INCLUDE 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/malloc.h> 41#include <sys/mbuf.h> 42#include <sys/protosw.h> 43#include <sys/socket.h> 44#include <sys/socketvar.h> 45#include <sys/errno.h> 46 47#include <net/if.h> 48#include <net/route.h> 49 50#include <netinet/in.h> 51#include <netinet/in_systm.h> 52#include <netinet/ip.h> 53#include <netinet/in_pcb.h> 54#include <netinet/ip_var.h> 55#include <netinet/tcp.h> 56#include <netinet/tcp_fsm.h> 57#include <netinet/tcp_seq.h> 58#include <netinet/tcp_timer.h> 59#include <netinet/tcp_var.h> 60#include <netinet/tcpip.h> 61#ifdef TCPDEBUG 62#include <netinet/tcp_debug.h> 63struct tcpiphdr tcp_saveti; 64#endif 65 66int tcprexmtthresh = 3; 67struct inpcb *tcp_last_inpcb = &tcb; 68tcp_seq tcp_iss; 69tcp_cc tcp_ccgen; 70struct inpcb tcb; 71struct tcpstat tcpstat; 72u_long tcp_now; 73 74#endif /* TUBA_INCLUDE */ 75 76/* 77 * Insert segment ti into reassembly queue of tcp with 78 * control block tp. Return TH_FIN if reassembly now includes 79 * a segment with FIN. The macro form does the common case inline 80 * (segment is the next to be received on an established connection, 81 * and the queue is empty), avoiding linkage into and removal 82 * from the queue and repetition of various conversions. 83 * Set DELACK for segments received in order, but ack immediately 84 * when segments are out of order (so fast retransmit can work). 85 */ 86#define TCP_REASS(tp, ti, m, so, flags) { \ 87 if ((ti)->ti_seq == (tp)->rcv_nxt && \ 88 (tp)->seg_next == (struct tcpiphdr *)(tp) && \ 89 (tp)->t_state == TCPS_ESTABLISHED) { \ 90 tp->t_flags |= TF_DELACK; \ 91 (tp)->rcv_nxt += (ti)->ti_len; \ 92 flags = (ti)->ti_flags & TH_FIN; \ 93 tcpstat.tcps_rcvpack++;\ 94 tcpstat.tcps_rcvbyte += (ti)->ti_len;\ 95 sbappend(&(so)->so_rcv, (m)); \ 96 sorwakeup(so); \ 97 } else { \ 98 (flags) = tcp_reass((tp), (ti), (m)); \ 99 tp->t_flags |= TF_ACKNOW; \ 100 } \ 101} 102#ifndef TUBA_INCLUDE 103 104int 105tcp_reass(tp, ti, m) 106 register struct tcpcb *tp; 107 register struct tcpiphdr *ti; 108 struct mbuf *m; 109{ 110 register struct tcpiphdr *q; 111 struct socket *so = tp->t_inpcb->inp_socket; 112 int flags; 113 114 /* 115 * Call with ti==0 after become established to 116 * force pre-ESTABLISHED data up to user socket. 117 */ 118 if (ti == 0) 119 goto present; 120 121 /* 122 * Find a segment which begins after this one does. 123 */ 124 for (q = tp->seg_next; q != (struct tcpiphdr *)tp; 125 q = (struct tcpiphdr *)q->ti_next) 126 if (SEQ_GT(q->ti_seq, ti->ti_seq)) 127 break; 128 129 /* 130 * If there is a preceding segment, it may provide some of 131 * our data already. If so, drop the data from the incoming 132 * segment. If it provides all of our data, drop us. 133 */ 134 if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { 135 register int i; 136 q = (struct tcpiphdr *)q->ti_prev; 137 /* conversion to int (in i) handles seq wraparound */ 138 i = q->ti_seq + q->ti_len - ti->ti_seq; 139 if (i > 0) { 140 if (i >= ti->ti_len) { 141 tcpstat.tcps_rcvduppack++; 142 tcpstat.tcps_rcvdupbyte += ti->ti_len; 143 m_freem(m); 144#ifdef TTCP 145 /* 146 * Try to present any queued data 147 * at the left window edge to the user. 148 * This is needed after the 3-WHS 149 * completes. 150 */ 151 goto present; /* ??? */ 152#else 153 return (0); 154#endif 155 } 156 m_adj(m, i); 157 ti->ti_len -= i; 158 ti->ti_seq += i; 159 } 160 q = (struct tcpiphdr *)(q->ti_next); 161 } 162 tcpstat.tcps_rcvoopack++; 163 tcpstat.tcps_rcvoobyte += ti->ti_len; 164 REASS_MBUF(ti) = m; /* XXX */ 165 166 /* 167 * While we overlap succeeding segments trim them or, 168 * if they are completely covered, dequeue them. 169 */ 170 while (q != (struct tcpiphdr *)tp) { 171 register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; 172 if (i <= 0) 173 break; 174 if (i < q->ti_len) { 175 q->ti_seq += i; 176 q->ti_len -= i; 177 m_adj(REASS_MBUF(q), i); 178 break; 179 } 180 q = (struct tcpiphdr *)q->ti_next; 181 m = REASS_MBUF((struct tcpiphdr *)q->ti_prev); 182 remque(q->ti_prev); 183 m_freem(m); 184 } 185 186 /* 187 * Stick new segment in its place. 188 */ 189 insque(ti, q->ti_prev); 190 191present: 192 /* 193 * Present data to user, advancing rcv_nxt through 194 * completed sequence space. 195 */ 196 if (!TCPS_HAVEESTABLISHED(tp->t_state)) 197 return (0); 198 ti = tp->seg_next; 199 if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) 200 return (0); 201 if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) 202 return (0); 203 do { 204 tp->rcv_nxt += ti->ti_len; 205 flags = ti->ti_flags & TH_FIN; 206 remque(ti); 207 m = REASS_MBUF(ti); 208 ti = (struct tcpiphdr *)ti->ti_next; 209 if (so->so_state & SS_CANTRCVMORE) 210 m_freem(m); 211 else 212 sbappend(&so->so_rcv, m); 213 } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); 214 sorwakeup(so); 215 return (flags); 216} 217 218/* 219 * TCP input routine, follows pages 65-76 of the 220 * protocol specification dated September, 1981 very closely. 221 */ 222void 223tcp_input(m, iphlen) 224 register struct mbuf *m; 225 int iphlen; 226{ 227 register struct tcpiphdr *ti; 228 register struct inpcb *inp; 229 caddr_t optp = NULL; 230 int optlen = 0; 231 int len, tlen, off; 232 register struct tcpcb *tp = 0; 233 register int tiflags; 234 struct socket *so = 0; 235 int todrop, acked, ourfinisacked, needoutput = 0; 236 struct in_addr laddr; 237 int dropsocket = 0; 238 int iss = 0; 239#ifdef TTCP 240 u_long tiwin; 241 struct tcpopt to; /* options in this segment */ 242 struct rmxp_tao *taop; /* pointer to our TAO cache entry */ 243 struct rmxp_tao tao_noncached; /* in case there's no cached entry */ 244#else 245 u_long tiwin, ts_val, ts_ecr; 246 int ts_present = 0; 247#endif 248#ifdef TCPDEBUG 249 short ostate = 0; 250#endif 251 252#ifdef TTCP 253 bzero((char *)&to, sizeof(to)); 254#endif 255 256 tcpstat.tcps_rcvtotal++; 257 /* 258 * Get IP and TCP header together in first mbuf. 259 * Note: IP leaves IP header in first mbuf. 260 */ 261 ti = mtod(m, struct tcpiphdr *); 262 if (iphlen > sizeof (struct ip)) 263 ip_stripoptions(m, (struct mbuf *)0); 264 if (m->m_len < sizeof (struct tcpiphdr)) { 265 if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { 266 tcpstat.tcps_rcvshort++; 267 return; 268 } 269 ti = mtod(m, struct tcpiphdr *); 270 } 271 272 /* 273 * Checksum extended TCP header and data. 274 */ 275 tlen = ((struct ip *)ti)->ip_len; 276 len = sizeof (struct ip) + tlen; 277 ti->ti_next = ti->ti_prev = 0; 278 ti->ti_x1 = 0; 279 ti->ti_len = (u_short)tlen; 280 HTONS(ti->ti_len); 281 ti->ti_sum = in_cksum(m, len); 282 if (ti->ti_sum) { 283 tcpstat.tcps_rcvbadsum++; 284 goto drop; 285 } 286#endif /* TUBA_INCLUDE */ 287 288 /* 289 * Check that TCP offset makes sense, 290 * pull out TCP options and adjust length. XXX 291 */ 292 off = ti->ti_off << 2; 293 if (off < sizeof (struct tcphdr) || off > tlen) { 294 tcpstat.tcps_rcvbadoff++; 295 goto drop; 296 } 297 tlen -= off; 298 ti->ti_len = tlen; 299 if (off > sizeof (struct tcphdr)) { 300 if (m->m_len < sizeof(struct ip) + off) { 301 if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { 302 tcpstat.tcps_rcvshort++; 303 return; 304 } 305 ti = mtod(m, struct tcpiphdr *); 306 } 307 optlen = off - sizeof (struct tcphdr); 308 optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); 309 /* 310 * Do quick retrieval of timestamp options ("options 311 * prediction?"). If timestamp is the only option and it's 312 * formatted as recommended in RFC 1323 appendix A, we 313 * quickly get the values now and not bother calling 314 * tcp_dooptions(), etc. 315 */ 316 if ((optlen == TCPOLEN_TSTAMP_APPA || 317 (optlen > TCPOLEN_TSTAMP_APPA && 318 optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && 319 *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) && 320 (ti->ti_flags & TH_SYN) == 0) { 321#ifdef TTCP 322 to.to_flag |= TOF_TS; 323 to.to_tsval = ntohl(*(u_long *)(optp + 4)); 324 to.to_tsecr = ntohl(*(u_long *)(optp + 8)); 325#else 326 ts_present = 1; 327 ts_val = ntohl(*(u_long *)(optp + 4)); 328 ts_ecr = ntohl(*(u_long *)(optp + 8)); 329#endif 330 optp = NULL; /* we've parsed the options */ 331 } 332 } 333 tiflags = ti->ti_flags; 334 335 /* 336 * Convert TCP protocol specific fields to host format. 337 */ 338 NTOHL(ti->ti_seq); 339 NTOHL(ti->ti_ack); 340 NTOHS(ti->ti_win); 341 NTOHS(ti->ti_urp); 342 343 /* 344 * Locate pcb for segment. 345 */ 346findpcb: 347 inp = tcp_last_inpcb; 348 if (inp->inp_lport != ti->ti_dport || 349 inp->inp_fport != ti->ti_sport || 350 inp->inp_faddr.s_addr != ti->ti_src.s_addr || 351 inp->inp_laddr.s_addr != ti->ti_dst.s_addr) { 352 inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport, 353 ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD); 354 if (inp) 355 tcp_last_inpcb = inp; 356 ++tcpstat.tcps_pcbcachemiss; 357 } 358 359 /* 360 * If the state is CLOSED (i.e., TCB does not exist) then 361 * all data in the incoming segment is discarded. 362 * If the TCB exists but is in CLOSED state, it is embryonic, 363 * but should either do a listen or a connect soon. 364 */ 365 if (inp == 0) 366 goto dropwithreset; 367 tp = intotcpcb(inp); 368 if (tp == 0) 369 goto dropwithreset; 370 if (tp->t_state == TCPS_CLOSED) 371 goto drop; 372 373 /* Unscale the window into a 32-bit value. */ 374 if ((tiflags & TH_SYN) == 0) 375 tiwin = ti->ti_win << tp->snd_scale; 376 else 377 tiwin = ti->ti_win; 378 379 so = inp->inp_socket; 380 if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { 381#ifdef TCPDEBUG 382 if (so->so_options & SO_DEBUG) { 383 ostate = tp->t_state; 384 tcp_saveti = *ti; 385 } 386#endif 387 if (so->so_options & SO_ACCEPTCONN) { 388#ifdef TTCP 389 register struct tcpcb *tp0 = tp; 390#endif 391 so = sonewconn(so, 0); 392 if (so == 0) 393 goto drop; 394 /* 395 * This is ugly, but .... 396 * 397 * Mark socket as temporary until we're 398 * committed to keeping it. The code at 399 * ``drop'' and ``dropwithreset'' check the 400 * flag dropsocket to see if the temporary 401 * socket created here should be discarded. 402 * We mark the socket as discardable until 403 * we're committed to it below in TCPS_LISTEN. 404 */ 405 dropsocket++; 406 inp = (struct inpcb *)so->so_pcb; 407 inp->inp_laddr = ti->ti_dst; 408 inp->inp_lport = ti->ti_dport; 409#if BSD>=43 410 inp->inp_options = ip_srcroute(); 411#endif 412 tp = intotcpcb(inp); 413 tp->t_state = TCPS_LISTEN; 414#ifdef TTCP 415 tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT); 416#endif 417 418 /* Compute proper scaling value from buffer space */ 419 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 420 TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat) 421 tp->request_r_scale++; 422 } 423 } 424 425 /* 426 * Segment received on connection. 427 * Reset idle time and keep-alive timer. 428 */ 429 tp->t_idle = 0; 430 tp->t_timer[TCPT_KEEP] = tcp_keepidle; 431 432 /* 433 * Process options if not in LISTEN state, 434 * else do it below (after getting remote address). 435 */ 436 if (optp && tp->t_state != TCPS_LISTEN) 437 tcp_dooptions(tp, optp, optlen, ti, 438#ifdef TTCP 439 &to); 440#else 441 &ts_present, &ts_val, &ts_ecr); 442#endif 443 444 /* 445 * Header prediction: check for the two common cases 446 * of a uni-directional data xfer. If the packet has 447 * no control flags, is in-sequence, the window didn't 448 * change and we're not retransmitting, it's a 449 * candidate. If the length is zero and the ack moved 450 * forward, we're the sender side of the xfer. Just 451 * free the data acked & wake any higher level process 452 * that was blocked waiting for space. If the length 453 * is non-zero and the ack didn't move, we're the 454 * receiver side. If we're getting packets in-order 455 * (the reassembly queue is empty), add the data to 456 * the socket buffer and note that we need a delayed ack. 457#ifdef TTCP 458 * Make sure that the hidden state-flags are also off. 459 * Since we check for TCPS_ESTABLISHED above, it can only 460 * be TH_NEEDSYN. 461#endif 462 */ 463 if (tp->t_state == TCPS_ESTABLISHED && 464 (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && 465#ifdef TTCP 466 ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && 467 ((to.to_flag & TOF_TS) == 0 || 468 TSTMP_GEQ(to.to_tsval, tp->ts_recent)) && 469 /* 470 * Using the CC option is compulsory if once started: 471 * the segment is OK if no T/TCP was negotiated or 472 * if the segment has a CC option equal to CCrecv 473 */ 474 ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) || 475 (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) && 476#else 477 (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && 478#endif 479 ti->ti_seq == tp->rcv_nxt && 480 tiwin && tiwin == tp->snd_wnd && 481 tp->snd_nxt == tp->snd_max) { 482 483 /* 484 * If last ACK falls within this segment's sequence numbers, 485 * record the timestamp. 486 * NOTE that the test is modified according to the latest 487 * proposal of the tcplw@cray.com list (Braden 1993/04/26). 488 */ 489#ifdef TTCP 490 if ((to.to_flag & TOF_TS) != 0 && 491 SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { 492 tp->ts_recent_age = tcp_now; 493 tp->ts_recent = to.to_tsval; 494#else 495 if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { 496 tp->ts_recent_age = tcp_now; 497 tp->ts_recent = ts_val; 498#endif 499 } 500 501 if (ti->ti_len == 0) { 502 if (SEQ_GT(ti->ti_ack, tp->snd_una) && 503 SEQ_LEQ(ti->ti_ack, tp->snd_max) && 504 tp->snd_cwnd >= tp->snd_wnd) { 505 /* 506 * this is a pure ack for outstanding data. 507 */ 508 ++tcpstat.tcps_predack; 509#ifdef TTCP 510 if ((to.to_flag & TOF_TS) != 0) 511 tcp_xmit_timer(tp, 512 tcp_now - to.to_tsecr + 1); 513#else 514 if (ts_present) 515 tcp_xmit_timer(tp, tcp_now-ts_ecr+1); 516#endif 517 else if (tp->t_rtt && 518 SEQ_GT(ti->ti_ack, tp->t_rtseq)) 519 tcp_xmit_timer(tp, tp->t_rtt); 520 acked = ti->ti_ack - tp->snd_una; 521 tcpstat.tcps_rcvackpack++; 522 tcpstat.tcps_rcvackbyte += acked; 523 sbdrop(&so->so_snd, acked); 524 tp->snd_una = ti->ti_ack; 525 m_freem(m); 526 527 /* 528 * If all outstanding data are acked, stop 529 * retransmit timer, otherwise restart timer 530 * using current (possibly backed-off) value. 531 * If process is waiting for space, 532 * wakeup/selwakeup/signal. If data 533 * are ready to send, let tcp_output 534 * decide between more output or persist. 535 */ 536 if (tp->snd_una == tp->snd_max) 537 tp->t_timer[TCPT_REXMT] = 0; 538 else if (tp->t_timer[TCPT_PERSIST] == 0) 539 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 540 541 if (so->so_snd.sb_flags & SB_NOTIFY) 542 sowwakeup(so); 543 if (so->so_snd.sb_cc) 544 (void) tcp_output(tp); 545 return; 546 } 547 } else if (ti->ti_ack == tp->snd_una && 548 tp->seg_next == (struct tcpiphdr *)tp && 549 ti->ti_len <= sbspace(&so->so_rcv)) { 550 /* 551 * this is a pure, in-sequence data packet 552 * with nothing on the reassembly queue and 553 * we have enough buffer space to take it. 554 */ 555 ++tcpstat.tcps_preddat; 556 tp->rcv_nxt += ti->ti_len; 557 tcpstat.tcps_rcvpack++; 558 tcpstat.tcps_rcvbyte += ti->ti_len; 559 /* 560 * Drop TCP, IP headers and TCP options then add data 561 * to socket buffer. 562 */ 563 m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); 564 m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); 565 sbappend(&so->so_rcv, m); 566 sorwakeup(so); 567 tp->t_flags |= TF_DELACK; 568 return; 569 } 570 } 571 572 /* 573 * Drop TCP, IP headers and TCP options. 574 */ 575 m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); 576 m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); 577 578 /* 579 * Calculate amount of space in receive window, 580 * and then do TCP input processing. 581 * Receive window is amount of space in rcv queue, 582 * but not less than advertised window. 583 */ 584 { int win; 585 586 win = sbspace(&so->so_rcv); 587 if (win < 0) 588 win = 0; 589 tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); 590 } 591 592 switch (tp->t_state) { 593 594 /* 595 * If the state is LISTEN then ignore segment if it contains an RST. 596 * If the segment contains an ACK then it is bad and send a RST. 597 * If it does not contain a SYN then it is not interesting; drop it. 598 * Don't bother responding if the destination was a broadcast. 599 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial 600 * tp->iss, and send a segment: 601 * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 602 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 603 * Fill in remote peer address fields if not previously specified. 604 * Enter SYN_RECEIVED state, and process any other fields of this 605 * segment in this state. 606 */ 607 case TCPS_LISTEN: { 608 struct mbuf *am; 609 register struct sockaddr_in *sin; 610 611 if (tiflags & TH_RST) 612 goto drop; 613 if (tiflags & TH_ACK) 614 goto dropwithreset; 615 if ((tiflags & TH_SYN) == 0) 616 goto drop; 617 /* 618 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN 619 * in_broadcast() should never return true on a received 620 * packet with M_BCAST not set. 621 */ 622 if (m->m_flags & (M_BCAST|M_MCAST) || 623 IN_MULTICAST(ntohl(ti->ti_dst.s_addr))) 624 goto drop; 625 am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */ 626 if (am == NULL) 627 goto drop; 628 am->m_len = sizeof (struct sockaddr_in); 629 sin = mtod(am, struct sockaddr_in *); 630 sin->sin_family = AF_INET; 631 sin->sin_len = sizeof(*sin); 632 sin->sin_addr = ti->ti_src; 633 sin->sin_port = ti->ti_sport; 634 bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero)); 635 laddr = inp->inp_laddr; 636 if (inp->inp_laddr.s_addr == INADDR_ANY) 637 inp->inp_laddr = ti->ti_dst; 638 if (in_pcbconnect(inp, am)) { 639 inp->inp_laddr = laddr; 640 (void) m_free(am); 641 goto drop; 642 } 643 (void) m_free(am); 644 tp->t_template = tcp_template(tp); 645 if (tp->t_template == 0) { 646 tp = tcp_drop(tp, ENOBUFS); 647 dropsocket = 0; /* socket is already gone */ 648 goto drop; 649 } 650#ifdef TTCP 651 if ((taop = tcp_gettaocache(inp)) == NULL) { 652 taop = &tao_noncached; 653 bzero(taop, sizeof(*taop)); 654 } 655#endif /* TTCP */ 656 if (optp) 657 tcp_dooptions(tp, optp, optlen, ti, 658#ifdef TTCP 659 &to); 660#else 661 &ts_present, &ts_val, &ts_ecr); 662#endif 663 if (iss) 664 tp->iss = iss; 665 else 666 tp->iss = tcp_iss; 667 tcp_iss += TCP_ISSINCR/2; 668 tp->irs = ti->ti_seq; 669 tcp_sendseqinit(tp); 670 tcp_rcvseqinit(tp); 671#ifdef TTCP 672 /* 673 * Initialization of the tcpcb for transaction; 674 * set SND.WND = SEG.WND, 675 * initialize CCsend and CCrecv. 676 */ 677 tp->snd_wnd = tiwin; /* initial send-window */ 678 tp->cc_send = CC_INC(tcp_ccgen); 679 tp->cc_recv = to.to_cc; 680 /* 681 * Perform TAO test on incoming CC (SEG.CC) option, if any. 682 * - compare SEG.CC against cached CC from the same host, 683 * if any. 684 * - if SEG.CC > chached value, SYN must be new and is accepted 685 * immediately: save new CC in the cache, mark the socket 686 * connected, enter ESTABLISHED state, turn on flag to 687 * send a SYN in the next segment. 688 * A virtual advertised window is set in rcv_adv to 689 * initialize SWS prevention. Then enter normal segment 690 * processing: drop SYN, process data and FIN. 691 * - otherwise do a normal 3-way handshake. 692 */ 693 if ((to.to_flag & TOF_CC) != 0) { 694 if (taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) { 695 taop->tao_cc = to.to_cc; 696 tp->t_state = TCPS_ESTABLISHED; 697 698 /* 699 * If there is a FIN, or if there is data and the 700 * connection is local, then delay SYN,ACK(SYN) in 701 * the hope of piggy-backing it on a response 702 * segment. Otherwise must send ACK now in case 703 * the other side is slow starting. 704 */ 705 if ((tiflags & TH_FIN) || (ti->ti_len != 0 && 706 in_localaddr(inp->inp_faddr))) 707 tp->t_flags |= (TF_DELACK | TF_NEEDSYN); 708 else 709 tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); 710 tp->rcv_adv += tp->rcv_wnd; 711 tcpstat.tcps_connects++; 712 soisconnected(so); 713 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; 714 dropsocket = 0; /* committed to socket */ 715 tcpstat.tcps_accepts++; 716 goto trimthenstep6; 717 } 718 /* else do standard 3-way handshake */ 719 } else { 720 /* 721 * No CC option, but maybe CC.NEW: 722 * invalidate cached value. 723 */ 724 taop->tao_cc = 0; 725 } 726 /* 727 * TAO test failed or there was no CC option, 728 * do a standard 3-way handshake. 729 */ 730#endif /* TTCP */ 731 tp->t_flags |= TF_ACKNOW; 732 tp->t_state = TCPS_SYN_RECEIVED; 733 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; 734 dropsocket = 0; /* committed to socket */ 735 tcpstat.tcps_accepts++; 736 goto trimthenstep6; 737 } 738 739 /* 740 * If the state is SYN_SENT: 741 * if seg contains an ACK, but not for our SYN, drop the input. 742 * if seg contains a RST, then drop the connection. 743 * if seg does not contain SYN, then drop it. 744 * Otherwise this is an acceptable SYN segment 745 * initialize tp->rcv_nxt and tp->irs 746 * if seg contains ack then advance tp->snd_una 747 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 748 * arrange for segment to be acked (eventually) 749 * continue processing rest of data/controls, beginning with URG 750 */ 751 case TCPS_SYN_SENT: 752#ifdef TTCP 753 if ((taop = tcp_gettaocache(inp)) == NULL) { 754 taop = &tao_noncached; 755 bzero(taop, sizeof(*taop)); 756 } 757 758 if ((tiflags & TH_ACK) && 759 (SEQ_LEQ(ti->ti_ack, tp->iss) || 760 SEQ_GT(ti->ti_ack, tp->snd_max))) { 761 /* 762 * If we have a cached CCsent for the remote host, 763 * hence we haven't just crashed and restarted, 764 * do not send a RST. This may be a retransmission 765 * from the other side after our earlier ACK was lost. 766 * Our new SYN, when it arrives, will serve as the 767 * needed ACK. 768 */ 769 if (taop->tao_ccsent != 0) 770 goto drop; 771 else 772 goto dropwithreset; 773 } 774#else 775 if ((tiflags & TH_ACK) && 776 (SEQ_LEQ(ti->ti_ack, tp->iss) || 777 SEQ_GT(ti->ti_ack, tp->snd_max))) 778 goto dropwithreset; 779#endif 780 if (tiflags & TH_RST) { 781 if (tiflags & TH_ACK) 782 tp = tcp_drop(tp, ECONNREFUSED); 783 goto drop; 784 } 785 if ((tiflags & TH_SYN) == 0) 786 goto drop; 787#ifdef TTCP 788 tp->snd_wnd = ti->ti_win; /* initial send window */ 789 tp->cc_recv = to.to_cc; /* foreign CC */ 790#else 791 if (tiflags & TH_ACK) { 792 tp->snd_una = ti->ti_ack; 793 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 794 tp->snd_nxt = tp->snd_una; 795 } 796 tp->t_timer[TCPT_REXMT] = 0; 797#endif 798 799 tp->irs = ti->ti_seq; 800 tcp_rcvseqinit(tp); 801#ifndef TTCP 802 tp->t_flags |= TF_ACKNOW; 803 if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { 804#else 805 if (tiflags & TH_ACK && SEQ_GT(ti->ti_ack, tp->iss)) { 806#endif 807 tcpstat.tcps_connects++; 808 soisconnected(so); 809 /* Do window scaling on this connection? */ 810 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 811 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 812 tp->snd_scale = tp->requested_s_scale; 813 tp->rcv_scale = tp->request_r_scale; 814 } 815#ifdef TTCP 816 /* 817 * Our SYN was acked. If segment contains CC.ECHO 818 * option, check it to make sure this segment really 819 * matches our SYN. If not, just drop it as old 820 * duplicate, but send an RST if we're still playing 821 * by the old rules. 822 */ 823 if ((to.to_flag & TOF_CCECHO) && 824 tp->cc_send != to.to_ccecho) { 825 if (taop->tao_ccsent != 0) 826 goto drop; 827 else 828 goto dropwithreset; 829 } 830 /* Segment is acceptable, update cache if undefined. */ 831 if (taop->tao_ccsent == 0) 832 taop->tao_ccsent = to.to_ccecho; 833 834 tp->rcv_adv += tp->rcv_wnd; 835 tp->snd_una++; /* SYN is acked */ 836 /* 837 * If there's data, delay ACK; if there's also a FIN 838 * ACKNOW will be turned on later. 839 */ 840 if (ti->ti_len != 0) 841 tp->t_flags |= TF_DELACK; 842 else 843 tp->t_flags |= TF_ACKNOW; 844 /* 845 * Received <SYN,ACK> in SYN_SENT[*] state. 846 * Transitions: 847 * SYN_SENT --> ESTABLISHED 848 * SYN_SENT* --> FIN_WAIT_1 849 */ 850 if (tp->t_flags & TF_NEEDFIN) { 851 tp->t_state = TCPS_FIN_WAIT_1; 852 tp->t_flags &= ~TF_NEEDFIN; 853 tiflags &= ~TH_SYN; 854 } else 855 tp->t_state = TCPS_ESTABLISHED; 856#else 857 tp->t_state = TCPS_ESTABLISHED; 858 (void) tcp_reass(tp, (struct tcpiphdr *)0, 859 (struct mbuf *)0); 860 /* 861 * if we didn't have to retransmit the SYN, 862 * use its rtt as our initial srtt & rtt var. 863 */ 864 if (tp->t_rtt) 865 tcp_xmit_timer(tp, tp->t_rtt); 866#endif 867 868#ifdef TTCP 869 } else { 870 /* 871 * Received initial SYN in SYN-SENT[*] state => simul- 872 * taneous open. If segment contains CC option and there is 873 * a cached CC, apply TAO test; if it succeeds, connection is 874 * half-synchronized. Otherwise, do 3-way handshake: 875 * SYN-SENT -> SYN-RECEIVED 876 * SYN-SENT* -> SYN-RECEIVED* 877 * If there was no CC option, clear cached CC value. 878 */ 879 tp->t_flags |= TF_ACKNOW; 880 tp->t_timer[TCPT_REXMT] = 0; 881 if (to.to_flag & TOF_CC) { 882 if (taop->tao_cc != 0 && 883 CC_GT(to.to_cc, taop->tao_cc)) { 884 /* 885 * update cache and make transition: 886 * SYN-SENT -> ESTABLISHED* 887 * SYN-SENT* -> FIN-WAIT-1* 888 */ 889 taop->tao_cc = to.to_cc; 890 if (tp->t_flags & TF_NEEDFIN) { 891 tp->t_state = TCPS_FIN_WAIT_1; 892 tp->t_flags &= ~TF_NEEDFIN; 893 } else 894 tp->t_state = TCPS_ESTABLISHED; 895 tp->t_flags |= TF_NEEDSYN; 896 } else 897 tp->t_state = TCPS_SYN_RECEIVED; 898 } else { 899 /* CC.NEW or no option => invalidate cache */ 900 taop->tao_cc = 0; 901 tp->t_state = TCPS_SYN_RECEIVED; 902 } 903 } 904#else 905 } else 906 tp->t_state = TCPS_SYN_RECEIVED; 907#endif 908 909trimthenstep6: 910 /* 911 * Advance ti->ti_seq to correspond to first data byte. 912 * If data, trim to stay within window, 913 * dropping FIN if necessary. 914 */ 915 ti->ti_seq++; 916 if (ti->ti_len > tp->rcv_wnd) { 917 todrop = ti->ti_len - tp->rcv_wnd; 918 m_adj(m, -todrop); 919 ti->ti_len = tp->rcv_wnd; 920 tiflags &= ~TH_FIN; 921 tcpstat.tcps_rcvpackafterwin++; 922 tcpstat.tcps_rcvbyteafterwin += todrop; 923 } 924 tp->snd_wl1 = ti->ti_seq - 1; 925 tp->rcv_up = ti->ti_seq; 926#ifdef TTCP 927 /* 928 * Client side of transaction: already sent SYN and data. 929 * If the remote host used T/TCP to validate the SYN, 930 * our data will be ACK'd; if so, enter normal data segment 931 * processing in the middle of step 5, ack processing. 932 * Otherwise, goto step 6. 933 */ 934 if (tiflags & TH_ACK) 935 goto process_ACK; 936#endif 937 goto step6; 938#ifdef TTCP 939 /* 940 * If the state is LAST_ACK or CLOSING or TIME_WAIT: 941 * if segment contains a SYN and CC [not CC.NEW] option: 942 * if state == TIME_WAIT and connection duration > MSL, 943 * drop packet and send RST; 944 * 945 * if SEG.CC > CCrecv then is new SYN, and can implicitly 946 * ack the FIN (and data) in retransmission queue. 947 * Complete close and delete TCPCB. Then reprocess 948 * segment, hoping to find new TCPCB in LISTEN state; 949 * 950 * else must be old SYN; drop it. 951 * else do normal processing. 952 */ 953 case TCPS_LAST_ACK: 954 case TCPS_CLOSING: 955 case TCPS_TIME_WAIT: 956 if ((tiflags & TH_SYN) && 957 (to.to_flag & TOF_CC) && tp->cc_recv != 0) { 958 if (tp->t_state == TCPS_TIME_WAIT && 959 tp->t_duration > TCPTV_MSL) 960 goto dropwithreset; 961 if (CC_GT(to.to_cc, tp->cc_recv)) { 962 tp = tcp_close(tp); 963 goto findpcb; 964 } 965 else 966 goto drop; 967 } 968 break; /* continue normal processing */ 969#endif 970 } 971 972 /* 973 * States other than LISTEN or SYN_SENT. 974 * First check timestamp, if present. 975#ifdef TTCP 976 * Then check the connection count, if present. 977#endif 978 * Then check that at least some bytes of segment are within 979 * receive window. If segment begins before rcv_nxt, 980 * drop leading data (and SYN); if nothing left, just ack. 981 * 982 * RFC 1323 PAWS: If we have a timestamp reply on this segment 983 * and it's less than ts_recent, drop it. 984 */ 985#ifdef TTCP 986 if ((to.to_flag & TOF_TS) != 0 && (tiflags & TH_RST) == 0 && 987 tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { 988#else 989 if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && 990 TSTMP_LT(ts_val, tp->ts_recent)) { 991#endif 992 993 /* Check to see if ts_recent is over 24 days old. */ 994 if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { 995 /* 996 * Invalidate ts_recent. If this segment updates 997 * ts_recent, the age will be reset later and ts_recent 998 * will get a valid value. If it does not, setting 999 * ts_recent to zero will at least satisfy the 1000 * requirement that zero be placed in the timestamp 1001 * echo reply when ts_recent isn't valid. The 1002 * age isn't reset until we get a valid ts_recent 1003 * because we don't want out-of-order segments to be 1004 * dropped when ts_recent is old. 1005 */ 1006 tp->ts_recent = 0; 1007 } else { 1008 tcpstat.tcps_rcvduppack++; 1009 tcpstat.tcps_rcvdupbyte += ti->ti_len; 1010 tcpstat.tcps_pawsdrop++; 1011 goto dropafterack; 1012 } 1013 } 1014 1015#ifdef TTCP 1016 /* 1017 * T/TCP mechanism 1018 * If T/TCP was negotiated and the segment doesn't have CC, 1019 * or if it's CC is wrong then drop the segment. 1020 * RST segments do not have to comply with this. 1021 */ 1022 if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) && 1023 ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc) && 1024 (tiflags & TH_RST) == 0) 1025 goto dropafterack; 1026#endif 1027 1028 todrop = tp->rcv_nxt - ti->ti_seq; 1029 if (todrop > 0) { 1030 if (tiflags & TH_SYN) { 1031 tiflags &= ~TH_SYN; 1032 ti->ti_seq++; 1033 if (ti->ti_urp > 1) 1034 ti->ti_urp--; 1035 else 1036 tiflags &= ~TH_URG; 1037 todrop--; 1038 } 1039 if (todrop >= ti->ti_len) { 1040 tcpstat.tcps_rcvduppack++; 1041 tcpstat.tcps_rcvdupbyte += ti->ti_len; 1042 /* 1043 * If segment is just one to the left of the window, 1044 * check two special cases: 1045 * 1. Don't toss RST in response to 4.2-style keepalive. 1046 * 2. If the only thing to drop is a FIN, we can drop 1047 * it, but check the ACK or we will get into FIN 1048 * wars if our FINs crossed (both CLOSING). 1049 * In either case, send ACK to resynchronize, 1050 * but keep on processing for RST or ACK. 1051 */ 1052 if ((tiflags & TH_FIN && todrop == ti->ti_len + 1) 1053#ifdef TCP_COMPAT_42 1054 || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1) 1055#endif 1056 ) { 1057 todrop = ti->ti_len; 1058 tiflags &= ~TH_FIN; 1059 tp->t_flags |= TF_ACKNOW; 1060 } else { 1061 /* 1062 * Handle the case when a bound socket connects 1063 * to itself. Allow packets with a SYN and 1064 * an ACK to continue with the processing. 1065 */ 1066 if (todrop != 0 || (tiflags & TH_ACK) == 0) 1067 goto dropafterack; 1068 } 1069 } else { 1070 tcpstat.tcps_rcvpartduppack++; 1071 tcpstat.tcps_rcvpartdupbyte += todrop; 1072 } 1073 m_adj(m, todrop); 1074 ti->ti_seq += todrop; 1075 ti->ti_len -= todrop; 1076 if (ti->ti_urp > todrop) 1077 ti->ti_urp -= todrop; 1078 else { 1079 tiflags &= ~TH_URG; 1080 ti->ti_urp = 0; 1081 } 1082 } 1083 1084 /* 1085 * If new data are received on a connection after the 1086 * user processes are gone, then RST the other end. 1087 */ 1088 if ((so->so_state & SS_NOFDREF) && 1089 tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { 1090 tp = tcp_close(tp); 1091 tcpstat.tcps_rcvafterclose++; 1092 goto dropwithreset; 1093 } 1094 1095 /* 1096 * If segment ends after window, drop trailing data 1097 * (and PUSH and FIN); if nothing left, just ACK. 1098 */ 1099 todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); 1100 if (todrop > 0) { 1101 tcpstat.tcps_rcvpackafterwin++; 1102 if (todrop >= ti->ti_len) { 1103 tcpstat.tcps_rcvbyteafterwin += ti->ti_len; 1104 /* 1105 * If a new connection request is received 1106 * while in TIME_WAIT, drop the old connection 1107 * and start over if the sequence numbers 1108 * are above the previous ones. 1109 */ 1110 if (tiflags & TH_SYN && 1111 tp->t_state == TCPS_TIME_WAIT && 1112 SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { 1113 iss = tp->rcv_nxt + TCP_ISSINCR; 1114 tp = tcp_close(tp); 1115 goto findpcb; 1116 } 1117 /* 1118 * If window is closed can only take segments at 1119 * window edge, and have to drop data and PUSH from 1120 * incoming segments. Continue processing, but 1121 * remember to ack. Otherwise, drop segment 1122 * and ack. 1123 */ 1124 if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { 1125 tp->t_flags |= TF_ACKNOW; 1126 tcpstat.tcps_rcvwinprobe++; 1127 } else 1128 goto dropafterack; 1129 } else 1130 tcpstat.tcps_rcvbyteafterwin += todrop; 1131 m_adj(m, -todrop); 1132 ti->ti_len -= todrop; 1133 tiflags &= ~(TH_PUSH|TH_FIN); 1134 } 1135 1136 /* 1137 * If last ACK falls within this segment's sequence numbers, 1138 * record its timestamp. 1139 * NOTE that the test is modified according to the latest 1140 * proposal of the tcplw@cray.com list (Braden 1993/04/26). 1141 */ 1142#ifdef TTCP 1143 if ((to.to_flag & TOF_TS) != 0 && 1144 SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { 1145 tp->ts_recent_age = tcp_now; 1146 tp->ts_recent = to.to_tsval; 1147#else 1148 if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { 1149 tp->ts_recent_age = tcp_now; 1150 tp->ts_recent = ts_val; 1151#endif 1152 } 1153 1154 /* 1155 * If the RST bit is set examine the state: 1156 * SYN_RECEIVED STATE: 1157 * If passive open, return to LISTEN state. 1158 * If active open, inform user that connection was refused. 1159 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 1160 * Inform user that connection was reset, and close tcb. 1161 * CLOSING, LAST_ACK, TIME_WAIT STATES 1162 * Close the tcb. 1163 */ 1164 if (tiflags&TH_RST) switch (tp->t_state) { 1165 1166 case TCPS_SYN_RECEIVED: 1167 so->so_error = ECONNREFUSED; 1168 goto close; 1169 1170 case TCPS_ESTABLISHED: 1171 case TCPS_FIN_WAIT_1: 1172 case TCPS_FIN_WAIT_2: 1173 case TCPS_CLOSE_WAIT: 1174 so->so_error = ECONNRESET; 1175 close: 1176 tp->t_state = TCPS_CLOSED; 1177 tcpstat.tcps_drops++; 1178 tp = tcp_close(tp); 1179 goto drop; 1180 1181 case TCPS_CLOSING: 1182 case TCPS_LAST_ACK: 1183 case TCPS_TIME_WAIT: 1184 tp = tcp_close(tp); 1185 goto drop; 1186 } 1187 1188 /* 1189 * If a SYN is in the window, then this is an 1190 * error and we send an RST and drop the connection. 1191 */ 1192 if (tiflags & TH_SYN) { 1193 tp = tcp_drop(tp, ECONNRESET); 1194 goto dropwithreset; 1195 } 1196 1197#ifdef TTCP 1198 /* 1199 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN 1200 * flag is on (half-synchronized state), then queue data for 1201 * later processing; else drop segment and return. 1202 */ 1203 if ((tiflags & TH_ACK) == 0) { 1204 if (tp->t_state == TCPS_SYN_RECEIVED || 1205 (tp->t_flags & TF_NEEDSYN)) 1206 goto step6; 1207 else 1208 goto drop; 1209 } 1210#else 1211 /* 1212 * If the ACK bit is off we drop the segment and return. 1213 */ 1214 if ((tiflags & TH_ACK) == 0) 1215 goto drop; 1216#endif 1217 1218 /* 1219 * Ack processing. 1220 */ 1221 switch (tp->t_state) { 1222 1223 /* 1224 * In SYN_RECEIVED state if the ack ACKs our SYN then enter 1225 * ESTABLISHED state and continue processing, otherwise 1226 * send an RST. 1227 */ 1228 case TCPS_SYN_RECEIVED: 1229 if (SEQ_GT(tp->snd_una, ti->ti_ack) || 1230 SEQ_GT(ti->ti_ack, tp->snd_max)) 1231 goto dropwithreset; 1232 1233 tcpstat.tcps_connects++; 1234 soisconnected(so); 1235 /* Do window scaling? */ 1236 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 1237 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 1238 tp->snd_scale = tp->requested_s_scale; 1239 tp->rcv_scale = tp->request_r_scale; 1240 } 1241#ifdef TTCP 1242 /* 1243 * Upon successful completion of 3-way handshake, 1244 * update cache.CC if it was undefined, pass any queued 1245 * data to the user, and advance state appropriately. 1246 */ 1247 if ((taop = tcp_gettaocache(inp)) != NULL && 1248 taop->tao_cc == 0) 1249 taop->tao_cc = tp->cc_recv; 1250 1251 /* 1252 * Make transitions: 1253 * SYN-RECEIVED -> ESTABLISHED 1254 * SYN-RECEIVED* -> FIN-WAIT-1 1255 */ 1256 if (tp->t_flags & TF_NEEDFIN) { 1257 tp->t_state = TCPS_FIN_WAIT_1; 1258 tp->t_flags &= ~TF_NEEDFIN; 1259 } else 1260 tp->t_state = TCPS_ESTABLISHED; 1261 /* 1262 * If segment contains data or ACK, will call tcp_reass() 1263 * later; if not, do so now to pass queued data to user. 1264 */ 1265 if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0) 1266 (void) tcp_reass(tp, (struct tcpiphdr *)0, 1267 (struct mbuf *)0); 1268#else /* TTCP */ 1269 tp->t_state = TCPS_ESTABLISHED; 1270 (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); 1271#endif /* TTCP */ 1272 tp->snd_wl1 = ti->ti_seq - 1; 1273 /* fall into ... */ 1274 1275 /* 1276 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 1277 * ACKs. If the ack is in the range 1278 * tp->snd_una < ti->ti_ack <= tp->snd_max 1279 * then advance tp->snd_una to ti->ti_ack and drop 1280 * data from the retransmission queue. If this ACK reflects 1281 * more up to date window information we update our window information. 1282 */ 1283 case TCPS_ESTABLISHED: 1284 case TCPS_FIN_WAIT_1: 1285 case TCPS_FIN_WAIT_2: 1286 case TCPS_CLOSE_WAIT: 1287 case TCPS_CLOSING: 1288 case TCPS_LAST_ACK: 1289 case TCPS_TIME_WAIT: 1290 1291 if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { 1292 if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { 1293 tcpstat.tcps_rcvdupack++; 1294 /* 1295 * If we have outstanding data (other than 1296 * a window probe), this is a completely 1297 * duplicate ack (ie, window info didn't 1298 * change), the ack is the biggest we've 1299 * seen and we've seen exactly our rexmt 1300 * threshhold of them, assume a packet 1301 * has been dropped and retransmit it. 1302 * Kludge snd_nxt & the congestion 1303 * window so we send only this one 1304 * packet. 1305 * 1306 * We know we're losing at the current 1307 * window size so do congestion avoidance 1308 * (set ssthresh to half the current window 1309 * and pull our congestion window back to 1310 * the new ssthresh). 1311 * 1312 * Dup acks mean that packets have left the 1313 * network (they're now cached at the receiver) 1314 * so bump cwnd by the amount in the receiver 1315 * to keep a constant cwnd packets in the 1316 * network. 1317 */ 1318 if (tp->t_timer[TCPT_REXMT] == 0 || 1319 ti->ti_ack != tp->snd_una) 1320 tp->t_dupacks = 0; 1321 else if (++tp->t_dupacks == tcprexmtthresh) { 1322 tcp_seq onxt = tp->snd_nxt; 1323 u_int win = 1324 min(tp->snd_wnd, tp->snd_cwnd) / 2 / 1325 tp->t_maxseg; 1326 1327 if (win < 2) 1328 win = 2; 1329 tp->snd_ssthresh = win * tp->t_maxseg; 1330 tp->t_timer[TCPT_REXMT] = 0; 1331 tp->t_rtt = 0; 1332 tp->snd_nxt = ti->ti_ack; 1333 tp->snd_cwnd = tp->t_maxseg; 1334 (void) tcp_output(tp); 1335 tp->snd_cwnd = tp->snd_ssthresh + 1336 tp->t_maxseg * tp->t_dupacks; 1337 if (SEQ_GT(onxt, tp->snd_nxt)) 1338 tp->snd_nxt = onxt; 1339 goto drop; 1340 } else if (tp->t_dupacks > tcprexmtthresh) { 1341 tp->snd_cwnd += tp->t_maxseg; 1342 (void) tcp_output(tp); 1343 goto drop; 1344 } 1345 } else 1346 tp->t_dupacks = 0; 1347 break; 1348 } 1349 /* 1350 * If the congestion window was inflated to account 1351 * for the other side's cached packets, retract it. 1352 */ 1353 if (tp->t_dupacks > tcprexmtthresh && 1354 tp->snd_cwnd > tp->snd_ssthresh) 1355 tp->snd_cwnd = tp->snd_ssthresh; 1356 tp->t_dupacks = 0; 1357 if (SEQ_GT(ti->ti_ack, tp->snd_max)) { 1358 tcpstat.tcps_rcvacktoomuch++; 1359 goto dropafterack; 1360 } 1361#ifdef TTCP 1362 /* 1363 * If we reach this point, ACK is not a duplicate, 1364 * i.e., it ACKs something we sent. 1365 */ 1366 if (tp->t_flags & TF_NEEDSYN) { 1367 /* 1368 * T/TCP: Connection was half-synchronized, and our 1369 * SYN has been ACK'd (so connection is now fully 1370 * synchronized). Go to non-starred state and 1371 * increment snd_una for ACK of SYN. 1372 */ 1373 tp->t_flags &= ~TF_NEEDSYN; 1374 tp->snd_una++; 1375 } 1376 1377process_ACK: 1378#endif 1379 acked = ti->ti_ack - tp->snd_una; 1380 tcpstat.tcps_rcvackpack++; 1381 tcpstat.tcps_rcvackbyte += acked; 1382 1383 /* 1384 * If we have a timestamp reply, update smoothed 1385 * round trip time. If no timestamp is present but 1386 * transmit timer is running and timed sequence 1387 * number was acked, update smoothed round trip time. 1388 * Since we now have an rtt measurement, cancel the 1389 * timer backoff (cf., Phil Karn's retransmit alg.). 1390 * Recompute the initial retransmit timer. 1391 */ 1392#ifdef TTCP 1393 if (to.to_flag & TOF_TS) 1394 tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1); 1395#else 1396 if (ts_present) 1397 tcp_xmit_timer(tp, tcp_now-ts_ecr+1); 1398#endif 1399 else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) 1400 tcp_xmit_timer(tp,tp->t_rtt); 1401 1402 /* 1403 * If all outstanding data is acked, stop retransmit 1404 * timer and remember to restart (more output or persist). 1405 * If there is more data to be acked, restart retransmit 1406 * timer, using current (possibly backed-off) value. 1407 */ 1408 if (ti->ti_ack == tp->snd_max) { 1409 tp->t_timer[TCPT_REXMT] = 0; 1410 needoutput = 1; 1411 } else if (tp->t_timer[TCPT_PERSIST] == 0) 1412 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 1413 1414#ifdef TTCP 1415 /* 1416 * If no data (only SYN) was ACK'd, 1417 * skip rest of ACK processing. 1418 */ 1419 if (acked == 0) 1420 goto step6; 1421#endif 1422 1423 /* 1424 * When new data is acked, open the congestion window. 1425 * If the window gives us less than ssthresh packets 1426 * in flight, open exponentially (maxseg per packet). 1427 * Otherwise open linearly: maxseg per window 1428 * (maxseg^2 / cwnd per packet). 1429 */ 1430 { 1431 register u_int cw = tp->snd_cwnd; 1432 register u_int incr = tp->t_maxseg; 1433 1434 if (cw > tp->snd_ssthresh) 1435 incr = incr * incr / cw; 1436 tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); 1437 } 1438 if (acked > so->so_snd.sb_cc) { 1439 tp->snd_wnd -= so->so_snd.sb_cc; 1440 sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); 1441 ourfinisacked = 1; 1442 } else { 1443 sbdrop(&so->so_snd, acked); 1444 tp->snd_wnd -= acked; 1445 ourfinisacked = 0; 1446 } 1447 if (so->so_snd.sb_flags & SB_NOTIFY) 1448 sowwakeup(so); 1449 tp->snd_una = ti->ti_ack; 1450 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) 1451 tp->snd_nxt = tp->snd_una; 1452 1453 switch (tp->t_state) { 1454 1455 /* 1456 * In FIN_WAIT_1 STATE in addition to the processing 1457 * for the ESTABLISHED state if our FIN is now acknowledged 1458 * then enter FIN_WAIT_2. 1459 */ 1460 case TCPS_FIN_WAIT_1: 1461 if (ourfinisacked) { 1462 /* 1463 * If we can't receive any more 1464 * data, then closing user can proceed. 1465 * Starting the timer is contrary to the 1466 * specification, but if we don't get a FIN 1467 * we'll hang forever. 1468 */ 1469 if (so->so_state & SS_CANTRCVMORE) { 1470 soisdisconnected(so); 1471 tp->t_timer[TCPT_2MSL] = tcp_maxidle; 1472 } 1473 tp->t_state = TCPS_FIN_WAIT_2; 1474 } 1475 break; 1476 1477 /* 1478 * In CLOSING STATE in addition to the processing for 1479 * the ESTABLISHED state if the ACK acknowledges our FIN 1480 * then enter the TIME-WAIT state, otherwise ignore 1481 * the segment. 1482 */ 1483 case TCPS_CLOSING: 1484 if (ourfinisacked) { 1485 tp->t_state = TCPS_TIME_WAIT; 1486 tcp_canceltimers(tp); 1487#ifdef TTCP 1488 /* Shorten TIME_WAIT [RFC-1644, p.28] */ 1489 if (tp->cc_recv != 0 && 1490 tp->t_duration < TCPTV_MSL) 1491 tp->t_timer[TCPT_2MSL] = 1492 tp->t_rxtcur * TCPTV_TWTRUNC; 1493 else 1494#endif 1495 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 1496 soisdisconnected(so); 1497 } 1498 break; 1499 1500 /* 1501 * In LAST_ACK, we may still be waiting for data to drain 1502 * and/or to be acked, as well as for the ack of our FIN. 1503 * If our FIN is now acknowledged, delete the TCB, 1504 * enter the closed state and return. 1505 */ 1506 case TCPS_LAST_ACK: 1507 if (ourfinisacked) { 1508 tp = tcp_close(tp); 1509 goto drop; 1510 } 1511 break; 1512 1513 /* 1514 * In TIME_WAIT state the only thing that should arrive 1515 * is a retransmission of the remote FIN. Acknowledge 1516 * it and restart the finack timer. 1517 */ 1518 case TCPS_TIME_WAIT: 1519 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 1520 goto dropafterack; 1521 } 1522 } 1523 1524step6: 1525 /* 1526 * Update window information. 1527 * Don't look at window if no ACK: TAC's send garbage on first SYN. 1528 */ 1529 if ((tiflags & TH_ACK) && 1530 (SEQ_LT(tp->snd_wl1, ti->ti_seq) || 1531 (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || 1532 (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { 1533 /* keep track of pure window updates */ 1534 if (ti->ti_len == 0 && 1535 tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) 1536 tcpstat.tcps_rcvwinupd++; 1537 tp->snd_wnd = tiwin; 1538 tp->snd_wl1 = ti->ti_seq; 1539 tp->snd_wl2 = ti->ti_ack; 1540 if (tp->snd_wnd > tp->max_sndwnd) 1541 tp->max_sndwnd = tp->snd_wnd; 1542 needoutput = 1; 1543 } 1544 1545 /* 1546 * Process segments with URG. 1547 */ 1548 if ((tiflags & TH_URG) && ti->ti_urp && 1549 TCPS_HAVERCVDFIN(tp->t_state) == 0) { 1550 /* 1551 * This is a kludge, but if we receive and accept 1552 * random urgent pointers, we'll crash in 1553 * soreceive. It's hard to imagine someone 1554 * actually wanting to send this much urgent data. 1555 */ 1556 if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) { 1557 ti->ti_urp = 0; /* XXX */ 1558 tiflags &= ~TH_URG; /* XXX */ 1559 goto dodata; /* XXX */ 1560 } 1561 /* 1562 * If this segment advances the known urgent pointer, 1563 * then mark the data stream. This should not happen 1564 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since 1565 * a FIN has been received from the remote side. 1566 * In these states we ignore the URG. 1567 * 1568 * According to RFC961 (Assigned Protocols), 1569 * the urgent pointer points to the last octet 1570 * of urgent data. We continue, however, 1571 * to consider it to indicate the first octet 1572 * of data past the urgent section as the original 1573 * spec states (in one of two places). 1574 */ 1575 if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { 1576 tp->rcv_up = ti->ti_seq + ti->ti_urp; 1577 so->so_oobmark = so->so_rcv.sb_cc + 1578 (tp->rcv_up - tp->rcv_nxt) - 1; 1579 if (so->so_oobmark == 0) 1580 so->so_state |= SS_RCVATMARK; 1581 sohasoutofband(so); 1582 tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1583 } 1584 /* 1585 * Remove out of band data so doesn't get presented to user. 1586 * This can happen independent of advancing the URG pointer, 1587 * but if two URG's are pending at once, some out-of-band 1588 * data may creep in... ick. 1589 */ 1590 if (ti->ti_urp <= (u_long)ti->ti_len 1591#ifdef SO_OOBINLINE 1592 && (so->so_options & SO_OOBINLINE) == 0 1593#endif 1594 ) 1595 tcp_pulloutofband(so, ti, m); 1596 } else 1597 /* 1598 * If no out of band data is expected, 1599 * pull receive urgent pointer along 1600 * with the receive window. 1601 */ 1602 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) 1603 tp->rcv_up = tp->rcv_nxt; 1604dodata: /* XXX */ 1605 1606 /* 1607 * Process the segment text, merging it into the TCP sequencing queue, 1608 * and arranging for acknowledgment of receipt if necessary. 1609 * This process logically involves adjusting tp->rcv_wnd as data 1610 * is presented to the user (this happens in tcp_usrreq.c, 1611 * case PRU_RCVD). If a FIN has already been received on this 1612 * connection then we just ignore the text. 1613 */ 1614 if ((ti->ti_len || (tiflags&TH_FIN)) && 1615 TCPS_HAVERCVDFIN(tp->t_state) == 0) { 1616 TCP_REASS(tp, ti, m, so, tiflags); 1617 /* 1618 * Note the amount of data that peer has sent into 1619 * our window, in order to estimate the sender's 1620 * buffer size. 1621 */ 1622 len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); 1623 } else { 1624 m_freem(m); 1625 tiflags &= ~TH_FIN; 1626 } 1627 1628 /* 1629 * If FIN is received ACK the FIN and let the user know 1630 * that the connection is closing. 1631 */ 1632 if (tiflags & TH_FIN) { 1633 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 1634 socantrcvmore(so); 1635#ifdef TTCP 1636 /* 1637 * If connection is half-synchronized 1638 * (ie SEND_SYN flag on) then delay ACK, 1639 * so it may be piggybacked when SYN is sent. 1640 * Otherwise, since we received a FIN then no 1641 * more input can be expected, send ACK now. 1642 */ 1643 if (tp->t_flags & TF_NEEDSYN) 1644 tp->t_flags |= TF_DELACK; 1645 else 1646#endif /* TTCP */ 1647 tp->t_flags |= TF_ACKNOW; 1648 tp->rcv_nxt++; 1649 } 1650 switch (tp->t_state) { 1651 1652 /* 1653 * In SYN_RECEIVED and ESTABLISHED STATES 1654 * enter the CLOSE_WAIT state. 1655 */ 1656 case TCPS_SYN_RECEIVED: 1657 case TCPS_ESTABLISHED: 1658 tp->t_state = TCPS_CLOSE_WAIT; 1659 break; 1660 1661 /* 1662 * If still in FIN_WAIT_1 STATE FIN has not been acked so 1663 * enter the CLOSING state. 1664 */ 1665 case TCPS_FIN_WAIT_1: 1666 tp->t_state = TCPS_CLOSING; 1667 break; 1668 1669 /* 1670 * In FIN_WAIT_2 state enter the TIME_WAIT state, 1671 * starting the time-wait timer, turning off the other 1672 * standard timers. 1673 */ 1674 case TCPS_FIN_WAIT_2: 1675 tp->t_state = TCPS_TIME_WAIT; 1676 tcp_canceltimers(tp); 1677#ifdef TTCP 1678 /* Shorten TIME_WAIT [RFC-1644, p.28] */ 1679 if (tp->cc_recv != 0 && 1680 tp->t_duration < TCPTV_MSL) { 1681 tp->t_timer[TCPT_2MSL] = 1682 tp->t_rxtcur * TCPTV_TWTRUNC; 1683 /* For transaction client, force ACK now. */ 1684 tp->t_flags |= TF_ACKNOW; 1685 } 1686 else 1687#endif 1688 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 1689 soisdisconnected(so); 1690 break; 1691 1692 /* 1693 * In TIME_WAIT state restart the 2 MSL time_wait timer. 1694 */ 1695 case TCPS_TIME_WAIT: 1696 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; 1697 break; 1698 } 1699 } 1700#ifdef TCPDEBUG 1701 if (so->so_options & SO_DEBUG) 1702 tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); 1703#endif 1704 1705 /* 1706 * Return any desired output. 1707 */ 1708 if (needoutput || (tp->t_flags & TF_ACKNOW)) 1709 (void) tcp_output(tp); 1710 return; 1711 1712dropafterack: 1713 /* 1714 * Generate an ACK dropping incoming segment if it occupies 1715 * sequence space, where the ACK reflects our state. 1716 */ 1717 if (tiflags & TH_RST) 1718 goto drop; 1719#ifdef TTCP 1720#ifdef TCPDEBUG 1721 if (so->so_options & SO_DEBUG) 1722 tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 1723#endif 1724#endif 1725 m_freem(m); 1726 tp->t_flags |= TF_ACKNOW; 1727 (void) tcp_output(tp); 1728 return; 1729 1730dropwithreset: 1731 /* 1732 * Generate a RST, dropping incoming segment. 1733 * Make ACK acceptable to originator of segment. 1734 * Don't bother to respond if destination was broadcast/multicast. 1735 */ 1736 if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) || 1737 IN_MULTICAST(ntohl(ti->ti_dst.s_addr))) 1738 goto drop; 1739#ifdef TTCP 1740#ifdef TCPDEBUG 1741 if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 1742 tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 1743#endif 1744#endif 1745 if (tiflags & TH_ACK) 1746 tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); 1747 else { 1748 if (tiflags & TH_SYN) 1749 ti->ti_len++; 1750 tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, 1751 TH_RST|TH_ACK); 1752 } 1753 /* destroy temporarily created socket */ 1754 if (dropsocket) 1755 (void) soabort(so); 1756 return; 1757 1758drop: 1759 /* 1760 * Drop space held by incoming segment and return. 1761 */ 1762#ifdef TCPDEBUG 1763#ifdef TTCP 1764 if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 1765 tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 1766#else 1767 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 1768 tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); 1769#endif 1770#endif 1771 m_freem(m); 1772 /* destroy temporarily created socket */ 1773 if (dropsocket) 1774 (void) soabort(so); 1775 return; 1776#ifndef TUBA_INCLUDE 1777} 1778 1779void 1780#ifdef TTCP 1781tcp_dooptions(tp, cp, cnt, ti, to) 1782#else 1783tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) 1784#endif 1785 struct tcpcb *tp; 1786 u_char *cp; 1787 int cnt; 1788 struct tcpiphdr *ti; 1789#ifdef TTCP 1790 struct tcpopt *to; 1791#else 1792 int *ts_present; 1793 u_long *ts_val, *ts_ecr; 1794#endif 1795{ 1796 u_short mss = 0; 1797 int opt, optlen; 1798 1799 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1800 opt = cp[0]; 1801 if (opt == TCPOPT_EOL) 1802 break; 1803 if (opt == TCPOPT_NOP) 1804 optlen = 1; 1805 else { 1806 optlen = cp[1]; 1807 if (optlen <= 0) 1808 break; 1809 } 1810 switch (opt) { 1811 1812 default: 1813 continue; 1814 1815 case TCPOPT_MAXSEG: 1816 if (optlen != TCPOLEN_MAXSEG) 1817 continue; 1818 if (!(ti->ti_flags & TH_SYN)) 1819 continue; 1820 bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); 1821 NTOHS(mss); 1822 break; 1823 1824 case TCPOPT_WINDOW: 1825 if (optlen != TCPOLEN_WINDOW) 1826 continue; 1827 if (!(ti->ti_flags & TH_SYN)) 1828 continue; 1829 tp->t_flags |= TF_RCVD_SCALE; 1830 tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); 1831 break; 1832 1833 case TCPOPT_TIMESTAMP: 1834 if (optlen != TCPOLEN_TIMESTAMP) 1835 continue; 1836#ifdef TTCP 1837 to->to_flag |= TOF_TS; 1838 bcopy((char *)cp + 2, 1839 (char *)&to->to_tsval, sizeof(to->to_tsval)); 1840 NTOHL(to->to_tsval); 1841 bcopy((char *)cp + 6, 1842 (char *)&to->to_tsecr, sizeof(to->to_tsecr)); 1843 NTOHL(to->to_tsecr); 1844#else 1845 *ts_present = 1; 1846 bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val)); 1847 NTOHL(*ts_val); 1848 bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr)); 1849 NTOHL(*ts_ecr); 1850#endif 1851 1852 /* 1853 * A timestamp received in a SYN makes 1854 * it ok to send timestamp requests and replies. 1855 */ 1856 if (ti->ti_flags & TH_SYN) { 1857 tp->t_flags |= TF_RCVD_TSTMP; 1858#ifdef TTCP 1859 tp->ts_recent = to->to_tsval; 1860#else 1861 tp->ts_recent = *ts_val; 1862#endif 1863 tp->ts_recent_age = tcp_now; 1864 } 1865 break; 1866#ifdef TTCP 1867 case TCPOPT_CC: 1868 if (optlen != TCPOLEN_CC) 1869 continue; 1870 to->to_flag |= TCPOPT_CC; 1871 bcopy((char *)cp + 2, 1872 (char *)&to->to_cc, sizeof(to->to_cc)); 1873 NTOHL(to->to_cc); 1874 /* 1875 * A CC or CC.new option received in a SYN makes 1876 * it ok to send CC in subsequent segments. 1877 */ 1878 if (ti->ti_flags & TH_SYN) 1879 tp->t_flags |= TF_RCVD_CC; 1880 break; 1881 case TCPOPT_CCNEW: 1882 if (optlen != TCPOLEN_CC) 1883 continue; 1884 if (!(ti->ti_flags & TH_SYN)) 1885 continue; 1886 to->to_flag |= TOF_CCNEW; 1887 bcopy((char *)cp + 2, 1888 (char *)&to->to_cc, sizeof(to->to_cc)); 1889 NTOHL(to->to_cc); 1890 /* 1891 * A CC or CC.new option received in a SYN makes 1892 * it ok to send CC in subsequent segments. 1893 */ 1894 tp->t_flags |= TF_RCVD_CC; 1895 break; 1896 case TCPOPT_CCECHO: 1897 if (optlen != TCPOLEN_CC) 1898 continue; 1899 if (!(ti->ti_flags & TH_SYN)) 1900 continue; 1901 to->to_flag |= TOF_CCECHO; 1902 bcopy((char *)cp + 2, 1903 (char *)&to->to_ccecho, sizeof(to->to_ccecho)); 1904 NTOHL(to->to_ccecho); 1905 break; 1906#endif /* TTCP*/ 1907 } 1908 } 1909 if (ti->ti_flags & TH_SYN) 1910 tcp_mss(tp, mss); /* sets t_maxseg */ 1911} 1912 1913/* 1914 * Pull out of band byte out of a segment so 1915 * it doesn't appear in the user's data queue. 1916 * It is still reflected in the segment length for 1917 * sequencing purposes. 1918 */ 1919void 1920tcp_pulloutofband(so, ti, m) 1921 struct socket *so; 1922 struct tcpiphdr *ti; 1923 register struct mbuf *m; 1924{ 1925 int cnt = ti->ti_urp - 1; 1926 1927 while (cnt >= 0) { 1928 if (m->m_len > cnt) { 1929 char *cp = mtod(m, caddr_t) + cnt; 1930 struct tcpcb *tp = sototcpcb(so); 1931 1932 tp->t_iobc = *cp; 1933 tp->t_oobflags |= TCPOOB_HAVEDATA; 1934 bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); 1935 m->m_len--; 1936 return; 1937 } 1938 cnt -= m->m_len; 1939 m = m->m_next; 1940 if (m == 0) 1941 break; 1942 } 1943 panic("tcp_pulloutofband"); 1944} 1945 1946/* 1947 * Collect new round-trip time estimate 1948 * and update averages and current timeout. 1949 */ 1950void 1951tcp_xmit_timer(tp, rtt) 1952 register struct tcpcb *tp; 1953 short rtt; 1954{ 1955 register short delta; 1956 1957 tcpstat.tcps_rttupdated++; 1958 if (tp->t_srtt != 0) { 1959 /* 1960 * srtt is stored as fixed point with 3 bits after the 1961 * binary point (i.e., scaled by 8). The following magic 1962 * is equivalent to the smoothing algorithm in rfc793 with 1963 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed 1964 * point). Adjust rtt to origin 0. 1965 */ 1966 delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); 1967 if ((tp->t_srtt += delta) <= 0) 1968 tp->t_srtt = 1; 1969 /* 1970 * We accumulate a smoothed rtt variance (actually, a 1971 * smoothed mean difference), then set the retransmit 1972 * timer to smoothed rtt + 4 times the smoothed variance. 1973 * rttvar is stored as fixed point with 2 bits after the 1974 * binary point (scaled by 4). The following is 1975 * equivalent to rfc793 smoothing with an alpha of .75 1976 * (rttvar = rttvar*3/4 + |delta| / 4). This replaces 1977 * rfc793's wired-in beta. 1978 */ 1979 if (delta < 0) 1980 delta = -delta; 1981 delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); 1982 if ((tp->t_rttvar += delta) <= 0) 1983 tp->t_rttvar = 1; 1984 } else { 1985 /* 1986 * No rtt measurement yet - use the unsmoothed rtt. 1987 * Set the variance to half the rtt (so our first 1988 * retransmit happens at 3*rtt). 1989 */ 1990 tp->t_srtt = rtt << TCP_RTT_SHIFT; 1991 tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); 1992 } 1993 tp->t_rtt = 0; 1994 tp->t_rxtshift = 0; 1995 1996 /* 1997 * the retransmit should happen at rtt + 4 * rttvar. 1998 * Because of the way we do the smoothing, srtt and rttvar 1999 * will each average +1/2 tick of bias. When we compute 2000 * the retransmit timer, we want 1/2 tick of rounding and 2001 * 1 extra tick because of +-1/2 tick uncertainty in the 2002 * firing of the timer. The bias will give us exactly the 2003 * 1.5 tick we need. But, because the bias is 2004 * statistical, we have to test that we don't drop below 2005 * the minimum feasible timer (which is 2 ticks). 2006 */ 2007 TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), 2008 tp->t_rttmin, TCPTV_REXMTMAX); 2009 2010 /* 2011 * We received an ack for a packet that wasn't retransmitted; 2012 * it is probably safe to discard any error indications we've 2013 * received recently. This isn't quite right, but close enough 2014 * for now (a route might have failed after we sent a segment, 2015 * and the return path might not be symmetrical). 2016 */ 2017 tp->t_softerror = 0; 2018} 2019 2020/* 2021 * Determine a reasonable value for maxseg size. 2022 * If the route is known, check route for mtu. 2023 * If none, use an mss that can be handled on the outgoing 2024 * interface without forcing IP to fragment; if bigger than 2025 * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES 2026 * to utilize large mbufs. If no route is found, route has no mtu, 2027 * or the destination isn't local, use a default, hopefully conservative 2028 * size (usually 512 or the default IP max size, but no more than the mtu 2029 * of the interface), as we can't discover anything about intervening 2030 * gateways or networks. We also initialize the congestion/slow start 2031 * window to be a single segment if the destination isn't local. 2032 * While looking at the routing entry, we also initialize other path-dependent 2033 * parameters from pre-set or cached values in the routing entry. 2034 * 2035 * Also take into account the space needed for options that we 2036 * send regularly. Make maxseg shorter by that amount to assure 2037 * that we can send maxseg amount of data even when the options 2038 * are present. Store the upper limit of the length of options plus 2039 * data in maxopd. 2040 * 2041 * NOTE that this routine is only called when we process an incoming 2042 * segment, for outgoing segments only tcp_mssopt is called. 2043 * 2044#ifdef TTCP 2045 * In case of T/TCP, we call this routine during implicit connection 2046 * setup as well (offer = -1), to initialize maxseg from the cached 2047 * MSS of our peer. 2048#endif 2049 */ 2050void 2051tcp_mss(tp, offer) 2052 struct tcpcb *tp; 2053 int offer; 2054{ 2055 register struct rtentry *rt; 2056 struct ifnet *ifp; 2057 register int rtt, mss; 2058 u_long bufsize; 2059 struct inpcb *inp; 2060 struct socket *so; 2061#ifdef TTCP 2062 struct rmxp_tao *taop; 2063 int origoffer = offer; 2064 extern int tcp_do_rfc1644; 2065#endif 2066 extern int tcp_mssdflt; 2067 extern int tcp_do_rfc1323; 2068 2069 inp = tp->t_inpcb; 2070 if ((rt = tcp_rtlookup(inp)) == NULL) { 2071 tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; 2072 return; 2073 } 2074 ifp = rt->rt_ifp; 2075 so = inp->inp_socket; 2076 2077#ifdef TTCP 2078 taop = rmx_taop(rt->rt_rmx); 2079 /* 2080 * Offer == -1 means that we didn't receive SYN yet, 2081 * use cached value in that case; 2082 */ 2083 if (offer == -1) 2084 offer = taop->tao_mssopt; 2085#endif /* TTCP */ 2086 /* 2087 * Offer == 0 means that there was no MSS on the SYN segment, 2088 * in this case we use tcp_mssdflt. 2089 */ 2090 if (offer == 0) 2091 offer = tcp_mssdflt; 2092 else 2093 /* 2094 * Sanity check: make sure that maxopd will be large 2095 * enough to allow some data on segments even is the 2096 * all the option space is used (40bytes). Otherwise 2097 * funny things may happen in tcp_output. 2098 */ 2099 offer = max(offer, 64); 2100#ifdef TTCP 2101 taop->tao_mssopt = offer; 2102#endif /* TTCP */ 2103 2104#ifdef RTV_MTU /* if route characteristics exist ... */ 2105 /* 2106 * While we're here, check if there's an initial rtt 2107 * or rttvar. Convert from the route-table units 2108 * to scaled multiples of the slow timeout timer. 2109 */ 2110 if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { 2111 /* 2112 * XXX the lock bit for RTT indicates that the value 2113 * is also a minimum value; this is subject to time. 2114 */ 2115 if (rt->rt_rmx.rmx_locks & RTV_RTT) 2116 tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ); 2117 tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); 2118 if (rt->rt_rmx.rmx_rttvar) 2119 tp->t_rttvar = rt->rt_rmx.rmx_rttvar / 2120 (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); 2121 else 2122 /* default variation is +- 1 rtt */ 2123 tp->t_rttvar = 2124 tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; 2125 TCPT_RANGESET(tp->t_rxtcur, 2126 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, 2127 tp->t_rttmin, TCPTV_REXMTMAX); 2128 } 2129 /* 2130 * if there's an mtu associated with the route, use it 2131 */ 2132 if (rt->rt_rmx.rmx_mtu) 2133 mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); 2134 else 2135#endif /* RTV_MTU */ 2136 { 2137 mss = ifp->if_mtu - sizeof(struct tcpiphdr); 2138 if (!in_localaddr(inp->inp_faddr)) 2139 mss = min(mss, tcp_mssdflt); 2140 } 2141 mss = min(mss, offer); 2142 /* 2143 * maxopd stores the maximum length of data AND options 2144 * in a segment; maxseg is the amount of data in a normal 2145 * segment. We need to store this value (maxopd) apart 2146 * from maxseg, because now every segment carries options 2147 * and thus we normally have somewhat less data in segments. 2148 */ 2149 tp->t_maxopd = mss; 2150 2151#ifdef TTCP 2152 /* 2153 * In case of T/TCP, origoffer==-1 indicates, that no segments 2154 * were received yet. In this case we just guess, otherwise 2155 * we do the same as before T/TCP. 2156 */ 2157 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 2158 (origoffer == -1 || 2159 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) 2160 mss -= TCPOLEN_TSTAMP_APPA; 2161 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && 2162 (origoffer == -1 || 2163 (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)) 2164 mss -= TCPOLEN_CC_APPA; 2165#else /* TTCP */ 2166 /* 2167 * Adjust mss to leave space for the usual options. We're 2168 * called from the end of tcp_dooptions so we can use the 2169 * REQ/RCVD flags to see if options will be used. 2170 */ 2171 if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == 2172 (TF_REQ_TSTMP|TF_RCVD_TSTMP)) 2173 mss -= TCPOLEN_TSTAMP_APPA; 2174#endif /* TTCP */ 2175 2176#if (MCLBYTES & (MCLBYTES - 1)) == 0 2177 if (mss > MCLBYTES) 2178 mss &= ~(MCLBYTES-1); 2179#else 2180 if (mss > MCLBYTES) 2181 mss = mss / MCLBYTES * MCLBYTES; 2182#endif 2183 /* 2184 * If there's a pipesize, change the socket buffer 2185 * to that size. Make the socket buffers an integral 2186 * number of mss units; if the mss is larger than 2187 * the socket buffer, decrease the mss. 2188 */ 2189#ifdef RTV_SPIPE 2190 if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) 2191#endif 2192 bufsize = so->so_snd.sb_hiwat; 2193 if (bufsize < mss) 2194 mss = bufsize; 2195 else { 2196 bufsize = roundup(bufsize, mss); 2197 if (bufsize > sb_max) 2198 bufsize = sb_max; 2199 (void)sbreserve(&so->so_snd, bufsize); 2200 } 2201 tp->t_maxseg = mss; 2202 2203#ifdef RTV_RPIPE 2204 if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) 2205#endif 2206 bufsize = so->so_rcv.sb_hiwat; 2207 if (bufsize > mss) { 2208 bufsize = roundup(bufsize, mss); 2209 if (bufsize > sb_max) 2210 bufsize = sb_max; 2211 (void)sbreserve(&so->so_rcv, bufsize); 2212 } 2213#ifdef TTCP 2214 /* 2215 * Don't force slow-start on local network. 2216 */ 2217 if (!in_localaddr(inp->inp_faddr)) 2218#endif /* TTCP */ 2219 tp->snd_cwnd = mss; 2220 2221#ifdef RTV_SSTHRESH 2222 if (rt->rt_rmx.rmx_ssthresh) { 2223 /* 2224 * There's some sort of gateway or interface 2225 * buffer limit on the path. Use this to set 2226 * the slow start threshhold, but set the 2227 * threshold to no less than 2*mss. 2228 */ 2229 tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); 2230 } 2231#endif 2232} 2233 2234/* 2235 * Determine the MSS option to send on an outgoing SYN. 2236 */ 2237int 2238tcp_mssopt(tp) 2239 struct tcpcb *tp; 2240{ 2241 struct rtentry *rt; 2242 extern int tcp_mssdflt; 2243 2244 rt = tcp_rtlookup(tp->t_inpcb); 2245 if (rt == NULL) 2246 return tcp_mssdflt; 2247 2248 /* 2249 * if there's an mtu associated with the route, use it 2250 */ 2251 if (rt->rt_rmx.rmx_mtu) 2252 return rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); 2253 2254 return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr); 2255} 2256#endif /* TUBA_INCLUDE */ 2257