1/* $NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93 32 */ 33 34/*********************************************************** 35 Copyright IBM Corporation 1987 36 37 All Rights Reserved 38 39Permission to use, copy, modify, and distribute this software and its 40documentation for any purpose and without fee is hereby granted, 41provided that the above copyright notice appear in all copies and that 42both that copyright notice and this permission notice appear in 43supporting documentation, and that the name of IBM not be 44used in advertising or publicity pertaining to distribution of the 45software without specific, written prior permission. 46 47IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 48ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL 49IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR 50ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 51WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 52ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 53SOFTWARE. 54 55******************************************************************/ 56 57/* 58 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison 59 */ 60/* 61 * The main work of data transfer is done here. These routines are called 62 * from tp.trans. They include the routines that check the validity of acks 63 * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket 64 * buffers and send them (tp_send()), drop the data from the socket buffers 65 * (tp_sbdrop()), and put incoming packet data into socket buffers 66 * (tp_stash()). 67 */ 68 69#include <sys/cdefs.h> 70__KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $"); 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/mbuf.h> 75#include <sys/socket.h> 76#include <sys/socketvar.h> 77#include <sys/protosw.h> 78#include <sys/errno.h> 79#include <sys/time.h> 80#include <sys/kernel.h> 81 82#include <netiso/tp_ip.h> 83#include <netiso/iso.h> 84#include <netiso/argo_debug.h> 85#include <netiso/tp_timer.h> 86#include <netiso/tp_param.h> 87#include <netiso/tp_stat.h> 88#include <netiso/tp_pcb.h> 89#include <netiso/tp_tpdu.h> 90#include <netiso/tp_trace.h> 91#include <netiso/tp_meas.h> 92#include <netiso/tp_seq.h> 93#include <netiso/tp_var.h> 94 95int tprexmtthresh = 3; 96 97/* 98 * CALLED FROM: 99 * tp.trans, when an XAK arrives 100 * FUNCTION and ARGUMENTS: 101 * Determines if the sequence number (seq) from the XAK 102 * acks anything new. If so, drop the appropriate tpdu 103 * from the XPD send queue. 104 * RETURN VALUE: 105 * Returns 1 if it did this, 0 if the ack caused no action. 106 */ 107int 108tp_goodXack(struct tp_pcb *tpcb, SeqNum seq) 109{ 110 111#ifdef TPPT 112 if (tp_traceflags[D_XPD]) { 113 tptraceTPCB(TPPTgotXack, 114 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew, 115 tpcb->tp_snduna); 116 } 117#endif 118 119 if (seq == tpcb->tp_Xuna) { 120 tpcb->tp_Xuna = tpcb->tp_Xsndnxt; 121 122 /* 123 * DROP 1 packet from the Xsnd socket buf - just so happens 124 * that only one packet can be there at any time so drop the 125 * whole thing. If you allow > 1 packet the socket buffer, 126 * then you'll have to keep track of how many characters went 127 * w/ each XPD tpdu, so this will get messier 128 */ 129#ifdef ARGO_DEBUG 130 if (argo_debug[D_XPD]) { 131 dump_mbuf(tpcb->tp_Xsnd.sb_mb, 132 "tp_goodXack Xsnd before sbdrop"); 133 } 134#endif 135 136#ifdef TPPT 137 if (tp_traceflags[D_XPD]) { 138 tptraceTPCB(TPPTmisc, 139 "goodXack: dropping cc ", 140 (int) (tpcb->tp_Xsnd.sb_cc), 141 0, 0, 0); 142 } 143#endif 144 sbdroprecord(&tpcb->tp_Xsnd); 145 return 1; 146 } 147 return 0; 148} 149 150/* 151 * CALLED FROM: 152 * tp_good_ack() 153 * FUNCTION and ARGUMENTS: 154 * updates 155 * smoothed average round trip time (*rtt) 156 * roundtrip time variance (*rtv) - actually deviation, not variance 157 * given the new value (diff) 158 * RETURN VALUE: 159 * void 160 */ 161 162void 163tp_rtt_rtv(struct tp_pcb *tpcb) 164{ 165 int old = tpcb->tp_rtt; 166 int elapsed, delta = 0; 167 168 elapsed = hardclock_ticks - tpcb->tp_rttemit; 169 170 if (tpcb->tp_rtt != 0) { 171 /* 172 * rtt is the smoothed round trip time in machine clock 173 * ticks (hz). It is stored as a fixed point number, 174 * unscaled (unlike the tcp srtt). The rationale here 175 * is that it is only significant to the nearest unit of 176 * slowtimo, which is at least 8 machine clock ticks 177 * so there is no need to scale. The smoothing is done 178 * according to the same formula as TCP (rtt = rtt*7/8 179 * + measured_rtt/8). 180 */ 181 delta = elapsed - tpcb->tp_rtt; 182 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0) 183 tpcb->tp_rtt = 1; 184 /* 185 * rtv is a smoothed accumulated mean difference, unscaled 186 * for reasons expressed above. 187 * It is smoothed with an alpha of .75, and the round trip timer 188 * will be set to rtt + 4*rtv, also as TCP does. 189 */ 190 if (delta < 0) 191 delta = -delta; 192 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0) 193 tpcb->tp_rtv = 1; 194 } else { 195 /* 196 * No rtt measurement yet - use the unsmoothed rtt. Set the 197 * variance to half the rtt (so our first retransmit happens 198 * at 3*rtt) 199 */ 200 tpcb->tp_rtt = elapsed; 201 tpcb->tp_rtv = elapsed >> 1; 202 } 203 tpcb->tp_rttemit = 0; 204 tpcb->tp_rxtshift = 0; 205 /* 206 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar. 207 * Because of the way we do the smoothing, srtt and rttvar 208 * will each average +1/2 tick of bias. When we compute 209 * the retransmit timer, we want 1/2 tick of rounding and 210 * 1 extra tick because of +-1/2 tick uncertainty in the 211 * firing of the timer. The bias will give us exactly the 212 * 1.5 tick we need. But, because the bias is 213 * statistical, we have to test that we don't drop below 214 * the minimum feasible timer (which is 2 ticks)." 215 */ 216 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb), 217 tpcb->tp_peer_acktime, 128 /* XXX */ ); 218#ifdef ARGO_DEBUG 219 if (argo_debug[D_RTT]) { 220 printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n", 221 "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old); 222 } 223#endif 224 tpcb->tp_rxtcur = tpcb->tp_dt_ticks; 225} 226 227/* 228 * CALLED FROM: 229 * tp.trans when an AK arrives 230 * FUNCTION and ARGUMENTS: 231 * Given (cdt), the credit from the AK tpdu, and 232 * (seq), the sequence number from the AK tpdu, 233 * tp_goodack() determines if the AK acknowledges something in the send 234 * window, and if so, drops the appropriate packets from the retransmission 235 * list, computes the round trip time, and updates the retransmission timer 236 * based on the new smoothed round trip time. 237 * RETURN VALUE: 238 * Returns 1 if 239 * EITHER it actually acked something heretofore unacknowledged 240 * OR no news but the credit should be processed. 241 * If something heretofore unacked was acked with this sequence number, 242 * the appropriate tpdus are dropped from the retransmission control list, 243 * by calling tp_sbdrop(). 244 * No need to see the tpdu itself. 245 */ 246int 247tp_goodack(struct tp_pcb *tpcb, u_int cdt, SeqNum seq, u_int subseq) 248{ 249 int old_fcredit = 0; 250 int bang = 0; /* bang --> ack for something 251 * heretofore unacked */ 252 u_int bytes_acked; 253 254#ifdef ARGO_DEBUG 255 if (argo_debug[D_ACKRECV]) { 256 printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n", 257 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt); 258 } 259#endif 260 261#ifdef TPPT 262 if (tp_traceflags[D_ACKRECV]) { 263 tptraceTPCB(TPPTgotack, 264 seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq); 265 } 266#endif 267 268#ifdef TP_PERF_MEAS 269 if (DOPERF(tpcb)) { 270 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0); 271 } 272#endif 273 274 if (seq == tpcb->tp_snduna) { 275 if (subseq < tpcb->tp_r_subseq || 276 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) { 277 discard_the_ack: 278#ifdef ARGO_DEBUG 279 if (argo_debug[D_ACKRECV]) { 280 printf("goodack discard : tpcb %p subseq %d r_subseq %d\n", 281 tpcb, subseq, tpcb->tp_r_subseq); 282 } 283#endif 284 goto done; 285 } 286 if (cdt == tpcb->tp_fcredit /* && thus subseq > 287 tpcb->tp_r_subseq */ ) { 288 tpcb->tp_r_subseq = subseq; 289 if (tpcb->tp_timer[TM_data_retrans] == 0) 290 tpcb->tp_dupacks = 0; 291 else if (++tpcb->tp_dupacks == tprexmtthresh) { 292 /* 293 * partner went out of his way to signal with 294 * different subsequences that he has the 295 * same lack of an expected packet. This may 296 * be an early indiciation of a loss 297 */ 298 299 SeqNum onxt = tpcb->tp_sndnxt; 300 struct mbuf *onxt_m = tpcb->tp_sndnxt_m; 301 u_int win = min(tpcb->tp_fcredit, 302 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2; 303#ifdef ARGO_DEBUG 304 if (argo_debug[D_ACKRECV]) { 305 printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n", 306 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt); 307 } 308#endif 309 if (win < 2) 310 win = 2; 311 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize; 312 tpcb->tp_timer[TM_data_retrans] = 0; 313 tpcb->tp_rttemit = 0; 314 tpcb->tp_sndnxt = tpcb->tp_snduna; 315 tpcb->tp_sndnxt_m = 0; 316 tpcb->tp_cong_win = tpcb->tp_l_tpdusize; 317 tp_send(tpcb); 318 tpcb->tp_cong_win = tpcb->tp_ssthresh + 319 tpcb->tp_dupacks * tpcb->tp_l_tpdusize; 320 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) { 321 tpcb->tp_sndnxt = onxt; 322 tpcb->tp_sndnxt_m = onxt_m; 323 } 324 } else if (tpcb->tp_dupacks > tprexmtthresh) { 325 tpcb->tp_cong_win += tpcb->tp_l_tpdusize; 326 } 327 goto done; 328 } 329 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna)) 330 goto discard_the_ack; 331 /* 332 * If the congestion window was inflated to account 333 * for the other side's cached packets, retract it. 334 */ 335 if (tpcb->tp_dupacks > tprexmtthresh && 336 tpcb->tp_cong_win > tpcb->tp_ssthresh) 337 tpcb->tp_cong_win = tpcb->tp_ssthresh; 338 tpcb->tp_r_subseq = subseq; 339 old_fcredit = tpcb->tp_fcredit; 340 tpcb->tp_fcredit = cdt; 341 if (cdt > tpcb->tp_maxfcredit) 342 tpcb->tp_maxfcredit = cdt; 343 tpcb->tp_dupacks = 0; 344 345 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) { 346 347 tpsbcheck(tpcb, 0); 348 bytes_acked = tp_sbdrop(tpcb, seq); 349 tpsbcheck(tpcb, 1); 350 /* 351 * If transmit timer is running and timed sequence 352 * number was acked, update smoothed round trip time. 353 * Since we now have an rtt measurement, cancel the 354 * timer backoff (cf., Phil Karn's retransmit alg.). 355 * Recompute the initial retransmit timer. 356 */ 357 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq)) 358 tp_rtt_rtv(tpcb); 359 /* 360 * If all outstanding data is acked, stop retransmit timer. 361 * If there is more data to be acked, restart retransmit 362 * timer, using current (possibly backed-off) value. 363 * OSI combines the keepalive and persistance functions. 364 * So, there is no persistance timer per se, to restart. 365 */ 366 if (tpcb->tp_class != TP_CLASS_0) 367 tpcb->tp_timer[TM_data_retrans] = 368 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur; 369 /* 370 * When new data is acked, open the congestion window. 371 * If the window gives us less than ssthresh packets 372 * in flight, open exponentially (maxseg per packet). 373 * Otherwise open linearly: maxseg per window 374 * (maxseg^2 / cwnd per packet), plus a constant 375 * fraction of a packet (maxseg/8) to help larger windows 376 * open quickly enough. 377 */ 378 { 379 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize; 380 381 incr = min(incr, bytes_acked); 382 if (cw > tpcb->tp_ssthresh) 383 incr = incr * incr / cw + incr / 8; 384 tpcb->tp_cong_win = 385 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat); 386 } 387 tpcb->tp_snduna = seq; 388 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) { 389 tpcb->tp_sndnxt = seq; 390 tpcb->tp_sndnxt_m = 0; 391 } 392 bang++; 393 } 394 if (cdt != 0 && old_fcredit == 0) { 395 tpcb->tp_sendfcc = 1; 396 } 397 if (cdt == 0) { 398 if (old_fcredit != 0) 399 IncStat(ts_zfcdt); 400 /* The following might mean that the window shrunk */ 401 if (tpcb->tp_timer[TM_data_retrans]) { 402 tpcb->tp_timer[TM_data_retrans] = 0; 403 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks; 404 if (tpcb->tp_sndnxt != tpcb->tp_snduna) { 405 tpcb->tp_sndnxt = tpcb->tp_snduna; 406 tpcb->tp_sndnxt_m = 0; 407 } 408 } 409 } 410 tpcb->tp_fcredit = cdt; 411 bang |= (old_fcredit < cdt); 412 413done: 414#ifdef ARGO_DEBUG 415 if (argo_debug[D_ACKRECV]) { 416 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n", 417 bang, cdt, old_fcredit, tpcb->tp_cong_win); 418 } 419#endif 420 /* 421 * if (bang) XXXXX Very bad to remove this test, but somethings 422 * broken 423 */ 424 tp_send(tpcb); 425 return (bang); 426} 427 428/* 429 * CALLED FROM: 430 * tp_goodack() 431 * FUNCTION and ARGUMENTS: 432 * drops everything up TO but not INCLUDING seq # (seq) 433 * from the retransmission queue. 434 */ 435int 436tp_sbdrop(struct tp_pcb *tpcb, SeqNum seq) 437{ 438 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 439 int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna); 440 int oldcc = sb->sb_cc, oldi = i; 441 442 if (i >= tpcb->tp_seqhalf) 443 printf("tp_spdropping too much -- should panic"); 444 while (i-- > 0) 445 sbdroprecord(sb); 446#ifdef ARGO_DEBUG 447 if (argo_debug[D_ACKRECV]) { 448 printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n", 449 oldi, oldcc - sb->sb_cc, tpcb, seq); 450 } 451#endif 452 if (sb_notify(sb)) 453 sowwakeup(tpcb->tp_sock); 454 return (oldcc - sb->sb_cc); 455} 456 457/* 458 * CALLED FROM: 459 * tp.trans on user send request, arrival of AK and arrival of XAK 460 * FUNCTION and ARGUMENTS: 461 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt). 462 * Emits until a) runs out of data, or b) runs into an XPD mark, or 463 * c) it hits seq number (highseq) limited by cong or credit. 464 * 465 * If you want XPD to buffer > 1 du per socket buffer, you can 466 * modifiy this to issue XPD tpdus also, but then it'll have 467 * to take some argument(s) to distinguish between the type of DU to 468 * hand tp_emit. 469 * 470 * When something is sent for the first time, its time-of-send 471 * is stashed (in system clock ticks rather than pf_slowtimo ticks). 472 * When the ack arrives, the smoothed round-trip time is figured 473 * using this value. 474 */ 475void 476tp_send(struct tp_pcb *tpcb) 477{ 478 int len; 479 struct mbuf *m; 480 struct mbuf *mb = 0; 481 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 482 unsigned int eotsdu = 0; 483 SeqNum highseq, checkseq; 484 int idle, idleticks, off, cong_win; 485#ifdef TP_PERF_MEAS 486 int send_start_time = hardclock_ticks; 487 SeqNum oldnxt = tpcb->tp_sndnxt; 488#endif /* TP_PERF_MEAS */ 489 490 idle = (tpcb->tp_snduna == tpcb->tp_sndnew); 491 if (idle) { 492 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact]; 493 if (idleticks > tpcb->tp_dt_ticks) 494 /* 495 * We have been idle for "a while" and no acks are 496 * expected to clock out any data we send -- 497 * slow start to get ack "clock" running again. 498 */ 499 tpcb->tp_cong_win = tpcb->tp_l_tpdusize; 500 } 501 cong_win = tpcb->tp_cong_win; 502 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna); 503 if (tpcb->tp_Xsnd.sb_mb) 504 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew); 505 506#ifdef ARGO_DEBUG 507 if (argo_debug[D_DATA]) { 508 printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n", 509 tpcb, tpcb->tp_sndnxt, cong_win, highseq); 510 } 511#endif 512#ifdef TPPT 513 if (tp_traceflags[D_DATA]) { 514 tptraceTPCB(TPPTmisc, "tp_send sndnew snduna", 515 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0); 516 tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin", 517 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win); 518 } 519#endif 520#ifdef TPPT 521 if (tp_traceflags[D_DATA]) { 522 tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin", 523 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win); 524 } 525#endif 526 527 if (tpcb->tp_sndnxt_m) 528 m = tpcb->tp_sndnxt_m; 529 else { 530 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna); 531 for (m = sb->sb_mb; m && off > 0; m = m->m_next) 532 off--; 533 } 534 /* 535 * Avoid silly window syndrome here . . . figure out how! 536 */ 537 checkseq = tpcb->tp_sndnum; 538 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq)) 539 checkseq = highseq; /* i.e. DON'T retain highest assigned 540 * packet */ 541 542 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) { 543 544 eotsdu = (m->m_flags & M_EOR) != 0; 545 len = m->m_pkthdr.len; 546 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 && 547 len < (tpcb->tp_l_tpdusize / 2)) 548 break; /* Nagle . . . . . */ 549 cong_win -= len; 550 /* 551 * make a copy - mb goes into the retransmission list while m 552 * gets emitted. m_copy won't copy a zero-length mbuf. 553 */ 554 mb = m; 555 m = m_copy(mb, 0, M_COPYALL); 556 if (m == NULL) 557 break; 558#ifdef TPPT 559 if (tp_traceflags[D_STASH]) { 560 tptraceTPCB(TPPTmisc, 561 "tp_send mcopy nxt high eotsdu len", 562 tpcb->tp_sndnxt, highseq, eotsdu, len); 563 } 564#endif 565 566#ifdef ARGO_DEBUG 567 if (argo_debug[D_DATA]) { 568 printf("tp_sending tpcb %p nxt 0x%x\n", 569 tpcb, tpcb->tp_sndnxt); 570 } 571#endif 572 /* 573 * when headers are precomputed, may need to fill in checksum 574 * here 575 */ 576 tpcb->tp_sock->so_error = 577 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m); 578 if (tpcb->tp_sock->so_error != 0) 579 /* error */ 580 break; 581 m = mb->m_nextpkt; 582 tpcb->tp_sndnxt_m = m; 583 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) { 584 SEQ_INC(tpcb, tpcb->tp_sndnew); 585 /* 586 * Time this transmission if not a retransmission and 587 * not currently timing anything. 588 */ 589 if (tpcb->tp_rttemit == 0) { 590 tpcb->tp_rttemit = hardclock_ticks; 591 tpcb->tp_rttseq = tpcb->tp_sndnxt; 592 } 593 tpcb->tp_sndnxt = tpcb->tp_sndnew; 594 } else 595 SEQ_INC(tpcb, tpcb->tp_sndnxt); 596 /* 597 * Set retransmit timer if not currently set. 598 * Initial value for retransmit timer is smoothed 599 * round-trip time + 2 * round-trip time variance. 600 * Initialize shift counter which is used for backoff 601 * of retransmit time. 602 */ 603 if (tpcb->tp_timer[TM_data_retrans] == 0 && 604 tpcb->tp_class != TP_CLASS_0) { 605 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks; 606 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks; 607 tpcb->tp_rxtshift = 0; 608 } 609 } 610 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum)) 611 tpcb->tp_oktonagle = 0; 612#ifdef TP_PERF_MEAS 613 if (DOPERF(tpcb)) { 614 int npkts; 615 int s, elapsed, *t; 616 struct timeval now; 617 618 elapsed = hardclock_ticks - send_start_time; 619 620 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt); 621 622 if (npkts > 0) 623 tpcb->tp_Nwindow++; 624 625 if (npkts > TP_PM_MAX) 626 npkts = TP_PM_MAX; 627 628 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]); 629 *t += (t - elapsed) >> TP_RTT_ALPHA; 630 631 if (mb == 0) { 632 IncPStat(tpcb, tps_win_lim_by_data[npkts]); 633 } else { 634 IncPStat(tpcb, tps_win_lim_by_cdt[npkts]); 635 /* not true with congestion-window being used */ 636 } 637 now.tv_sec = elapsed / hz; 638 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz; 639 tpmeas(tpcb->tp_lref, 640 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts); 641 } 642#endif /* TP_PERF_MEAS */ 643 644 645#ifdef TPPT 646 if (tp_traceflags[D_DATA]) { 647 tptraceTPCB(TPPTmisc, 648 "tp_send at end: new nxt eotsdu error", 649 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, 650 tpcb->tp_sock->so_error); 651 652 } 653#endif 654} 655 656int TPNagleok; 657int TPNagled; 658 659int 660tp_packetize(struct tp_pcb *tpcb, struct mbuf *m, int eotsdu) 661{ 662 struct mbuf *n = NULL; 663 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 664 int maxsize = tpcb->tp_l_tpdusize 665 - tp_headersize(DT_TPDU_type, tpcb) 666 - (tpcb->tp_use_checksum ? 4 : 0); 667 int totlen = m->m_pkthdr.len; 668 669 /* 670 * Pre-packetize the data in the sockbuf 671 * according to negotiated mtu. Do it here 672 * where we can safely wait for mbufs. 673 * 674 * This presumes knowledge of sockbuf conventions. 675 * TODO: allocate space for header and fill it in (once!). 676 */ 677#ifdef ARGO_DEBUG 678 if (argo_debug[D_DATA]) { 679 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n", 680 maxsize, totlen, eotsdu, tpcb->tp_sndnum); 681 } 682#endif 683 if (tpcb->tp_oktonagle) { 684 if ((n = sb->sb_mb) == 0) 685 panic("tp_packetize"); 686 while (n->m_nextpkt) 687 n = n->m_nextpkt; 688 if (n->m_flags & M_EOR) 689 panic("tp_packetize 2"); 690 SEQ_INC(tpcb, tpcb->tp_sndnum); 691 if (totlen + n->m_pkthdr.len < maxsize) { 692 /* 693 * There is an unsent packet with space, 694 * combine data 695 */ 696 struct mbuf *old_n = n; 697 tpsbcheck(tpcb, 3); 698 n->m_pkthdr.len += totlen; 699 while (n->m_next) 700 n = n->m_next; 701 sbcompress(sb, m, n); 702 tpsbcheck(tpcb, 4); 703 n = old_n; 704 TPNagled++; 705 goto out; 706 } 707 } 708 709 while (m) { 710 n = m; 711 if (totlen > maxsize) { 712 if ((m = m_split(n, maxsize, M_WAIT)) == 0) 713 panic("tp_packetize"); 714 } else 715 m = 0; 716 totlen -= maxsize; 717 tpsbcheck(tpcb, 5); 718 sbappendrecord(sb, n); 719 tpsbcheck(tpcb, 6); 720 SEQ_INC(tpcb, tpcb->tp_sndnum); 721 } 722out: 723 if (eotsdu) { 724 n->m_flags |= M_EOR; /* XXX belongs at end */ 725 tpcb->tp_oktonagle = 0; 726 } else { 727 SEQ_DEC(tpcb, tpcb->tp_sndnum); 728 tpcb->tp_oktonagle = 1; 729 TPNagleok++; 730 } 731 732#ifdef ARGO_DEBUG 733 if (argo_debug[D_DATA]) { 734 printf("SEND out: oktonagle %d sndnum 0x%x\n", 735 tpcb->tp_oktonagle, tpcb->tp_sndnum); 736 } 737#endif 738 return 0; 739} 740 741 742/* 743 * NAME: tp_stash() 744 * CALLED FROM: 745 * tp.trans on arrival of a DT tpdu 746 * FUNCTION, ARGUMENTS, and RETURN VALUE: 747 * Returns 1 if 748 * a) something new arrived and it's got eotsdu_reached bit on, 749 * b) this arrival was caused other out-of-sequence things to be 750 * accepted, or 751 * c) this arrival is the highest seq # for which we last gave credit 752 * (sender just sent a whole window) 753 * In other words, returns 1 if tp should send an ack immediately, 0 if 754 * the ack can wait a while. 755 * 756 * Note: this implementation no longer renegs on credit, (except 757 * when debugging option D_RENEG is on, for the purpose of testing 758 * ack subsequencing), so we don't need to check for incoming tpdus 759 * being in a reneged portion of the window. 760 */ 761 762int 763tp_stash(struct tp_pcb *tpcb, struct tp_event *e) 764{ 765 int ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH; 766 /* 0--> delay acks until full window */ 767 /* 1--> ack each tpdu */ 768#define E e->TPDU_ATTR(DT) 769 770 if (E.e_eot) { 771 struct mbuf *n = E.e_data; 772 n->m_flags |= M_EOR; 773 n->m_nextpkt = 0; 774 } 775#ifdef ARGO_DEBUG 776 if (argo_debug[D_STASH]) { 777 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 778 "stash: so_rcv before appending"); 779 dump_mbuf(E.e_data, 780 "stash: e_data before appending"); 781 } 782#endif 783 784#ifdef TP_PERF_MEAS 785 if (DOPERF(tpcb)) { 786 PStat(tpcb, Nb_from_ll) += E.e_datalen; 787 tpmeas(tpcb->tp_lref, TPtime_from_ll, 788 &e->e_time, E.e_seq, 789 (u_int) PStat(tpcb, Nb_from_ll), 790 (u_int) E.e_datalen); 791 } 792#endif 793 794 if (E.e_seq == tpcb->tp_rcvnxt) { 795 796#ifdef ARGO_DEBUG 797 if (argo_debug[D_STASH]) { 798 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n", 799 E.e_seq, E.e_datalen, E.e_eot); 800 } 801#endif 802 803#ifdef TPPT 804 if (tp_traceflags[D_STASH]) { 805 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot", 806 E.e_seq, E.e_datalen, E.e_eot, 0); 807 } 808#endif 809 810 SET_DELACK(tpcb); 811 812 sbappend(&tpcb->tp_sock->so_rcv, E.e_data); 813 814 SEQ_INC(tpcb, tpcb->tp_rcvnxt); 815 /* 816 * move chains from the reassembly queue to the socket buffer 817 */ 818 if (tpcb->tp_rsycnt) { 819 struct mbuf **mp; 820 struct mbuf **mplim; 821 822 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % 823 tpcb->tp_maxlcredit); 824 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit; 825 826 while (tpcb->tp_rsycnt && *mp) { 827 sbappend(&tpcb->tp_sock->so_rcv, *mp); 828 tpcb->tp_rsycnt--; 829 *mp = 0; 830 SEQ_INC(tpcb, tpcb->tp_rcvnxt); 831 ack_reason |= ACK_REORDER; 832 if (++mp == mplim) 833 mp = tpcb->tp_rsyq; 834 } 835 } 836#ifdef ARGO_DEBUG 837 if (argo_debug[D_STASH]) { 838 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 839 "stash: so_rcv after appending"); 840 } 841#endif 842 843 } else { 844 struct mbuf **mp; 845 SeqNum uwe; 846 847#ifdef TPPT 848 if (tp_traceflags[D_STASH]) { 849 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt", 850 E.e_seq, tpcb->tp_rcvnxt, 851 tpcb->tp_lcredit, 0); 852 } 853#endif 854 855 if (tpcb->tp_rsyq == 0) 856 tp_rsyset(tpcb); 857 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit); 858 if (tpcb->tp_rsyq == 0 || 859 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) { 860 ack_reason = ACK_DONT; 861 m_freem(E.e_data); 862 } else if (*(mp = tpcb->tp_rsyq + 863 (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) { 864#ifdef ARGO_DEBUG 865 if (argo_debug[D_STASH]) { 866 printf("tp_stash - drop & ack\n"); 867 } 868#endif 869 870 /* 871 * retransmission - drop it and force 872 * an ack 873 */ 874 IncStat(ts_dt_dup); 875#ifdef TP_PERF_MEAS 876 if (DOPERF(tpcb)) { 877 IncPStat(tpcb, tps_n_ack_cuz_dup); 878 } 879#endif 880 881 m_freem(E.e_data); 882 ack_reason |= ACK_DUP; 883 } else { 884 *mp = E.e_data; 885 tpcb->tp_rsycnt++; 886 ack_reason = ACK_DONT; 887 } 888 } 889 /* 890 * there were some comments of historical interest 891 * here. 892 */ 893 { 894 LOCAL_CREDIT(tpcb); 895 896 if (E.e_seq == tpcb->tp_sent_uwe) 897 ack_reason |= ACK_STRAT_FULLWIN; 898 899#ifdef TPPT 900 if (tp_traceflags[D_STASH]) { 901 tptraceTPCB(TPPTmisc, 902 "end of stash, eot, ack_reason, sent_uwe ", 903 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0); 904 } 905#endif 906 907 if (ack_reason == ACK_DONT) { 908 IncStat(ts_ackreason[ACK_DONT]); 909 return 0; 910 } else { 911#ifdef TP_PERF_MEAS 912 if (DOPERF(tpcb)) { 913 if (ack_reason & ACK_STRAT_EACH) { 914 IncPStat(tpcb, tps_n_ack_cuz_strat); 915 } else if (ack_reason & ACK_STRAT_FULLWIN) { 916 IncPStat(tpcb, tps_n_ack_cuz_fullwin); 917 } else if (ack_reason & ACK_REORDER) { 918 IncPStat(tpcb, tps_n_ack_cuz_reorder); 919 } 920 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0, 921 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0); 922 } 923#endif 924 { 925 int i; 926 927 /* 928 * keep track of all reasons 929 * that apply 930 */ 931 for (i = 1; i < _ACK_NUM_REASONS_; i++) { 932 if (ack_reason & (1 << i)) 933 IncStat(ts_ackreason[i]); 934 } 935 } 936 return 1; 937 } 938 } 939} 940 941/* 942 * tp_rsyflush - drop all the packets on the reassembly queue. 943 * Do this when closing the socket, or when somebody has changed 944 * the space avaible in the receive socket (XXX). 945 */ 946void 947tp_rsyflush(struct tp_pcb *tpcb) 948{ 949 struct mbuf **mp; 950 if (tpcb->tp_rsycnt) { 951 for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit; 952 --mp >= tpcb->tp_rsyq;) 953 if (*mp) { 954 tpcb->tp_rsycnt--; 955 m_freem(*mp); 956 } 957 if (tpcb->tp_rsycnt) { 958 printf("tp_rsyflush %p\n", tpcb); 959 tpcb->tp_rsycnt = 0; 960 } 961 } 962 free((void *) tpcb->tp_rsyq, M_PCB); 963 tpcb->tp_rsyq = 0; 964} 965 966void 967tp_rsyset(struct tp_pcb *tpcb) 968{ 969 struct socket *so = tpcb->tp_sock; 970 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf; 971 int old_credit = tpcb->tp_maxlcredit; 972 void * rsyq; 973 974 tpcb->tp_maxlcredit = maxcredit = min(maxcredit, 975 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize); 976 977 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0) 978 return; 979 maxcredit *= sizeof(struct mbuf *); 980 if (tpcb->tp_rsyq) 981 tp_rsyflush(tpcb); 982 rsyq = malloc(maxcredit, M_PCB, M_NOWAIT|M_ZERO); 983 tpcb->tp_rsyq = (struct mbuf **) rsyq; 984} 985 986 987void 988tpsbcheck(struct tp_pcb *tpcb, int i) 989{ 990 struct mbuf *n, *m; 991 int len = 0, mbcnt = 0, pktlen; 992 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 993 994 for (n = sb->sb_mb; n; n = n->m_nextpkt) { 995 if ((n->m_flags & M_PKTHDR) == 0) 996 panic("tpsbcheck nohdr"); 997 pktlen = len + n->m_pkthdr.len; 998 for (m = n; m; m = m->m_next) { 999 len += m->m_len; 1000 mbcnt += MSIZE; 1001 if (m->m_flags & M_EXT) 1002 mbcnt += m->m_ext.ext_size; 1003 } 1004 if (len != pktlen) { 1005 printf("test %d; len %d != pktlen %d on mbuf %p\n", 1006 i, len, pktlen, n); 1007 panic("tpsbcheck short"); 1008 } 1009 } 1010 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 1011 printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc, 1012 mbcnt, sb->sb_mbcnt); 1013 panic("tpsbcheck"); 1014 } 1015} 1016