1/*	$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)tp_subr.c	8.1 (Berkeley) 6/10/93
32 */
33
34/***********************************************************
35		Copyright IBM Corporation 1987
36
37                      All Rights Reserved
38
39Permission to use, copy, modify, and distribute this software and its
40documentation for any purpose and without fee is hereby granted,
41provided that the above copyright notice appear in all copies and that
42both that copyright notice and this permission notice appear in
43supporting documentation, and that the name of IBM not be
44used in advertising or publicity pertaining to distribution of the
45software without specific, written prior permission.
46
47IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
48ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
49IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
50ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
51WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
52ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53SOFTWARE.
54
55******************************************************************/
56
57/*
58 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
59 */
60/*
61 * The main work of data transfer is done here. These routines are called
62 * from tp.trans. They include the routines that check the validity of acks
63 * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
64 * buffers and send them (tp_send()), drop the data from the socket buffers
65 * (tp_sbdrop()),  and put incoming packet data into socket buffers
66 * (tp_stash()).
67 */
68
69#include <sys/cdefs.h>
70__KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $");
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/protosw.h>
78#include <sys/errno.h>
79#include <sys/time.h>
80#include <sys/kernel.h>
81
82#include <netiso/tp_ip.h>
83#include <netiso/iso.h>
84#include <netiso/argo_debug.h>
85#include <netiso/tp_timer.h>
86#include <netiso/tp_param.h>
87#include <netiso/tp_stat.h>
88#include <netiso/tp_pcb.h>
89#include <netiso/tp_tpdu.h>
90#include <netiso/tp_trace.h>
91#include <netiso/tp_meas.h>
92#include <netiso/tp_seq.h>
93#include <netiso/tp_var.h>
94
95int             tprexmtthresh = 3;
96
97/*
98 * CALLED FROM:
99 *	tp.trans, when an XAK arrives
100 * FUNCTION and ARGUMENTS:
101 * 	Determines if the sequence number (seq) from the XAK
102 * 	acks anything new.  If so, drop the appropriate tpdu
103 * 	from the XPD send queue.
104 * RETURN VALUE:
105 * 	Returns 1 if it did this, 0 if the ack caused no action.
106 */
107int
108tp_goodXack(struct tp_pcb  *tpcb, SeqNum seq)
109{
110
111#ifdef TPPT
112	if (tp_traceflags[D_XPD]) {
113		tptraceTPCB(TPPTgotXack,
114		      seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
115			    tpcb->tp_snduna);
116	}
117#endif
118
119	if (seq == tpcb->tp_Xuna) {
120		tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
121
122		/*
123		 * DROP 1 packet from the Xsnd socket buf - just so happens
124		 * that only one packet can be there at any time so drop the
125		 * whole thing.  If you allow > 1 packet the socket buffer,
126		 * then you'll have to keep track of how many characters went
127		 * w/ each XPD tpdu, so this will get messier
128		 */
129#ifdef ARGO_DEBUG
130		if (argo_debug[D_XPD]) {
131			dump_mbuf(tpcb->tp_Xsnd.sb_mb,
132				  "tp_goodXack Xsnd before sbdrop");
133		}
134#endif
135
136#ifdef TPPT
137		if (tp_traceflags[D_XPD]) {
138			tptraceTPCB(TPPTmisc,
139				    "goodXack: dropping cc ",
140				    (int) (tpcb->tp_Xsnd.sb_cc),
141				    0, 0, 0);
142		}
143#endif
144		sbdroprecord(&tpcb->tp_Xsnd);
145		return 1;
146	}
147	return 0;
148}
149
150/*
151 * CALLED FROM:
152 *  tp_good_ack()
153 * FUNCTION and ARGUMENTS:
154 *  updates
155 *  smoothed average round trip time (*rtt)
156 *  roundtrip time variance (*rtv) - actually deviation, not variance
157 *  given the new value (diff)
158 * RETURN VALUE:
159 * void
160 */
161
162void
163tp_rtt_rtv(struct tp_pcb *tpcb)
164{
165	int             old = tpcb->tp_rtt;
166	int             elapsed, delta = 0;
167
168	elapsed = hardclock_ticks - tpcb->tp_rttemit;
169
170	if (tpcb->tp_rtt != 0) {
171		/*
172		 * rtt is the smoothed round trip time in machine clock
173		 * ticks (hz). It is stored as a fixed point number,
174		 * unscaled (unlike the tcp srtt).  The rationale here
175		 * is that it is only significant to the nearest unit of
176		 * slowtimo, which is at least 8 machine clock ticks
177		 * so there is no need to scale.  The smoothing is done
178		 * according to the same formula as TCP (rtt = rtt*7/8
179		 * + measured_rtt/8).
180		 */
181		delta = elapsed - tpcb->tp_rtt;
182		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
183			tpcb->tp_rtt = 1;
184		/*
185		 * rtv is a smoothed accumulated mean difference, unscaled
186		 * for reasons expressed above.
187		 * It is smoothed with an alpha of .75, and the round trip timer
188		 * will be set to rtt + 4*rtv, also as TCP does.
189		 */
190		if (delta < 0)
191			delta = -delta;
192		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
193			tpcb->tp_rtv = 1;
194	} else {
195		/*
196		 * No rtt measurement yet - use the unsmoothed rtt. Set the
197		 * variance to half the rtt (so our first retransmit happens
198		 * at 3*rtt)
199		 */
200		tpcb->tp_rtt = elapsed;
201		tpcb->tp_rtv = elapsed >> 1;
202	}
203	tpcb->tp_rttemit = 0;
204	tpcb->tp_rxtshift = 0;
205	/*
206	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
207	 * Because of the way we do the smoothing, srtt and rttvar
208	 * will each average +1/2 tick of bias.  When we compute
209	 * the retransmit timer, we want 1/2 tick of rounding and
210	 * 1 extra tick because of +-1/2 tick uncertainty in the
211	 * firing of the timer.  The bias will give us exactly the
212	 * 1.5 tick we need.  But, because the bias is
213	 * statistical, we have to test that we don't drop below
214	 * the minimum feasible timer (which is 2 ticks)."
215	 */
216	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
217		    tpcb->tp_peer_acktime, 128 /* XXX */ );
218#ifdef ARGO_DEBUG
219	if (argo_debug[D_RTT]) {
220		printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
221		       "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old);
222	}
223#endif
224	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
225}
226
227/*
228 * CALLED FROM:
229 *  tp.trans when an AK arrives
230 * FUNCTION and ARGUMENTS:
231 * 	Given (cdt), the credit from the AK tpdu, and
232 *	(seq), the sequence number from the AK tpdu,
233 *  tp_goodack() determines if the AK acknowledges something in the send
234 * 	window, and if so, drops the appropriate packets from the retransmission
235 *  list, computes the round trip time, and updates the retransmission timer
236 *  based on the new smoothed round trip time.
237 * RETURN VALUE:
238 * 	Returns 1 if
239 * 	EITHER it actually acked something heretofore unacknowledged
240 * 	OR no news but the credit should be processed.
241 * 	If something heretofore unacked was acked with this sequence number,
242 * 	the appropriate tpdus are dropped from the retransmission control list,
243 * 	by calling tp_sbdrop().
244 * 	No need to see the tpdu itself.
245 */
246int
247tp_goodack(struct tp_pcb *tpcb, u_int cdt, SeqNum seq, u_int subseq)
248{
249	int             old_fcredit = 0;
250	int             bang = 0;	/* bang --> ack for something
251					 * heretofore unacked */
252	u_int           bytes_acked;
253
254#ifdef ARGO_DEBUG
255	if (argo_debug[D_ACKRECV]) {
256		printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
257		       tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
258	}
259#endif
260
261#ifdef TPPT
262	if (tp_traceflags[D_ACKRECV]) {
263		tptraceTPCB(TPPTgotack,
264			seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq);
265	}
266#endif
267
268#ifdef TP_PERF_MEAS
269		if (DOPERF(tpcb)) {
270		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0);
271	}
272#endif
273
274	if (seq == tpcb->tp_snduna) {
275		if (subseq < tpcb->tp_r_subseq ||
276		 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
277	discard_the_ack:
278#ifdef ARGO_DEBUG
279			if (argo_debug[D_ACKRECV]) {
280				printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
281				       tpcb, subseq, tpcb->tp_r_subseq);
282			}
283#endif
284			goto done;
285		}
286		if (cdt == tpcb->tp_fcredit	/* && thus subseq >
287		        tpcb->tp_r_subseq */ ) {
288			tpcb->tp_r_subseq = subseq;
289			if (tpcb->tp_timer[TM_data_retrans] == 0)
290				tpcb->tp_dupacks = 0;
291			else if (++tpcb->tp_dupacks == tprexmtthresh) {
292				/*
293				 * partner went out of his way to signal with
294				 * different subsequences that he has the
295				 * same lack of an expected packet.  This may
296				 * be an early indiciation of a loss
297				 */
298
299				SeqNum          onxt = tpcb->tp_sndnxt;
300				struct mbuf    *onxt_m = tpcb->tp_sndnxt_m;
301				u_int           win = min(tpcb->tp_fcredit,
302				tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
303#ifdef ARGO_DEBUG
304				if (argo_debug[D_ACKRECV]) {
305					printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
306					       "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
307				}
308#endif
309				if (win < 2)
310					win = 2;
311				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
312				tpcb->tp_timer[TM_data_retrans] = 0;
313				tpcb->tp_rttemit = 0;
314				tpcb->tp_sndnxt = tpcb->tp_snduna;
315				tpcb->tp_sndnxt_m = 0;
316				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
317				tp_send(tpcb);
318				tpcb->tp_cong_win = tpcb->tp_ssthresh +
319					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
320				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
321					tpcb->tp_sndnxt = onxt;
322					tpcb->tp_sndnxt_m = onxt_m;
323				}
324			} else if (tpcb->tp_dupacks > tprexmtthresh) {
325				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
326			}
327			goto done;
328		}
329	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
330		goto discard_the_ack;
331	/*
332	 * If the congestion window was inflated to account
333	 * for the other side's cached packets, retract it.
334	 */
335	if (tpcb->tp_dupacks > tprexmtthresh &&
336	    tpcb->tp_cong_win > tpcb->tp_ssthresh)
337		tpcb->tp_cong_win = tpcb->tp_ssthresh;
338	tpcb->tp_r_subseq = subseq;
339	old_fcredit = tpcb->tp_fcredit;
340	tpcb->tp_fcredit = cdt;
341	if (cdt > tpcb->tp_maxfcredit)
342		tpcb->tp_maxfcredit = cdt;
343	tpcb->tp_dupacks = 0;
344
345	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
346
347		tpsbcheck(tpcb, 0);
348		bytes_acked = tp_sbdrop(tpcb, seq);
349		tpsbcheck(tpcb, 1);
350		/*
351		 * If transmit timer is running and timed sequence
352		 * number was acked, update smoothed round trip time.
353		 * Since we now have an rtt measurement, cancel the
354		 * timer backoff (cf., Phil Karn's retransmit alg.).
355		 * Recompute the initial retransmit timer.
356		 */
357		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
358			tp_rtt_rtv(tpcb);
359		/*
360		 * If all outstanding data is acked, stop retransmit timer.
361		 * If there is more data to be acked, restart retransmit
362		 * timer, using current (possibly backed-off) value.
363		 * OSI combines the keepalive and persistance functions.
364		 * So, there is no persistance timer per se, to restart.
365		 */
366		if (tpcb->tp_class != TP_CLASS_0)
367			tpcb->tp_timer[TM_data_retrans] =
368				(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
369		/*
370		 * When new data is acked, open the congestion window.
371		 * If the window gives us less than ssthresh packets
372		 * in flight, open exponentially (maxseg per packet).
373		 * Otherwise open linearly: maxseg per window
374		 * (maxseg^2 / cwnd per packet), plus a constant
375		 * fraction of a packet (maxseg/8) to help larger windows
376		 * open quickly enough.
377		 */
378		{
379			u_int           cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
380
381			incr = min(incr, bytes_acked);
382			if (cw > tpcb->tp_ssthresh)
383				incr = incr * incr / cw + incr / 8;
384			tpcb->tp_cong_win =
385				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
386		}
387		tpcb->tp_snduna = seq;
388		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
389			tpcb->tp_sndnxt = seq;
390			tpcb->tp_sndnxt_m = 0;
391		}
392		bang++;
393	}
394	if (cdt != 0 && old_fcredit == 0) {
395		tpcb->tp_sendfcc = 1;
396	}
397	if (cdt == 0) {
398		if (old_fcredit != 0)
399			IncStat(ts_zfcdt);
400		/* The following might mean that the window shrunk */
401		if (tpcb->tp_timer[TM_data_retrans]) {
402			tpcb->tp_timer[TM_data_retrans] = 0;
403			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
404			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
405				tpcb->tp_sndnxt = tpcb->tp_snduna;
406				tpcb->tp_sndnxt_m = 0;
407			}
408		}
409	}
410	tpcb->tp_fcredit = cdt;
411	bang |= (old_fcredit < cdt);
412
413done:
414#ifdef ARGO_DEBUG
415	if (argo_debug[D_ACKRECV]) {
416		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
417		       bang, cdt, old_fcredit, tpcb->tp_cong_win);
418	}
419#endif
420	/*
421	 * if (bang) XXXXX Very bad to remove this test, but somethings
422	 * broken
423	 */
424	tp_send(tpcb);
425	return (bang);
426}
427
428/*
429 * CALLED FROM:
430 *  tp_goodack()
431 * FUNCTION and ARGUMENTS:
432 *  drops everything up TO but not INCLUDING seq # (seq)
433 *  from the retransmission queue.
434 */
435int
436tp_sbdrop(struct tp_pcb *tpcb, SeqNum seq)
437{
438	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
439	int    i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
440	int             oldcc = sb->sb_cc, oldi = i;
441
442	if (i >= tpcb->tp_seqhalf)
443		printf("tp_spdropping too much -- should panic");
444	while (i-- > 0)
445		sbdroprecord(sb);
446#ifdef ARGO_DEBUG
447	if (argo_debug[D_ACKRECV]) {
448		printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
449		       oldi, oldcc - sb->sb_cc, tpcb, seq);
450	}
451#endif
452	if (sb_notify(sb))
453		sowwakeup(tpcb->tp_sock);
454	return (oldcc - sb->sb_cc);
455}
456
457/*
458 * CALLED FROM:
459 * 	tp.trans on user send request, arrival of AK and arrival of XAK
460 * FUNCTION and ARGUMENTS:
461 * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
462 * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
463 * 			c) it hits seq number (highseq) limited by cong or credit.
464 *
465 * 	If you want XPD to buffer > 1 du per socket buffer, you can
466 * 	modifiy this to issue XPD tpdus also, but then it'll have
467 * 	to take some argument(s) to distinguish between the type of DU to
468 * 	hand tp_emit.
469 *
470 * 	When something is sent for the first time, its time-of-send
471 * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
472 *  When the ack arrives, the smoothed round-trip time is figured
473 *  using this value.
474 */
475void
476tp_send(struct tp_pcb *tpcb)
477{
478	int    len;
479	struct mbuf *m;
480	struct mbuf    *mb = 0;
481	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
482	unsigned int    eotsdu = 0;
483	SeqNum          highseq, checkseq;
484	int             idle, idleticks, off, cong_win;
485#ifdef TP_PERF_MEAS
486	int             send_start_time = hardclock_ticks;
487	SeqNum          oldnxt = tpcb->tp_sndnxt;
488#endif /* TP_PERF_MEAS */
489
490	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
491	if (idle) {
492		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
493		if (idleticks > tpcb->tp_dt_ticks)
494			/*
495			 * We have been idle for "a while" and no acks are
496			 * expected to clock out any data we send --
497			 * slow start to get ack "clock" running again.
498			 */
499			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
500	}
501	cong_win = tpcb->tp_cong_win;
502	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
503	if (tpcb->tp_Xsnd.sb_mb)
504		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
505
506#ifdef ARGO_DEBUG
507	if (argo_debug[D_DATA]) {
508		printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
509		       tpcb, tpcb->tp_sndnxt, cong_win, highseq);
510	}
511#endif
512#ifdef TPPT
513	if (tp_traceflags[D_DATA]) {
514		tptraceTPCB(TPPTmisc, "tp_send sndnew snduna",
515			    tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
516	tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
517	    tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
518	}
519#endif
520#ifdef TPPT
521		if (tp_traceflags[D_DATA]) {
522		tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin",
523		      tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
524	}
525#endif
526
527		if (tpcb->tp_sndnxt_m)
528		m = tpcb->tp_sndnxt_m;
529	else {
530		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
531		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
532			off--;
533	}
534	/*
535	 * Avoid silly window syndrome here . . . figure out how!
536	 */
537	checkseq = tpcb->tp_sndnum;
538	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
539		checkseq = highseq;	/* i.e. DON'T retain highest assigned
540					 * packet */
541
542	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
543
544		eotsdu = (m->m_flags & M_EOR) != 0;
545		len = m->m_pkthdr.len;
546		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
547		    len < (tpcb->tp_l_tpdusize / 2))
548			break;	/* Nagle . . . . . */
549		cong_win -= len;
550		/*
551		 * make a copy - mb goes into the retransmission list while m
552		 * gets emitted.  m_copy won't copy a zero-length mbuf.
553		 */
554		mb = m;
555		m = m_copy(mb, 0, M_COPYALL);
556		if (m == NULL)
557			break;
558#ifdef TPPT
559		if (tp_traceflags[D_STASH]) {
560			tptraceTPCB(TPPTmisc,
561				    "tp_send mcopy nxt high eotsdu len",
562				    tpcb->tp_sndnxt, highseq, eotsdu, len);
563		}
564#endif
565
566#ifdef ARGO_DEBUG
567			if (argo_debug[D_DATA]) {
568			printf("tp_sending tpcb %p nxt 0x%x\n",
569			       tpcb, tpcb->tp_sndnxt);
570		}
571#endif
572		/*
573		 * when headers are precomputed, may need to fill in checksum
574		 * here
575		 */
576		tpcb->tp_sock->so_error =
577		      tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m);
578		if (tpcb->tp_sock->so_error != 0)
579			/* error */
580			break;
581		m = mb->m_nextpkt;
582		tpcb->tp_sndnxt_m = m;
583		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
584			SEQ_INC(tpcb, tpcb->tp_sndnew);
585			/*
586			 * Time this transmission if not a retransmission and
587			 * not currently timing anything.
588			 */
589			if (tpcb->tp_rttemit == 0) {
590				tpcb->tp_rttemit = hardclock_ticks;
591				tpcb->tp_rttseq = tpcb->tp_sndnxt;
592			}
593			tpcb->tp_sndnxt = tpcb->tp_sndnew;
594		} else
595			SEQ_INC(tpcb, tpcb->tp_sndnxt);
596		/*
597		 * Set retransmit timer if not currently set.
598		 * Initial value for retransmit timer is smoothed
599		 * round-trip time + 2 * round-trip time variance.
600		 * Initialize shift counter which is used for backoff
601		 * of retransmit time.
602		 */
603		if (tpcb->tp_timer[TM_data_retrans] == 0 &&
604		    tpcb->tp_class != TP_CLASS_0) {
605			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
606			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
607			tpcb->tp_rxtshift = 0;
608		}
609	}
610	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
611		tpcb->tp_oktonagle = 0;
612#ifdef TP_PERF_MEAS
613	if (DOPERF(tpcb)) {
614		int    npkts;
615		int             s, elapsed, *t;
616		struct timeval  now;
617
618		elapsed = hardclock_ticks - send_start_time;
619
620		npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
621
622		if (npkts > 0)
623			tpcb->tp_Nwindow++;
624
625		if (npkts > TP_PM_MAX)
626			npkts = TP_PM_MAX;
627
628		t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
629		*t += (t - elapsed) >> TP_RTT_ALPHA;
630
631		if (mb == 0) {
632			IncPStat(tpcb, tps_win_lim_by_data[npkts]);
633		} else {
634			IncPStat(tpcb, tps_win_lim_by_cdt[npkts]);
635			/* not true with congestion-window being used */
636		}
637		now.tv_sec = elapsed / hz;
638		now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
639		tpmeas(tpcb->tp_lref,
640		       TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
641	}
642#endif				/* TP_PERF_MEAS */
643
644
645#ifdef TPPT
646	if (tp_traceflags[D_DATA]) {
647		tptraceTPCB(TPPTmisc,
648			    "tp_send at end: new nxt eotsdu error",
649			    tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu,
650			    tpcb->tp_sock->so_error);
651
652	}
653#endif
654}
655
656int             TPNagleok;
657int             TPNagled;
658
659int
660tp_packetize(struct tp_pcb *tpcb, struct mbuf *m, int eotsdu)
661{
662	struct mbuf *n = NULL;
663	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
664	int             maxsize = tpcb->tp_l_tpdusize
665			    - tp_headersize(DT_TPDU_type, tpcb)
666			    - (tpcb->tp_use_checksum ? 4 : 0);
667	int             totlen = m->m_pkthdr.len;
668
669	/*
670	 * Pre-packetize the data in the sockbuf
671	 * according to negotiated mtu.  Do it here
672	 * where we can safely wait for mbufs.
673	 *
674	 * This presumes knowledge of sockbuf conventions.
675	 * TODO: allocate space for header and fill it in (once!).
676	 */
677#ifdef ARGO_DEBUG
678	if (argo_debug[D_DATA]) {
679		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
680		       maxsize, totlen, eotsdu, tpcb->tp_sndnum);
681	}
682#endif
683	if (tpcb->tp_oktonagle) {
684		if ((n = sb->sb_mb) == 0)
685			panic("tp_packetize");
686		while (n->m_nextpkt)
687			n = n->m_nextpkt;
688		if (n->m_flags & M_EOR)
689			panic("tp_packetize 2");
690		SEQ_INC(tpcb, tpcb->tp_sndnum);
691		if (totlen + n->m_pkthdr.len < maxsize) {
692			/*
693			 * There is an unsent packet with space,
694			 * combine data
695			 */
696			struct mbuf    *old_n = n;
697			tpsbcheck(tpcb, 3);
698			n->m_pkthdr.len += totlen;
699			while (n->m_next)
700				n = n->m_next;
701			sbcompress(sb, m, n);
702			tpsbcheck(tpcb, 4);
703			n = old_n;
704			TPNagled++;
705			goto out;
706		}
707	}
708
709	while (m) {
710		n = m;
711		if (totlen > maxsize) {
712			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
713				panic("tp_packetize");
714		} else
715			m = 0;
716		totlen -= maxsize;
717		tpsbcheck(tpcb, 5);
718		sbappendrecord(sb, n);
719		tpsbcheck(tpcb, 6);
720		SEQ_INC(tpcb, tpcb->tp_sndnum);
721	}
722out:
723	if (eotsdu) {
724		n->m_flags |= M_EOR;	/* XXX belongs at end */
725		tpcb->tp_oktonagle = 0;
726	} else {
727		SEQ_DEC(tpcb, tpcb->tp_sndnum);
728		tpcb->tp_oktonagle = 1;
729		TPNagleok++;
730	}
731
732#ifdef ARGO_DEBUG
733	if (argo_debug[D_DATA]) {
734		printf("SEND out: oktonagle %d sndnum 0x%x\n",
735		       tpcb->tp_oktonagle, tpcb->tp_sndnum);
736	}
737#endif
738	return 0;
739}
740
741
742/*
743 * NAME: tp_stash()
744 * CALLED FROM:
745 *	tp.trans on arrival of a DT tpdu
746 * FUNCTION, ARGUMENTS, and RETURN VALUE:
747 * 	Returns 1 if
748 *	a) something new arrived and it's got eotsdu_reached bit on,
749 * 	b) this arrival was caused other out-of-sequence things to be
750 *    	accepted, or
751 * 	c) this arrival is the highest seq # for which we last gave credit
752 *   	(sender just sent a whole window)
753 *  In other words, returns 1 if tp should send an ack immediately, 0 if
754 *  the ack can wait a while.
755 *
756 * Note: this implementation no longer renegs on credit, (except
757 * when debugging option D_RENEG is on, for the purpose of testing
758 * ack subsequencing), so we don't  need to check for incoming tpdus
759 * being in a reneged portion of the window.
760 */
761
762int
763tp_stash(struct tp_pcb *tpcb, struct tp_event *e)
764{
765	int    ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH;
766	/* 0--> delay acks until full window */
767	/* 1--> ack each tpdu */
768#define E e->TPDU_ATTR(DT)
769
770	if (E.e_eot) {
771		struct mbuf *n = E.e_data;
772		n->m_flags |= M_EOR;
773		n->m_nextpkt = 0;
774	}
775#ifdef ARGO_DEBUG
776	if (argo_debug[D_STASH]) {
777		dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
778			  "stash: so_rcv before appending");
779		dump_mbuf(E.e_data,
780			  "stash: e_data before appending");
781	}
782#endif
783
784#ifdef TP_PERF_MEAS
785	if (DOPERF(tpcb)) {
786		PStat(tpcb, Nb_from_ll) += E.e_datalen;
787		tpmeas(tpcb->tp_lref, TPtime_from_ll,
788		       &e->e_time, E.e_seq,
789		       (u_int) PStat(tpcb, Nb_from_ll),
790		       (u_int) E.e_datalen);
791	}
792#endif
793
794	if (E.e_seq == tpcb->tp_rcvnxt) {
795
796#ifdef ARGO_DEBUG
797		if (argo_debug[D_STASH]) {
798			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
799			     E.e_seq, E.e_datalen, E.e_eot);
800		}
801#endif
802
803#ifdef TPPT
804		if (tp_traceflags[D_STASH]) {
805			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
806			  E.e_seq, E.e_datalen, E.e_eot, 0);
807		}
808#endif
809
810		SET_DELACK(tpcb);
811
812		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
813
814		SEQ_INC(tpcb, tpcb->tp_rcvnxt);
815		/*
816		 * move chains from the reassembly queue to the socket buffer
817		 */
818		if (tpcb->tp_rsycnt) {
819			struct mbuf **mp;
820			struct mbuf   **mplim;
821
822			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt %
823					      tpcb->tp_maxlcredit);
824			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
825
826			while (tpcb->tp_rsycnt && *mp) {
827				sbappend(&tpcb->tp_sock->so_rcv, *mp);
828				tpcb->tp_rsycnt--;
829				*mp = 0;
830				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
831				ack_reason |= ACK_REORDER;
832				if (++mp == mplim)
833					mp = tpcb->tp_rsyq;
834			}
835		}
836#ifdef ARGO_DEBUG
837		if (argo_debug[D_STASH]) {
838			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
839			   "stash: so_rcv after appending");
840		}
841#endif
842
843	} else {
844		struct mbuf **mp;
845		SeqNum          uwe;
846
847#ifdef TPPT
848		if (tp_traceflags[D_STASH]) {
849			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
850				    E.e_seq, tpcb->tp_rcvnxt,
851				    tpcb->tp_lcredit, 0);
852		}
853#endif
854
855		if (tpcb->tp_rsyq == 0)
856			tp_rsyset(tpcb);
857		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
858		if (tpcb->tp_rsyq == 0 ||
859		    !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
860			ack_reason = ACK_DONT;
861			m_freem(E.e_data);
862		} else if (*(mp = tpcb->tp_rsyq +
863			     (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) {
864#ifdef ARGO_DEBUG
865			if (argo_debug[D_STASH]) {
866				printf("tp_stash - drop & ack\n");
867			}
868#endif
869
870			/*
871			 * retransmission - drop it and force
872			 * an ack
873			 */
874			IncStat(ts_dt_dup);
875#ifdef TP_PERF_MEAS
876			if (DOPERF(tpcb)) {
877				IncPStat(tpcb, tps_n_ack_cuz_dup);
878			}
879#endif
880
881				m_freem(E.e_data);
882			ack_reason |= ACK_DUP;
883		} else {
884			*mp = E.e_data;
885			tpcb->tp_rsycnt++;
886			ack_reason = ACK_DONT;
887		}
888	}
889	/*
890	 * there were some comments of historical interest
891	 * here.
892	 */
893	{
894		LOCAL_CREDIT(tpcb);
895
896		if (E.e_seq == tpcb->tp_sent_uwe)
897			ack_reason |= ACK_STRAT_FULLWIN;
898
899#ifdef TPPT
900		if (tp_traceflags[D_STASH]) {
901			tptraceTPCB(TPPTmisc,
902		 "end of stash, eot, ack_reason, sent_uwe ",
903		 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
904		}
905#endif
906
907		if (ack_reason == ACK_DONT) {
908			IncStat(ts_ackreason[ACK_DONT]);
909			return 0;
910		} else {
911#ifdef TP_PERF_MEAS
912			if (DOPERF(tpcb)) {
913				if (ack_reason & ACK_STRAT_EACH) {
914				IncPStat(tpcb, tps_n_ack_cuz_strat);
915			} else if (ack_reason & ACK_STRAT_FULLWIN) {
916				IncPStat(tpcb, tps_n_ack_cuz_fullwin);
917			} else if (ack_reason & ACK_REORDER) {
918				IncPStat(tpcb, tps_n_ack_cuz_reorder);
919			}
920			tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
921			   SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
922			}
923#endif
924			{
925				int    i;
926
927				/*
928				 * keep track of all reasons
929				 * that apply
930				 */
931				for (i = 1; i < _ACK_NUM_REASONS_; i++) {
932					if (ack_reason & (1 << i))
933						IncStat(ts_ackreason[i]);
934				}
935			}
936			return 1;
937		}
938	}
939}
940
941/*
942 * tp_rsyflush - drop all the packets on the reassembly queue.
943 * Do this when closing the socket, or when somebody has changed
944 * the space avaible in the receive socket (XXX).
945 */
946void
947tp_rsyflush(struct tp_pcb *tpcb)
948{
949	struct mbuf **mp;
950	if (tpcb->tp_rsycnt) {
951		for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
952		     --mp >= tpcb->tp_rsyq;)
953			if (*mp) {
954				tpcb->tp_rsycnt--;
955				m_freem(*mp);
956			}
957		if (tpcb->tp_rsycnt) {
958			printf("tp_rsyflush %p\n", tpcb);
959			tpcb->tp_rsycnt = 0;
960		}
961	}
962	free((void *) tpcb->tp_rsyq, M_PCB);
963	tpcb->tp_rsyq = 0;
964}
965
966void
967tp_rsyset(struct tp_pcb *tpcb)
968{
969	struct socket *so = tpcb->tp_sock;
970	int             maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
971	int             old_credit = tpcb->tp_maxlcredit;
972	void *        rsyq;
973
974	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
975					      (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize);
976
977	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
978		return;
979	maxcredit *= sizeof(struct mbuf *);
980	if (tpcb->tp_rsyq)
981		tp_rsyflush(tpcb);
982	rsyq = malloc(maxcredit, M_PCB, M_NOWAIT|M_ZERO);
983	tpcb->tp_rsyq = (struct mbuf **) rsyq;
984}
985
986
987void
988tpsbcheck(struct tp_pcb *tpcb, int i)
989{
990	struct mbuf *n, *m;
991	int    len = 0, mbcnt = 0, pktlen;
992	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
993
994	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
995		if ((n->m_flags & M_PKTHDR) == 0)
996			panic("tpsbcheck nohdr");
997		pktlen = len + n->m_pkthdr.len;
998		for (m = n; m; m = m->m_next) {
999			len += m->m_len;
1000			mbcnt += MSIZE;
1001			if (m->m_flags & M_EXT)
1002				mbcnt += m->m_ext.ext_size;
1003		}
1004		if (len != pktlen) {
1005			printf("test %d; len %d != pktlen %d on mbuf %p\n",
1006			       i, len, pktlen, n);
1007			panic("tpsbcheck short");
1008		}
1009	}
1010	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1011		printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc,
1012		       mbcnt, sb->sb_mbcnt);
1013		panic("tpsbcheck");
1014	}
1015}
1016