spx_reass.c revision 192753
1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2009 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * Copyright (c) 1995, Mike Mitchell
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)spx_usrreq.h
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/netipx/spx_reass.c 192753 2009-05-25 10:25:41Z rwatson $");
67
68#include <sys/param.h>
69#include <sys/lock.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/protosw.h>
75#include <sys/signalvar.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/sx.h>
79#include <sys/systm.h>
80
81#include <net/route.h>
82#include <netinet/tcp_fsm.h>
83
84#include <netipx/ipx.h>
85#include <netipx/ipx_pcb.h>
86#include <netipx/ipx_var.h>
87#include <netipx/spx.h>
88#include <netipx/spx_debug.h>
89#include <netipx/spx_timer.h>
90#include <netipx/spx_var.h>
91
92static int	spx_use_delack = 0;
93static int	spxrexmtthresh = 3;
94
95static __inline void
96spx_insque(struct spx_q *element, struct spx_q *head)
97{
98
99	element->si_next = head->si_next;
100	element->si_prev = head;
101	head->si_next = element;
102	element->si_next->si_prev = element;
103}
104
105void
106spx_remque(struct spx_q *element)
107{
108
109	element->si_next->si_prev = element->si_prev;
110	element->si_prev->si_next = element->si_next;
111	element->si_prev = NULL;
112}
113
114/*
115 * Flesh pending queued segments on SPX close.
116 */
117void
118spx_reass_flush(struct spxpcb *cb)
119{
120	struct spx_q *s;
121	struct mbuf *m;
122
123	s = cb->s_q.si_next;
124	while (s != &(cb->s_q)) {
125		s = s->si_next;
126		spx_remque(s);
127		m = dtom(s);
128		m_freem(m);
129	}
130}
131
132/*
133 * Initialize SPX segment reassembly queue on SPX socket open.
134 */
135void
136spx_reass_init(struct spxpcb *cb)
137{
138
139	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
140}
141
142/*
143 * This is structurally similar to the tcp reassembly routine but its
144 * function is somewhat different: it merely queues packets up, and
145 * suppresses duplicates.
146 */
147int
148spx_reass(struct spxpcb *cb, struct spx *si)
149{
150	struct spx_q *q;
151	struct mbuf *m;
152	struct socket *so = cb->s_ipxpcb->ipxp_socket;
153	char packetp = cb->s_flags & SF_HI;
154	int incr;
155	char wakeup = 0;
156
157	IPX_LOCK_ASSERT(cb->s_ipxpcb);
158
159	if (si == SI(0))
160		goto present;
161
162	/*
163	 * Update our news from them.
164	 */
165	if (si->si_cc & SPX_SA)
166		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
167	if (SSEQ_GT(si->si_alo, cb->s_ralo))
168		cb->s_flags |= SF_WIN;
169	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
170		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
171			spxstat.spxs_rcvdupack++;
172
173			/*
174			 * If this is a completely duplicate ack and other
175			 * conditions hold, we assume a packet has been
176			 * dropped and retransmit it exactly as in
177			 * tcp_input().
178			 */
179			if (si->si_ack != cb->s_rack ||
180			    si->si_alo != cb->s_ralo)
181				cb->s_dupacks = 0;
182			else if (++cb->s_dupacks == spxrexmtthresh) {
183				u_short onxt = cb->s_snxt;
184				int cwnd = cb->s_cwnd;
185
186				cb->s_snxt = si->si_ack;
187				cb->s_cwnd = CUNIT;
188				cb->s_force = 1 + SPXT_REXMT;
189				spx_output(cb, NULL);
190				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
191				cb->s_rtt = 0;
192				if (cwnd >= 4 * CUNIT)
193					cb->s_cwnd = cwnd / 2;
194				if (SSEQ_GT(onxt, cb->s_snxt))
195					cb->s_snxt = onxt;
196				return (1);
197			}
198		} else
199			cb->s_dupacks = 0;
200		goto update_window;
201	}
202	cb->s_dupacks = 0;
203
204	/*
205	 * If our correspondent acknowledges data we haven't sent TCP would
206	 * drop the packet after acking.  We'll be a little more permissive.
207	 */
208	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
209		spxstat.spxs_rcvacktoomuch++;
210		si->si_ack = cb->s_smax + 1;
211	}
212	spxstat.spxs_rcvackpack++;
213
214	/*
215	 * If transmit timer is running and timed sequence number was acked,
216	 * update smoothed round trip time.  See discussion of algorithm in
217	 * tcp_input.c
218	 */
219	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
220		spxstat.spxs_rttupdated++;
221		if (cb->s_srtt != 0) {
222			short delta;
223			delta = cb->s_rtt - (cb->s_srtt >> 3);
224			if ((cb->s_srtt += delta) <= 0)
225				cb->s_srtt = 1;
226			if (delta < 0)
227				delta = -delta;
228			delta -= (cb->s_rttvar >> 2);
229			if ((cb->s_rttvar += delta) <= 0)
230				cb->s_rttvar = 1;
231		} else {
232			/*
233			 * No rtt measurement yet.
234			 */
235			cb->s_srtt = cb->s_rtt << 3;
236			cb->s_rttvar = cb->s_rtt << 1;
237		}
238		cb->s_rtt = 0;
239		cb->s_rxtshift = 0;
240		SPXT_RANGESET(cb->s_rxtcur,
241			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
242			SPXTV_MIN, SPXTV_REXMTMAX);
243	}
244
245	/*
246	 * If all outstanding data is acked, stop retransmit timer and
247	 * remember to restart (more output or persist).  If there is more
248	 * data to be acked, restart retransmit timer, using current
249	 * (possibly backed-off) value;
250	 */
251	if (si->si_ack == cb->s_smax + 1) {
252		cb->s_timer[SPXT_REXMT] = 0;
253		cb->s_flags |= SF_RXT;
254	} else if (cb->s_timer[SPXT_PERSIST] == 0)
255		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
256
257	/*
258	 * When new data is acked, open the congestion window.  If the window
259	 * gives us less than ssthresh packets in flight, open exponentially
260	 * (maxseg at a time).  Otherwise open linearly (maxseg^2 / cwnd at a
261	 * time).
262	 */
263	incr = CUNIT;
264	if (cb->s_cwnd > cb->s_ssthresh)
265		incr = max(incr * incr / cb->s_cwnd, 1);
266	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
267
268	/*
269	 * Trim Acked data from output queue.
270	 */
271	SOCKBUF_LOCK(&so->so_snd);
272	while ((m = so->so_snd.sb_mb) != NULL) {
273		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
274			sbdroprecord_locked(&so->so_snd);
275		else
276			break;
277	}
278	sowwakeup_locked(so);
279	cb->s_rack = si->si_ack;
280update_window:
281	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
282		cb->s_snxt = cb->s_rack;
283	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
284	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
285	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
286		/* keep track of pure window updates */
287		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
288		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
289			spxstat.spxs_rcvwinupd++;
290			spxstat.spxs_rcvdupack--;
291		}
292		cb->s_ralo = si->si_alo;
293		cb->s_swl1 = si->si_seq;
294		cb->s_swl2 = si->si_ack;
295		cb->s_swnd = (1 + si->si_alo - si->si_ack);
296		if (cb->s_swnd > cb->s_smxw)
297			cb->s_smxw = cb->s_swnd;
298		cb->s_flags |= SF_WIN;
299	}
300
301	/*
302	 * If this packet number is higher than that which we have allocated
303	 * refuse it, unless urgent.
304	 */
305	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
306		if (si->si_cc & SPX_SP) {
307			spxstat.spxs_rcvwinprobe++;
308			return (1);
309		} else
310			spxstat.spxs_rcvpackafterwin++;
311		if (si->si_cc & SPX_OB) {
312			if (SSEQ_GT(si->si_seq, cb->s_alo + 60))
313				return (1); /* else queue this packet; */
314		} else {
315#ifdef BROKEN
316			/*
317			 * XXXRW: This is broken on at least one count:
318			 * spx_close() will free the ipxp and related parts,
319			 * which are then touched by spx_input() after the
320			 * return from spx_reass().
321			 */
322			/*struct socket *so = cb->s_ipxpcb->ipxp_socket;
323			if (so->so_state && SS_NOFDREF) {
324				spx_close(cb);
325			} else
326				       would crash system*/
327#endif
328			spx_istat.notyet++;
329			return (1);
330		}
331	}
332
333	/*
334	 * If this is a system packet, we don't need to queue it up, and
335	 * won't update acknowledge #.
336	 */
337	if (si->si_cc & SPX_SP)
338		return (1);
339
340	/*
341	 * We have already seen this packet, so drop.
342	 */
343	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
344		spx_istat.bdreas++;
345		spxstat.spxs_rcvduppack++;
346		if (si->si_seq == cb->s_ack - 1)
347			spx_istat.lstdup++;
348		return (1);
349	}
350
351	/*
352	 * Loop through all packets queued up to insert in appropriate
353	 * sequence.
354	 */
355	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
356		if (si->si_seq == SI(q)->si_seq) {
357			spxstat.spxs_rcvduppack++;
358			return (1);
359		}
360		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
361			spxstat.spxs_rcvoopack++;
362			break;
363		}
364	}
365	spx_insque((struct spx_q *)si, q->si_prev);
366
367	/*
368	 * If this packet is urgent, inform process
369	 */
370	if (si->si_cc & SPX_OB) {
371		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
372		sohasoutofband(so);
373		cb->s_oobflags |= SF_IOOB;
374	}
375present:
376#define SPINC sizeof(struct spxhdr)
377	SOCKBUF_LOCK(&so->so_rcv);
378
379	/*
380	 * Loop through all packets queued up to update acknowledge number,
381	 * and present all acknowledged data to user; if in packet interface
382	 * mode, show packet headers.
383	 */
384	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
385		  if (SI(q)->si_seq == cb->s_ack) {
386			cb->s_ack++;
387			m = dtom(q);
388			if (SI(q)->si_cc & SPX_OB) {
389				cb->s_oobflags &= ~SF_IOOB;
390				if (so->so_rcv.sb_cc)
391					so->so_oobmark = so->so_rcv.sb_cc;
392				else
393					so->so_rcv.sb_state |= SBS_RCVATMARK;
394			}
395			q = q->si_prev;
396			spx_remque(q->si_next);
397			wakeup = 1;
398			spxstat.spxs_rcvpack++;
399#ifdef SF_NEWCALL
400			if (cb->s_flags2 & SF_NEWCALL) {
401				struct spxhdr *sp = mtod(m, struct spxhdr *);
402				u_char dt = sp->spx_dt;
403				spx_newchecks[4]++;
404				if (dt != cb->s_rhdr.spx_dt) {
405					struct mbuf *mm =
406					   m_getclr(M_DONTWAIT, MT_CONTROL);
407					spx_newchecks[0]++;
408					if (mm != NULL) {
409						u_short *s =
410							mtod(mm, u_short *);
411						cb->s_rhdr.spx_dt = dt;
412						mm->m_len = 5; /*XXX*/
413						s[0] = 5;
414						s[1] = 1;
415						*(u_char *)(&s[2]) = dt;
416						sbappend_locked(&so->so_rcv, mm);
417					}
418				}
419				if (sp->spx_cc & SPX_OB) {
420					MCHTYPE(m, MT_OOBDATA);
421					spx_newchecks[1]++;
422					so->so_oobmark = 0;
423					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
424				}
425				if (packetp == 0) {
426					m->m_data += SPINC;
427					m->m_len -= SPINC;
428					m->m_pkthdr.len -= SPINC;
429				}
430				if ((sp->spx_cc & SPX_EM) || packetp) {
431					sbappendrecord_locked(&so->so_rcv, m);
432					spx_newchecks[9]++;
433				} else
434					sbappend_locked(&so->so_rcv, m);
435			} else
436#endif
437			if (packetp)
438				sbappendrecord_locked(&so->so_rcv, m);
439			else {
440				cb->s_rhdr = *mtod(m, struct spxhdr *);
441				m->m_data += SPINC;
442				m->m_len -= SPINC;
443				m->m_pkthdr.len -= SPINC;
444				sbappend_locked(&so->so_rcv, m);
445			}
446		  } else
447			break;
448	}
449	if (wakeup)
450		sorwakeup_locked(so);
451	else
452		SOCKBUF_UNLOCK(&so->so_rcv);
453	return (0);
454}
455