spx_usrreq.c revision 184205
1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * Copyright (c) 1995, Mike Mitchell
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)spx_usrreq.h
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 184205 2008-10-23 15:53:51Z des $");
67
68#include <sys/param.h>
69#include <sys/lock.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/protosw.h>
75#include <sys/signalvar.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/sx.h>
79#include <sys/systm.h>
80
81#include <net/route.h>
82#include <netinet/tcp_fsm.h>
83
84#include <netipx/ipx.h>
85#include <netipx/ipx_pcb.h>
86#include <netipx/ipx_var.h>
87#include <netipx/spx.h>
88#include <netipx/spx_debug.h>
89#include <netipx/spx_timer.h>
90#include <netipx/spx_var.h>
91
92/*
93 * SPX protocol implementation.
94 */
95static struct	mtx spx_mtx;			/* Protects only spx_iss. */
96static u_short 	spx_iss;
97static u_short	spx_newchecks[50];
98static int	spx_hardnosed;
99static int	spx_use_delack = 0;
100static int	traceallspxs = 0;
101static struct	spx_istat spx_istat;
102static int	spxrexmtthresh = 3;
103
104#define	SPX_LOCK_INIT()	mtx_init(&spx_mtx, "spx_mtx", NULL, MTX_DEF)
105#define	SPX_LOCK()	mtx_lock(&spx_mtx)
106#define	SPX_UNLOCK()	mtx_unlock(&spx_mtx)
107
108/* Following was struct spxstat spxstat; */
109#ifndef spxstat
110#define spxstat spx_istat.newstats
111#endif
112
113static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
114    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
115
116static	void spx_close(struct spxpcb *cb);
117static	void spx_disconnect(struct spxpcb *cb);
118static	void spx_drop(struct spxpcb *cb, int errno);
119static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
120static	int spx_reass(struct spxpcb *cb, struct spx *si);
121static	void spx_setpersist(struct spxpcb *cb);
122static	void spx_template(struct spxpcb *cb);
123static	void spx_timers(struct spxpcb *cb, int timer);
124static	void spx_usrclosed(struct spxpcb *cb);
125
126static	void spx_usr_abort(struct socket *so);
127static	int spx_accept(struct socket *so, struct sockaddr **nam);
128static	int spx_attach(struct socket *so, int proto, struct thread *td);
129static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
130static	void spx_usr_close(struct socket *so);
131static	int spx_connect(struct socket *so, struct sockaddr *nam,
132			struct thread *td);
133static	void spx_detach(struct socket *so);
134static	void spx_pcbdetach(struct ipxpcb *ipxp);
135static	int spx_usr_disconnect(struct socket *so);
136static	int spx_listen(struct socket *so, int backlog, struct thread *td);
137static	int spx_rcvd(struct socket *so, int flags);
138static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
139static	int spx_send(struct socket *so, int flags, struct mbuf *m,
140		     struct sockaddr *addr, struct mbuf *control,
141		     struct thread *td);
142static	int spx_shutdown(struct socket *so);
143static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
144
145struct	pr_usrreqs spx_usrreqs = {
146	.pru_abort =		spx_usr_abort,
147	.pru_accept =		spx_accept,
148	.pru_attach =		spx_attach,
149	.pru_bind =		spx_bind,
150	.pru_connect =		spx_connect,
151	.pru_control =		ipx_control,
152	.pru_detach =		spx_detach,
153	.pru_disconnect =	spx_usr_disconnect,
154	.pru_listen =		spx_listen,
155	.pru_peeraddr =		ipx_peeraddr,
156	.pru_rcvd =		spx_rcvd,
157	.pru_rcvoob =		spx_rcvoob,
158	.pru_send =		spx_send,
159	.pru_shutdown =		spx_shutdown,
160	.pru_sockaddr =		ipx_sockaddr,
161	.pru_close =		spx_usr_close,
162};
163
164struct	pr_usrreqs spx_usrreq_sps = {
165	.pru_abort =		spx_usr_abort,
166	.pru_accept =		spx_accept,
167	.pru_attach =		spx_sp_attach,
168	.pru_bind =		spx_bind,
169	.pru_connect =		spx_connect,
170	.pru_control =		ipx_control,
171	.pru_detach =		spx_detach,
172	.pru_disconnect =	spx_usr_disconnect,
173	.pru_listen =		spx_listen,
174	.pru_peeraddr =		ipx_peeraddr,
175	.pru_rcvd =		spx_rcvd,
176	.pru_rcvoob =		spx_rcvoob,
177	.pru_send =		spx_send,
178	.pru_shutdown =		spx_shutdown,
179	.pru_sockaddr =		ipx_sockaddr,
180	.pru_close =		spx_usr_close,
181};
182
183void
184spx_init(void)
185{
186
187	SPX_LOCK_INIT();
188	spx_iss = 1; /* WRONG !! should fish it out of TODR */
189}
190
191void
192spx_input(struct mbuf *m, struct ipxpcb *ipxp)
193{
194	struct spxpcb *cb;
195	struct spx *si = mtod(m, struct spx *);
196	struct socket *so;
197	struct spx spx_savesi;
198	int dropsocket = 0;
199	short ostate = 0;
200
201	spxstat.spxs_rcvtotal++;
202	KASSERT(ipxp != NULL, ("spx_input: ipxpcb == NULL"));
203
204	/*
205	 * spx_input() assumes that the caller will hold both the pcb list
206	 * lock and also the ipxp lock.  spx_input() will release both before
207	 * returning, and may in fact trade in the ipxp lock for another pcb
208	 * lock following sonewconn().
209	 */
210	IPX_LIST_LOCK_ASSERT();
211	IPX_LOCK_ASSERT(ipxp);
212
213	cb = ipxtospxpcb(ipxp);
214	KASSERT(cb != NULL, ("spx_input: cb == NULL"));
215
216	if (ipxp->ipxp_flags & IPXP_DROPPED)
217		goto drop;
218
219	if (m->m_len < sizeof(*si)) {
220		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
221			IPX_UNLOCK(ipxp);
222			IPX_LIST_UNLOCK();
223			spxstat.spxs_rcvshort++;
224			return;
225		}
226		si = mtod(m, struct spx *);
227	}
228	si->si_seq = ntohs(si->si_seq);
229	si->si_ack = ntohs(si->si_ack);
230	si->si_alo = ntohs(si->si_alo);
231
232	so = ipxp->ipxp_socket;
233	KASSERT(so != NULL, ("spx_input: so == NULL"));
234
235	if (so->so_options & SO_DEBUG || traceallspxs) {
236		ostate = cb->s_state;
237		spx_savesi = *si;
238	}
239	if (so->so_options & SO_ACCEPTCONN) {
240		struct spxpcb *ocb = cb;
241
242		so = sonewconn(so, 0);
243		if (so == NULL)
244			goto drop;
245
246		/*
247		 * This is ugly, but ....
248		 *
249		 * Mark socket as temporary until we're committed to keeping
250		 * it.  The code at ``drop'' and ``dropwithreset'' check the
251		 * flag dropsocket to see if the temporary socket created
252		 * here should be discarded.  We mark the socket as
253		 * discardable until we're committed to it below in
254		 * TCPS_LISTEN.
255		 *
256		 * XXXRW: In the new world order of real kernel parallelism,
257		 * temporarily allocating the socket when we're "not sure"
258		 * seems like a bad idea, as we might race to remove it if
259		 * the listen socket is closed...?
260		 *
261		 * We drop the lock of the listen socket ipxp, and acquire
262		 * the lock of the new socket ippx.
263		 */
264		dropsocket++;
265		IPX_UNLOCK(ipxp);
266		ipxp = (struct ipxpcb *)so->so_pcb;
267		IPX_LOCK(ipxp);
268		ipxp->ipxp_laddr = si->si_dna;
269		cb = ipxtospxpcb(ipxp);
270		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
271		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
272		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
273		cb->s_state = TCPS_LISTEN;
274	}
275	IPX_LOCK_ASSERT(ipxp);
276
277	/*
278	 * Packet received on connection.  Reset idle time and keep-alive
279	 * timer.
280	 */
281	cb->s_idle = 0;
282	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
283
284	switch (cb->s_state) {
285	case TCPS_LISTEN:{
286		struct sockaddr_ipx *sipx, ssipx;
287		struct ipx_addr laddr;
288
289		/*
290		 * If somebody here was carying on a conversation and went
291		 * away, and his pen pal thinks he can still talk, we get the
292		 * misdirected packet.
293		 */
294		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
295			spx_istat.gonawy++;
296			goto dropwithreset;
297		}
298		sipx = &ssipx;
299		bzero(sipx, sizeof *sipx);
300		sipx->sipx_len = sizeof(*sipx);
301		sipx->sipx_family = AF_IPX;
302		sipx->sipx_addr = si->si_sna;
303		laddr = ipxp->ipxp_laddr;
304		if (ipx_nullhost(laddr))
305			ipxp->ipxp_laddr = si->si_dna;
306		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
307			ipxp->ipxp_laddr = laddr;
308			spx_istat.noconn++;
309			goto drop;
310		}
311		spx_template(cb);
312		dropsocket = 0;		/* committed to socket */
313		cb->s_did = si->si_sid;
314		cb->s_rack = si->si_ack;
315		cb->s_ralo = si->si_alo;
316#define THREEWAYSHAKE
317#ifdef THREEWAYSHAKE
318		cb->s_state = TCPS_SYN_RECEIVED;
319		cb->s_force = 1 + SPXT_KEEP;
320		spxstat.spxs_accepts++;
321		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
322		}
323		break;
324
325	 case TCPS_SYN_RECEIVED: {
326		/*
327		 * This state means that we have heard a response to our
328		 * acceptance of their connection.  It is probably logically
329		 * unnecessary in this implementation.
330		 */
331		if (si->si_did != cb->s_sid) {
332			spx_istat.wrncon++;
333			goto drop;
334		}
335#endif
336		ipxp->ipxp_fport =  si->si_sport;
337		cb->s_timer[SPXT_REXMT] = 0;
338		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
339		soisconnected(so);
340		cb->s_state = TCPS_ESTABLISHED;
341		spxstat.spxs_accepts++;
342		}
343		break;
344
345	case TCPS_SYN_SENT:
346		/*
347		 * This state means that we have gotten a response to our
348		 * attempt to establish a connection.  We fill in the data
349		 * from the other side, telling us which port to respond to,
350		 * instead of the well-known one we might have sent to in the
351		 * first place.  We also require that this is a response to
352		 * our connection id.
353		 */
354		if (si->si_did != cb->s_sid) {
355			spx_istat.notme++;
356			goto drop;
357		}
358		spxstat.spxs_connects++;
359		cb->s_did = si->si_sid;
360		cb->s_rack = si->si_ack;
361		cb->s_ralo = si->si_alo;
362		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
363		cb->s_timer[SPXT_REXMT] = 0;
364		cb->s_flags |= SF_ACKNOW;
365		soisconnected(so);
366		cb->s_state = TCPS_ESTABLISHED;
367
368		/*
369		 * Use roundtrip time of connection request for initial rtt.
370		 */
371		if (cb->s_rtt) {
372			cb->s_srtt = cb->s_rtt << 3;
373			cb->s_rttvar = cb->s_rtt << 1;
374			SPXT_RANGESET(cb->s_rxtcur,
375			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
376			    SPXTV_MIN, SPXTV_REXMTMAX);
377			    cb->s_rtt = 0;
378		}
379	}
380
381	if (so->so_options & SO_DEBUG || traceallspxs)
382		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
383
384	m->m_len -= sizeof(struct ipx);
385	m->m_pkthdr.len -= sizeof(struct ipx);
386	m->m_data += sizeof(struct ipx);
387
388	if (spx_reass(cb, si))
389		m_freem(m);
390	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
391		spx_output(cb, NULL);
392	cb->s_flags &= ~(SF_WIN|SF_RXT);
393	IPX_UNLOCK(ipxp);
394	IPX_LIST_UNLOCK();
395	return;
396
397dropwithreset:
398	IPX_LOCK_ASSERT(ipxp);
399	if (cb == NULL || (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
400	    traceallspxs))
401		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
402	IPX_UNLOCK(ipxp);
403	if (dropsocket) {
404		struct socket *head;
405		ACCEPT_LOCK();
406		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
407		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
408		head = so->so_head;
409		TAILQ_REMOVE(&head->so_incomp, so, so_list);
410		head->so_incqlen--;
411		so->so_qstate &= ~SQ_INCOMP;
412		so->so_head = NULL;
413		ACCEPT_UNLOCK();
414		soabort(so);
415	}
416	IPX_LIST_UNLOCK();
417	m_freem(m);
418	return;
419
420drop:
421	IPX_LOCK_ASSERT(ipxp);
422	if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
423		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
424	IPX_UNLOCK(ipxp);
425	IPX_LIST_UNLOCK();
426	m_freem(m);
427}
428
429/*
430 * This is structurally similar to the tcp reassembly routine but its
431 * function is somewhat different: it merely queues packets up, and
432 * suppresses duplicates.
433 */
434static int
435spx_reass(struct spxpcb *cb, struct spx *si)
436{
437	struct spx_q *q;
438	struct mbuf *m;
439	struct socket *so = cb->s_ipxpcb->ipxp_socket;
440	char packetp = cb->s_flags & SF_HI;
441	int incr;
442	char wakeup = 0;
443
444	IPX_LOCK_ASSERT(cb->s_ipxpcb);
445
446	if (si == SI(0))
447		goto present;
448
449	/*
450	 * Update our news from them.
451	 */
452	if (si->si_cc & SPX_SA)
453		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
454	if (SSEQ_GT(si->si_alo, cb->s_ralo))
455		cb->s_flags |= SF_WIN;
456	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
457		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
458			spxstat.spxs_rcvdupack++;
459
460			/*
461			 * If this is a completely duplicate ack and other
462			 * conditions hold, we assume a packet has been
463			 * dropped and retransmit it exactly as in
464			 * tcp_input().
465			 */
466			if (si->si_ack != cb->s_rack ||
467			    si->si_alo != cb->s_ralo)
468				cb->s_dupacks = 0;
469			else if (++cb->s_dupacks == spxrexmtthresh) {
470				u_short onxt = cb->s_snxt;
471				int cwnd = cb->s_cwnd;
472
473				cb->s_snxt = si->si_ack;
474				cb->s_cwnd = CUNIT;
475				cb->s_force = 1 + SPXT_REXMT;
476				spx_output(cb, NULL);
477				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
478				cb->s_rtt = 0;
479				if (cwnd >= 4 * CUNIT)
480					cb->s_cwnd = cwnd / 2;
481				if (SSEQ_GT(onxt, cb->s_snxt))
482					cb->s_snxt = onxt;
483				return (1);
484			}
485		} else
486			cb->s_dupacks = 0;
487		goto update_window;
488	}
489	cb->s_dupacks = 0;
490
491	/*
492	 * If our correspondent acknowledges data we haven't sent TCP would
493	 * drop the packet after acking.  We'll be a little more permissive.
494	 */
495	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
496		spxstat.spxs_rcvacktoomuch++;
497		si->si_ack = cb->s_smax + 1;
498	}
499	spxstat.spxs_rcvackpack++;
500
501	/*
502	 * If transmit timer is running and timed sequence number was acked,
503	 * update smoothed round trip time.  See discussion of algorithm in
504	 * tcp_input.c
505	 */
506	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
507		spxstat.spxs_rttupdated++;
508		if (cb->s_srtt != 0) {
509			short delta;
510			delta = cb->s_rtt - (cb->s_srtt >> 3);
511			if ((cb->s_srtt += delta) <= 0)
512				cb->s_srtt = 1;
513			if (delta < 0)
514				delta = -delta;
515			delta -= (cb->s_rttvar >> 2);
516			if ((cb->s_rttvar += delta) <= 0)
517				cb->s_rttvar = 1;
518		} else {
519			/*
520			 * No rtt measurement yet.
521			 */
522			cb->s_srtt = cb->s_rtt << 3;
523			cb->s_rttvar = cb->s_rtt << 1;
524		}
525		cb->s_rtt = 0;
526		cb->s_rxtshift = 0;
527		SPXT_RANGESET(cb->s_rxtcur,
528			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
529			SPXTV_MIN, SPXTV_REXMTMAX);
530	}
531
532	/*
533	 * If all outstanding data is acked, stop retransmit timer and
534	 * remember to restart (more output or persist).  If there is more
535	 * data to be acked, restart retransmit timer, using current
536	 * (possibly backed-off) value;
537	 */
538	if (si->si_ack == cb->s_smax + 1) {
539		cb->s_timer[SPXT_REXMT] = 0;
540		cb->s_flags |= SF_RXT;
541	} else if (cb->s_timer[SPXT_PERSIST] == 0)
542		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
543
544	/*
545	 * When new data is acked, open the congestion window.  If the window
546	 * gives us less than ssthresh packets in flight, open exponentially
547	 * (maxseg at a time).  Otherwise open linearly (maxseg^2 / cwnd at a
548	 * time).
549	 */
550	incr = CUNIT;
551	if (cb->s_cwnd > cb->s_ssthresh)
552		incr = max(incr * incr / cb->s_cwnd, 1);
553	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
554
555	/*
556	 * Trim Acked data from output queue.
557	 */
558	SOCKBUF_LOCK(&so->so_snd);
559	while ((m = so->so_snd.sb_mb) != NULL) {
560		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
561			sbdroprecord_locked(&so->so_snd);
562		else
563			break;
564	}
565	sowwakeup_locked(so);
566	cb->s_rack = si->si_ack;
567update_window:
568	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
569		cb->s_snxt = cb->s_rack;
570	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
571	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
572	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
573		/* keep track of pure window updates */
574		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
575		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
576			spxstat.spxs_rcvwinupd++;
577			spxstat.spxs_rcvdupack--;
578		}
579		cb->s_ralo = si->si_alo;
580		cb->s_swl1 = si->si_seq;
581		cb->s_swl2 = si->si_ack;
582		cb->s_swnd = (1 + si->si_alo - si->si_ack);
583		if (cb->s_swnd > cb->s_smxw)
584			cb->s_smxw = cb->s_swnd;
585		cb->s_flags |= SF_WIN;
586	}
587
588	/*
589	 * If this packet number is higher than that which we have allocated
590	 * refuse it, unless urgent.
591	 */
592	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
593		if (si->si_cc & SPX_SP) {
594			spxstat.spxs_rcvwinprobe++;
595			return (1);
596		} else
597			spxstat.spxs_rcvpackafterwin++;
598		if (si->si_cc & SPX_OB) {
599			if (SSEQ_GT(si->si_seq, cb->s_alo + 60))
600				return (1); /* else queue this packet; */
601		} else {
602#ifdef BROKEN
603			/*
604			 * XXXRW: This is broken on at least one count:
605			 * spx_close() will free the ipxp and related parts,
606			 * which are then touched by spx_input() after the
607			 * return from spx_reass().
608			 */
609			/*struct socket *so = cb->s_ipxpcb->ipxp_socket;
610			if (so->so_state && SS_NOFDREF) {
611				spx_close(cb);
612			} else
613				       would crash system*/
614#endif
615			spx_istat.notyet++;
616			return (1);
617		}
618	}
619
620	/*
621	 * If this is a system packet, we don't need to queue it up, and
622	 * won't update acknowledge #.
623	 */
624	if (si->si_cc & SPX_SP)
625		return (1);
626
627	/*
628	 * We have already seen this packet, so drop.
629	 */
630	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
631		spx_istat.bdreas++;
632		spxstat.spxs_rcvduppack++;
633		if (si->si_seq == cb->s_ack - 1)
634			spx_istat.lstdup++;
635		return (1);
636	}
637
638	/*
639	 * Loop through all packets queued up to insert in appropriate
640	 * sequence.
641	 */
642	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
643		if (si->si_seq == SI(q)->si_seq) {
644			spxstat.spxs_rcvduppack++;
645			return (1);
646		}
647		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
648			spxstat.spxs_rcvoopack++;
649			break;
650		}
651	}
652	insque(si, q->si_prev);
653
654	/*
655	 * If this packet is urgent, inform process
656	 */
657	if (si->si_cc & SPX_OB) {
658		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
659		sohasoutofband(so);
660		cb->s_oobflags |= SF_IOOB;
661	}
662present:
663#define SPINC sizeof(struct spxhdr)
664	SOCKBUF_LOCK(&so->so_rcv);
665
666	/*
667	 * Loop through all packets queued up to update acknowledge number,
668	 * and present all acknowledged data to user; if in packet interface
669	 * mode, show packet headers.
670	 */
671	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
672		  if (SI(q)->si_seq == cb->s_ack) {
673			cb->s_ack++;
674			m = dtom(q);
675			if (SI(q)->si_cc & SPX_OB) {
676				cb->s_oobflags &= ~SF_IOOB;
677				if (so->so_rcv.sb_cc)
678					so->so_oobmark = so->so_rcv.sb_cc;
679				else
680					so->so_rcv.sb_state |= SBS_RCVATMARK;
681			}
682			q = q->si_prev;
683			remque(q->si_next);
684			wakeup = 1;
685			spxstat.spxs_rcvpack++;
686#ifdef SF_NEWCALL
687			if (cb->s_flags2 & SF_NEWCALL) {
688				struct spxhdr *sp = mtod(m, struct spxhdr *);
689				u_char dt = sp->spx_dt;
690				spx_newchecks[4]++;
691				if (dt != cb->s_rhdr.spx_dt) {
692					struct mbuf *mm =
693					   m_getclr(M_DONTWAIT, MT_CONTROL);
694					spx_newchecks[0]++;
695					if (mm != NULL) {
696						u_short *s =
697							mtod(mm, u_short *);
698						cb->s_rhdr.spx_dt = dt;
699						mm->m_len = 5; /*XXX*/
700						s[0] = 5;
701						s[1] = 1;
702						*(u_char *)(&s[2]) = dt;
703						sbappend_locked(&so->so_rcv, mm);
704					}
705				}
706				if (sp->spx_cc & SPX_OB) {
707					MCHTYPE(m, MT_OOBDATA);
708					spx_newchecks[1]++;
709					so->so_oobmark = 0;
710					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
711				}
712				if (packetp == 0) {
713					m->m_data += SPINC;
714					m->m_len -= SPINC;
715					m->m_pkthdr.len -= SPINC;
716				}
717				if ((sp->spx_cc & SPX_EM) || packetp) {
718					sbappendrecord_locked(&so->so_rcv, m);
719					spx_newchecks[9]++;
720				} else
721					sbappend_locked(&so->so_rcv, m);
722			} else
723#endif
724			if (packetp)
725				sbappendrecord_locked(&so->so_rcv, m);
726			else {
727				cb->s_rhdr = *mtod(m, struct spxhdr *);
728				m->m_data += SPINC;
729				m->m_len -= SPINC;
730				m->m_pkthdr.len -= SPINC;
731				sbappend_locked(&so->so_rcv, m);
732			}
733		  } else
734			break;
735	}
736	if (wakeup)
737		sorwakeup_locked(so);
738	else
739		SOCKBUF_UNLOCK(&so->so_rcv);
740	return (0);
741}
742
743void
744spx_ctlinput(int cmd, struct sockaddr *arg_as_sa, void *dummy)
745{
746
747	/* Currently, nothing. */
748}
749
750static int
751spx_output(struct spxpcb *cb, struct mbuf *m0)
752{
753	struct socket *so = cb->s_ipxpcb->ipxp_socket;
754	struct mbuf *m;
755	struct spx *si = NULL;
756	struct sockbuf *sb = &so->so_snd;
757	int len = 0, win, rcv_win;
758	short span, off, recordp = 0;
759	u_short alo;
760	int error = 0, sendalot;
761#ifdef notdef
762	int idle;
763#endif
764	struct mbuf *mprev;
765
766	IPX_LOCK_ASSERT(cb->s_ipxpcb);
767
768	if (m0 != NULL) {
769		int mtu = cb->s_mtu;
770		int datalen;
771
772		/*
773		 * Make sure that packet isn't too big.
774		 */
775		for (m = m0; m != NULL; m = m->m_next) {
776			mprev = m;
777			len += m->m_len;
778			if (m->m_flags & M_EOR)
779				recordp = 1;
780		}
781		datalen = (cb->s_flags & SF_HO) ?
782				len - sizeof(struct spxhdr) : len;
783		if (datalen > mtu) {
784			if (cb->s_flags & SF_PI) {
785				m_freem(m0);
786				return (EMSGSIZE);
787			} else {
788				int oldEM = cb->s_cc & SPX_EM;
789
790				cb->s_cc &= ~SPX_EM;
791				while (len > mtu) {
792					m = m_copym(m0, 0, mtu, M_DONTWAIT);
793					if (m == NULL) {
794					    cb->s_cc |= oldEM;
795					    m_freem(m0);
796					    return (ENOBUFS);
797					}
798					if (cb->s_flags & SF_NEWCALL) {
799					    struct mbuf *mm = m;
800					    spx_newchecks[7]++;
801					    while (mm != NULL) {
802						mm->m_flags &= ~M_EOR;
803						mm = mm->m_next;
804					    }
805					}
806					error = spx_output(cb, m);
807					if (error) {
808						cb->s_cc |= oldEM;
809						m_freem(m0);
810						return (error);
811					}
812					m_adj(m0, mtu);
813					len -= mtu;
814				}
815				cb->s_cc |= oldEM;
816			}
817		}
818
819		/*
820		 * Force length even, by adding a "garbage byte" if
821		 * necessary.
822		 */
823		if (len & 1) {
824			m = mprev;
825			if (M_TRAILINGSPACE(m) >= 1)
826				m->m_len++;
827			else {
828				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
829
830				if (m1 == NULL) {
831					m_freem(m0);
832					return (ENOBUFS);
833				}
834				m1->m_len = 1;
835				*(mtod(m1, u_char *)) = 0;
836				m->m_next = m1;
837			}
838		}
839		m = m_gethdr(M_DONTWAIT, MT_DATA);
840		if (m == NULL) {
841			m_freem(m0);
842			return (ENOBUFS);
843		}
844
845		/*
846		 * Fill in mbuf with extended SP header and addresses and
847		 * length put into network format.
848		 */
849		MH_ALIGN(m, sizeof(struct spx));
850		m->m_len = sizeof(struct spx);
851		m->m_next = m0;
852		si = mtod(m, struct spx *);
853		si->si_i = *cb->s_ipx;
854		si->si_s = cb->s_shdr;
855		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
856			struct spxhdr *sh;
857			if (m0->m_len < sizeof(*sh)) {
858				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
859					m_free(m);
860					m_freem(m0);
861					return (EINVAL);
862				}
863				m->m_next = m0;
864			}
865			sh = mtod(m0, struct spxhdr *);
866			si->si_dt = sh->spx_dt;
867			si->si_cc |= sh->spx_cc & SPX_EM;
868			m0->m_len -= sizeof(*sh);
869			m0->m_data += sizeof(*sh);
870			len -= sizeof(*sh);
871		}
872		len += sizeof(*si);
873		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
874			si->si_cc |= SPX_EM;
875			spx_newchecks[8]++;
876		}
877		if (cb->s_oobflags & SF_SOOB) {
878			/*
879			 * Per jqj@cornell: Make sure OB packets convey
880			 * exactly 1 byte.  If the packet is 1 byte or
881			 * larger, we have already guaranted there to be at
882			 * least one garbage byte for the checksum, and extra
883			 * bytes shouldn't hurt!
884			 */
885			if (len > sizeof(*si)) {
886				si->si_cc |= SPX_OB;
887				len = (1 + sizeof(*si));
888			}
889		}
890		si->si_len = htons((u_short)len);
891		m->m_pkthdr.len = ((len - 1) | 1) + 1;
892
893		/*
894		 * Queue stuff up for output.
895		 */
896		sbappendrecord(sb, m);
897		cb->s_seq++;
898	}
899#ifdef notdef
900	idle = (cb->s_smax == (cb->s_rack - 1));
901#endif
902again:
903	sendalot = 0;
904	off = cb->s_snxt - cb->s_rack;
905	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
906
907	/*
908	 * If in persist timeout with window of 0, send a probe.  Otherwise,
909	 * if window is small but non-zero and timer expired, send what we
910	 * can and go into transmit state.
911	 */
912	if (cb->s_force == 1 + SPXT_PERSIST) {
913		if (win != 0) {
914			cb->s_timer[SPXT_PERSIST] = 0;
915			cb->s_rxtshift = 0;
916		}
917	}
918	span = cb->s_seq - cb->s_rack;
919	len = min(span, win) - off;
920
921	if (len < 0) {
922		/*
923		 * Window shrank after we went into it.  If window shrank to
924		 * 0, cancel pending restransmission and pull s_snxt back to
925		 * (closed) window.  We will enter persist state below.  If
926		 * the widndow didn't close completely, just wait for an ACK.
927		 */
928		len = 0;
929		if (win == 0) {
930			cb->s_timer[SPXT_REXMT] = 0;
931			cb->s_snxt = cb->s_rack;
932		}
933	}
934	if (len > 1)
935		sendalot = 1;
936	rcv_win = sbspace(&so->so_rcv);
937
938	/*
939	 * Send if we owe peer an ACK.
940	 */
941	if (cb->s_oobflags & SF_SOOB) {
942		/*
943		 * Must transmit this out of band packet.
944		 */
945		cb->s_oobflags &= ~ SF_SOOB;
946		sendalot = 1;
947		spxstat.spxs_sndurg++;
948		goto found;
949	}
950	if (cb->s_flags & SF_ACKNOW)
951		goto send;
952	if (cb->s_state < TCPS_ESTABLISHED)
953		goto send;
954
955	/*
956	 * Silly window can't happen in spx.  Code from TCP deleted.
957	 */
958	if (len)
959		goto send;
960
961	/*
962	 * Compare available window to amount of window known to peer (as
963	 * advertised window less next expected input.)  If the difference is
964	 * at least two packets or at least 35% of the mximum possible
965	 * window, then want to send a window update to peer.
966	 */
967	if (rcv_win > 0) {
968		u_short delta =  1 + cb->s_alo - cb->s_ack;
969		int adv = rcv_win - (delta * cb->s_mtu);
970
971		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
972		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
973			spxstat.spxs_sndwinup++;
974			cb->s_flags |= SF_ACKNOW;
975			goto send;
976		}
977
978	}
979
980	/*
981	 * Many comments from tcp_output.c are appropriate here including ...
982	 * If send window is too small, there is data to transmit, and no
983	 * retransmit or persist is pending, then go to persist state.  If
984	 * nothing happens soon, send when timer expires: if window is
985	 * non-zero, transmit what we can, otherwise send a probe.
986	 */
987	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
988	    cb->s_timer[SPXT_PERSIST] == 0) {
989		cb->s_rxtshift = 0;
990		spx_setpersist(cb);
991	}
992
993	/*
994	 * No reason to send a packet, just return.
995	 */
996	cb->s_outx = 1;
997	return (0);
998
999send:
1000	/*
1001	 * Find requested packet.
1002	 */
1003	si = 0;
1004	if (len > 0) {
1005		cb->s_want = cb->s_snxt;
1006		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
1007			si = mtod(m, struct spx *);
1008			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
1009				break;
1010		}
1011	found:
1012		if (si != NULL) {
1013			if (si->si_seq == cb->s_snxt)
1014					cb->s_snxt++;
1015				else
1016					spxstat.spxs_sndvoid++, si = 0;
1017		}
1018	}
1019
1020	/*
1021	 * Update window.
1022	 */
1023	if (rcv_win < 0)
1024		rcv_win = 0;
1025	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
1026	if (SSEQ_LT(alo, cb->s_alo))
1027		alo = cb->s_alo;
1028
1029	if (si != NULL) {
1030		/*
1031		 * Must make a copy of this packet for ipx_output to monkey
1032		 * with.
1033		 */
1034		m = m_copy(dtom(si), 0, (int)M_COPYALL);
1035		if (m == NULL)
1036			return (ENOBUFS);
1037		si = mtod(m, struct spx *);
1038		if (SSEQ_LT(si->si_seq, cb->s_smax))
1039			spxstat.spxs_sndrexmitpack++;
1040		else
1041			spxstat.spxs_sndpack++;
1042	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1043		/*
1044		 * Must send an acknowledgement or a probe.
1045		 */
1046		if (cb->s_force)
1047			spxstat.spxs_sndprobe++;
1048		if (cb->s_flags & SF_ACKNOW)
1049			spxstat.spxs_sndacks++;
1050		m = m_gethdr(M_DONTWAIT, MT_DATA);
1051		if (m == NULL)
1052			return (ENOBUFS);
1053
1054		/*
1055		 * Fill in mbuf with extended SP header and addresses and
1056		 * length put into network format.
1057		 */
1058		MH_ALIGN(m, sizeof(struct spx));
1059		m->m_len = sizeof(*si);
1060		m->m_pkthdr.len = sizeof(*si);
1061		si = mtod(m, struct spx *);
1062		si->si_i = *cb->s_ipx;
1063		si->si_s = cb->s_shdr;
1064		si->si_seq = cb->s_smax + 1;
1065		si->si_len = htons(sizeof(*si));
1066		si->si_cc |= SPX_SP;
1067	} else {
1068		cb->s_outx = 3;
1069		if (so->so_options & SO_DEBUG || traceallspxs)
1070			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1071		return (0);
1072	}
1073
1074	/*
1075	 * Stuff checksum and output datagram.
1076	 */
1077	if ((si->si_cc & SPX_SP) == 0) {
1078		if (cb->s_force != (1 + SPXT_PERSIST) ||
1079		    cb->s_timer[SPXT_PERSIST] == 0) {
1080			/*
1081			 * If this is a new packet and we are not currently
1082			 * timing anything, time this one.
1083			 */
1084			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1085				cb->s_smax = si->si_seq;
1086				if (cb->s_rtt == 0) {
1087					spxstat.spxs_segstimed++;
1088					cb->s_rtseq = si->si_seq;
1089					cb->s_rtt = 1;
1090				}
1091			}
1092
1093			/*
1094			 * Set rexmt timer if not currently set, initial
1095			 * value for retransmit timer is smoothed round-trip
1096			 * time + 2 * round-trip time variance.  Initialize
1097			 * shift counter which is used for backoff of
1098			 * retransmit time.
1099			 */
1100			if (cb->s_timer[SPXT_REXMT] == 0 &&
1101			    cb->s_snxt != cb->s_rack) {
1102				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1103				if (cb->s_timer[SPXT_PERSIST]) {
1104					cb->s_timer[SPXT_PERSIST] = 0;
1105					cb->s_rxtshift = 0;
1106				}
1107			}
1108		} else if (SSEQ_LT(cb->s_smax, si->si_seq))
1109			cb->s_smax = si->si_seq;
1110	} else if (cb->s_state < TCPS_ESTABLISHED) {
1111		if (cb->s_rtt == 0)
1112			cb->s_rtt = 1; /* Time initial handshake */
1113		if (cb->s_timer[SPXT_REXMT] == 0)
1114			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1115	}
1116
1117	/*
1118	 * Do not request acks when we ack their data packets or when we do a
1119	 * gratuitous window update.
1120	 */
1121	if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1122		si->si_cc |= SPX_SA;
1123	si->si_seq = htons(si->si_seq);
1124	si->si_alo = htons(alo);
1125	si->si_ack = htons(cb->s_ack);
1126
1127	if (ipxcksum)
1128		si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1129	else
1130		si->si_sum = 0xffff;
1131
1132	cb->s_outx = 4;
1133	if (so->so_options & SO_DEBUG || traceallspxs)
1134		spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1135
1136	if (so->so_options & SO_DONTROUTE)
1137		error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1138	else
1139		error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1140	if (error)
1141		return (error);
1142	spxstat.spxs_sndtotal++;
1143
1144	/*
1145	 * Data sent (as far as we can tell).  If this advertises a larger
1146	 * window than any other segment, then remember the size of the
1147	 * advertized window.  Any pending ACK has now been sent.
1148	 */
1149	cb->s_force = 0;
1150	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1151	if (SSEQ_GT(alo, cb->s_alo))
1152		cb->s_alo = alo;
1153	if (sendalot)
1154		goto again;
1155	cb->s_outx = 5;
1156	return (0);
1157}
1158
1159static int spx_do_persist_panics = 0;
1160
1161static void
1162spx_setpersist(struct spxpcb *cb)
1163{
1164	int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1165
1166	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1167
1168	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1169		panic("spx_output REXMT");
1170
1171	/*
1172	 * Start/restart persistance timer.
1173	 */
1174	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1175	    t*spx_backoff[cb->s_rxtshift],
1176	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1177	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1178		cb->s_rxtshift++;
1179}
1180
1181int
1182spx_ctloutput(struct socket *so, struct sockopt *sopt)
1183{
1184	struct spxhdr spxhdr;
1185	struct ipxpcb *ipxp;
1186	struct spxpcb *cb;
1187	int mask, error;
1188	short soptval;
1189	u_short usoptval;
1190	int optval;
1191
1192	ipxp = sotoipxpcb(so);
1193	KASSERT(ipxp != NULL, ("spx_ctloutput: ipxp == NULL"));
1194
1195	/*
1196	 * This will have to be changed when we do more general stacking of
1197	 * protocols.
1198	 */
1199	if (sopt->sopt_level != IPXPROTO_SPX)
1200		return (ipx_ctloutput(so, sopt));
1201
1202	IPX_LOCK(ipxp);
1203	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1204		IPX_UNLOCK(ipxp);
1205		return (ECONNRESET);
1206	}
1207
1208	IPX_LOCK(ipxp);
1209	cb = ipxtospxpcb(ipxp);
1210	KASSERT(cb != NULL, ("spx_ctloutput: cb == NULL"));
1211
1212	error = 0;
1213	switch (sopt->sopt_dir) {
1214	case SOPT_GET:
1215		switch (sopt->sopt_name) {
1216		case SO_HEADERS_ON_INPUT:
1217			mask = SF_HI;
1218			goto get_flags;
1219
1220		case SO_HEADERS_ON_OUTPUT:
1221			mask = SF_HO;
1222		get_flags:
1223			soptval = cb->s_flags & mask;
1224			IPX_UNLOCK(ipxp);
1225			error = sooptcopyout(sopt, &soptval,
1226			    sizeof(soptval));
1227			break;
1228
1229		case SO_MTU:
1230			usoptval = cb->s_mtu;
1231			IPX_UNLOCK(ipxp);
1232			error = sooptcopyout(sopt, &usoptval,
1233			    sizeof(usoptval));
1234			break;
1235
1236		case SO_LAST_HEADER:
1237			spxhdr = cb->s_rhdr;
1238			IPX_UNLOCK(ipxp);
1239			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
1240			break;
1241
1242		case SO_DEFAULT_HEADERS:
1243			spxhdr = cb->s_shdr;
1244			IPX_UNLOCK(ipxp);
1245			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
1246			break;
1247
1248		default:
1249			IPX_UNLOCK(ipxp);
1250			error = ENOPROTOOPT;
1251		}
1252		break;
1253
1254	case SOPT_SET:
1255		/*
1256		 * XXX Why are these shorts on get and ints on set?  That
1257		 * doesn't make any sense...
1258		 *
1259		 * XXXRW: Note, when we re-acquire the ipxp lock, we should
1260		 * re-check that it's not dropped.
1261		 */
1262		IPX_UNLOCK(ipxp);
1263		switch (sopt->sopt_name) {
1264		case SO_HEADERS_ON_INPUT:
1265			mask = SF_HI;
1266			goto set_head;
1267
1268		case SO_HEADERS_ON_OUTPUT:
1269			mask = SF_HO;
1270		set_head:
1271			error = sooptcopyin(sopt, &optval, sizeof optval,
1272					    sizeof optval);
1273			if (error)
1274				break;
1275
1276			IPX_LOCK(ipxp);
1277			if (cb->s_flags & SF_PI) {
1278				if (optval)
1279					cb->s_flags |= mask;
1280				else
1281					cb->s_flags &= ~mask;
1282			} else error = EINVAL;
1283			IPX_UNLOCK(ipxp);
1284			break;
1285
1286		case SO_MTU:
1287			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1288					    sizeof usoptval);
1289			if (error)
1290				break;
1291			/* Unlocked write. */
1292			cb->s_mtu = usoptval;
1293			break;
1294
1295#ifdef SF_NEWCALL
1296		case SO_NEWCALL:
1297			error = sooptcopyin(sopt, &optval, sizeof optval,
1298					    sizeof optval);
1299			if (error)
1300				break;
1301			IPX_LOCK(ipxp);
1302			if (optval) {
1303				cb->s_flags2 |= SF_NEWCALL;
1304				spx_newchecks[5]++;
1305			} else {
1306				cb->s_flags2 &= ~SF_NEWCALL;
1307				spx_newchecks[6]++;
1308			}
1309			IPX_UNLOCK(ipxp);
1310			break;
1311#endif
1312
1313		case SO_DEFAULT_HEADERS:
1314			{
1315				struct spxhdr sp;
1316
1317				error = sooptcopyin(sopt, &sp, sizeof sp,
1318						    sizeof sp);
1319				if (error)
1320					break;
1321				IPX_LOCK(ipxp);
1322				cb->s_dt = sp.spx_dt;
1323				cb->s_cc = sp.spx_cc & SPX_EM;
1324				IPX_UNLOCK(ipxp);
1325			}
1326			break;
1327
1328		default:
1329			error = ENOPROTOOPT;
1330		}
1331		break;
1332
1333	default:
1334		panic("spx_ctloutput: bad socket option direction");
1335	}
1336	return (error);
1337}
1338
1339static void
1340spx_usr_abort(struct socket *so)
1341{
1342	struct ipxpcb *ipxp;
1343	struct spxpcb *cb;
1344
1345	ipxp = sotoipxpcb(so);
1346	KASSERT(ipxp != NULL, ("spx_usr_abort: ipxp == NULL"));
1347
1348	cb = ipxtospxpcb(ipxp);
1349	KASSERT(cb != NULL, ("spx_usr_abort: cb == NULL"));
1350
1351	IPX_LIST_LOCK();
1352	IPX_LOCK(ipxp);
1353	spx_drop(cb, ECONNABORTED);
1354	IPX_UNLOCK(ipxp);
1355	IPX_LIST_UNLOCK();
1356}
1357
1358/*
1359 * Accept a connection.  Essentially all the work is done at higher levels;
1360 * just return the address of the peer, storing through addr.
1361 */
1362static int
1363spx_accept(struct socket *so, struct sockaddr **nam)
1364{
1365	struct ipxpcb *ipxp;
1366	struct sockaddr_ipx *sipx, ssipx;
1367
1368	ipxp = sotoipxpcb(so);
1369	KASSERT(ipxp != NULL, ("spx_accept: ipxp == NULL"));
1370
1371	sipx = &ssipx;
1372	bzero(sipx, sizeof *sipx);
1373	sipx->sipx_len = sizeof *sipx;
1374	sipx->sipx_family = AF_IPX;
1375	IPX_LOCK(ipxp);
1376	sipx->sipx_addr = ipxp->ipxp_faddr;
1377	IPX_UNLOCK(ipxp);
1378	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1379	return (0);
1380}
1381
1382static int
1383spx_attach(struct socket *so, int proto, struct thread *td)
1384{
1385	struct ipxpcb *ipxp;
1386	struct spxpcb *cb;
1387	struct mbuf *mm;
1388	struct sockbuf *sb;
1389	int error;
1390
1391	ipxp = sotoipxpcb(so);
1392	KASSERT(ipxp == NULL, ("spx_attach: ipxp != NULL"));
1393
1394	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1395		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1396		if (error)
1397			return (error);
1398	}
1399
1400	cb = malloc(sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1401	if (cb == NULL)
1402		return (ENOBUFS);
1403	mm = m_getclr(M_DONTWAIT, MT_DATA);
1404	if (mm == NULL) {
1405		free(cb, M_PCB);
1406		return (ENOBUFS);
1407	}
1408
1409	IPX_LIST_LOCK();
1410	error = ipx_pcballoc(so, &ipxpcb_list, td);
1411	if (error) {
1412		IPX_LIST_UNLOCK();
1413		m_free(mm);
1414		free(cb, M_PCB);
1415		return (error);
1416	}
1417	ipxp = sotoipxpcb(so);
1418	ipxp->ipxp_flags |= IPXP_SPX;
1419
1420	cb->s_ipx = mtod(mm, struct ipx *);
1421	cb->s_state = TCPS_LISTEN;
1422	cb->s_smax = -1;
1423	cb->s_swl1 = -1;
1424	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1425	cb->s_ipxpcb = ipxp;
1426	cb->s_mtu = 576 - sizeof(struct spx);
1427	sb = &so->so_snd;
1428	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1429	cb->s_ssthresh = cb->s_cwnd;
1430	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1431
1432	/*
1433	 * Above is recomputed when connecting to account for changed
1434	 * buffering or mtu's.
1435	 */
1436	cb->s_rtt = SPXTV_SRTTBASE;
1437	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1438	SPXT_RANGESET(cb->s_rxtcur,
1439	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1440	    SPXTV_MIN, SPXTV_REXMTMAX);
1441	ipxp->ipxp_pcb = (caddr_t)cb;
1442	IPX_LIST_UNLOCK();
1443	return (0);
1444}
1445
1446static void
1447spx_pcbdetach(struct ipxpcb *ipxp)
1448{
1449	struct spxpcb *cb;
1450	struct spx_q *s;
1451	struct mbuf *m;
1452
1453	IPX_LOCK_ASSERT(ipxp);
1454
1455	cb = ipxtospxpcb(ipxp);
1456	KASSERT(cb != NULL, ("spx_pcbdetach: cb == NULL"));
1457
1458	s = cb->s_q.si_next;
1459	while (s != &(cb->s_q)) {
1460		s = s->si_next;
1461		remque(s);
1462		m = dtom(s);
1463		m_freem(m);
1464	}
1465	m_free(dtom(cb->s_ipx));
1466	free(cb, M_PCB);
1467	ipxp->ipxp_pcb = NULL;
1468}
1469
1470static int
1471spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1472{
1473	struct ipxpcb *ipxp;
1474	int error;
1475
1476	ipxp = sotoipxpcb(so);
1477	KASSERT(ipxp != NULL, ("spx_bind: ipxp == NULL"));
1478
1479	IPX_LIST_LOCK();
1480	IPX_LOCK(ipxp);
1481	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1482		error = EINVAL;
1483		goto out;
1484	}
1485	error = ipx_pcbbind(ipxp, nam, td);
1486out:
1487	IPX_UNLOCK(ipxp);
1488	IPX_LIST_UNLOCK();
1489	return (error);
1490}
1491
1492static void
1493spx_usr_close(struct socket *so)
1494{
1495	struct ipxpcb *ipxp;
1496	struct spxpcb *cb;
1497
1498	ipxp = sotoipxpcb(so);
1499	KASSERT(ipxp != NULL, ("spx_usr_close: ipxp == NULL"));
1500
1501	cb = ipxtospxpcb(ipxp);
1502	KASSERT(cb != NULL, ("spx_usr_close: cb == NULL"));
1503
1504	IPX_LIST_LOCK();
1505	IPX_LOCK(ipxp);
1506	if (cb->s_state > TCPS_LISTEN)
1507		spx_disconnect(cb);
1508	else
1509		spx_close(cb);
1510	IPX_UNLOCK(ipxp);
1511	IPX_LIST_UNLOCK();
1512}
1513
1514/*
1515 * Initiate connection to peer.  Enter SYN_SENT state, and mark socket as
1516 * connecting.  Start keep-alive timer, setup prototype header, send initial
1517 * system packet requesting connection.
1518 */
1519static int
1520spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1521{
1522	struct ipxpcb *ipxp;
1523	struct spxpcb *cb;
1524	int error;
1525
1526	ipxp = sotoipxpcb(so);
1527	KASSERT(ipxp != NULL, ("spx_connect: ipxp == NULL"));
1528
1529	cb = ipxtospxpcb(ipxp);
1530	KASSERT(cb != NULL, ("spx_connect: cb == NULL"));
1531
1532	IPX_LIST_LOCK();
1533	IPX_LOCK(ipxp);
1534	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1535		error = EINVAL;
1536		goto spx_connect_end;
1537	}
1538	if (ipxp->ipxp_lport == 0) {
1539		error = ipx_pcbbind(ipxp, NULL, td);
1540		if (error)
1541			goto spx_connect_end;
1542	}
1543	error = ipx_pcbconnect(ipxp, nam, td);
1544	if (error)
1545		goto spx_connect_end;
1546	soisconnecting(so);
1547	spxstat.spxs_connattempt++;
1548	cb->s_state = TCPS_SYN_SENT;
1549	cb->s_did = 0;
1550	spx_template(cb);
1551	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1552	cb->s_force = 1 + SPXTV_KEEP;
1553
1554	/*
1555	 * Other party is required to respond to the port I send from, but he
1556	 * is not required to answer from where I am sending to, so allow
1557	 * wildcarding.  Original port I am sending to is still saved in
1558	 * cb->s_dport.
1559	 */
1560	ipxp->ipxp_fport = 0;
1561	error = spx_output(cb, NULL);
1562spx_connect_end:
1563	IPX_UNLOCK(ipxp);
1564	IPX_LIST_UNLOCK();
1565	return (error);
1566}
1567
1568static void
1569spx_detach(struct socket *so)
1570{
1571	struct ipxpcb *ipxp;
1572	struct spxpcb *cb;
1573
1574	/*
1575	 * XXXRW: Should assert appropriately detached.
1576	 */
1577	ipxp = sotoipxpcb(so);
1578	KASSERT(ipxp != NULL, ("spx_detach: ipxp == NULL"));
1579
1580	cb = ipxtospxpcb(ipxp);
1581	KASSERT(cb != NULL, ("spx_detach: cb == NULL"));
1582
1583	IPX_LIST_LOCK();
1584	IPX_LOCK(ipxp);
1585	spx_pcbdetach(ipxp);
1586	ipx_pcbfree(ipxp);
1587	IPX_LIST_UNLOCK();
1588}
1589
1590/*
1591 * We may decide later to implement connection closing handshaking at the spx
1592 * level optionally.  Here is the hook to do it:
1593 */
1594static int
1595spx_usr_disconnect(struct socket *so)
1596{
1597	struct ipxpcb *ipxp;
1598	struct spxpcb *cb;
1599	int error;
1600
1601	ipxp = sotoipxpcb(so);
1602	KASSERT(ipxp != NULL, ("spx_usr_disconnect: ipxp == NULL"));
1603
1604	cb = ipxtospxpcb(ipxp);
1605	KASSERT(cb != NULL, ("spx_usr_disconnect: cb == NULL"));
1606
1607	IPX_LIST_LOCK();
1608	IPX_LOCK(ipxp);
1609	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1610		error = EINVAL;
1611		goto out;
1612	}
1613	spx_disconnect(cb);
1614	error = 0;
1615out:
1616	IPX_UNLOCK(ipxp);
1617	IPX_LIST_UNLOCK();
1618	return (error);
1619}
1620
1621static int
1622spx_listen(struct socket *so, int backlog, struct thread *td)
1623{
1624	int error;
1625	struct ipxpcb *ipxp;
1626	struct spxpcb *cb;
1627
1628	error = 0;
1629	ipxp = sotoipxpcb(so);
1630	KASSERT(ipxp != NULL, ("spx_listen: ipxp == NULL"));
1631
1632	cb = ipxtospxpcb(ipxp);
1633	KASSERT(cb != NULL, ("spx_listen: cb == NULL"));
1634
1635	IPX_LIST_LOCK();
1636	IPX_LOCK(ipxp);
1637	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1638		error = EINVAL;
1639		goto out;
1640	}
1641	SOCK_LOCK(so);
1642	error = solisten_proto_check(so);
1643	if (error == 0 && ipxp->ipxp_lport == 0)
1644		error = ipx_pcbbind(ipxp, NULL, td);
1645	if (error == 0) {
1646		cb->s_state = TCPS_LISTEN;
1647		solisten_proto(so, backlog);
1648	}
1649	SOCK_UNLOCK(so);
1650out:
1651	IPX_UNLOCK(ipxp);
1652	IPX_LIST_UNLOCK();
1653	return (error);
1654}
1655
1656/*
1657 * After a receive, possibly send acknowledgment updating allocation.
1658 */
1659static int
1660spx_rcvd(struct socket *so, int flags)
1661{
1662	struct ipxpcb *ipxp;
1663	struct spxpcb *cb;
1664	int error;
1665
1666	ipxp = sotoipxpcb(so);
1667	KASSERT(ipxp != NULL, ("spx_rcvd: ipxp == NULL"));
1668
1669	cb = ipxtospxpcb(ipxp);
1670	KASSERT(cb != NULL, ("spx_rcvd: cb == NULL"));
1671
1672	IPX_LOCK(ipxp);
1673	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1674		error = EINVAL;
1675		goto out;
1676	}
1677	cb->s_flags |= SF_RVD;
1678	spx_output(cb, NULL);
1679	cb->s_flags &= ~SF_RVD;
1680	error = 0;
1681out:
1682	IPX_UNLOCK(ipxp);
1683	return (error);
1684}
1685
1686static int
1687spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1688{
1689	struct ipxpcb *ipxp;
1690	struct spxpcb *cb;
1691	int error;
1692
1693	ipxp = sotoipxpcb(so);
1694	KASSERT(ipxp != NULL, ("spx_rcvoob: ipxp == NULL"));
1695
1696	cb = ipxtospxpcb(ipxp);
1697	KASSERT(cb != NULL, ("spx_rcvoob: cb == NULL"));
1698
1699	IPX_LOCK(ipxp);
1700	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1701		error = EINVAL;
1702		goto out;
1703	}
1704	SOCKBUF_LOCK(&so->so_rcv);
1705	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1706	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1707		SOCKBUF_UNLOCK(&so->so_rcv);
1708		m->m_len = 1;
1709		*mtod(m, caddr_t) = cb->s_iobc;
1710		error = 0;
1711		goto out;
1712	}
1713	SOCKBUF_UNLOCK(&so->so_rcv);
1714	error = EINVAL;
1715out:
1716	IPX_UNLOCK(ipxp);
1717	return (error);
1718}
1719
1720static int
1721spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1722    struct mbuf *controlp, struct thread *td)
1723{
1724	struct ipxpcb *ipxp;
1725	struct spxpcb *cb;
1726	int error;
1727
1728	ipxp = sotoipxpcb(so);
1729	KASSERT(ipxp != NULL, ("spx_send: ipxp == NULL"));
1730
1731	cb = ipxtospxpcb(ipxp);
1732	KASSERT(cb != NULL, ("spx_send: cb == NULL"));
1733
1734	error = 0;
1735	IPX_LOCK(ipxp);
1736	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1737		error = ECONNRESET;
1738		goto spx_send_end;
1739	}
1740	if (flags & PRUS_OOB) {
1741		if (sbspace(&so->so_snd) < -512) {
1742			error = ENOBUFS;
1743			goto spx_send_end;
1744		}
1745		cb->s_oobflags |= SF_SOOB;
1746	}
1747	if (controlp != NULL) {
1748		u_short *p = mtod(controlp, u_short *);
1749		spx_newchecks[2]++;
1750		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1751			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1752			spx_newchecks[3]++;
1753		}
1754		m_freem(controlp);
1755	}
1756	controlp = NULL;
1757	error = spx_output(cb, m);
1758	m = NULL;
1759spx_send_end:
1760	IPX_UNLOCK(ipxp);
1761	if (controlp != NULL)
1762		m_freem(controlp);
1763	if (m != NULL)
1764		m_freem(m);
1765	return (error);
1766}
1767
1768static int
1769spx_shutdown(struct socket *so)
1770{
1771	struct ipxpcb *ipxp;
1772	struct spxpcb *cb;
1773	int error;
1774
1775	ipxp = sotoipxpcb(so);
1776	KASSERT(ipxp != NULL, ("spx_shutdown: ipxp == NULL"));
1777
1778	cb = ipxtospxpcb(ipxp);
1779	KASSERT(cb != NULL, ("spx_shutdown: cb == NULL"));
1780
1781	socantsendmore(so);
1782	IPX_LIST_LOCK();
1783	IPX_LOCK(ipxp);
1784	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1785		error = EINVAL;
1786		goto out;
1787	}
1788	spx_usrclosed(cb);
1789	error = 0;
1790out:
1791	IPX_UNLOCK(ipxp);
1792	IPX_LIST_UNLOCK();
1793	return (error);
1794}
1795
1796static int
1797spx_sp_attach(struct socket *so, int proto, struct thread *td)
1798{
1799	struct ipxpcb *ipxp;
1800	struct spxpcb *cb;
1801	int error;
1802
1803	KASSERT(so->so_pcb == NULL, ("spx_sp_attach: so_pcb != NULL"));
1804
1805	error = spx_attach(so, proto, td);
1806	if (error)
1807		return (error);
1808
1809	ipxp = sotoipxpcb(so);
1810	KASSERT(ipxp != NULL, ("spx_sp_attach: ipxp == NULL"));
1811
1812	cb = ipxtospxpcb(ipxp);
1813	KASSERT(cb != NULL, ("spx_sp_attach: cb == NULL"));
1814
1815	IPX_LOCK(ipxp);
1816	cb->s_flags |= (SF_HI | SF_HO | SF_PI);
1817	IPX_UNLOCK(ipxp);
1818	return (0);
1819}
1820
1821/*
1822 * Create template to be used to send spx packets on a connection.  Called
1823 * after host entry created, fills in a skeletal spx header (choosing
1824 * connection id), minimizing the amount of work necessary when the
1825 * connection is used.
1826 */
1827static void
1828spx_template(struct spxpcb *cb)
1829{
1830	struct ipxpcb *ipxp = cb->s_ipxpcb;
1831	struct ipx *ipx = cb->s_ipx;
1832	struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1833
1834	IPX_LOCK_ASSERT(ipxp);
1835
1836	ipx->ipx_pt = IPXPROTO_SPX;
1837	ipx->ipx_sna = ipxp->ipxp_laddr;
1838	ipx->ipx_dna = ipxp->ipxp_faddr;
1839	SPX_LOCK();
1840	cb->s_sid = htons(spx_iss);
1841	spx_iss += SPX_ISSINCR/2;
1842	SPX_UNLOCK();
1843	cb->s_alo = 1;
1844	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1845
1846	/*
1847	 * Try to expand fast to full complement of large packets.
1848	 */
1849	cb->s_ssthresh = cb->s_cwnd;
1850	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1851
1852	/*
1853	 * But allow for lots of little packets as well.
1854	 */
1855	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1856}
1857
1858/*
1859 * Close a SPIP control block.  Wake up any sleepers.  We used to free any
1860 * queued packets and cb->s_ipx here, but now we defer that until the pcb is
1861 * discarded.
1862 */
1863void
1864spx_close(struct spxpcb *cb)
1865{
1866	struct ipxpcb *ipxp = cb->s_ipxpcb;
1867	struct socket *so = ipxp->ipxp_socket;
1868
1869	KASSERT(ipxp != NULL, ("spx_close: ipxp == NULL"));
1870	IPX_LIST_LOCK_ASSERT();
1871	IPX_LOCK_ASSERT(ipxp);
1872
1873	ipxp->ipxp_flags |= IPXP_DROPPED;
1874	soisdisconnected(so);
1875	spxstat.spxs_closed++;
1876}
1877
1878/*
1879 * Someday we may do level 3 handshaking to close a connection or send a
1880 * xerox style error.  For now, just close.  cb will always be invalid after
1881 * this call.
1882 */
1883static void
1884spx_usrclosed(struct spxpcb *cb)
1885{
1886
1887	IPX_LIST_LOCK_ASSERT();
1888	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1889
1890	spx_close(cb);
1891}
1892
1893/*
1894 * cb will always be invalid after this call.
1895 */
1896static void
1897spx_disconnect(struct spxpcb *cb)
1898{
1899
1900	IPX_LIST_LOCK_ASSERT();
1901	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1902
1903	spx_close(cb);
1904}
1905
1906/*
1907 * Drop connection, reporting the specified error.  cb will always be invalid
1908 * after this call.
1909 */
1910static void
1911spx_drop(struct spxpcb *cb, int errno)
1912{
1913	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1914
1915	IPX_LIST_LOCK_ASSERT();
1916	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1917
1918	/*
1919	 * Someday, in the xerox world we will generate error protocol
1920	 * packets announcing that the socket has gone away.
1921	 */
1922	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1923		spxstat.spxs_drops++;
1924		cb->s_state = TCPS_CLOSED;
1925		/*tcp_output(cb);*/
1926	} else
1927		spxstat.spxs_conndrops++;
1928	so->so_error = errno;
1929	spx_close(cb);
1930}
1931
1932/*
1933 * Fast timeout routine for processing delayed acks.
1934 */
1935void
1936spx_fasttimo(void)
1937{
1938	struct ipxpcb *ipxp;
1939	struct spxpcb *cb;
1940
1941	IPX_LIST_LOCK();
1942	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1943		IPX_LOCK(ipxp);
1944		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1945		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1946			IPX_UNLOCK(ipxp);
1947			continue;
1948		}
1949		cb = ipxtospxpcb(ipxp);
1950		if (cb->s_flags & SF_DELACK) {
1951			cb->s_flags &= ~SF_DELACK;
1952			cb->s_flags |= SF_ACKNOW;
1953			spxstat.spxs_delack++;
1954			spx_output(cb, NULL);
1955		}
1956		IPX_UNLOCK(ipxp);
1957	}
1958	IPX_LIST_UNLOCK();
1959}
1960
1961/*
1962 * spx protocol timeout routine called every 500 ms.  Updates the timers in
1963 * all active pcb's and causes finite state machine actions if timers expire.
1964 */
1965void
1966spx_slowtimo(void)
1967{
1968	struct ipxpcb *ipxp;
1969	struct spxpcb *cb;
1970	int i;
1971
1972	/*
1973	 * Search through tcb's and update active timers.  Once, timers could
1974	 * free ipxp's, but now we do that only when detaching a socket.
1975	 */
1976	IPX_LIST_LOCK();
1977	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1978		IPX_LOCK(ipxp);
1979		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1980		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1981			IPX_UNLOCK(ipxp);
1982			continue;
1983		}
1984
1985		cb = (struct spxpcb *)ipxp->ipxp_pcb;
1986		KASSERT(cb != NULL, ("spx_slowtimo: cb == NULL"));
1987		for (i = 0; i < SPXT_NTIMERS; i++) {
1988			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1989				spx_timers(cb, i);
1990				if (ipxp->ipxp_flags & IPXP_DROPPED)
1991					break;
1992			}
1993		}
1994		if (!(ipxp->ipxp_flags & IPXP_DROPPED)) {
1995			cb->s_idle++;
1996			if (cb->s_rtt)
1997				cb->s_rtt++;
1998		}
1999		IPX_UNLOCK(ipxp);
2000	}
2001	IPX_LIST_UNLOCK();
2002	SPX_LOCK();
2003	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
2004	SPX_UNLOCK();
2005}
2006
2007/*
2008 * SPX timer processing.
2009 */
2010static void
2011spx_timers(struct spxpcb *cb, int timer)
2012{
2013	long rexmt;
2014	int win;
2015
2016	IPX_LIST_LOCK_ASSERT();
2017	IPX_LOCK_ASSERT(cb->s_ipxpcb);
2018
2019	cb->s_force = 1 + timer;
2020	switch (timer) {
2021	case SPXT_2MSL:
2022		/*
2023		 * 2 MSL timeout in shutdown went off.  TCP deletes
2024		 * connection control block.
2025		 */
2026		printf("spx: SPXT_2MSL went off for no reason\n");
2027		cb->s_timer[timer] = 0;
2028		break;
2029
2030	case SPXT_REXMT:
2031		/*
2032		 * Retransmission timer went off.  Message has not been acked
2033		 * within retransmit interval.  Back off to a longer
2034		 * retransmit interval and retransmit one packet.
2035		 */
2036		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
2037			cb->s_rxtshift = SPX_MAXRXTSHIFT;
2038			spxstat.spxs_timeoutdrop++;
2039			spx_drop(cb, ETIMEDOUT);
2040			break;
2041		}
2042		spxstat.spxs_rexmttimeo++;
2043		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
2044		rexmt *= spx_backoff[cb->s_rxtshift];
2045		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
2046		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
2047
2048		/*
2049		 * If we have backed off fairly far, our srtt estimate is
2050		 * probably bogus.  Clobber it so we'll take the next rtt
2051		 * measurement as our srtt; move the current srtt into rttvar
2052		 * to keep the current retransmit times until then.
2053		 */
2054		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
2055			cb->s_rttvar += (cb->s_srtt >> 2);
2056			cb->s_srtt = 0;
2057		}
2058		cb->s_snxt = cb->s_rack;
2059
2060		/*
2061		 * If timing a packet, stop the timer.
2062		 */
2063		cb->s_rtt = 0;
2064
2065		/*
2066		 * See very long discussion in tcp_timer.c about congestion
2067		 * window and sstrhesh.
2068		 */
2069		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
2070		if (win < 2)
2071			win = 2;
2072		cb->s_cwnd = CUNIT;
2073		cb->s_ssthresh = win * CUNIT;
2074		spx_output(cb, NULL);
2075		break;
2076
2077	case SPXT_PERSIST:
2078		/*
2079		 * Persistance timer into zero window.  Force a probe to be
2080		 * sent.
2081		 */
2082		spxstat.spxs_persisttimeo++;
2083		spx_setpersist(cb);
2084		spx_output(cb, NULL);
2085		break;
2086
2087	case SPXT_KEEP:
2088		/*
2089		 * Keep-alive timer went off; send something or drop
2090		 * connection if idle for too long.
2091		 */
2092		spxstat.spxs_keeptimeo++;
2093		if (cb->s_state < TCPS_ESTABLISHED)
2094			goto dropit;
2095		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
2096		    	if (cb->s_idle >= SPXTV_MAXIDLE)
2097				goto dropit;
2098			spxstat.spxs_keepprobe++;
2099			spx_output(cb, NULL);
2100		} else
2101			cb->s_idle = 0;
2102		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
2103		break;
2104
2105	dropit:
2106		spxstat.spxs_keepdrops++;
2107		spx_drop(cb, ETIMEDOUT);
2108		break;
2109
2110	default:
2111		panic("spx_timers: unknown timer %d", timer);
2112	}
2113}
2114