spx_usrreq.c revision 191533
1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * Copyright (c) 1995, Mike Mitchell
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)spx_usrreq.h
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 191533 2009-04-26 21:03:27Z ed $");
67
68#include <sys/param.h>
69#include <sys/lock.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/protosw.h>
75#include <sys/signalvar.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/sx.h>
79#include <sys/systm.h>
80
81#include <net/route.h>
82#include <netinet/tcp_fsm.h>
83
84#include <netipx/ipx.h>
85#include <netipx/ipx_pcb.h>
86#include <netipx/ipx_var.h>
87#include <netipx/spx.h>
88#include <netipx/spx_debug.h>
89#include <netipx/spx_timer.h>
90#include <netipx/spx_var.h>
91
92/*
93 * SPX protocol implementation.
94 */
95static struct	mtx spx_mtx;			/* Protects only spx_iss. */
96static u_short 	spx_iss;
97static u_short	spx_newchecks[50];
98static int	spx_hardnosed;
99static int	spx_use_delack = 0;
100static int	traceallspxs = 0;
101static struct	spx_istat spx_istat;
102static int	spxrexmtthresh = 3;
103
104#define	SPX_LOCK_INIT()	mtx_init(&spx_mtx, "spx_mtx", NULL, MTX_DEF)
105#define	SPX_LOCK()	mtx_lock(&spx_mtx)
106#define	SPX_UNLOCK()	mtx_unlock(&spx_mtx)
107
108/* Following was struct spxstat spxstat; */
109#ifndef spxstat
110#define spxstat spx_istat.newstats
111#endif
112
113static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
114    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
115
116static	void spx_close(struct spxpcb *cb);
117static	void spx_disconnect(struct spxpcb *cb);
118static	void spx_drop(struct spxpcb *cb, int errno);
119static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
120static	int spx_reass(struct spxpcb *cb, struct spx *si);
121static	void spx_setpersist(struct spxpcb *cb);
122static	void spx_template(struct spxpcb *cb);
123static	void spx_timers(struct spxpcb *cb, int timer);
124static	void spx_usrclosed(struct spxpcb *cb);
125
126static	void spx_usr_abort(struct socket *so);
127static	int spx_accept(struct socket *so, struct sockaddr **nam);
128static	int spx_attach(struct socket *so, int proto, struct thread *td);
129static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
130static	void spx_usr_close(struct socket *so);
131static	int spx_connect(struct socket *so, struct sockaddr *nam,
132			struct thread *td);
133static	void spx_detach(struct socket *so);
134static	void spx_pcbdetach(struct ipxpcb *ipxp);
135static	int spx_usr_disconnect(struct socket *so);
136static	int spx_listen(struct socket *so, int backlog, struct thread *td);
137static	int spx_rcvd(struct socket *so, int flags);
138static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
139static	int spx_send(struct socket *so, int flags, struct mbuf *m,
140		     struct sockaddr *addr, struct mbuf *control,
141		     struct thread *td);
142static	int spx_shutdown(struct socket *so);
143static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
144
145struct	pr_usrreqs spx_usrreqs = {
146	.pru_abort =		spx_usr_abort,
147	.pru_accept =		spx_accept,
148	.pru_attach =		spx_attach,
149	.pru_bind =		spx_bind,
150	.pru_connect =		spx_connect,
151	.pru_control =		ipx_control,
152	.pru_detach =		spx_detach,
153	.pru_disconnect =	spx_usr_disconnect,
154	.pru_listen =		spx_listen,
155	.pru_peeraddr =		ipx_peeraddr,
156	.pru_rcvd =		spx_rcvd,
157	.pru_rcvoob =		spx_rcvoob,
158	.pru_send =		spx_send,
159	.pru_shutdown =		spx_shutdown,
160	.pru_sockaddr =		ipx_sockaddr,
161	.pru_close =		spx_usr_close,
162};
163
164struct	pr_usrreqs spx_usrreq_sps = {
165	.pru_abort =		spx_usr_abort,
166	.pru_accept =		spx_accept,
167	.pru_attach =		spx_sp_attach,
168	.pru_bind =		spx_bind,
169	.pru_connect =		spx_connect,
170	.pru_control =		ipx_control,
171	.pru_detach =		spx_detach,
172	.pru_disconnect =	spx_usr_disconnect,
173	.pru_listen =		spx_listen,
174	.pru_peeraddr =		ipx_peeraddr,
175	.pru_rcvd =		spx_rcvd,
176	.pru_rcvoob =		spx_rcvoob,
177	.pru_send =		spx_send,
178	.pru_shutdown =		spx_shutdown,
179	.pru_sockaddr =		ipx_sockaddr,
180	.pru_close =		spx_usr_close,
181};
182
183static __inline void
184spx_insque(struct spx_q *element, struct spx_q *head)
185{
186
187	element->si_next = head->si_next;
188	element->si_prev = head;
189	head->si_next = element;
190	element->si_next->si_prev = element;
191}
192
193static __inline void
194spx_remque(struct spx_q *element)
195{
196
197	element->si_next->si_prev = element->si_prev;
198	element->si_prev->si_next = element->si_next;
199	element->si_prev = NULL;
200}
201
202void
203spx_init(void)
204{
205
206	SPX_LOCK_INIT();
207	spx_iss = 1; /* WRONG !! should fish it out of TODR */
208}
209
210void
211spx_input(struct mbuf *m, struct ipxpcb *ipxp)
212{
213	struct spxpcb *cb;
214	struct spx *si = mtod(m, struct spx *);
215	struct socket *so;
216	struct spx spx_savesi;
217	int dropsocket = 0;
218	short ostate = 0;
219
220	spxstat.spxs_rcvtotal++;
221	KASSERT(ipxp != NULL, ("spx_input: ipxpcb == NULL"));
222
223	/*
224	 * spx_input() assumes that the caller will hold both the pcb list
225	 * lock and also the ipxp lock.  spx_input() will release both before
226	 * returning, and may in fact trade in the ipxp lock for another pcb
227	 * lock following sonewconn().
228	 */
229	IPX_LIST_LOCK_ASSERT();
230	IPX_LOCK_ASSERT(ipxp);
231
232	cb = ipxtospxpcb(ipxp);
233	KASSERT(cb != NULL, ("spx_input: cb == NULL"));
234
235	if (ipxp->ipxp_flags & IPXP_DROPPED)
236		goto drop;
237
238	if (m->m_len < sizeof(*si)) {
239		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
240			IPX_UNLOCK(ipxp);
241			IPX_LIST_UNLOCK();
242			spxstat.spxs_rcvshort++;
243			return;
244		}
245		si = mtod(m, struct spx *);
246	}
247	si->si_seq = ntohs(si->si_seq);
248	si->si_ack = ntohs(si->si_ack);
249	si->si_alo = ntohs(si->si_alo);
250
251	so = ipxp->ipxp_socket;
252	KASSERT(so != NULL, ("spx_input: so == NULL"));
253
254	if (so->so_options & SO_DEBUG || traceallspxs) {
255		ostate = cb->s_state;
256		spx_savesi = *si;
257	}
258	if (so->so_options & SO_ACCEPTCONN) {
259		struct spxpcb *ocb = cb;
260
261		so = sonewconn(so, 0);
262		if (so == NULL)
263			goto drop;
264
265		/*
266		 * This is ugly, but ....
267		 *
268		 * Mark socket as temporary until we're committed to keeping
269		 * it.  The code at ``drop'' and ``dropwithreset'' check the
270		 * flag dropsocket to see if the temporary socket created
271		 * here should be discarded.  We mark the socket as
272		 * discardable until we're committed to it below in
273		 * TCPS_LISTEN.
274		 *
275		 * XXXRW: In the new world order of real kernel parallelism,
276		 * temporarily allocating the socket when we're "not sure"
277		 * seems like a bad idea, as we might race to remove it if
278		 * the listen socket is closed...?
279		 *
280		 * We drop the lock of the listen socket ipxp, and acquire
281		 * the lock of the new socket ippx.
282		 */
283		dropsocket++;
284		IPX_UNLOCK(ipxp);
285		ipxp = (struct ipxpcb *)so->so_pcb;
286		IPX_LOCK(ipxp);
287		ipxp->ipxp_laddr = si->si_dna;
288		cb = ipxtospxpcb(ipxp);
289		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
290		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
291		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
292		cb->s_state = TCPS_LISTEN;
293	}
294	IPX_LOCK_ASSERT(ipxp);
295
296	/*
297	 * Packet received on connection.  Reset idle time and keep-alive
298	 * timer.
299	 */
300	cb->s_idle = 0;
301	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
302
303	switch (cb->s_state) {
304	case TCPS_LISTEN:{
305		struct sockaddr_ipx *sipx, ssipx;
306		struct ipx_addr laddr;
307
308		/*
309		 * If somebody here was carying on a conversation and went
310		 * away, and his pen pal thinks he can still talk, we get the
311		 * misdirected packet.
312		 */
313		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
314			spx_istat.gonawy++;
315			goto dropwithreset;
316		}
317		sipx = &ssipx;
318		bzero(sipx, sizeof *sipx);
319		sipx->sipx_len = sizeof(*sipx);
320		sipx->sipx_family = AF_IPX;
321		sipx->sipx_addr = si->si_sna;
322		laddr = ipxp->ipxp_laddr;
323		if (ipx_nullhost(laddr))
324			ipxp->ipxp_laddr = si->si_dna;
325		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
326			ipxp->ipxp_laddr = laddr;
327			spx_istat.noconn++;
328			goto drop;
329		}
330		spx_template(cb);
331		dropsocket = 0;		/* committed to socket */
332		cb->s_did = si->si_sid;
333		cb->s_rack = si->si_ack;
334		cb->s_ralo = si->si_alo;
335#define THREEWAYSHAKE
336#ifdef THREEWAYSHAKE
337		cb->s_state = TCPS_SYN_RECEIVED;
338		cb->s_force = 1 + SPXT_KEEP;
339		spxstat.spxs_accepts++;
340		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
341		}
342		break;
343
344	 case TCPS_SYN_RECEIVED: {
345		/*
346		 * This state means that we have heard a response to our
347		 * acceptance of their connection.  It is probably logically
348		 * unnecessary in this implementation.
349		 */
350		if (si->si_did != cb->s_sid) {
351			spx_istat.wrncon++;
352			goto drop;
353		}
354#endif
355		ipxp->ipxp_fport =  si->si_sport;
356		cb->s_timer[SPXT_REXMT] = 0;
357		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
358		soisconnected(so);
359		cb->s_state = TCPS_ESTABLISHED;
360		spxstat.spxs_accepts++;
361		}
362		break;
363
364	case TCPS_SYN_SENT:
365		/*
366		 * This state means that we have gotten a response to our
367		 * attempt to establish a connection.  We fill in the data
368		 * from the other side, telling us which port to respond to,
369		 * instead of the well-known one we might have sent to in the
370		 * first place.  We also require that this is a response to
371		 * our connection id.
372		 */
373		if (si->si_did != cb->s_sid) {
374			spx_istat.notme++;
375			goto drop;
376		}
377		spxstat.spxs_connects++;
378		cb->s_did = si->si_sid;
379		cb->s_rack = si->si_ack;
380		cb->s_ralo = si->si_alo;
381		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
382		cb->s_timer[SPXT_REXMT] = 0;
383		cb->s_flags |= SF_ACKNOW;
384		soisconnected(so);
385		cb->s_state = TCPS_ESTABLISHED;
386
387		/*
388		 * Use roundtrip time of connection request for initial rtt.
389		 */
390		if (cb->s_rtt) {
391			cb->s_srtt = cb->s_rtt << 3;
392			cb->s_rttvar = cb->s_rtt << 1;
393			SPXT_RANGESET(cb->s_rxtcur,
394			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
395			    SPXTV_MIN, SPXTV_REXMTMAX);
396			    cb->s_rtt = 0;
397		}
398	}
399
400	if (so->so_options & SO_DEBUG || traceallspxs)
401		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
402
403	m->m_len -= sizeof(struct ipx);
404	m->m_pkthdr.len -= sizeof(struct ipx);
405	m->m_data += sizeof(struct ipx);
406
407	if (spx_reass(cb, si))
408		m_freem(m);
409	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
410		spx_output(cb, NULL);
411	cb->s_flags &= ~(SF_WIN|SF_RXT);
412	IPX_UNLOCK(ipxp);
413	IPX_LIST_UNLOCK();
414	return;
415
416dropwithreset:
417	IPX_LOCK_ASSERT(ipxp);
418	if (cb == NULL || (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
419	    traceallspxs))
420		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
421	IPX_UNLOCK(ipxp);
422	if (dropsocket) {
423		struct socket *head;
424		ACCEPT_LOCK();
425		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
426		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
427		head = so->so_head;
428		TAILQ_REMOVE(&head->so_incomp, so, so_list);
429		head->so_incqlen--;
430		so->so_qstate &= ~SQ_INCOMP;
431		so->so_head = NULL;
432		ACCEPT_UNLOCK();
433		soabort(so);
434	}
435	IPX_LIST_UNLOCK();
436	m_freem(m);
437	return;
438
439drop:
440	IPX_LOCK_ASSERT(ipxp);
441	if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
442		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
443	IPX_UNLOCK(ipxp);
444	IPX_LIST_UNLOCK();
445	m_freem(m);
446}
447
448/*
449 * This is structurally similar to the tcp reassembly routine but its
450 * function is somewhat different: it merely queues packets up, and
451 * suppresses duplicates.
452 */
453static int
454spx_reass(struct spxpcb *cb, struct spx *si)
455{
456	struct spx_q *q;
457	struct mbuf *m;
458	struct socket *so = cb->s_ipxpcb->ipxp_socket;
459	char packetp = cb->s_flags & SF_HI;
460	int incr;
461	char wakeup = 0;
462
463	IPX_LOCK_ASSERT(cb->s_ipxpcb);
464
465	if (si == SI(0))
466		goto present;
467
468	/*
469	 * Update our news from them.
470	 */
471	if (si->si_cc & SPX_SA)
472		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
473	if (SSEQ_GT(si->si_alo, cb->s_ralo))
474		cb->s_flags |= SF_WIN;
475	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
476		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
477			spxstat.spxs_rcvdupack++;
478
479			/*
480			 * If this is a completely duplicate ack and other
481			 * conditions hold, we assume a packet has been
482			 * dropped and retransmit it exactly as in
483			 * tcp_input().
484			 */
485			if (si->si_ack != cb->s_rack ||
486			    si->si_alo != cb->s_ralo)
487				cb->s_dupacks = 0;
488			else if (++cb->s_dupacks == spxrexmtthresh) {
489				u_short onxt = cb->s_snxt;
490				int cwnd = cb->s_cwnd;
491
492				cb->s_snxt = si->si_ack;
493				cb->s_cwnd = CUNIT;
494				cb->s_force = 1 + SPXT_REXMT;
495				spx_output(cb, NULL);
496				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
497				cb->s_rtt = 0;
498				if (cwnd >= 4 * CUNIT)
499					cb->s_cwnd = cwnd / 2;
500				if (SSEQ_GT(onxt, cb->s_snxt))
501					cb->s_snxt = onxt;
502				return (1);
503			}
504		} else
505			cb->s_dupacks = 0;
506		goto update_window;
507	}
508	cb->s_dupacks = 0;
509
510	/*
511	 * If our correspondent acknowledges data we haven't sent TCP would
512	 * drop the packet after acking.  We'll be a little more permissive.
513	 */
514	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
515		spxstat.spxs_rcvacktoomuch++;
516		si->si_ack = cb->s_smax + 1;
517	}
518	spxstat.spxs_rcvackpack++;
519
520	/*
521	 * If transmit timer is running and timed sequence number was acked,
522	 * update smoothed round trip time.  See discussion of algorithm in
523	 * tcp_input.c
524	 */
525	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
526		spxstat.spxs_rttupdated++;
527		if (cb->s_srtt != 0) {
528			short delta;
529			delta = cb->s_rtt - (cb->s_srtt >> 3);
530			if ((cb->s_srtt += delta) <= 0)
531				cb->s_srtt = 1;
532			if (delta < 0)
533				delta = -delta;
534			delta -= (cb->s_rttvar >> 2);
535			if ((cb->s_rttvar += delta) <= 0)
536				cb->s_rttvar = 1;
537		} else {
538			/*
539			 * No rtt measurement yet.
540			 */
541			cb->s_srtt = cb->s_rtt << 3;
542			cb->s_rttvar = cb->s_rtt << 1;
543		}
544		cb->s_rtt = 0;
545		cb->s_rxtshift = 0;
546		SPXT_RANGESET(cb->s_rxtcur,
547			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
548			SPXTV_MIN, SPXTV_REXMTMAX);
549	}
550
551	/*
552	 * If all outstanding data is acked, stop retransmit timer and
553	 * remember to restart (more output or persist).  If there is more
554	 * data to be acked, restart retransmit timer, using current
555	 * (possibly backed-off) value;
556	 */
557	if (si->si_ack == cb->s_smax + 1) {
558		cb->s_timer[SPXT_REXMT] = 0;
559		cb->s_flags |= SF_RXT;
560	} else if (cb->s_timer[SPXT_PERSIST] == 0)
561		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
562
563	/*
564	 * When new data is acked, open the congestion window.  If the window
565	 * gives us less than ssthresh packets in flight, open exponentially
566	 * (maxseg at a time).  Otherwise open linearly (maxseg^2 / cwnd at a
567	 * time).
568	 */
569	incr = CUNIT;
570	if (cb->s_cwnd > cb->s_ssthresh)
571		incr = max(incr * incr / cb->s_cwnd, 1);
572	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
573
574	/*
575	 * Trim Acked data from output queue.
576	 */
577	SOCKBUF_LOCK(&so->so_snd);
578	while ((m = so->so_snd.sb_mb) != NULL) {
579		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
580			sbdroprecord_locked(&so->so_snd);
581		else
582			break;
583	}
584	sowwakeup_locked(so);
585	cb->s_rack = si->si_ack;
586update_window:
587	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
588		cb->s_snxt = cb->s_rack;
589	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
590	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
591	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
592		/* keep track of pure window updates */
593		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
594		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
595			spxstat.spxs_rcvwinupd++;
596			spxstat.spxs_rcvdupack--;
597		}
598		cb->s_ralo = si->si_alo;
599		cb->s_swl1 = si->si_seq;
600		cb->s_swl2 = si->si_ack;
601		cb->s_swnd = (1 + si->si_alo - si->si_ack);
602		if (cb->s_swnd > cb->s_smxw)
603			cb->s_smxw = cb->s_swnd;
604		cb->s_flags |= SF_WIN;
605	}
606
607	/*
608	 * If this packet number is higher than that which we have allocated
609	 * refuse it, unless urgent.
610	 */
611	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
612		if (si->si_cc & SPX_SP) {
613			spxstat.spxs_rcvwinprobe++;
614			return (1);
615		} else
616			spxstat.spxs_rcvpackafterwin++;
617		if (si->si_cc & SPX_OB) {
618			if (SSEQ_GT(si->si_seq, cb->s_alo + 60))
619				return (1); /* else queue this packet; */
620		} else {
621#ifdef BROKEN
622			/*
623			 * XXXRW: This is broken on at least one count:
624			 * spx_close() will free the ipxp and related parts,
625			 * which are then touched by spx_input() after the
626			 * return from spx_reass().
627			 */
628			/*struct socket *so = cb->s_ipxpcb->ipxp_socket;
629			if (so->so_state && SS_NOFDREF) {
630				spx_close(cb);
631			} else
632				       would crash system*/
633#endif
634			spx_istat.notyet++;
635			return (1);
636		}
637	}
638
639	/*
640	 * If this is a system packet, we don't need to queue it up, and
641	 * won't update acknowledge #.
642	 */
643	if (si->si_cc & SPX_SP)
644		return (1);
645
646	/*
647	 * We have already seen this packet, so drop.
648	 */
649	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
650		spx_istat.bdreas++;
651		spxstat.spxs_rcvduppack++;
652		if (si->si_seq == cb->s_ack - 1)
653			spx_istat.lstdup++;
654		return (1);
655	}
656
657	/*
658	 * Loop through all packets queued up to insert in appropriate
659	 * sequence.
660	 */
661	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
662		if (si->si_seq == SI(q)->si_seq) {
663			spxstat.spxs_rcvduppack++;
664			return (1);
665		}
666		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
667			spxstat.spxs_rcvoopack++;
668			break;
669		}
670	}
671	spx_insque((struct spx_q *)si, q->si_prev);
672
673	/*
674	 * If this packet is urgent, inform process
675	 */
676	if (si->si_cc & SPX_OB) {
677		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
678		sohasoutofband(so);
679		cb->s_oobflags |= SF_IOOB;
680	}
681present:
682#define SPINC sizeof(struct spxhdr)
683	SOCKBUF_LOCK(&so->so_rcv);
684
685	/*
686	 * Loop through all packets queued up to update acknowledge number,
687	 * and present all acknowledged data to user; if in packet interface
688	 * mode, show packet headers.
689	 */
690	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
691		  if (SI(q)->si_seq == cb->s_ack) {
692			cb->s_ack++;
693			m = dtom(q);
694			if (SI(q)->si_cc & SPX_OB) {
695				cb->s_oobflags &= ~SF_IOOB;
696				if (so->so_rcv.sb_cc)
697					so->so_oobmark = so->so_rcv.sb_cc;
698				else
699					so->so_rcv.sb_state |= SBS_RCVATMARK;
700			}
701			q = q->si_prev;
702			spx_remque(q->si_next);
703			wakeup = 1;
704			spxstat.spxs_rcvpack++;
705#ifdef SF_NEWCALL
706			if (cb->s_flags2 & SF_NEWCALL) {
707				struct spxhdr *sp = mtod(m, struct spxhdr *);
708				u_char dt = sp->spx_dt;
709				spx_newchecks[4]++;
710				if (dt != cb->s_rhdr.spx_dt) {
711					struct mbuf *mm =
712					   m_getclr(M_DONTWAIT, MT_CONTROL);
713					spx_newchecks[0]++;
714					if (mm != NULL) {
715						u_short *s =
716							mtod(mm, u_short *);
717						cb->s_rhdr.spx_dt = dt;
718						mm->m_len = 5; /*XXX*/
719						s[0] = 5;
720						s[1] = 1;
721						*(u_char *)(&s[2]) = dt;
722						sbappend_locked(&so->so_rcv, mm);
723					}
724				}
725				if (sp->spx_cc & SPX_OB) {
726					MCHTYPE(m, MT_OOBDATA);
727					spx_newchecks[1]++;
728					so->so_oobmark = 0;
729					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
730				}
731				if (packetp == 0) {
732					m->m_data += SPINC;
733					m->m_len -= SPINC;
734					m->m_pkthdr.len -= SPINC;
735				}
736				if ((sp->spx_cc & SPX_EM) || packetp) {
737					sbappendrecord_locked(&so->so_rcv, m);
738					spx_newchecks[9]++;
739				} else
740					sbappend_locked(&so->so_rcv, m);
741			} else
742#endif
743			if (packetp)
744				sbappendrecord_locked(&so->so_rcv, m);
745			else {
746				cb->s_rhdr = *mtod(m, struct spxhdr *);
747				m->m_data += SPINC;
748				m->m_len -= SPINC;
749				m->m_pkthdr.len -= SPINC;
750				sbappend_locked(&so->so_rcv, m);
751			}
752		  } else
753			break;
754	}
755	if (wakeup)
756		sorwakeup_locked(so);
757	else
758		SOCKBUF_UNLOCK(&so->so_rcv);
759	return (0);
760}
761
762void
763spx_ctlinput(int cmd, struct sockaddr *arg_as_sa, void *dummy)
764{
765
766	/* Currently, nothing. */
767}
768
769static int
770spx_output(struct spxpcb *cb, struct mbuf *m0)
771{
772	struct socket *so = cb->s_ipxpcb->ipxp_socket;
773	struct mbuf *m;
774	struct spx *si = NULL;
775	struct sockbuf *sb = &so->so_snd;
776	int len = 0, win, rcv_win;
777	short span, off, recordp = 0;
778	u_short alo;
779	int error = 0, sendalot;
780#ifdef notdef
781	int idle;
782#endif
783	struct mbuf *mprev;
784
785	IPX_LOCK_ASSERT(cb->s_ipxpcb);
786
787	if (m0 != NULL) {
788		int mtu = cb->s_mtu;
789		int datalen;
790
791		/*
792		 * Make sure that packet isn't too big.
793		 */
794		for (m = m0; m != NULL; m = m->m_next) {
795			mprev = m;
796			len += m->m_len;
797			if (m->m_flags & M_EOR)
798				recordp = 1;
799		}
800		datalen = (cb->s_flags & SF_HO) ?
801				len - sizeof(struct spxhdr) : len;
802		if (datalen > mtu) {
803			if (cb->s_flags & SF_PI) {
804				m_freem(m0);
805				return (EMSGSIZE);
806			} else {
807				int oldEM = cb->s_cc & SPX_EM;
808
809				cb->s_cc &= ~SPX_EM;
810				while (len > mtu) {
811					m = m_copym(m0, 0, mtu, M_DONTWAIT);
812					if (m == NULL) {
813					    cb->s_cc |= oldEM;
814					    m_freem(m0);
815					    return (ENOBUFS);
816					}
817					if (cb->s_flags & SF_NEWCALL) {
818					    struct mbuf *mm = m;
819					    spx_newchecks[7]++;
820					    while (mm != NULL) {
821						mm->m_flags &= ~M_EOR;
822						mm = mm->m_next;
823					    }
824					}
825					error = spx_output(cb, m);
826					if (error) {
827						cb->s_cc |= oldEM;
828						m_freem(m0);
829						return (error);
830					}
831					m_adj(m0, mtu);
832					len -= mtu;
833				}
834				cb->s_cc |= oldEM;
835			}
836		}
837
838		/*
839		 * Force length even, by adding a "garbage byte" if
840		 * necessary.
841		 */
842		if (len & 1) {
843			m = mprev;
844			if (M_TRAILINGSPACE(m) >= 1)
845				m->m_len++;
846			else {
847				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
848
849				if (m1 == NULL) {
850					m_freem(m0);
851					return (ENOBUFS);
852				}
853				m1->m_len = 1;
854				*(mtod(m1, u_char *)) = 0;
855				m->m_next = m1;
856			}
857		}
858		m = m_gethdr(M_DONTWAIT, MT_DATA);
859		if (m == NULL) {
860			m_freem(m0);
861			return (ENOBUFS);
862		}
863
864		/*
865		 * Fill in mbuf with extended SP header and addresses and
866		 * length put into network format.
867		 */
868		MH_ALIGN(m, sizeof(struct spx));
869		m->m_len = sizeof(struct spx);
870		m->m_next = m0;
871		si = mtod(m, struct spx *);
872		si->si_i = *cb->s_ipx;
873		si->si_s = cb->s_shdr;
874		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
875			struct spxhdr *sh;
876			if (m0->m_len < sizeof(*sh)) {
877				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
878					m_free(m);
879					m_freem(m0);
880					return (EINVAL);
881				}
882				m->m_next = m0;
883			}
884			sh = mtod(m0, struct spxhdr *);
885			si->si_dt = sh->spx_dt;
886			si->si_cc |= sh->spx_cc & SPX_EM;
887			m0->m_len -= sizeof(*sh);
888			m0->m_data += sizeof(*sh);
889			len -= sizeof(*sh);
890		}
891		len += sizeof(*si);
892		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
893			si->si_cc |= SPX_EM;
894			spx_newchecks[8]++;
895		}
896		if (cb->s_oobflags & SF_SOOB) {
897			/*
898			 * Per jqj@cornell: Make sure OB packets convey
899			 * exactly 1 byte.  If the packet is 1 byte or
900			 * larger, we have already guaranted there to be at
901			 * least one garbage byte for the checksum, and extra
902			 * bytes shouldn't hurt!
903			 */
904			if (len > sizeof(*si)) {
905				si->si_cc |= SPX_OB;
906				len = (1 + sizeof(*si));
907			}
908		}
909		si->si_len = htons((u_short)len);
910		m->m_pkthdr.len = ((len - 1) | 1) + 1;
911
912		/*
913		 * Queue stuff up for output.
914		 */
915		sbappendrecord(sb, m);
916		cb->s_seq++;
917	}
918#ifdef notdef
919	idle = (cb->s_smax == (cb->s_rack - 1));
920#endif
921again:
922	sendalot = 0;
923	off = cb->s_snxt - cb->s_rack;
924	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
925
926	/*
927	 * If in persist timeout with window of 0, send a probe.  Otherwise,
928	 * if window is small but non-zero and timer expired, send what we
929	 * can and go into transmit state.
930	 */
931	if (cb->s_force == 1 + SPXT_PERSIST) {
932		if (win != 0) {
933			cb->s_timer[SPXT_PERSIST] = 0;
934			cb->s_rxtshift = 0;
935		}
936	}
937	span = cb->s_seq - cb->s_rack;
938	len = min(span, win) - off;
939
940	if (len < 0) {
941		/*
942		 * Window shrank after we went into it.  If window shrank to
943		 * 0, cancel pending restransmission and pull s_snxt back to
944		 * (closed) window.  We will enter persist state below.  If
945		 * the widndow didn't close completely, just wait for an ACK.
946		 */
947		len = 0;
948		if (win == 0) {
949			cb->s_timer[SPXT_REXMT] = 0;
950			cb->s_snxt = cb->s_rack;
951		}
952	}
953	if (len > 1)
954		sendalot = 1;
955	rcv_win = sbspace(&so->so_rcv);
956
957	/*
958	 * Send if we owe peer an ACK.
959	 */
960	if (cb->s_oobflags & SF_SOOB) {
961		/*
962		 * Must transmit this out of band packet.
963		 */
964		cb->s_oobflags &= ~ SF_SOOB;
965		sendalot = 1;
966		spxstat.spxs_sndurg++;
967		goto found;
968	}
969	if (cb->s_flags & SF_ACKNOW)
970		goto send;
971	if (cb->s_state < TCPS_ESTABLISHED)
972		goto send;
973
974	/*
975	 * Silly window can't happen in spx.  Code from TCP deleted.
976	 */
977	if (len)
978		goto send;
979
980	/*
981	 * Compare available window to amount of window known to peer (as
982	 * advertised window less next expected input.)  If the difference is
983	 * at least two packets or at least 35% of the mximum possible
984	 * window, then want to send a window update to peer.
985	 */
986	if (rcv_win > 0) {
987		u_short delta =  1 + cb->s_alo - cb->s_ack;
988		int adv = rcv_win - (delta * cb->s_mtu);
989
990		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
991		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
992			spxstat.spxs_sndwinup++;
993			cb->s_flags |= SF_ACKNOW;
994			goto send;
995		}
996
997	}
998
999	/*
1000	 * Many comments from tcp_output.c are appropriate here including ...
1001	 * If send window is too small, there is data to transmit, and no
1002	 * retransmit or persist is pending, then go to persist state.  If
1003	 * nothing happens soon, send when timer expires: if window is
1004	 * non-zero, transmit what we can, otherwise send a probe.
1005	 */
1006	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
1007	    cb->s_timer[SPXT_PERSIST] == 0) {
1008		cb->s_rxtshift = 0;
1009		spx_setpersist(cb);
1010	}
1011
1012	/*
1013	 * No reason to send a packet, just return.
1014	 */
1015	cb->s_outx = 1;
1016	return (0);
1017
1018send:
1019	/*
1020	 * Find requested packet.
1021	 */
1022	si = 0;
1023	if (len > 0) {
1024		cb->s_want = cb->s_snxt;
1025		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
1026			si = mtod(m, struct spx *);
1027			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
1028				break;
1029		}
1030	found:
1031		if (si != NULL) {
1032			if (si->si_seq == cb->s_snxt)
1033					cb->s_snxt++;
1034				else
1035					spxstat.spxs_sndvoid++, si = 0;
1036		}
1037	}
1038
1039	/*
1040	 * Update window.
1041	 */
1042	if (rcv_win < 0)
1043		rcv_win = 0;
1044	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
1045	if (SSEQ_LT(alo, cb->s_alo))
1046		alo = cb->s_alo;
1047
1048	if (si != NULL) {
1049		/*
1050		 * Must make a copy of this packet for ipx_output to monkey
1051		 * with.
1052		 */
1053		m = m_copy(dtom(si), 0, (int)M_COPYALL);
1054		if (m == NULL)
1055			return (ENOBUFS);
1056		si = mtod(m, struct spx *);
1057		if (SSEQ_LT(si->si_seq, cb->s_smax))
1058			spxstat.spxs_sndrexmitpack++;
1059		else
1060			spxstat.spxs_sndpack++;
1061	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1062		/*
1063		 * Must send an acknowledgement or a probe.
1064		 */
1065		if (cb->s_force)
1066			spxstat.spxs_sndprobe++;
1067		if (cb->s_flags & SF_ACKNOW)
1068			spxstat.spxs_sndacks++;
1069		m = m_gethdr(M_DONTWAIT, MT_DATA);
1070		if (m == NULL)
1071			return (ENOBUFS);
1072
1073		/*
1074		 * Fill in mbuf with extended SP header and addresses and
1075		 * length put into network format.
1076		 */
1077		MH_ALIGN(m, sizeof(struct spx));
1078		m->m_len = sizeof(*si);
1079		m->m_pkthdr.len = sizeof(*si);
1080		si = mtod(m, struct spx *);
1081		si->si_i = *cb->s_ipx;
1082		si->si_s = cb->s_shdr;
1083		si->si_seq = cb->s_smax + 1;
1084		si->si_len = htons(sizeof(*si));
1085		si->si_cc |= SPX_SP;
1086	} else {
1087		cb->s_outx = 3;
1088		if (so->so_options & SO_DEBUG || traceallspxs)
1089			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1090		return (0);
1091	}
1092
1093	/*
1094	 * Stuff checksum and output datagram.
1095	 */
1096	if ((si->si_cc & SPX_SP) == 0) {
1097		if (cb->s_force != (1 + SPXT_PERSIST) ||
1098		    cb->s_timer[SPXT_PERSIST] == 0) {
1099			/*
1100			 * If this is a new packet and we are not currently
1101			 * timing anything, time this one.
1102			 */
1103			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1104				cb->s_smax = si->si_seq;
1105				if (cb->s_rtt == 0) {
1106					spxstat.spxs_segstimed++;
1107					cb->s_rtseq = si->si_seq;
1108					cb->s_rtt = 1;
1109				}
1110			}
1111
1112			/*
1113			 * Set rexmt timer if not currently set, initial
1114			 * value for retransmit timer is smoothed round-trip
1115			 * time + 2 * round-trip time variance.  Initialize
1116			 * shift counter which is used for backoff of
1117			 * retransmit time.
1118			 */
1119			if (cb->s_timer[SPXT_REXMT] == 0 &&
1120			    cb->s_snxt != cb->s_rack) {
1121				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1122				if (cb->s_timer[SPXT_PERSIST]) {
1123					cb->s_timer[SPXT_PERSIST] = 0;
1124					cb->s_rxtshift = 0;
1125				}
1126			}
1127		} else if (SSEQ_LT(cb->s_smax, si->si_seq))
1128			cb->s_smax = si->si_seq;
1129	} else if (cb->s_state < TCPS_ESTABLISHED) {
1130		if (cb->s_rtt == 0)
1131			cb->s_rtt = 1; /* Time initial handshake */
1132		if (cb->s_timer[SPXT_REXMT] == 0)
1133			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1134	}
1135
1136	/*
1137	 * Do not request acks when we ack their data packets or when we do a
1138	 * gratuitous window update.
1139	 */
1140	if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1141		si->si_cc |= SPX_SA;
1142	si->si_seq = htons(si->si_seq);
1143	si->si_alo = htons(alo);
1144	si->si_ack = htons(cb->s_ack);
1145
1146	if (ipxcksum)
1147		si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1148	else
1149		si->si_sum = 0xffff;
1150
1151	cb->s_outx = 4;
1152	if (so->so_options & SO_DEBUG || traceallspxs)
1153		spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1154
1155	if (so->so_options & SO_DONTROUTE)
1156		error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1157	else
1158		error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1159	if (error)
1160		return (error);
1161	spxstat.spxs_sndtotal++;
1162
1163	/*
1164	 * Data sent (as far as we can tell).  If this advertises a larger
1165	 * window than any other segment, then remember the size of the
1166	 * advertized window.  Any pending ACK has now been sent.
1167	 */
1168	cb->s_force = 0;
1169	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1170	if (SSEQ_GT(alo, cb->s_alo))
1171		cb->s_alo = alo;
1172	if (sendalot)
1173		goto again;
1174	cb->s_outx = 5;
1175	return (0);
1176}
1177
1178static int spx_do_persist_panics = 0;
1179
1180static void
1181spx_setpersist(struct spxpcb *cb)
1182{
1183	int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1184
1185	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1186
1187	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1188		panic("spx_output REXMT");
1189
1190	/*
1191	 * Start/restart persistance timer.
1192	 */
1193	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1194	    t*spx_backoff[cb->s_rxtshift],
1195	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1196	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1197		cb->s_rxtshift++;
1198}
1199
1200int
1201spx_ctloutput(struct socket *so, struct sockopt *sopt)
1202{
1203	struct spxhdr spxhdr;
1204	struct ipxpcb *ipxp;
1205	struct spxpcb *cb;
1206	int mask, error;
1207	short soptval;
1208	u_short usoptval;
1209	int optval;
1210
1211	ipxp = sotoipxpcb(so);
1212	KASSERT(ipxp != NULL, ("spx_ctloutput: ipxp == NULL"));
1213
1214	/*
1215	 * This will have to be changed when we do more general stacking of
1216	 * protocols.
1217	 */
1218	if (sopt->sopt_level != IPXPROTO_SPX)
1219		return (ipx_ctloutput(so, sopt));
1220
1221	IPX_LOCK(ipxp);
1222	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1223		IPX_UNLOCK(ipxp);
1224		return (ECONNRESET);
1225	}
1226
1227	IPX_LOCK(ipxp);
1228	cb = ipxtospxpcb(ipxp);
1229	KASSERT(cb != NULL, ("spx_ctloutput: cb == NULL"));
1230
1231	error = 0;
1232	switch (sopt->sopt_dir) {
1233	case SOPT_GET:
1234		switch (sopt->sopt_name) {
1235		case SO_HEADERS_ON_INPUT:
1236			mask = SF_HI;
1237			goto get_flags;
1238
1239		case SO_HEADERS_ON_OUTPUT:
1240			mask = SF_HO;
1241		get_flags:
1242			soptval = cb->s_flags & mask;
1243			IPX_UNLOCK(ipxp);
1244			error = sooptcopyout(sopt, &soptval,
1245			    sizeof(soptval));
1246			break;
1247
1248		case SO_MTU:
1249			usoptval = cb->s_mtu;
1250			IPX_UNLOCK(ipxp);
1251			error = sooptcopyout(sopt, &usoptval,
1252			    sizeof(usoptval));
1253			break;
1254
1255		case SO_LAST_HEADER:
1256			spxhdr = cb->s_rhdr;
1257			IPX_UNLOCK(ipxp);
1258			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
1259			break;
1260
1261		case SO_DEFAULT_HEADERS:
1262			spxhdr = cb->s_shdr;
1263			IPX_UNLOCK(ipxp);
1264			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
1265			break;
1266
1267		default:
1268			IPX_UNLOCK(ipxp);
1269			error = ENOPROTOOPT;
1270		}
1271		break;
1272
1273	case SOPT_SET:
1274		/*
1275		 * XXX Why are these shorts on get and ints on set?  That
1276		 * doesn't make any sense...
1277		 *
1278		 * XXXRW: Note, when we re-acquire the ipxp lock, we should
1279		 * re-check that it's not dropped.
1280		 */
1281		IPX_UNLOCK(ipxp);
1282		switch (sopt->sopt_name) {
1283		case SO_HEADERS_ON_INPUT:
1284			mask = SF_HI;
1285			goto set_head;
1286
1287		case SO_HEADERS_ON_OUTPUT:
1288			mask = SF_HO;
1289		set_head:
1290			error = sooptcopyin(sopt, &optval, sizeof optval,
1291					    sizeof optval);
1292			if (error)
1293				break;
1294
1295			IPX_LOCK(ipxp);
1296			if (cb->s_flags & SF_PI) {
1297				if (optval)
1298					cb->s_flags |= mask;
1299				else
1300					cb->s_flags &= ~mask;
1301			} else error = EINVAL;
1302			IPX_UNLOCK(ipxp);
1303			break;
1304
1305		case SO_MTU:
1306			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1307					    sizeof usoptval);
1308			if (error)
1309				break;
1310			/* Unlocked write. */
1311			cb->s_mtu = usoptval;
1312			break;
1313
1314#ifdef SF_NEWCALL
1315		case SO_NEWCALL:
1316			error = sooptcopyin(sopt, &optval, sizeof optval,
1317					    sizeof optval);
1318			if (error)
1319				break;
1320			IPX_LOCK(ipxp);
1321			if (optval) {
1322				cb->s_flags2 |= SF_NEWCALL;
1323				spx_newchecks[5]++;
1324			} else {
1325				cb->s_flags2 &= ~SF_NEWCALL;
1326				spx_newchecks[6]++;
1327			}
1328			IPX_UNLOCK(ipxp);
1329			break;
1330#endif
1331
1332		case SO_DEFAULT_HEADERS:
1333			{
1334				struct spxhdr sp;
1335
1336				error = sooptcopyin(sopt, &sp, sizeof sp,
1337						    sizeof sp);
1338				if (error)
1339					break;
1340				IPX_LOCK(ipxp);
1341				cb->s_dt = sp.spx_dt;
1342				cb->s_cc = sp.spx_cc & SPX_EM;
1343				IPX_UNLOCK(ipxp);
1344			}
1345			break;
1346
1347		default:
1348			error = ENOPROTOOPT;
1349		}
1350		break;
1351
1352	default:
1353		panic("spx_ctloutput: bad socket option direction");
1354	}
1355	return (error);
1356}
1357
1358static void
1359spx_usr_abort(struct socket *so)
1360{
1361	struct ipxpcb *ipxp;
1362	struct spxpcb *cb;
1363
1364	ipxp = sotoipxpcb(so);
1365	KASSERT(ipxp != NULL, ("spx_usr_abort: ipxp == NULL"));
1366
1367	cb = ipxtospxpcb(ipxp);
1368	KASSERT(cb != NULL, ("spx_usr_abort: cb == NULL"));
1369
1370	IPX_LIST_LOCK();
1371	IPX_LOCK(ipxp);
1372	spx_drop(cb, ECONNABORTED);
1373	IPX_UNLOCK(ipxp);
1374	IPX_LIST_UNLOCK();
1375}
1376
1377/*
1378 * Accept a connection.  Essentially all the work is done at higher levels;
1379 * just return the address of the peer, storing through addr.
1380 */
1381static int
1382spx_accept(struct socket *so, struct sockaddr **nam)
1383{
1384	struct ipxpcb *ipxp;
1385	struct sockaddr_ipx *sipx, ssipx;
1386
1387	ipxp = sotoipxpcb(so);
1388	KASSERT(ipxp != NULL, ("spx_accept: ipxp == NULL"));
1389
1390	sipx = &ssipx;
1391	bzero(sipx, sizeof *sipx);
1392	sipx->sipx_len = sizeof *sipx;
1393	sipx->sipx_family = AF_IPX;
1394	IPX_LOCK(ipxp);
1395	sipx->sipx_addr = ipxp->ipxp_faddr;
1396	IPX_UNLOCK(ipxp);
1397	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1398	return (0);
1399}
1400
1401static int
1402spx_attach(struct socket *so, int proto, struct thread *td)
1403{
1404	struct ipxpcb *ipxp;
1405	struct spxpcb *cb;
1406	struct mbuf *mm;
1407	struct sockbuf *sb;
1408	int error;
1409
1410	ipxp = sotoipxpcb(so);
1411	KASSERT(ipxp == NULL, ("spx_attach: ipxp != NULL"));
1412
1413	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1414		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1415		if (error)
1416			return (error);
1417	}
1418
1419	cb = malloc(sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1420	if (cb == NULL)
1421		return (ENOBUFS);
1422	mm = m_getclr(M_DONTWAIT, MT_DATA);
1423	if (mm == NULL) {
1424		free(cb, M_PCB);
1425		return (ENOBUFS);
1426	}
1427
1428	IPX_LIST_LOCK();
1429	error = ipx_pcballoc(so, &ipxpcb_list, td);
1430	if (error) {
1431		IPX_LIST_UNLOCK();
1432		m_free(mm);
1433		free(cb, M_PCB);
1434		return (error);
1435	}
1436	ipxp = sotoipxpcb(so);
1437	ipxp->ipxp_flags |= IPXP_SPX;
1438
1439	cb->s_ipx = mtod(mm, struct ipx *);
1440	cb->s_state = TCPS_LISTEN;
1441	cb->s_smax = -1;
1442	cb->s_swl1 = -1;
1443	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1444	cb->s_ipxpcb = ipxp;
1445	cb->s_mtu = 576 - sizeof(struct spx);
1446	sb = &so->so_snd;
1447	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1448	cb->s_ssthresh = cb->s_cwnd;
1449	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1450
1451	/*
1452	 * Above is recomputed when connecting to account for changed
1453	 * buffering or mtu's.
1454	 */
1455	cb->s_rtt = SPXTV_SRTTBASE;
1456	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1457	SPXT_RANGESET(cb->s_rxtcur,
1458	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1459	    SPXTV_MIN, SPXTV_REXMTMAX);
1460	ipxp->ipxp_pcb = (caddr_t)cb;
1461	IPX_LIST_UNLOCK();
1462	return (0);
1463}
1464
1465static void
1466spx_pcbdetach(struct ipxpcb *ipxp)
1467{
1468	struct spxpcb *cb;
1469	struct spx_q *s;
1470	struct mbuf *m;
1471
1472	IPX_LOCK_ASSERT(ipxp);
1473
1474	cb = ipxtospxpcb(ipxp);
1475	KASSERT(cb != NULL, ("spx_pcbdetach: cb == NULL"));
1476
1477	s = cb->s_q.si_next;
1478	while (s != &(cb->s_q)) {
1479		s = s->si_next;
1480		spx_remque(s);
1481		m = dtom(s);
1482		m_freem(m);
1483	}
1484	m_free(dtom(cb->s_ipx));
1485	free(cb, M_PCB);
1486	ipxp->ipxp_pcb = NULL;
1487}
1488
1489static int
1490spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1491{
1492	struct ipxpcb *ipxp;
1493	int error;
1494
1495	ipxp = sotoipxpcb(so);
1496	KASSERT(ipxp != NULL, ("spx_bind: ipxp == NULL"));
1497
1498	IPX_LIST_LOCK();
1499	IPX_LOCK(ipxp);
1500	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1501		error = EINVAL;
1502		goto out;
1503	}
1504	error = ipx_pcbbind(ipxp, nam, td);
1505out:
1506	IPX_UNLOCK(ipxp);
1507	IPX_LIST_UNLOCK();
1508	return (error);
1509}
1510
1511static void
1512spx_usr_close(struct socket *so)
1513{
1514	struct ipxpcb *ipxp;
1515	struct spxpcb *cb;
1516
1517	ipxp = sotoipxpcb(so);
1518	KASSERT(ipxp != NULL, ("spx_usr_close: ipxp == NULL"));
1519
1520	cb = ipxtospxpcb(ipxp);
1521	KASSERT(cb != NULL, ("spx_usr_close: cb == NULL"));
1522
1523	IPX_LIST_LOCK();
1524	IPX_LOCK(ipxp);
1525	if (cb->s_state > TCPS_LISTEN)
1526		spx_disconnect(cb);
1527	else
1528		spx_close(cb);
1529	IPX_UNLOCK(ipxp);
1530	IPX_LIST_UNLOCK();
1531}
1532
1533/*
1534 * Initiate connection to peer.  Enter SYN_SENT state, and mark socket as
1535 * connecting.  Start keep-alive timer, setup prototype header, send initial
1536 * system packet requesting connection.
1537 */
1538static int
1539spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1540{
1541	struct ipxpcb *ipxp;
1542	struct spxpcb *cb;
1543	int error;
1544
1545	ipxp = sotoipxpcb(so);
1546	KASSERT(ipxp != NULL, ("spx_connect: ipxp == NULL"));
1547
1548	cb = ipxtospxpcb(ipxp);
1549	KASSERT(cb != NULL, ("spx_connect: cb == NULL"));
1550
1551	IPX_LIST_LOCK();
1552	IPX_LOCK(ipxp);
1553	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1554		error = EINVAL;
1555		goto spx_connect_end;
1556	}
1557	if (ipxp->ipxp_lport == 0) {
1558		error = ipx_pcbbind(ipxp, NULL, td);
1559		if (error)
1560			goto spx_connect_end;
1561	}
1562	error = ipx_pcbconnect(ipxp, nam, td);
1563	if (error)
1564		goto spx_connect_end;
1565	soisconnecting(so);
1566	spxstat.spxs_connattempt++;
1567	cb->s_state = TCPS_SYN_SENT;
1568	cb->s_did = 0;
1569	spx_template(cb);
1570	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1571	cb->s_force = 1 + SPXTV_KEEP;
1572
1573	/*
1574	 * Other party is required to respond to the port I send from, but he
1575	 * is not required to answer from where I am sending to, so allow
1576	 * wildcarding.  Original port I am sending to is still saved in
1577	 * cb->s_dport.
1578	 */
1579	ipxp->ipxp_fport = 0;
1580	error = spx_output(cb, NULL);
1581spx_connect_end:
1582	IPX_UNLOCK(ipxp);
1583	IPX_LIST_UNLOCK();
1584	return (error);
1585}
1586
1587static void
1588spx_detach(struct socket *so)
1589{
1590	struct ipxpcb *ipxp;
1591	struct spxpcb *cb;
1592
1593	/*
1594	 * XXXRW: Should assert appropriately detached.
1595	 */
1596	ipxp = sotoipxpcb(so);
1597	KASSERT(ipxp != NULL, ("spx_detach: ipxp == NULL"));
1598
1599	cb = ipxtospxpcb(ipxp);
1600	KASSERT(cb != NULL, ("spx_detach: cb == NULL"));
1601
1602	IPX_LIST_LOCK();
1603	IPX_LOCK(ipxp);
1604	spx_pcbdetach(ipxp);
1605	ipx_pcbfree(ipxp);
1606	IPX_LIST_UNLOCK();
1607}
1608
1609/*
1610 * We may decide later to implement connection closing handshaking at the spx
1611 * level optionally.  Here is the hook to do it:
1612 */
1613static int
1614spx_usr_disconnect(struct socket *so)
1615{
1616	struct ipxpcb *ipxp;
1617	struct spxpcb *cb;
1618	int error;
1619
1620	ipxp = sotoipxpcb(so);
1621	KASSERT(ipxp != NULL, ("spx_usr_disconnect: ipxp == NULL"));
1622
1623	cb = ipxtospxpcb(ipxp);
1624	KASSERT(cb != NULL, ("spx_usr_disconnect: cb == NULL"));
1625
1626	IPX_LIST_LOCK();
1627	IPX_LOCK(ipxp);
1628	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1629		error = EINVAL;
1630		goto out;
1631	}
1632	spx_disconnect(cb);
1633	error = 0;
1634out:
1635	IPX_UNLOCK(ipxp);
1636	IPX_LIST_UNLOCK();
1637	return (error);
1638}
1639
1640static int
1641spx_listen(struct socket *so, int backlog, struct thread *td)
1642{
1643	int error;
1644	struct ipxpcb *ipxp;
1645	struct spxpcb *cb;
1646
1647	error = 0;
1648	ipxp = sotoipxpcb(so);
1649	KASSERT(ipxp != NULL, ("spx_listen: ipxp == NULL"));
1650
1651	cb = ipxtospxpcb(ipxp);
1652	KASSERT(cb != NULL, ("spx_listen: cb == NULL"));
1653
1654	IPX_LIST_LOCK();
1655	IPX_LOCK(ipxp);
1656	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1657		error = EINVAL;
1658		goto out;
1659	}
1660	SOCK_LOCK(so);
1661	error = solisten_proto_check(so);
1662	if (error == 0 && ipxp->ipxp_lport == 0)
1663		error = ipx_pcbbind(ipxp, NULL, td);
1664	if (error == 0) {
1665		cb->s_state = TCPS_LISTEN;
1666		solisten_proto(so, backlog);
1667	}
1668	SOCK_UNLOCK(so);
1669out:
1670	IPX_UNLOCK(ipxp);
1671	IPX_LIST_UNLOCK();
1672	return (error);
1673}
1674
1675/*
1676 * After a receive, possibly send acknowledgment updating allocation.
1677 */
1678static int
1679spx_rcvd(struct socket *so, int flags)
1680{
1681	struct ipxpcb *ipxp;
1682	struct spxpcb *cb;
1683	int error;
1684
1685	ipxp = sotoipxpcb(so);
1686	KASSERT(ipxp != NULL, ("spx_rcvd: ipxp == NULL"));
1687
1688	cb = ipxtospxpcb(ipxp);
1689	KASSERT(cb != NULL, ("spx_rcvd: cb == NULL"));
1690
1691	IPX_LOCK(ipxp);
1692	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1693		error = EINVAL;
1694		goto out;
1695	}
1696	cb->s_flags |= SF_RVD;
1697	spx_output(cb, NULL);
1698	cb->s_flags &= ~SF_RVD;
1699	error = 0;
1700out:
1701	IPX_UNLOCK(ipxp);
1702	return (error);
1703}
1704
1705static int
1706spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1707{
1708	struct ipxpcb *ipxp;
1709	struct spxpcb *cb;
1710	int error;
1711
1712	ipxp = sotoipxpcb(so);
1713	KASSERT(ipxp != NULL, ("spx_rcvoob: ipxp == NULL"));
1714
1715	cb = ipxtospxpcb(ipxp);
1716	KASSERT(cb != NULL, ("spx_rcvoob: cb == NULL"));
1717
1718	IPX_LOCK(ipxp);
1719	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1720		error = EINVAL;
1721		goto out;
1722	}
1723	SOCKBUF_LOCK(&so->so_rcv);
1724	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1725	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1726		SOCKBUF_UNLOCK(&so->so_rcv);
1727		m->m_len = 1;
1728		*mtod(m, caddr_t) = cb->s_iobc;
1729		error = 0;
1730		goto out;
1731	}
1732	SOCKBUF_UNLOCK(&so->so_rcv);
1733	error = EINVAL;
1734out:
1735	IPX_UNLOCK(ipxp);
1736	return (error);
1737}
1738
1739static int
1740spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1741    struct mbuf *controlp, struct thread *td)
1742{
1743	struct ipxpcb *ipxp;
1744	struct spxpcb *cb;
1745	int error;
1746
1747	ipxp = sotoipxpcb(so);
1748	KASSERT(ipxp != NULL, ("spx_send: ipxp == NULL"));
1749
1750	cb = ipxtospxpcb(ipxp);
1751	KASSERT(cb != NULL, ("spx_send: cb == NULL"));
1752
1753	error = 0;
1754	IPX_LOCK(ipxp);
1755	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1756		error = ECONNRESET;
1757		goto spx_send_end;
1758	}
1759	if (flags & PRUS_OOB) {
1760		if (sbspace(&so->so_snd) < -512) {
1761			error = ENOBUFS;
1762			goto spx_send_end;
1763		}
1764		cb->s_oobflags |= SF_SOOB;
1765	}
1766	if (controlp != NULL) {
1767		u_short *p = mtod(controlp, u_short *);
1768		spx_newchecks[2]++;
1769		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1770			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1771			spx_newchecks[3]++;
1772		}
1773		m_freem(controlp);
1774	}
1775	controlp = NULL;
1776	error = spx_output(cb, m);
1777	m = NULL;
1778spx_send_end:
1779	IPX_UNLOCK(ipxp);
1780	if (controlp != NULL)
1781		m_freem(controlp);
1782	if (m != NULL)
1783		m_freem(m);
1784	return (error);
1785}
1786
1787static int
1788spx_shutdown(struct socket *so)
1789{
1790	struct ipxpcb *ipxp;
1791	struct spxpcb *cb;
1792	int error;
1793
1794	ipxp = sotoipxpcb(so);
1795	KASSERT(ipxp != NULL, ("spx_shutdown: ipxp == NULL"));
1796
1797	cb = ipxtospxpcb(ipxp);
1798	KASSERT(cb != NULL, ("spx_shutdown: cb == NULL"));
1799
1800	socantsendmore(so);
1801	IPX_LIST_LOCK();
1802	IPX_LOCK(ipxp);
1803	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1804		error = EINVAL;
1805		goto out;
1806	}
1807	spx_usrclosed(cb);
1808	error = 0;
1809out:
1810	IPX_UNLOCK(ipxp);
1811	IPX_LIST_UNLOCK();
1812	return (error);
1813}
1814
1815static int
1816spx_sp_attach(struct socket *so, int proto, struct thread *td)
1817{
1818	struct ipxpcb *ipxp;
1819	struct spxpcb *cb;
1820	int error;
1821
1822	KASSERT(so->so_pcb == NULL, ("spx_sp_attach: so_pcb != NULL"));
1823
1824	error = spx_attach(so, proto, td);
1825	if (error)
1826		return (error);
1827
1828	ipxp = sotoipxpcb(so);
1829	KASSERT(ipxp != NULL, ("spx_sp_attach: ipxp == NULL"));
1830
1831	cb = ipxtospxpcb(ipxp);
1832	KASSERT(cb != NULL, ("spx_sp_attach: cb == NULL"));
1833
1834	IPX_LOCK(ipxp);
1835	cb->s_flags |= (SF_HI | SF_HO | SF_PI);
1836	IPX_UNLOCK(ipxp);
1837	return (0);
1838}
1839
1840/*
1841 * Create template to be used to send spx packets on a connection.  Called
1842 * after host entry created, fills in a skeletal spx header (choosing
1843 * connection id), minimizing the amount of work necessary when the
1844 * connection is used.
1845 */
1846static void
1847spx_template(struct spxpcb *cb)
1848{
1849	struct ipxpcb *ipxp = cb->s_ipxpcb;
1850	struct ipx *ipx = cb->s_ipx;
1851	struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1852
1853	IPX_LOCK_ASSERT(ipxp);
1854
1855	ipx->ipx_pt = IPXPROTO_SPX;
1856	ipx->ipx_sna = ipxp->ipxp_laddr;
1857	ipx->ipx_dna = ipxp->ipxp_faddr;
1858	SPX_LOCK();
1859	cb->s_sid = htons(spx_iss);
1860	spx_iss += SPX_ISSINCR/2;
1861	SPX_UNLOCK();
1862	cb->s_alo = 1;
1863	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1864
1865	/*
1866	 * Try to expand fast to full complement of large packets.
1867	 */
1868	cb->s_ssthresh = cb->s_cwnd;
1869	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1870
1871	/*
1872	 * But allow for lots of little packets as well.
1873	 */
1874	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1875}
1876
1877/*
1878 * Close a SPIP control block.  Wake up any sleepers.  We used to free any
1879 * queued packets and cb->s_ipx here, but now we defer that until the pcb is
1880 * discarded.
1881 */
1882void
1883spx_close(struct spxpcb *cb)
1884{
1885	struct ipxpcb *ipxp = cb->s_ipxpcb;
1886	struct socket *so = ipxp->ipxp_socket;
1887
1888	KASSERT(ipxp != NULL, ("spx_close: ipxp == NULL"));
1889	IPX_LIST_LOCK_ASSERT();
1890	IPX_LOCK_ASSERT(ipxp);
1891
1892	ipxp->ipxp_flags |= IPXP_DROPPED;
1893	soisdisconnected(so);
1894	spxstat.spxs_closed++;
1895}
1896
1897/*
1898 * Someday we may do level 3 handshaking to close a connection or send a
1899 * xerox style error.  For now, just close.  cb will always be invalid after
1900 * this call.
1901 */
1902static void
1903spx_usrclosed(struct spxpcb *cb)
1904{
1905
1906	IPX_LIST_LOCK_ASSERT();
1907	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1908
1909	spx_close(cb);
1910}
1911
1912/*
1913 * cb will always be invalid after this call.
1914 */
1915static void
1916spx_disconnect(struct spxpcb *cb)
1917{
1918
1919	IPX_LIST_LOCK_ASSERT();
1920	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1921
1922	spx_close(cb);
1923}
1924
1925/*
1926 * Drop connection, reporting the specified error.  cb will always be invalid
1927 * after this call.
1928 */
1929static void
1930spx_drop(struct spxpcb *cb, int errno)
1931{
1932	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1933
1934	IPX_LIST_LOCK_ASSERT();
1935	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1936
1937	/*
1938	 * Someday, in the xerox world we will generate error protocol
1939	 * packets announcing that the socket has gone away.
1940	 */
1941	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1942		spxstat.spxs_drops++;
1943		cb->s_state = TCPS_CLOSED;
1944		/*tcp_output(cb);*/
1945	} else
1946		spxstat.spxs_conndrops++;
1947	so->so_error = errno;
1948	spx_close(cb);
1949}
1950
1951/*
1952 * Fast timeout routine for processing delayed acks.
1953 */
1954void
1955spx_fasttimo(void)
1956{
1957	struct ipxpcb *ipxp;
1958	struct spxpcb *cb;
1959
1960	IPX_LIST_LOCK();
1961	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1962		IPX_LOCK(ipxp);
1963		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1964		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1965			IPX_UNLOCK(ipxp);
1966			continue;
1967		}
1968		cb = ipxtospxpcb(ipxp);
1969		if (cb->s_flags & SF_DELACK) {
1970			cb->s_flags &= ~SF_DELACK;
1971			cb->s_flags |= SF_ACKNOW;
1972			spxstat.spxs_delack++;
1973			spx_output(cb, NULL);
1974		}
1975		IPX_UNLOCK(ipxp);
1976	}
1977	IPX_LIST_UNLOCK();
1978}
1979
1980/*
1981 * spx protocol timeout routine called every 500 ms.  Updates the timers in
1982 * all active pcb's and causes finite state machine actions if timers expire.
1983 */
1984void
1985spx_slowtimo(void)
1986{
1987	struct ipxpcb *ipxp;
1988	struct spxpcb *cb;
1989	int i;
1990
1991	/*
1992	 * Search through tcb's and update active timers.  Once, timers could
1993	 * free ipxp's, but now we do that only when detaching a socket.
1994	 */
1995	IPX_LIST_LOCK();
1996	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1997		IPX_LOCK(ipxp);
1998		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1999		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
2000			IPX_UNLOCK(ipxp);
2001			continue;
2002		}
2003
2004		cb = (struct spxpcb *)ipxp->ipxp_pcb;
2005		KASSERT(cb != NULL, ("spx_slowtimo: cb == NULL"));
2006		for (i = 0; i < SPXT_NTIMERS; i++) {
2007			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
2008				spx_timers(cb, i);
2009				if (ipxp->ipxp_flags & IPXP_DROPPED)
2010					break;
2011			}
2012		}
2013		if (!(ipxp->ipxp_flags & IPXP_DROPPED)) {
2014			cb->s_idle++;
2015			if (cb->s_rtt)
2016				cb->s_rtt++;
2017		}
2018		IPX_UNLOCK(ipxp);
2019	}
2020	IPX_LIST_UNLOCK();
2021	SPX_LOCK();
2022	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
2023	SPX_UNLOCK();
2024}
2025
2026/*
2027 * SPX timer processing.
2028 */
2029static void
2030spx_timers(struct spxpcb *cb, int timer)
2031{
2032	long rexmt;
2033	int win;
2034
2035	IPX_LIST_LOCK_ASSERT();
2036	IPX_LOCK_ASSERT(cb->s_ipxpcb);
2037
2038	cb->s_force = 1 + timer;
2039	switch (timer) {
2040	case SPXT_2MSL:
2041		/*
2042		 * 2 MSL timeout in shutdown went off.  TCP deletes
2043		 * connection control block.
2044		 */
2045		printf("spx: SPXT_2MSL went off for no reason\n");
2046		cb->s_timer[timer] = 0;
2047		break;
2048
2049	case SPXT_REXMT:
2050		/*
2051		 * Retransmission timer went off.  Message has not been acked
2052		 * within retransmit interval.  Back off to a longer
2053		 * retransmit interval and retransmit one packet.
2054		 */
2055		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
2056			cb->s_rxtshift = SPX_MAXRXTSHIFT;
2057			spxstat.spxs_timeoutdrop++;
2058			spx_drop(cb, ETIMEDOUT);
2059			break;
2060		}
2061		spxstat.spxs_rexmttimeo++;
2062		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
2063		rexmt *= spx_backoff[cb->s_rxtshift];
2064		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
2065		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
2066
2067		/*
2068		 * If we have backed off fairly far, our srtt estimate is
2069		 * probably bogus.  Clobber it so we'll take the next rtt
2070		 * measurement as our srtt; move the current srtt into rttvar
2071		 * to keep the current retransmit times until then.
2072		 */
2073		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
2074			cb->s_rttvar += (cb->s_srtt >> 2);
2075			cb->s_srtt = 0;
2076		}
2077		cb->s_snxt = cb->s_rack;
2078
2079		/*
2080		 * If timing a packet, stop the timer.
2081		 */
2082		cb->s_rtt = 0;
2083
2084		/*
2085		 * See very long discussion in tcp_timer.c about congestion
2086		 * window and sstrhesh.
2087		 */
2088		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
2089		if (win < 2)
2090			win = 2;
2091		cb->s_cwnd = CUNIT;
2092		cb->s_ssthresh = win * CUNIT;
2093		spx_output(cb, NULL);
2094		break;
2095
2096	case SPXT_PERSIST:
2097		/*
2098		 * Persistance timer into zero window.  Force a probe to be
2099		 * sent.
2100		 */
2101		spxstat.spxs_persisttimeo++;
2102		spx_setpersist(cb);
2103		spx_output(cb, NULL);
2104		break;
2105
2106	case SPXT_KEEP:
2107		/*
2108		 * Keep-alive timer went off; send something or drop
2109		 * connection if idle for too long.
2110		 */
2111		spxstat.spxs_keeptimeo++;
2112		if (cb->s_state < TCPS_ESTABLISHED)
2113			goto dropit;
2114		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
2115		    	if (cb->s_idle >= SPXTV_MAXIDLE)
2116				goto dropit;
2117			spxstat.spxs_keepprobe++;
2118			spx_output(cb, NULL);
2119		} else
2120			cb->s_idle = 0;
2121		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
2122		break;
2123
2124	dropit:
2125		spxstat.spxs_keepdrops++;
2126		spx_drop(cb, ETIMEDOUT);
2127		break;
2128
2129	default:
2130		panic("spx_timers: unknown timer %d", timer);
2131	}
2132}
2133