spx_usrreq.c revision 192748
1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * Copyright (c) 1995, Mike Mitchell
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)spx_usrreq.h
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 192748 2009-05-25 09:57:18Z rwatson $");
67
68#include <sys/param.h>
69#include <sys/lock.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/protosw.h>
75#include <sys/signalvar.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/sx.h>
79#include <sys/systm.h>
80
81#include <net/route.h>
82#include <netinet/tcp_fsm.h>
83
84#include <netipx/ipx.h>
85#include <netipx/ipx_pcb.h>
86#include <netipx/ipx_var.h>
87#include <netipx/spx.h>
88#include <netipx/spx_debug.h>
89#include <netipx/spx_timer.h>
90#include <netipx/spx_var.h>
91
92/*
93 * SPX protocol implementation.
94 */
95static struct	mtx spx_mtx;			/* Protects only spx_iss. */
96static u_short 	spx_iss;
97u_short		spx_newchecks[50];
98static int	spx_hardnosed;
99static int	traceallspxs = 0;
100struct	spx_istat spx_istat;
101
102#define	SPX_LOCK_INIT()	mtx_init(&spx_mtx, "spx_mtx", NULL, MTX_DEF)
103#define	SPX_LOCK()	mtx_lock(&spx_mtx)
104#define	SPX_UNLOCK()	mtx_unlock(&spx_mtx)
105
106static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
107    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
108
109static	void spx_close(struct spxpcb *cb);
110static	void spx_disconnect(struct spxpcb *cb);
111static	void spx_drop(struct spxpcb *cb, int errno);
112static	void spx_setpersist(struct spxpcb *cb);
113static	void spx_template(struct spxpcb *cb);
114static	void spx_timers(struct spxpcb *cb, int timer);
115static	void spx_usrclosed(struct spxpcb *cb);
116
117static	void spx_usr_abort(struct socket *so);
118static	int spx_accept(struct socket *so, struct sockaddr **nam);
119static	int spx_attach(struct socket *so, int proto, struct thread *td);
120static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
121static	void spx_usr_close(struct socket *so);
122static	int spx_connect(struct socket *so, struct sockaddr *nam,
123			struct thread *td);
124static	void spx_detach(struct socket *so);
125static	void spx_pcbdetach(struct ipxpcb *ipxp);
126static	int spx_usr_disconnect(struct socket *so);
127static	int spx_listen(struct socket *so, int backlog, struct thread *td);
128static	int spx_rcvd(struct socket *so, int flags);
129static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
130static	int spx_send(struct socket *so, int flags, struct mbuf *m,
131		     struct sockaddr *addr, struct mbuf *control,
132		     struct thread *td);
133static	int spx_shutdown(struct socket *so);
134static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
135
136struct	pr_usrreqs spx_usrreqs = {
137	.pru_abort =		spx_usr_abort,
138	.pru_accept =		spx_accept,
139	.pru_attach =		spx_attach,
140	.pru_bind =		spx_bind,
141	.pru_connect =		spx_connect,
142	.pru_control =		ipx_control,
143	.pru_detach =		spx_detach,
144	.pru_disconnect =	spx_usr_disconnect,
145	.pru_listen =		spx_listen,
146	.pru_peeraddr =		ipx_peeraddr,
147	.pru_rcvd =		spx_rcvd,
148	.pru_rcvoob =		spx_rcvoob,
149	.pru_send =		spx_send,
150	.pru_shutdown =		spx_shutdown,
151	.pru_sockaddr =		ipx_sockaddr,
152	.pru_close =		spx_usr_close,
153};
154
155struct	pr_usrreqs spx_usrreq_sps = {
156	.pru_abort =		spx_usr_abort,
157	.pru_accept =		spx_accept,
158	.pru_attach =		spx_sp_attach,
159	.pru_bind =		spx_bind,
160	.pru_connect =		spx_connect,
161	.pru_control =		ipx_control,
162	.pru_detach =		spx_detach,
163	.pru_disconnect =	spx_usr_disconnect,
164	.pru_listen =		spx_listen,
165	.pru_peeraddr =		ipx_peeraddr,
166	.pru_rcvd =		spx_rcvd,
167	.pru_rcvoob =		spx_rcvoob,
168	.pru_send =		spx_send,
169	.pru_shutdown =		spx_shutdown,
170	.pru_sockaddr =		ipx_sockaddr,
171	.pru_close =		spx_usr_close,
172};
173
174void
175spx_init(void)
176{
177
178	SPX_LOCK_INIT();
179	spx_iss = 1; /* WRONG !! should fish it out of TODR */
180}
181
182void
183spx_input(struct mbuf *m, struct ipxpcb *ipxp)
184{
185	struct spxpcb *cb;
186	struct spx *si = mtod(m, struct spx *);
187	struct socket *so;
188	struct spx spx_savesi;
189	int dropsocket = 0;
190	short ostate = 0;
191
192	spxstat.spxs_rcvtotal++;
193	KASSERT(ipxp != NULL, ("spx_input: ipxpcb == NULL"));
194
195	/*
196	 * spx_input() assumes that the caller will hold both the pcb list
197	 * lock and also the ipxp lock.  spx_input() will release both before
198	 * returning, and may in fact trade in the ipxp lock for another pcb
199	 * lock following sonewconn().
200	 */
201	IPX_LIST_LOCK_ASSERT();
202	IPX_LOCK_ASSERT(ipxp);
203
204	cb = ipxtospxpcb(ipxp);
205	KASSERT(cb != NULL, ("spx_input: cb == NULL"));
206
207	if (ipxp->ipxp_flags & IPXP_DROPPED)
208		goto drop;
209
210	if (m->m_len < sizeof(*si)) {
211		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
212			IPX_UNLOCK(ipxp);
213			IPX_LIST_UNLOCK();
214			spxstat.spxs_rcvshort++;
215			return;
216		}
217		si = mtod(m, struct spx *);
218	}
219	si->si_seq = ntohs(si->si_seq);
220	si->si_ack = ntohs(si->si_ack);
221	si->si_alo = ntohs(si->si_alo);
222
223	so = ipxp->ipxp_socket;
224	KASSERT(so != NULL, ("spx_input: so == NULL"));
225
226	if (so->so_options & SO_DEBUG || traceallspxs) {
227		ostate = cb->s_state;
228		spx_savesi = *si;
229	}
230	if (so->so_options & SO_ACCEPTCONN) {
231		struct spxpcb *ocb = cb;
232
233		so = sonewconn(so, 0);
234		if (so == NULL)
235			goto drop;
236
237		/*
238		 * This is ugly, but ....
239		 *
240		 * Mark socket as temporary until we're committed to keeping
241		 * it.  The code at ``drop'' and ``dropwithreset'' check the
242		 * flag dropsocket to see if the temporary socket created
243		 * here should be discarded.  We mark the socket as
244		 * discardable until we're committed to it below in
245		 * TCPS_LISTEN.
246		 *
247		 * XXXRW: In the new world order of real kernel parallelism,
248		 * temporarily allocating the socket when we're "not sure"
249		 * seems like a bad idea, as we might race to remove it if
250		 * the listen socket is closed...?
251		 *
252		 * We drop the lock of the listen socket ipxp, and acquire
253		 * the lock of the new socket ippx.
254		 */
255		dropsocket++;
256		IPX_UNLOCK(ipxp);
257		ipxp = (struct ipxpcb *)so->so_pcb;
258		IPX_LOCK(ipxp);
259		ipxp->ipxp_laddr = si->si_dna;
260		cb = ipxtospxpcb(ipxp);
261		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
262		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
263		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
264		cb->s_state = TCPS_LISTEN;
265	}
266	IPX_LOCK_ASSERT(ipxp);
267
268	/*
269	 * Packet received on connection.  Reset idle time and keep-alive
270	 * timer.
271	 */
272	cb->s_idle = 0;
273	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
274
275	switch (cb->s_state) {
276	case TCPS_LISTEN:{
277		struct sockaddr_ipx *sipx, ssipx;
278		struct ipx_addr laddr;
279
280		/*
281		 * If somebody here was carying on a conversation and went
282		 * away, and his pen pal thinks he can still talk, we get the
283		 * misdirected packet.
284		 */
285		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
286			spx_istat.gonawy++;
287			goto dropwithreset;
288		}
289		sipx = &ssipx;
290		bzero(sipx, sizeof *sipx);
291		sipx->sipx_len = sizeof(*sipx);
292		sipx->sipx_family = AF_IPX;
293		sipx->sipx_addr = si->si_sna;
294		laddr = ipxp->ipxp_laddr;
295		if (ipx_nullhost(laddr))
296			ipxp->ipxp_laddr = si->si_dna;
297		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
298			ipxp->ipxp_laddr = laddr;
299			spx_istat.noconn++;
300			goto drop;
301		}
302		spx_template(cb);
303		dropsocket = 0;		/* committed to socket */
304		cb->s_did = si->si_sid;
305		cb->s_rack = si->si_ack;
306		cb->s_ralo = si->si_alo;
307#define THREEWAYSHAKE
308#ifdef THREEWAYSHAKE
309		cb->s_state = TCPS_SYN_RECEIVED;
310		cb->s_force = 1 + SPXT_KEEP;
311		spxstat.spxs_accepts++;
312		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
313		}
314		break;
315
316	 case TCPS_SYN_RECEIVED: {
317		/*
318		 * This state means that we have heard a response to our
319		 * acceptance of their connection.  It is probably logically
320		 * unnecessary in this implementation.
321		 */
322		if (si->si_did != cb->s_sid) {
323			spx_istat.wrncon++;
324			goto drop;
325		}
326#endif
327		ipxp->ipxp_fport =  si->si_sport;
328		cb->s_timer[SPXT_REXMT] = 0;
329		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
330		soisconnected(so);
331		cb->s_state = TCPS_ESTABLISHED;
332		spxstat.spxs_accepts++;
333		}
334		break;
335
336	case TCPS_SYN_SENT:
337		/*
338		 * This state means that we have gotten a response to our
339		 * attempt to establish a connection.  We fill in the data
340		 * from the other side, telling us which port to respond to,
341		 * instead of the well-known one we might have sent to in the
342		 * first place.  We also require that this is a response to
343		 * our connection id.
344		 */
345		if (si->si_did != cb->s_sid) {
346			spx_istat.notme++;
347			goto drop;
348		}
349		spxstat.spxs_connects++;
350		cb->s_did = si->si_sid;
351		cb->s_rack = si->si_ack;
352		cb->s_ralo = si->si_alo;
353		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
354		cb->s_timer[SPXT_REXMT] = 0;
355		cb->s_flags |= SF_ACKNOW;
356		soisconnected(so);
357		cb->s_state = TCPS_ESTABLISHED;
358
359		/*
360		 * Use roundtrip time of connection request for initial rtt.
361		 */
362		if (cb->s_rtt) {
363			cb->s_srtt = cb->s_rtt << 3;
364			cb->s_rttvar = cb->s_rtt << 1;
365			SPXT_RANGESET(cb->s_rxtcur,
366			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
367			    SPXTV_MIN, SPXTV_REXMTMAX);
368			    cb->s_rtt = 0;
369		}
370	}
371
372	if (so->so_options & SO_DEBUG || traceallspxs)
373		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
374
375	m->m_len -= sizeof(struct ipx);
376	m->m_pkthdr.len -= sizeof(struct ipx);
377	m->m_data += sizeof(struct ipx);
378
379	if (spx_reass(cb, si))
380		m_freem(m);
381	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
382		spx_output(cb, NULL);
383	cb->s_flags &= ~(SF_WIN|SF_RXT);
384	IPX_UNLOCK(ipxp);
385	IPX_LIST_UNLOCK();
386	return;
387
388dropwithreset:
389	IPX_LOCK_ASSERT(ipxp);
390	if (cb == NULL || (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
391	    traceallspxs))
392		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
393	IPX_UNLOCK(ipxp);
394	if (dropsocket) {
395		struct socket *head;
396		ACCEPT_LOCK();
397		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
398		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
399		head = so->so_head;
400		TAILQ_REMOVE(&head->so_incomp, so, so_list);
401		head->so_incqlen--;
402		so->so_qstate &= ~SQ_INCOMP;
403		so->so_head = NULL;
404		ACCEPT_UNLOCK();
405		soabort(so);
406	}
407	IPX_LIST_UNLOCK();
408	m_freem(m);
409	return;
410
411drop:
412	IPX_LOCK_ASSERT(ipxp);
413	if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
414		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
415	IPX_UNLOCK(ipxp);
416	IPX_LIST_UNLOCK();
417	m_freem(m);
418}
419
420int
421spx_output(struct spxpcb *cb, struct mbuf *m0)
422{
423	struct socket *so = cb->s_ipxpcb->ipxp_socket;
424	struct mbuf *m;
425	struct spx *si = NULL;
426	struct sockbuf *sb = &so->so_snd;
427	int len = 0, win, rcv_win;
428	short span, off, recordp = 0;
429	u_short alo;
430	int error = 0, sendalot;
431#ifdef notdef
432	int idle;
433#endif
434	struct mbuf *mprev;
435
436	IPX_LOCK_ASSERT(cb->s_ipxpcb);
437
438	if (m0 != NULL) {
439		int mtu = cb->s_mtu;
440		int datalen;
441
442		/*
443		 * Make sure that packet isn't too big.
444		 */
445		for (m = m0; m != NULL; m = m->m_next) {
446			mprev = m;
447			len += m->m_len;
448			if (m->m_flags & M_EOR)
449				recordp = 1;
450		}
451		datalen = (cb->s_flags & SF_HO) ?
452				len - sizeof(struct spxhdr) : len;
453		if (datalen > mtu) {
454			if (cb->s_flags & SF_PI) {
455				m_freem(m0);
456				return (EMSGSIZE);
457			} else {
458				int oldEM = cb->s_cc & SPX_EM;
459
460				cb->s_cc &= ~SPX_EM;
461				while (len > mtu) {
462					m = m_copym(m0, 0, mtu, M_DONTWAIT);
463					if (m == NULL) {
464					    cb->s_cc |= oldEM;
465					    m_freem(m0);
466					    return (ENOBUFS);
467					}
468					if (cb->s_flags & SF_NEWCALL) {
469					    struct mbuf *mm = m;
470					    spx_newchecks[7]++;
471					    while (mm != NULL) {
472						mm->m_flags &= ~M_EOR;
473						mm = mm->m_next;
474					    }
475					}
476					error = spx_output(cb, m);
477					if (error) {
478						cb->s_cc |= oldEM;
479						m_freem(m0);
480						return (error);
481					}
482					m_adj(m0, mtu);
483					len -= mtu;
484				}
485				cb->s_cc |= oldEM;
486			}
487		}
488
489		/*
490		 * Force length even, by adding a "garbage byte" if
491		 * necessary.
492		 */
493		if (len & 1) {
494			m = mprev;
495			if (M_TRAILINGSPACE(m) >= 1)
496				m->m_len++;
497			else {
498				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
499
500				if (m1 == NULL) {
501					m_freem(m0);
502					return (ENOBUFS);
503				}
504				m1->m_len = 1;
505				*(mtod(m1, u_char *)) = 0;
506				m->m_next = m1;
507			}
508		}
509		m = m_gethdr(M_DONTWAIT, MT_DATA);
510		if (m == NULL) {
511			m_freem(m0);
512			return (ENOBUFS);
513		}
514
515		/*
516		 * Fill in mbuf with extended SP header and addresses and
517		 * length put into network format.
518		 */
519		MH_ALIGN(m, sizeof(struct spx));
520		m->m_len = sizeof(struct spx);
521		m->m_next = m0;
522		si = mtod(m, struct spx *);
523		si->si_i = *cb->s_ipx;
524		si->si_s = cb->s_shdr;
525		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
526			struct spxhdr *sh;
527			if (m0->m_len < sizeof(*sh)) {
528				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
529					m_free(m);
530					m_freem(m0);
531					return (EINVAL);
532				}
533				m->m_next = m0;
534			}
535			sh = mtod(m0, struct spxhdr *);
536			si->si_dt = sh->spx_dt;
537			si->si_cc |= sh->spx_cc & SPX_EM;
538			m0->m_len -= sizeof(*sh);
539			m0->m_data += sizeof(*sh);
540			len -= sizeof(*sh);
541		}
542		len += sizeof(*si);
543		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
544			si->si_cc |= SPX_EM;
545			spx_newchecks[8]++;
546		}
547		if (cb->s_oobflags & SF_SOOB) {
548			/*
549			 * Per jqj@cornell: Make sure OB packets convey
550			 * exactly 1 byte.  If the packet is 1 byte or
551			 * larger, we have already guaranted there to be at
552			 * least one garbage byte for the checksum, and extra
553			 * bytes shouldn't hurt!
554			 */
555			if (len > sizeof(*si)) {
556				si->si_cc |= SPX_OB;
557				len = (1 + sizeof(*si));
558			}
559		}
560		si->si_len = htons((u_short)len);
561		m->m_pkthdr.len = ((len - 1) | 1) + 1;
562
563		/*
564		 * Queue stuff up for output.
565		 */
566		sbappendrecord(sb, m);
567		cb->s_seq++;
568	}
569#ifdef notdef
570	idle = (cb->s_smax == (cb->s_rack - 1));
571#endif
572again:
573	sendalot = 0;
574	off = cb->s_snxt - cb->s_rack;
575	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
576
577	/*
578	 * If in persist timeout with window of 0, send a probe.  Otherwise,
579	 * if window is small but non-zero and timer expired, send what we
580	 * can and go into transmit state.
581	 */
582	if (cb->s_force == 1 + SPXT_PERSIST) {
583		if (win != 0) {
584			cb->s_timer[SPXT_PERSIST] = 0;
585			cb->s_rxtshift = 0;
586		}
587	}
588	span = cb->s_seq - cb->s_rack;
589	len = min(span, win) - off;
590
591	if (len < 0) {
592		/*
593		 * Window shrank after we went into it.  If window shrank to
594		 * 0, cancel pending restransmission and pull s_snxt back to
595		 * (closed) window.  We will enter persist state below.  If
596		 * the widndow didn't close completely, just wait for an ACK.
597		 */
598		len = 0;
599		if (win == 0) {
600			cb->s_timer[SPXT_REXMT] = 0;
601			cb->s_snxt = cb->s_rack;
602		}
603	}
604	if (len > 1)
605		sendalot = 1;
606	rcv_win = sbspace(&so->so_rcv);
607
608	/*
609	 * Send if we owe peer an ACK.
610	 */
611	if (cb->s_oobflags & SF_SOOB) {
612		/*
613		 * Must transmit this out of band packet.
614		 */
615		cb->s_oobflags &= ~ SF_SOOB;
616		sendalot = 1;
617		spxstat.spxs_sndurg++;
618		goto found;
619	}
620	if (cb->s_flags & SF_ACKNOW)
621		goto send;
622	if (cb->s_state < TCPS_ESTABLISHED)
623		goto send;
624
625	/*
626	 * Silly window can't happen in spx.  Code from TCP deleted.
627	 */
628	if (len)
629		goto send;
630
631	/*
632	 * Compare available window to amount of window known to peer (as
633	 * advertised window less next expected input.)  If the difference is
634	 * at least two packets or at least 35% of the mximum possible
635	 * window, then want to send a window update to peer.
636	 */
637	if (rcv_win > 0) {
638		u_short delta =  1 + cb->s_alo - cb->s_ack;
639		int adv = rcv_win - (delta * cb->s_mtu);
640
641		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
642		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
643			spxstat.spxs_sndwinup++;
644			cb->s_flags |= SF_ACKNOW;
645			goto send;
646		}
647
648	}
649
650	/*
651	 * Many comments from tcp_output.c are appropriate here including ...
652	 * If send window is too small, there is data to transmit, and no
653	 * retransmit or persist is pending, then go to persist state.  If
654	 * nothing happens soon, send when timer expires: if window is
655	 * non-zero, transmit what we can, otherwise send a probe.
656	 */
657	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
658	    cb->s_timer[SPXT_PERSIST] == 0) {
659		cb->s_rxtshift = 0;
660		spx_setpersist(cb);
661	}
662
663	/*
664	 * No reason to send a packet, just return.
665	 */
666	cb->s_outx = 1;
667	return (0);
668
669send:
670	/*
671	 * Find requested packet.
672	 */
673	si = 0;
674	if (len > 0) {
675		cb->s_want = cb->s_snxt;
676		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
677			si = mtod(m, struct spx *);
678			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
679				break;
680		}
681	found:
682		if (si != NULL) {
683			if (si->si_seq == cb->s_snxt)
684					cb->s_snxt++;
685				else
686					spxstat.spxs_sndvoid++, si = 0;
687		}
688	}
689
690	/*
691	 * Update window.
692	 */
693	if (rcv_win < 0)
694		rcv_win = 0;
695	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
696	if (SSEQ_LT(alo, cb->s_alo))
697		alo = cb->s_alo;
698
699	if (si != NULL) {
700		/*
701		 * Must make a copy of this packet for ipx_output to monkey
702		 * with.
703		 */
704		m = m_copy(dtom(si), 0, (int)M_COPYALL);
705		if (m == NULL)
706			return (ENOBUFS);
707		si = mtod(m, struct spx *);
708		if (SSEQ_LT(si->si_seq, cb->s_smax))
709			spxstat.spxs_sndrexmitpack++;
710		else
711			spxstat.spxs_sndpack++;
712	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
713		/*
714		 * Must send an acknowledgement or a probe.
715		 */
716		if (cb->s_force)
717			spxstat.spxs_sndprobe++;
718		if (cb->s_flags & SF_ACKNOW)
719			spxstat.spxs_sndacks++;
720		m = m_gethdr(M_DONTWAIT, MT_DATA);
721		if (m == NULL)
722			return (ENOBUFS);
723
724		/*
725		 * Fill in mbuf with extended SP header and addresses and
726		 * length put into network format.
727		 */
728		MH_ALIGN(m, sizeof(struct spx));
729		m->m_len = sizeof(*si);
730		m->m_pkthdr.len = sizeof(*si);
731		si = mtod(m, struct spx *);
732		si->si_i = *cb->s_ipx;
733		si->si_s = cb->s_shdr;
734		si->si_seq = cb->s_smax + 1;
735		si->si_len = htons(sizeof(*si));
736		si->si_cc |= SPX_SP;
737	} else {
738		cb->s_outx = 3;
739		if (so->so_options & SO_DEBUG || traceallspxs)
740			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
741		return (0);
742	}
743
744	/*
745	 * Stuff checksum and output datagram.
746	 */
747	if ((si->si_cc & SPX_SP) == 0) {
748		if (cb->s_force != (1 + SPXT_PERSIST) ||
749		    cb->s_timer[SPXT_PERSIST] == 0) {
750			/*
751			 * If this is a new packet and we are not currently
752			 * timing anything, time this one.
753			 */
754			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
755				cb->s_smax = si->si_seq;
756				if (cb->s_rtt == 0) {
757					spxstat.spxs_segstimed++;
758					cb->s_rtseq = si->si_seq;
759					cb->s_rtt = 1;
760				}
761			}
762
763			/*
764			 * Set rexmt timer if not currently set, initial
765			 * value for retransmit timer is smoothed round-trip
766			 * time + 2 * round-trip time variance.  Initialize
767			 * shift counter which is used for backoff of
768			 * retransmit time.
769			 */
770			if (cb->s_timer[SPXT_REXMT] == 0 &&
771			    cb->s_snxt != cb->s_rack) {
772				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
773				if (cb->s_timer[SPXT_PERSIST]) {
774					cb->s_timer[SPXT_PERSIST] = 0;
775					cb->s_rxtshift = 0;
776				}
777			}
778		} else if (SSEQ_LT(cb->s_smax, si->si_seq))
779			cb->s_smax = si->si_seq;
780	} else if (cb->s_state < TCPS_ESTABLISHED) {
781		if (cb->s_rtt == 0)
782			cb->s_rtt = 1; /* Time initial handshake */
783		if (cb->s_timer[SPXT_REXMT] == 0)
784			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
785	}
786
787	/*
788	 * Do not request acks when we ack their data packets or when we do a
789	 * gratuitous window update.
790	 */
791	if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
792		si->si_cc |= SPX_SA;
793	si->si_seq = htons(si->si_seq);
794	si->si_alo = htons(alo);
795	si->si_ack = htons(cb->s_ack);
796
797	if (ipxcksum)
798		si->si_sum = ipx_cksum(m, ntohs(si->si_len));
799	else
800		si->si_sum = 0xffff;
801
802	cb->s_outx = 4;
803	if (so->so_options & SO_DEBUG || traceallspxs)
804		spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
805
806	if (so->so_options & SO_DONTROUTE)
807		error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
808	else
809		error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
810	if (error)
811		return (error);
812	spxstat.spxs_sndtotal++;
813
814	/*
815	 * Data sent (as far as we can tell).  If this advertises a larger
816	 * window than any other segment, then remember the size of the
817	 * advertized window.  Any pending ACK has now been sent.
818	 */
819	cb->s_force = 0;
820	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
821	if (SSEQ_GT(alo, cb->s_alo))
822		cb->s_alo = alo;
823	if (sendalot)
824		goto again;
825	cb->s_outx = 5;
826	return (0);
827}
828
829static int spx_do_persist_panics = 0;
830
831static void
832spx_setpersist(struct spxpcb *cb)
833{
834	int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
835
836	IPX_LOCK_ASSERT(cb->s_ipxpcb);
837
838	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
839		panic("spx_output REXMT");
840
841	/*
842	 * Start/restart persistance timer.
843	 */
844	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
845	    t*spx_backoff[cb->s_rxtshift],
846	    SPXTV_PERSMIN, SPXTV_PERSMAX);
847	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
848		cb->s_rxtshift++;
849}
850
851int
852spx_ctloutput(struct socket *so, struct sockopt *sopt)
853{
854	struct spxhdr spxhdr;
855	struct ipxpcb *ipxp;
856	struct spxpcb *cb;
857	int mask, error;
858	short soptval;
859	u_short usoptval;
860	int optval;
861
862	ipxp = sotoipxpcb(so);
863	KASSERT(ipxp != NULL, ("spx_ctloutput: ipxp == NULL"));
864
865	/*
866	 * This will have to be changed when we do more general stacking of
867	 * protocols.
868	 */
869	if (sopt->sopt_level != IPXPROTO_SPX)
870		return (ipx_ctloutput(so, sopt));
871
872	IPX_LOCK(ipxp);
873	if (ipxp->ipxp_flags & IPXP_DROPPED) {
874		IPX_UNLOCK(ipxp);
875		return (ECONNRESET);
876	}
877
878	IPX_LOCK(ipxp);
879	cb = ipxtospxpcb(ipxp);
880	KASSERT(cb != NULL, ("spx_ctloutput: cb == NULL"));
881
882	error = 0;
883	switch (sopt->sopt_dir) {
884	case SOPT_GET:
885		switch (sopt->sopt_name) {
886		case SO_HEADERS_ON_INPUT:
887			mask = SF_HI;
888			goto get_flags;
889
890		case SO_HEADERS_ON_OUTPUT:
891			mask = SF_HO;
892		get_flags:
893			soptval = cb->s_flags & mask;
894			IPX_UNLOCK(ipxp);
895			error = sooptcopyout(sopt, &soptval,
896			    sizeof(soptval));
897			break;
898
899		case SO_MTU:
900			usoptval = cb->s_mtu;
901			IPX_UNLOCK(ipxp);
902			error = sooptcopyout(sopt, &usoptval,
903			    sizeof(usoptval));
904			break;
905
906		case SO_LAST_HEADER:
907			spxhdr = cb->s_rhdr;
908			IPX_UNLOCK(ipxp);
909			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
910			break;
911
912		case SO_DEFAULT_HEADERS:
913			spxhdr = cb->s_shdr;
914			IPX_UNLOCK(ipxp);
915			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
916			break;
917
918		default:
919			IPX_UNLOCK(ipxp);
920			error = ENOPROTOOPT;
921		}
922		break;
923
924	case SOPT_SET:
925		/*
926		 * XXX Why are these shorts on get and ints on set?  That
927		 * doesn't make any sense...
928		 *
929		 * XXXRW: Note, when we re-acquire the ipxp lock, we should
930		 * re-check that it's not dropped.
931		 */
932		IPX_UNLOCK(ipxp);
933		switch (sopt->sopt_name) {
934		case SO_HEADERS_ON_INPUT:
935			mask = SF_HI;
936			goto set_head;
937
938		case SO_HEADERS_ON_OUTPUT:
939			mask = SF_HO;
940		set_head:
941			error = sooptcopyin(sopt, &optval, sizeof optval,
942					    sizeof optval);
943			if (error)
944				break;
945
946			IPX_LOCK(ipxp);
947			if (cb->s_flags & SF_PI) {
948				if (optval)
949					cb->s_flags |= mask;
950				else
951					cb->s_flags &= ~mask;
952			} else error = EINVAL;
953			IPX_UNLOCK(ipxp);
954			break;
955
956		case SO_MTU:
957			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
958					    sizeof usoptval);
959			if (error)
960				break;
961			/* Unlocked write. */
962			cb->s_mtu = usoptval;
963			break;
964
965#ifdef SF_NEWCALL
966		case SO_NEWCALL:
967			error = sooptcopyin(sopt, &optval, sizeof optval,
968					    sizeof optval);
969			if (error)
970				break;
971			IPX_LOCK(ipxp);
972			if (optval) {
973				cb->s_flags2 |= SF_NEWCALL;
974				spx_newchecks[5]++;
975			} else {
976				cb->s_flags2 &= ~SF_NEWCALL;
977				spx_newchecks[6]++;
978			}
979			IPX_UNLOCK(ipxp);
980			break;
981#endif
982
983		case SO_DEFAULT_HEADERS:
984			{
985				struct spxhdr sp;
986
987				error = sooptcopyin(sopt, &sp, sizeof sp,
988						    sizeof sp);
989				if (error)
990					break;
991				IPX_LOCK(ipxp);
992				cb->s_dt = sp.spx_dt;
993				cb->s_cc = sp.spx_cc & SPX_EM;
994				IPX_UNLOCK(ipxp);
995			}
996			break;
997
998		default:
999			error = ENOPROTOOPT;
1000		}
1001		break;
1002
1003	default:
1004		panic("spx_ctloutput: bad socket option direction");
1005	}
1006	return (error);
1007}
1008
1009static void
1010spx_usr_abort(struct socket *so)
1011{
1012	struct ipxpcb *ipxp;
1013	struct spxpcb *cb;
1014
1015	ipxp = sotoipxpcb(so);
1016	KASSERT(ipxp != NULL, ("spx_usr_abort: ipxp == NULL"));
1017
1018	cb = ipxtospxpcb(ipxp);
1019	KASSERT(cb != NULL, ("spx_usr_abort: cb == NULL"));
1020
1021	IPX_LIST_LOCK();
1022	IPX_LOCK(ipxp);
1023	spx_drop(cb, ECONNABORTED);
1024	IPX_UNLOCK(ipxp);
1025	IPX_LIST_UNLOCK();
1026}
1027
1028/*
1029 * Accept a connection.  Essentially all the work is done at higher levels;
1030 * just return the address of the peer, storing through addr.
1031 */
1032static int
1033spx_accept(struct socket *so, struct sockaddr **nam)
1034{
1035	struct ipxpcb *ipxp;
1036	struct sockaddr_ipx *sipx, ssipx;
1037
1038	ipxp = sotoipxpcb(so);
1039	KASSERT(ipxp != NULL, ("spx_accept: ipxp == NULL"));
1040
1041	sipx = &ssipx;
1042	bzero(sipx, sizeof *sipx);
1043	sipx->sipx_len = sizeof *sipx;
1044	sipx->sipx_family = AF_IPX;
1045	IPX_LOCK(ipxp);
1046	sipx->sipx_addr = ipxp->ipxp_faddr;
1047	IPX_UNLOCK(ipxp);
1048	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1049	return (0);
1050}
1051
1052static int
1053spx_attach(struct socket *so, int proto, struct thread *td)
1054{
1055	struct ipxpcb *ipxp;
1056	struct spxpcb *cb;
1057	struct mbuf *mm;
1058	struct sockbuf *sb;
1059	int error;
1060
1061	ipxp = sotoipxpcb(so);
1062	KASSERT(ipxp == NULL, ("spx_attach: ipxp != NULL"));
1063
1064	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1065		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1066		if (error)
1067			return (error);
1068	}
1069
1070	cb = malloc(sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1071	if (cb == NULL)
1072		return (ENOBUFS);
1073	mm = m_getclr(M_DONTWAIT, MT_DATA);
1074	if (mm == NULL) {
1075		free(cb, M_PCB);
1076		return (ENOBUFS);
1077	}
1078
1079	IPX_LIST_LOCK();
1080	error = ipx_pcballoc(so, &ipxpcb_list, td);
1081	if (error) {
1082		IPX_LIST_UNLOCK();
1083		m_free(mm);
1084		free(cb, M_PCB);
1085		return (error);
1086	}
1087	ipxp = sotoipxpcb(so);
1088	ipxp->ipxp_flags |= IPXP_SPX;
1089
1090	cb->s_ipx = mtod(mm, struct ipx *);
1091	cb->s_state = TCPS_LISTEN;
1092	cb->s_smax = -1;
1093	cb->s_swl1 = -1;
1094	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1095	cb->s_ipxpcb = ipxp;
1096	cb->s_mtu = 576 - sizeof(struct spx);
1097	sb = &so->so_snd;
1098	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1099	cb->s_ssthresh = cb->s_cwnd;
1100	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1101
1102	/*
1103	 * Above is recomputed when connecting to account for changed
1104	 * buffering or mtu's.
1105	 */
1106	cb->s_rtt = SPXTV_SRTTBASE;
1107	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1108	SPXT_RANGESET(cb->s_rxtcur,
1109	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1110	    SPXTV_MIN, SPXTV_REXMTMAX);
1111	ipxp->ipxp_pcb = (caddr_t)cb;
1112	IPX_LIST_UNLOCK();
1113	return (0);
1114}
1115
1116static void
1117spx_pcbdetach(struct ipxpcb *ipxp)
1118{
1119	struct spxpcb *cb;
1120	struct spx_q *s;
1121	struct mbuf *m;
1122
1123	IPX_LOCK_ASSERT(ipxp);
1124
1125	cb = ipxtospxpcb(ipxp);
1126	KASSERT(cb != NULL, ("spx_pcbdetach: cb == NULL"));
1127
1128	s = cb->s_q.si_next;
1129	while (s != &(cb->s_q)) {
1130		s = s->si_next;
1131		spx_remque(s);
1132		m = dtom(s);
1133		m_freem(m);
1134	}
1135	m_free(dtom(cb->s_ipx));
1136	free(cb, M_PCB);
1137	ipxp->ipxp_pcb = NULL;
1138}
1139
1140static int
1141spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1142{
1143	struct ipxpcb *ipxp;
1144	int error;
1145
1146	ipxp = sotoipxpcb(so);
1147	KASSERT(ipxp != NULL, ("spx_bind: ipxp == NULL"));
1148
1149	IPX_LIST_LOCK();
1150	IPX_LOCK(ipxp);
1151	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1152		error = EINVAL;
1153		goto out;
1154	}
1155	error = ipx_pcbbind(ipxp, nam, td);
1156out:
1157	IPX_UNLOCK(ipxp);
1158	IPX_LIST_UNLOCK();
1159	return (error);
1160}
1161
1162static void
1163spx_usr_close(struct socket *so)
1164{
1165	struct ipxpcb *ipxp;
1166	struct spxpcb *cb;
1167
1168	ipxp = sotoipxpcb(so);
1169	KASSERT(ipxp != NULL, ("spx_usr_close: ipxp == NULL"));
1170
1171	cb = ipxtospxpcb(ipxp);
1172	KASSERT(cb != NULL, ("spx_usr_close: cb == NULL"));
1173
1174	IPX_LIST_LOCK();
1175	IPX_LOCK(ipxp);
1176	if (cb->s_state > TCPS_LISTEN)
1177		spx_disconnect(cb);
1178	else
1179		spx_close(cb);
1180	IPX_UNLOCK(ipxp);
1181	IPX_LIST_UNLOCK();
1182}
1183
1184/*
1185 * Initiate connection to peer.  Enter SYN_SENT state, and mark socket as
1186 * connecting.  Start keep-alive timer, setup prototype header, send initial
1187 * system packet requesting connection.
1188 */
1189static int
1190spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1191{
1192	struct ipxpcb *ipxp;
1193	struct spxpcb *cb;
1194	int error;
1195
1196	ipxp = sotoipxpcb(so);
1197	KASSERT(ipxp != NULL, ("spx_connect: ipxp == NULL"));
1198
1199	cb = ipxtospxpcb(ipxp);
1200	KASSERT(cb != NULL, ("spx_connect: cb == NULL"));
1201
1202	IPX_LIST_LOCK();
1203	IPX_LOCK(ipxp);
1204	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1205		error = EINVAL;
1206		goto spx_connect_end;
1207	}
1208	if (ipxp->ipxp_lport == 0) {
1209		error = ipx_pcbbind(ipxp, NULL, td);
1210		if (error)
1211			goto spx_connect_end;
1212	}
1213	error = ipx_pcbconnect(ipxp, nam, td);
1214	if (error)
1215		goto spx_connect_end;
1216	soisconnecting(so);
1217	spxstat.spxs_connattempt++;
1218	cb->s_state = TCPS_SYN_SENT;
1219	cb->s_did = 0;
1220	spx_template(cb);
1221	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1222	cb->s_force = 1 + SPXTV_KEEP;
1223
1224	/*
1225	 * Other party is required to respond to the port I send from, but he
1226	 * is not required to answer from where I am sending to, so allow
1227	 * wildcarding.  Original port I am sending to is still saved in
1228	 * cb->s_dport.
1229	 */
1230	ipxp->ipxp_fport = 0;
1231	error = spx_output(cb, NULL);
1232spx_connect_end:
1233	IPX_UNLOCK(ipxp);
1234	IPX_LIST_UNLOCK();
1235	return (error);
1236}
1237
1238static void
1239spx_detach(struct socket *so)
1240{
1241	struct ipxpcb *ipxp;
1242	struct spxpcb *cb;
1243
1244	/*
1245	 * XXXRW: Should assert appropriately detached.
1246	 */
1247	ipxp = sotoipxpcb(so);
1248	KASSERT(ipxp != NULL, ("spx_detach: ipxp == NULL"));
1249
1250	cb = ipxtospxpcb(ipxp);
1251	KASSERT(cb != NULL, ("spx_detach: cb == NULL"));
1252
1253	IPX_LIST_LOCK();
1254	IPX_LOCK(ipxp);
1255	spx_pcbdetach(ipxp);
1256	ipx_pcbfree(ipxp);
1257	IPX_LIST_UNLOCK();
1258}
1259
1260/*
1261 * We may decide later to implement connection closing handshaking at the spx
1262 * level optionally.  Here is the hook to do it:
1263 */
1264static int
1265spx_usr_disconnect(struct socket *so)
1266{
1267	struct ipxpcb *ipxp;
1268	struct spxpcb *cb;
1269	int error;
1270
1271	ipxp = sotoipxpcb(so);
1272	KASSERT(ipxp != NULL, ("spx_usr_disconnect: ipxp == NULL"));
1273
1274	cb = ipxtospxpcb(ipxp);
1275	KASSERT(cb != NULL, ("spx_usr_disconnect: cb == NULL"));
1276
1277	IPX_LIST_LOCK();
1278	IPX_LOCK(ipxp);
1279	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1280		error = EINVAL;
1281		goto out;
1282	}
1283	spx_disconnect(cb);
1284	error = 0;
1285out:
1286	IPX_UNLOCK(ipxp);
1287	IPX_LIST_UNLOCK();
1288	return (error);
1289}
1290
1291static int
1292spx_listen(struct socket *so, int backlog, struct thread *td)
1293{
1294	int error;
1295	struct ipxpcb *ipxp;
1296	struct spxpcb *cb;
1297
1298	error = 0;
1299	ipxp = sotoipxpcb(so);
1300	KASSERT(ipxp != NULL, ("spx_listen: ipxp == NULL"));
1301
1302	cb = ipxtospxpcb(ipxp);
1303	KASSERT(cb != NULL, ("spx_listen: cb == NULL"));
1304
1305	IPX_LIST_LOCK();
1306	IPX_LOCK(ipxp);
1307	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1308		error = EINVAL;
1309		goto out;
1310	}
1311	SOCK_LOCK(so);
1312	error = solisten_proto_check(so);
1313	if (error == 0 && ipxp->ipxp_lport == 0)
1314		error = ipx_pcbbind(ipxp, NULL, td);
1315	if (error == 0) {
1316		cb->s_state = TCPS_LISTEN;
1317		solisten_proto(so, backlog);
1318	}
1319	SOCK_UNLOCK(so);
1320out:
1321	IPX_UNLOCK(ipxp);
1322	IPX_LIST_UNLOCK();
1323	return (error);
1324}
1325
1326/*
1327 * After a receive, possibly send acknowledgment updating allocation.
1328 */
1329static int
1330spx_rcvd(struct socket *so, int flags)
1331{
1332	struct ipxpcb *ipxp;
1333	struct spxpcb *cb;
1334	int error;
1335
1336	ipxp = sotoipxpcb(so);
1337	KASSERT(ipxp != NULL, ("spx_rcvd: ipxp == NULL"));
1338
1339	cb = ipxtospxpcb(ipxp);
1340	KASSERT(cb != NULL, ("spx_rcvd: cb == NULL"));
1341
1342	IPX_LOCK(ipxp);
1343	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1344		error = EINVAL;
1345		goto out;
1346	}
1347	cb->s_flags |= SF_RVD;
1348	spx_output(cb, NULL);
1349	cb->s_flags &= ~SF_RVD;
1350	error = 0;
1351out:
1352	IPX_UNLOCK(ipxp);
1353	return (error);
1354}
1355
1356static int
1357spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1358{
1359	struct ipxpcb *ipxp;
1360	struct spxpcb *cb;
1361	int error;
1362
1363	ipxp = sotoipxpcb(so);
1364	KASSERT(ipxp != NULL, ("spx_rcvoob: ipxp == NULL"));
1365
1366	cb = ipxtospxpcb(ipxp);
1367	KASSERT(cb != NULL, ("spx_rcvoob: cb == NULL"));
1368
1369	IPX_LOCK(ipxp);
1370	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1371		error = EINVAL;
1372		goto out;
1373	}
1374	SOCKBUF_LOCK(&so->so_rcv);
1375	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1376	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1377		SOCKBUF_UNLOCK(&so->so_rcv);
1378		m->m_len = 1;
1379		*mtod(m, caddr_t) = cb->s_iobc;
1380		error = 0;
1381		goto out;
1382	}
1383	SOCKBUF_UNLOCK(&so->so_rcv);
1384	error = EINVAL;
1385out:
1386	IPX_UNLOCK(ipxp);
1387	return (error);
1388}
1389
1390static int
1391spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1392    struct mbuf *controlp, struct thread *td)
1393{
1394	struct ipxpcb *ipxp;
1395	struct spxpcb *cb;
1396	int error;
1397
1398	ipxp = sotoipxpcb(so);
1399	KASSERT(ipxp != NULL, ("spx_send: ipxp == NULL"));
1400
1401	cb = ipxtospxpcb(ipxp);
1402	KASSERT(cb != NULL, ("spx_send: cb == NULL"));
1403
1404	error = 0;
1405	IPX_LOCK(ipxp);
1406	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1407		error = ECONNRESET;
1408		goto spx_send_end;
1409	}
1410	if (flags & PRUS_OOB) {
1411		if (sbspace(&so->so_snd) < -512) {
1412			error = ENOBUFS;
1413			goto spx_send_end;
1414		}
1415		cb->s_oobflags |= SF_SOOB;
1416	}
1417	if (controlp != NULL) {
1418		u_short *p = mtod(controlp, u_short *);
1419		spx_newchecks[2]++;
1420		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1421			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1422			spx_newchecks[3]++;
1423		}
1424		m_freem(controlp);
1425	}
1426	controlp = NULL;
1427	error = spx_output(cb, m);
1428	m = NULL;
1429spx_send_end:
1430	IPX_UNLOCK(ipxp);
1431	if (controlp != NULL)
1432		m_freem(controlp);
1433	if (m != NULL)
1434		m_freem(m);
1435	return (error);
1436}
1437
1438static int
1439spx_shutdown(struct socket *so)
1440{
1441	struct ipxpcb *ipxp;
1442	struct spxpcb *cb;
1443	int error;
1444
1445	ipxp = sotoipxpcb(so);
1446	KASSERT(ipxp != NULL, ("spx_shutdown: ipxp == NULL"));
1447
1448	cb = ipxtospxpcb(ipxp);
1449	KASSERT(cb != NULL, ("spx_shutdown: cb == NULL"));
1450
1451	socantsendmore(so);
1452	IPX_LIST_LOCK();
1453	IPX_LOCK(ipxp);
1454	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1455		error = EINVAL;
1456		goto out;
1457	}
1458	spx_usrclosed(cb);
1459	error = 0;
1460out:
1461	IPX_UNLOCK(ipxp);
1462	IPX_LIST_UNLOCK();
1463	return (error);
1464}
1465
1466static int
1467spx_sp_attach(struct socket *so, int proto, struct thread *td)
1468{
1469	struct ipxpcb *ipxp;
1470	struct spxpcb *cb;
1471	int error;
1472
1473	KASSERT(so->so_pcb == NULL, ("spx_sp_attach: so_pcb != NULL"));
1474
1475	error = spx_attach(so, proto, td);
1476	if (error)
1477		return (error);
1478
1479	ipxp = sotoipxpcb(so);
1480	KASSERT(ipxp != NULL, ("spx_sp_attach: ipxp == NULL"));
1481
1482	cb = ipxtospxpcb(ipxp);
1483	KASSERT(cb != NULL, ("spx_sp_attach: cb == NULL"));
1484
1485	IPX_LOCK(ipxp);
1486	cb->s_flags |= (SF_HI | SF_HO | SF_PI);
1487	IPX_UNLOCK(ipxp);
1488	return (0);
1489}
1490
1491/*
1492 * Create template to be used to send spx packets on a connection.  Called
1493 * after host entry created, fills in a skeletal spx header (choosing
1494 * connection id), minimizing the amount of work necessary when the
1495 * connection is used.
1496 */
1497static void
1498spx_template(struct spxpcb *cb)
1499{
1500	struct ipxpcb *ipxp = cb->s_ipxpcb;
1501	struct ipx *ipx = cb->s_ipx;
1502	struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1503
1504	IPX_LOCK_ASSERT(ipxp);
1505
1506	ipx->ipx_pt = IPXPROTO_SPX;
1507	ipx->ipx_sna = ipxp->ipxp_laddr;
1508	ipx->ipx_dna = ipxp->ipxp_faddr;
1509	SPX_LOCK();
1510	cb->s_sid = htons(spx_iss);
1511	spx_iss += SPX_ISSINCR/2;
1512	SPX_UNLOCK();
1513	cb->s_alo = 1;
1514	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1515
1516	/*
1517	 * Try to expand fast to full complement of large packets.
1518	 */
1519	cb->s_ssthresh = cb->s_cwnd;
1520	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1521
1522	/*
1523	 * But allow for lots of little packets as well.
1524	 */
1525	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1526}
1527
1528/*
1529 * Close a SPIP control block.  Wake up any sleepers.  We used to free any
1530 * queued packets and cb->s_ipx here, but now we defer that until the pcb is
1531 * discarded.
1532 */
1533void
1534spx_close(struct spxpcb *cb)
1535{
1536	struct ipxpcb *ipxp = cb->s_ipxpcb;
1537	struct socket *so = ipxp->ipxp_socket;
1538
1539	KASSERT(ipxp != NULL, ("spx_close: ipxp == NULL"));
1540	IPX_LIST_LOCK_ASSERT();
1541	IPX_LOCK_ASSERT(ipxp);
1542
1543	ipxp->ipxp_flags |= IPXP_DROPPED;
1544	soisdisconnected(so);
1545	spxstat.spxs_closed++;
1546}
1547
1548/*
1549 * Someday we may do level 3 handshaking to close a connection or send a
1550 * xerox style error.  For now, just close.  cb will always be invalid after
1551 * this call.
1552 */
1553static void
1554spx_usrclosed(struct spxpcb *cb)
1555{
1556
1557	IPX_LIST_LOCK_ASSERT();
1558	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1559
1560	spx_close(cb);
1561}
1562
1563/*
1564 * cb will always be invalid after this call.
1565 */
1566static void
1567spx_disconnect(struct spxpcb *cb)
1568{
1569
1570	IPX_LIST_LOCK_ASSERT();
1571	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1572
1573	spx_close(cb);
1574}
1575
1576/*
1577 * Drop connection, reporting the specified error.  cb will always be invalid
1578 * after this call.
1579 */
1580static void
1581spx_drop(struct spxpcb *cb, int errno)
1582{
1583	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1584
1585	IPX_LIST_LOCK_ASSERT();
1586	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1587
1588	/*
1589	 * Someday, in the xerox world we will generate error protocol
1590	 * packets announcing that the socket has gone away.
1591	 */
1592	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1593		spxstat.spxs_drops++;
1594		cb->s_state = TCPS_CLOSED;
1595		/*tcp_output(cb);*/
1596	} else
1597		spxstat.spxs_conndrops++;
1598	so->so_error = errno;
1599	spx_close(cb);
1600}
1601
1602/*
1603 * Fast timeout routine for processing delayed acks.
1604 */
1605void
1606spx_fasttimo(void)
1607{
1608	struct ipxpcb *ipxp;
1609	struct spxpcb *cb;
1610
1611	IPX_LIST_LOCK();
1612	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1613		IPX_LOCK(ipxp);
1614		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1615		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1616			IPX_UNLOCK(ipxp);
1617			continue;
1618		}
1619		cb = ipxtospxpcb(ipxp);
1620		if (cb->s_flags & SF_DELACK) {
1621			cb->s_flags &= ~SF_DELACK;
1622			cb->s_flags |= SF_ACKNOW;
1623			spxstat.spxs_delack++;
1624			spx_output(cb, NULL);
1625		}
1626		IPX_UNLOCK(ipxp);
1627	}
1628	IPX_LIST_UNLOCK();
1629}
1630
1631/*
1632 * spx protocol timeout routine called every 500 ms.  Updates the timers in
1633 * all active pcb's and causes finite state machine actions if timers expire.
1634 */
1635void
1636spx_slowtimo(void)
1637{
1638	struct ipxpcb *ipxp;
1639	struct spxpcb *cb;
1640	int i;
1641
1642	/*
1643	 * Search through tcb's and update active timers.  Once, timers could
1644	 * free ipxp's, but now we do that only when detaching a socket.
1645	 */
1646	IPX_LIST_LOCK();
1647	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1648		IPX_LOCK(ipxp);
1649		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1650		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1651			IPX_UNLOCK(ipxp);
1652			continue;
1653		}
1654
1655		cb = (struct spxpcb *)ipxp->ipxp_pcb;
1656		KASSERT(cb != NULL, ("spx_slowtimo: cb == NULL"));
1657		for (i = 0; i < SPXT_NTIMERS; i++) {
1658			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1659				spx_timers(cb, i);
1660				if (ipxp->ipxp_flags & IPXP_DROPPED)
1661					break;
1662			}
1663		}
1664		if (!(ipxp->ipxp_flags & IPXP_DROPPED)) {
1665			cb->s_idle++;
1666			if (cb->s_rtt)
1667				cb->s_rtt++;
1668		}
1669		IPX_UNLOCK(ipxp);
1670	}
1671	IPX_LIST_UNLOCK();
1672	SPX_LOCK();
1673	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1674	SPX_UNLOCK();
1675}
1676
1677/*
1678 * SPX timer processing.
1679 */
1680static void
1681spx_timers(struct spxpcb *cb, int timer)
1682{
1683	long rexmt;
1684	int win;
1685
1686	IPX_LIST_LOCK_ASSERT();
1687	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1688
1689	cb->s_force = 1 + timer;
1690	switch (timer) {
1691	case SPXT_2MSL:
1692		/*
1693		 * 2 MSL timeout in shutdown went off.  TCP deletes
1694		 * connection control block.
1695		 */
1696		printf("spx: SPXT_2MSL went off for no reason\n");
1697		cb->s_timer[timer] = 0;
1698		break;
1699
1700	case SPXT_REXMT:
1701		/*
1702		 * Retransmission timer went off.  Message has not been acked
1703		 * within retransmit interval.  Back off to a longer
1704		 * retransmit interval and retransmit one packet.
1705		 */
1706		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1707			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1708			spxstat.spxs_timeoutdrop++;
1709			spx_drop(cb, ETIMEDOUT);
1710			break;
1711		}
1712		spxstat.spxs_rexmttimeo++;
1713		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1714		rexmt *= spx_backoff[cb->s_rxtshift];
1715		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1716		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1717
1718		/*
1719		 * If we have backed off fairly far, our srtt estimate is
1720		 * probably bogus.  Clobber it so we'll take the next rtt
1721		 * measurement as our srtt; move the current srtt into rttvar
1722		 * to keep the current retransmit times until then.
1723		 */
1724		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1725			cb->s_rttvar += (cb->s_srtt >> 2);
1726			cb->s_srtt = 0;
1727		}
1728		cb->s_snxt = cb->s_rack;
1729
1730		/*
1731		 * If timing a packet, stop the timer.
1732		 */
1733		cb->s_rtt = 0;
1734
1735		/*
1736		 * See very long discussion in tcp_timer.c about congestion
1737		 * window and sstrhesh.
1738		 */
1739		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1740		if (win < 2)
1741			win = 2;
1742		cb->s_cwnd = CUNIT;
1743		cb->s_ssthresh = win * CUNIT;
1744		spx_output(cb, NULL);
1745		break;
1746
1747	case SPXT_PERSIST:
1748		/*
1749		 * Persistance timer into zero window.  Force a probe to be
1750		 * sent.
1751		 */
1752		spxstat.spxs_persisttimeo++;
1753		spx_setpersist(cb);
1754		spx_output(cb, NULL);
1755		break;
1756
1757	case SPXT_KEEP:
1758		/*
1759		 * Keep-alive timer went off; send something or drop
1760		 * connection if idle for too long.
1761		 */
1762		spxstat.spxs_keeptimeo++;
1763		if (cb->s_state < TCPS_ESTABLISHED)
1764			goto dropit;
1765		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1766		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1767				goto dropit;
1768			spxstat.spxs_keepprobe++;
1769			spx_output(cb, NULL);
1770		} else
1771			cb->s_idle = 0;
1772		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1773		break;
1774
1775	dropit:
1776		spxstat.spxs_keepdrops++;
1777		spx_drop(cb, ETIMEDOUT);
1778		break;
1779
1780	default:
1781		panic("spx_timers: unknown timer %d", timer);
1782	}
1783}
1784