1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2009 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * Copyright (c) 1995, Mike Mitchell
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)spx_usrreq.h
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD$");
67
68#include <sys/param.h>
69#include <sys/lock.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/protosw.h>
75#include <sys/signalvar.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/sx.h>
79#include <sys/systm.h>
80
81#include <net/route.h>
82#include <netinet/tcp_fsm.h>
83
84#include <netipx/ipx.h>
85#include <netipx/ipx_pcb.h>
86#include <netipx/ipx_var.h>
87#include <netipx/spx.h>
88#include <netipx/spx_debug.h>
89#include <netipx/spx_timer.h>
90#include <netipx/spx_var.h>
91
92#include <security/mac/mac_framework.h>
93
94/*
95 * SPX protocol implementation.
96 */
97static struct	mtx spx_mtx;			/* Protects only spx_iss. */
98static u_short 	spx_iss;
99u_short		spx_newchecks[50];
100static int	spx_hardnosed;
101static int	traceallspxs = 0;
102struct	spx_istat spx_istat;
103
104#define	SPX_LOCK_INIT()	mtx_init(&spx_mtx, "spx_mtx", NULL, MTX_DEF)
105#define	SPX_LOCK()	mtx_lock(&spx_mtx)
106#define	SPX_UNLOCK()	mtx_unlock(&spx_mtx)
107
108static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
109    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
110
111static	void spx_close(struct spxpcb *cb);
112static	void spx_disconnect(struct spxpcb *cb);
113static	void spx_drop(struct spxpcb *cb, int errno);
114static	void spx_setpersist(struct spxpcb *cb);
115static	void spx_template(struct spxpcb *cb);
116static	void spx_timers(struct spxpcb *cb, int timer);
117static	void spx_usrclosed(struct spxpcb *cb);
118
119static	void spx_usr_abort(struct socket *so);
120static	int spx_accept(struct socket *so, struct sockaddr **nam);
121static	int spx_attach(struct socket *so, int proto, struct thread *td);
122static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
123static	void spx_usr_close(struct socket *so);
124static	int spx_connect(struct socket *so, struct sockaddr *nam,
125			struct thread *td);
126static	void spx_detach(struct socket *so);
127static	void spx_pcbdetach(struct ipxpcb *ipxp);
128static	int spx_usr_disconnect(struct socket *so);
129static	int spx_listen(struct socket *so, int backlog, struct thread *td);
130static	int spx_rcvd(struct socket *so, int flags);
131static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
132static	int spx_send(struct socket *so, int flags, struct mbuf *m,
133		     struct sockaddr *addr, struct mbuf *control,
134		     struct thread *td);
135static	int spx_shutdown(struct socket *so);
136static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
137
138struct	pr_usrreqs spx_usrreqs = {
139	.pru_abort =		spx_usr_abort,
140	.pru_accept =		spx_accept,
141	.pru_attach =		spx_attach,
142	.pru_bind =		spx_bind,
143	.pru_connect =		spx_connect,
144	.pru_control =		ipx_control,
145	.pru_detach =		spx_detach,
146	.pru_disconnect =	spx_usr_disconnect,
147	.pru_listen =		spx_listen,
148	.pru_peeraddr =		ipx_peeraddr,
149	.pru_rcvd =		spx_rcvd,
150	.pru_rcvoob =		spx_rcvoob,
151	.pru_send =		spx_send,
152	.pru_shutdown =		spx_shutdown,
153	.pru_sockaddr =		ipx_sockaddr,
154	.pru_close =		spx_usr_close,
155};
156
157struct	pr_usrreqs spx_usrreq_sps = {
158	.pru_abort =		spx_usr_abort,
159	.pru_accept =		spx_accept,
160	.pru_attach =		spx_sp_attach,
161	.pru_bind =		spx_bind,
162	.pru_connect =		spx_connect,
163	.pru_control =		ipx_control,
164	.pru_detach =		spx_detach,
165	.pru_disconnect =	spx_usr_disconnect,
166	.pru_listen =		spx_listen,
167	.pru_peeraddr =		ipx_peeraddr,
168	.pru_rcvd =		spx_rcvd,
169	.pru_rcvoob =		spx_rcvoob,
170	.pru_send =		spx_send,
171	.pru_shutdown =		spx_shutdown,
172	.pru_sockaddr =		ipx_sockaddr,
173	.pru_close =		spx_usr_close,
174};
175
176void
177spx_init(void)
178{
179
180	SPX_LOCK_INIT();
181	spx_iss = 1; /* WRONG !! should fish it out of TODR */
182}
183
184void
185spx_input(struct mbuf *m, struct ipxpcb *ipxp)
186{
187	struct spxpcb *cb;
188	struct spx *si = mtod(m, struct spx *);
189	struct socket *so;
190	struct spx spx_savesi;
191	int dropsocket = 0;
192	short ostate = 0;
193
194	spxstat.spxs_rcvtotal++;
195	KASSERT(ipxp != NULL, ("spx_input: ipxpcb == NULL"));
196
197	/*
198	 * spx_input() assumes that the caller will hold both the pcb list
199	 * lock and also the ipxp lock.  spx_input() will release both before
200	 * returning, and may in fact trade in the ipxp lock for another pcb
201	 * lock following sonewconn().
202	 */
203	IPX_LIST_LOCK_ASSERT();
204	IPX_LOCK_ASSERT(ipxp);
205
206	cb = ipxtospxpcb(ipxp);
207	KASSERT(cb != NULL, ("spx_input: cb == NULL"));
208
209	if (ipxp->ipxp_flags & IPXP_DROPPED)
210		goto drop;
211
212	if (m->m_len < sizeof(*si)) {
213		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
214			IPX_UNLOCK(ipxp);
215			IPX_LIST_UNLOCK();
216			spxstat.spxs_rcvshort++;
217			return;
218		}
219		si = mtod(m, struct spx *);
220	}
221	si->si_seq = ntohs(si->si_seq);
222	si->si_ack = ntohs(si->si_ack);
223	si->si_alo = ntohs(si->si_alo);
224
225	so = ipxp->ipxp_socket;
226	KASSERT(so != NULL, ("spx_input: so == NULL"));
227
228#ifdef MAC
229	if (mac_socket_check_deliver(so, m) != 0)
230		goto drop;
231#endif
232
233	if (so->so_options & SO_DEBUG || traceallspxs) {
234		ostate = cb->s_state;
235		spx_savesi = *si;
236	}
237	if (so->so_options & SO_ACCEPTCONN) {
238		struct spxpcb *ocb = cb;
239
240		so = sonewconn(so, 0);
241		if (so == NULL)
242			goto drop;
243
244		/*
245		 * This is ugly, but ....
246		 *
247		 * Mark socket as temporary until we're committed to keeping
248		 * it.  The code at ``drop'' and ``dropwithreset'' check the
249		 * flag dropsocket to see if the temporary socket created
250		 * here should be discarded.  We mark the socket as
251		 * discardable until we're committed to it below in
252		 * TCPS_LISTEN.
253		 *
254		 * XXXRW: In the new world order of real kernel parallelism,
255		 * temporarily allocating the socket when we're "not sure"
256		 * seems like a bad idea, as we might race to remove it if
257		 * the listen socket is closed...?
258		 *
259		 * We drop the lock of the listen socket ipxp, and acquire
260		 * the lock of the new socket ippx.
261		 */
262		dropsocket++;
263		IPX_UNLOCK(ipxp);
264		ipxp = (struct ipxpcb *)so->so_pcb;
265		IPX_LOCK(ipxp);
266		ipxp->ipxp_laddr = si->si_dna;
267		cb = ipxtospxpcb(ipxp);
268		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
269		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
270		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
271		cb->s_state = TCPS_LISTEN;
272	}
273	IPX_LOCK_ASSERT(ipxp);
274
275	/*
276	 * Packet received on connection.  Reset idle time and keep-alive
277	 * timer.
278	 */
279	cb->s_idle = 0;
280	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
281
282	switch (cb->s_state) {
283	case TCPS_LISTEN:{
284		struct sockaddr_ipx *sipx, ssipx;
285		struct ipx_addr laddr;
286
287		/*
288		 * If somebody here was carying on a conversation and went
289		 * away, and his pen pal thinks he can still talk, we get the
290		 * misdirected packet.
291		 */
292		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
293			spx_istat.gonawy++;
294			goto dropwithreset;
295		}
296		sipx = &ssipx;
297		bzero(sipx, sizeof *sipx);
298		sipx->sipx_len = sizeof(*sipx);
299		sipx->sipx_family = AF_IPX;
300		sipx->sipx_addr = si->si_sna;
301		laddr = ipxp->ipxp_laddr;
302		if (ipx_nullhost(laddr))
303			ipxp->ipxp_laddr = si->si_dna;
304		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
305			ipxp->ipxp_laddr = laddr;
306			spx_istat.noconn++;
307			goto drop;
308		}
309		spx_template(cb);
310		dropsocket = 0;		/* committed to socket */
311		cb->s_did = si->si_sid;
312		cb->s_rack = si->si_ack;
313		cb->s_ralo = si->si_alo;
314#define THREEWAYSHAKE
315#ifdef THREEWAYSHAKE
316		cb->s_state = TCPS_SYN_RECEIVED;
317		cb->s_force = 1 + SPXT_KEEP;
318		spxstat.spxs_accepts++;
319		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
320		}
321		break;
322
323	 case TCPS_SYN_RECEIVED: {
324		/*
325		 * This state means that we have heard a response to our
326		 * acceptance of their connection.  It is probably logically
327		 * unnecessary in this implementation.
328		 */
329		if (si->si_did != cb->s_sid) {
330			spx_istat.wrncon++;
331			goto drop;
332		}
333#endif
334		ipxp->ipxp_fport =  si->si_sport;
335		cb->s_timer[SPXT_REXMT] = 0;
336		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
337		soisconnected(so);
338		cb->s_state = TCPS_ESTABLISHED;
339		spxstat.spxs_accepts++;
340		}
341		break;
342
343	case TCPS_SYN_SENT:
344		/*
345		 * This state means that we have gotten a response to our
346		 * attempt to establish a connection.  We fill in the data
347		 * from the other side, telling us which port to respond to,
348		 * instead of the well-known one we might have sent to in the
349		 * first place.  We also require that this is a response to
350		 * our connection id.
351		 */
352		if (si->si_did != cb->s_sid) {
353			spx_istat.notme++;
354			goto drop;
355		}
356		spxstat.spxs_connects++;
357		cb->s_did = si->si_sid;
358		cb->s_rack = si->si_ack;
359		cb->s_ralo = si->si_alo;
360		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
361		cb->s_timer[SPXT_REXMT] = 0;
362		cb->s_flags |= SF_ACKNOW;
363		soisconnected(so);
364		cb->s_state = TCPS_ESTABLISHED;
365
366		/*
367		 * Use roundtrip time of connection request for initial rtt.
368		 */
369		if (cb->s_rtt) {
370			cb->s_srtt = cb->s_rtt << 3;
371			cb->s_rttvar = cb->s_rtt << 1;
372			SPXT_RANGESET(cb->s_rxtcur,
373			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
374			    SPXTV_MIN, SPXTV_REXMTMAX);
375			    cb->s_rtt = 0;
376		}
377	}
378
379	if (so->so_options & SO_DEBUG || traceallspxs)
380		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
381
382	m->m_len -= sizeof(struct ipx);
383	m->m_pkthdr.len -= sizeof(struct ipx);
384	m->m_data += sizeof(struct ipx);
385
386	if (spx_reass(cb, m, si))
387		m_freem(m);
388	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
389		spx_output(cb, NULL);
390	cb->s_flags &= ~(SF_WIN|SF_RXT);
391	IPX_UNLOCK(ipxp);
392	IPX_LIST_UNLOCK();
393	return;
394
395dropwithreset:
396	IPX_LOCK_ASSERT(ipxp);
397	if (cb == NULL || (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
398	    traceallspxs))
399		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
400	IPX_UNLOCK(ipxp);
401	if (dropsocket) {
402		struct socket *head;
403		ACCEPT_LOCK();
404		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
405		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
406		head = so->so_head;
407		TAILQ_REMOVE(&head->so_incomp, so, so_list);
408		head->so_incqlen--;
409		so->so_qstate &= ~SQ_INCOMP;
410		so->so_head = NULL;
411		ACCEPT_UNLOCK();
412		soabort(so);
413	}
414	IPX_LIST_UNLOCK();
415	m_freem(m);
416	return;
417
418drop:
419	IPX_LOCK_ASSERT(ipxp);
420	if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
421		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
422	IPX_UNLOCK(ipxp);
423	IPX_LIST_UNLOCK();
424	m_freem(m);
425}
426
427void
428spx_ctlinput(int cmd, struct sockaddr *arg_as_sa, void *dummy)
429{
430
431	/* Currently, nothing. */
432}
433
434int
435spx_output(struct spxpcb *cb, struct mbuf *m0)
436{
437	struct socket *so = cb->s_ipxpcb->ipxp_socket;
438	struct mbuf *m = NULL;
439	struct spx *si = NULL;
440	struct sockbuf *sb = &so->so_snd;
441	int len = 0, win, rcv_win;
442	short span, off, recordp = 0;
443	u_short alo;
444	int error = 0, sendalot;
445#ifdef notdef
446	int idle;
447#endif
448	struct mbuf *mprev;
449
450	IPX_LOCK_ASSERT(cb->s_ipxpcb);
451
452	if (m0 != NULL) {
453		int mtu = cb->s_mtu;
454		int datalen;
455
456		/*
457		 * Make sure that packet isn't too big.
458		 */
459		for (m = m0; m != NULL; m = m->m_next) {
460			mprev = m;
461			len += m->m_len;
462			if (m->m_flags & M_EOR)
463				recordp = 1;
464		}
465		datalen = (cb->s_flags & SF_HO) ?
466				len - sizeof(struct spxhdr) : len;
467		if (datalen > mtu) {
468			if (cb->s_flags & SF_PI) {
469				m_freem(m0);
470				return (EMSGSIZE);
471			} else {
472				int oldEM = cb->s_cc & SPX_EM;
473
474				cb->s_cc &= ~SPX_EM;
475				while (len > mtu) {
476					m = m_copym(m0, 0, mtu, M_DONTWAIT);
477					if (m == NULL) {
478					    cb->s_cc |= oldEM;
479					    m_freem(m0);
480					    return (ENOBUFS);
481					}
482					if (cb->s_flags & SF_NEWCALL) {
483					    struct mbuf *mm = m;
484					    spx_newchecks[7]++;
485					    while (mm != NULL) {
486						mm->m_flags &= ~M_EOR;
487						mm = mm->m_next;
488					    }
489					}
490					error = spx_output(cb, m);
491					if (error) {
492						cb->s_cc |= oldEM;
493						m_freem(m0);
494						return (error);
495					}
496					m_adj(m0, mtu);
497					len -= mtu;
498				}
499				cb->s_cc |= oldEM;
500			}
501		}
502
503		/*
504		 * Force length even, by adding a "garbage byte" if
505		 * necessary.
506		 */
507		if (len & 1) {
508			m = mprev;
509			if (M_TRAILINGSPACE(m) >= 1)
510				m->m_len++;
511			else {
512				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
513
514				if (m1 == NULL) {
515					m_freem(m0);
516					return (ENOBUFS);
517				}
518				m1->m_len = 1;
519				*(mtod(m1, u_char *)) = 0;
520				m->m_next = m1;
521			}
522		}
523		m = m_gethdr(M_DONTWAIT, MT_DATA);
524		if (m == NULL) {
525			m_freem(m0);
526			return (ENOBUFS);
527		}
528
529		/*
530		 * Fill in mbuf with extended SP header and addresses and
531		 * length put into network format.
532		 */
533		MH_ALIGN(m, sizeof(struct spx));
534		m->m_len = sizeof(struct spx);
535		m->m_next = m0;
536		si = mtod(m, struct spx *);
537		si->si_i = cb->s_ipx;
538		si->si_s = cb->s_shdr;
539		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
540			struct spxhdr *sh;
541			if (m0->m_len < sizeof(*sh)) {
542				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
543					m_free(m);
544					m_freem(m0);
545					return (EINVAL);
546				}
547				m->m_next = m0;
548			}
549			sh = mtod(m0, struct spxhdr *);
550			si->si_dt = sh->spx_dt;
551			si->si_cc |= sh->spx_cc & SPX_EM;
552			m0->m_len -= sizeof(*sh);
553			m0->m_data += sizeof(*sh);
554			len -= sizeof(*sh);
555		}
556		len += sizeof(*si);
557		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
558			si->si_cc |= SPX_EM;
559			spx_newchecks[8]++;
560		}
561		if (cb->s_oobflags & SF_SOOB) {
562			/*
563			 * Per jqj@cornell: Make sure OB packets convey
564			 * exactly 1 byte.  If the packet is 1 byte or
565			 * larger, we have already guaranted there to be at
566			 * least one garbage byte for the checksum, and extra
567			 * bytes shouldn't hurt!
568			 */
569			if (len > sizeof(*si)) {
570				si->si_cc |= SPX_OB;
571				len = (1 + sizeof(*si));
572			}
573		}
574		si->si_len = htons((u_short)len);
575		m->m_pkthdr.len = ((len - 1) | 1) + 1;
576
577		/*
578		 * Queue stuff up for output.
579		 */
580		sbappendrecord(sb, m);
581		cb->s_seq++;
582	}
583#ifdef notdef
584	idle = (cb->s_smax == (cb->s_rack - 1));
585#endif
586again:
587	sendalot = 0;
588	off = cb->s_snxt - cb->s_rack;
589	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
590
591	/*
592	 * If in persist timeout with window of 0, send a probe.  Otherwise,
593	 * if window is small but non-zero and timer expired, send what we
594	 * can and go into transmit state.
595	 */
596	if (cb->s_force == 1 + SPXT_PERSIST) {
597		if (win != 0) {
598			cb->s_timer[SPXT_PERSIST] = 0;
599			cb->s_rxtshift = 0;
600		}
601	}
602	span = cb->s_seq - cb->s_rack;
603	len = min(span, win) - off;
604
605	if (len < 0) {
606		/*
607		 * Window shrank after we went into it.  If window shrank to
608		 * 0, cancel pending restransmission and pull s_snxt back to
609		 * (closed) window.  We will enter persist state below.  If
610		 * the widndow didn't close completely, just wait for an ACK.
611		 */
612		len = 0;
613		if (win == 0) {
614			cb->s_timer[SPXT_REXMT] = 0;
615			cb->s_snxt = cb->s_rack;
616		}
617	}
618	if (len > 1)
619		sendalot = 1;
620	rcv_win = sbspace(&so->so_rcv);
621
622	/*
623	 * Send if we owe peer an ACK.
624	 */
625	if (cb->s_oobflags & SF_SOOB) {
626		/*
627		 * Must transmit this out of band packet.
628		 */
629		cb->s_oobflags &= ~ SF_SOOB;
630		sendalot = 1;
631		spxstat.spxs_sndurg++;
632		goto found;
633	}
634	if (cb->s_flags & SF_ACKNOW)
635		goto send;
636	if (cb->s_state < TCPS_ESTABLISHED)
637		goto send;
638
639	/*
640	 * Silly window can't happen in spx.  Code from TCP deleted.
641	 */
642	if (len)
643		goto send;
644
645	/*
646	 * Compare available window to amount of window known to peer (as
647	 * advertised window less next expected input.)  If the difference is
648	 * at least two packets or at least 35% of the mximum possible
649	 * window, then want to send a window update to peer.
650	 */
651	if (rcv_win > 0) {
652		u_short delta =  1 + cb->s_alo - cb->s_ack;
653		int adv = rcv_win - (delta * cb->s_mtu);
654
655		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
656		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
657			spxstat.spxs_sndwinup++;
658			cb->s_flags |= SF_ACKNOW;
659			goto send;
660		}
661
662	}
663
664	/*
665	 * Many comments from tcp_output.c are appropriate here including ...
666	 * If send window is too small, there is data to transmit, and no
667	 * retransmit or persist is pending, then go to persist state.  If
668	 * nothing happens soon, send when timer expires: if window is
669	 * non-zero, transmit what we can, otherwise send a probe.
670	 */
671	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
672	    cb->s_timer[SPXT_PERSIST] == 0) {
673		cb->s_rxtshift = 0;
674		spx_setpersist(cb);
675	}
676
677	/*
678	 * No reason to send a packet, just return.
679	 */
680	cb->s_outx = 1;
681	return (0);
682
683send:
684	/*
685	 * Find requested packet.
686	 */
687	si = NULL;
688	m = NULL;
689	if (len > 0) {
690		cb->s_want = cb->s_snxt;
691		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
692			si = mtod(m, struct spx *);
693			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
694				break;
695		}
696	found:
697		if (si != NULL) {
698			if (si->si_seq != cb->s_snxt) {
699				spxstat.spxs_sndvoid++;
700				si = NULL;
701				m = NULL;
702			} else
703				cb->s_snxt++;
704		}
705	}
706
707	/*
708	 * Update window.
709	 */
710	if (rcv_win < 0)
711		rcv_win = 0;
712	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
713	if (SSEQ_LT(alo, cb->s_alo))
714		alo = cb->s_alo;
715
716	if (m != NULL) {
717		/*
718		 * Must make a copy of this packet for ipx_output to monkey
719		 * with.
720		 */
721		m = m_copy(m, 0, M_COPYALL);
722		if (m == NULL)
723			return (ENOBUFS);
724		si = mtod(m, struct spx *);
725		if (SSEQ_LT(si->si_seq, cb->s_smax))
726			spxstat.spxs_sndrexmitpack++;
727		else
728			spxstat.spxs_sndpack++;
729	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
730		/*
731		 * Must send an acknowledgement or a probe.
732		 */
733		if (cb->s_force)
734			spxstat.spxs_sndprobe++;
735		if (cb->s_flags & SF_ACKNOW)
736			spxstat.spxs_sndacks++;
737		m = m_gethdr(M_DONTWAIT, MT_DATA);
738		if (m == NULL)
739			return (ENOBUFS);
740
741		/*
742		 * Fill in mbuf with extended SP header and addresses and
743		 * length put into network format.
744		 */
745		MH_ALIGN(m, sizeof(struct spx));
746		m->m_len = sizeof(*si);
747		m->m_pkthdr.len = sizeof(*si);
748		si = mtod(m, struct spx *);
749		si->si_i = cb->s_ipx;
750		si->si_s = cb->s_shdr;
751		si->si_seq = cb->s_smax + 1;
752		si->si_len = htons(sizeof(*si));
753		si->si_cc |= SPX_SP;
754	} else {
755		cb->s_outx = 3;
756		if (so->so_options & SO_DEBUG || traceallspxs)
757			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
758		return (0);
759	}
760
761	/*
762	 * Stuff checksum and output datagram.
763	 */
764	if ((si->si_cc & SPX_SP) == 0) {
765		if (cb->s_force != (1 + SPXT_PERSIST) ||
766		    cb->s_timer[SPXT_PERSIST] == 0) {
767			/*
768			 * If this is a new packet and we are not currently
769			 * timing anything, time this one.
770			 */
771			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
772				cb->s_smax = si->si_seq;
773				if (cb->s_rtt == 0) {
774					spxstat.spxs_segstimed++;
775					cb->s_rtseq = si->si_seq;
776					cb->s_rtt = 1;
777				}
778			}
779
780			/*
781			 * Set rexmt timer if not currently set, initial
782			 * value for retransmit timer is smoothed round-trip
783			 * time + 2 * round-trip time variance.  Initialize
784			 * shift counter which is used for backoff of
785			 * retransmit time.
786			 */
787			if (cb->s_timer[SPXT_REXMT] == 0 &&
788			    cb->s_snxt != cb->s_rack) {
789				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
790				if (cb->s_timer[SPXT_PERSIST]) {
791					cb->s_timer[SPXT_PERSIST] = 0;
792					cb->s_rxtshift = 0;
793				}
794			}
795		} else if (SSEQ_LT(cb->s_smax, si->si_seq))
796			cb->s_smax = si->si_seq;
797	} else if (cb->s_state < TCPS_ESTABLISHED) {
798		if (cb->s_rtt == 0)
799			cb->s_rtt = 1; /* Time initial handshake */
800		if (cb->s_timer[SPXT_REXMT] == 0)
801			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
802	}
803
804	/*
805	 * Do not request acks when we ack their data packets or when we do a
806	 * gratuitous window update.
807	 */
808	if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
809		si->si_cc |= SPX_SA;
810	si->si_seq = htons(si->si_seq);
811	si->si_alo = htons(alo);
812	si->si_ack = htons(cb->s_ack);
813
814	if (ipxcksum)
815		si->si_sum = ipx_cksum(m, ntohs(si->si_len));
816	else
817		si->si_sum = 0xffff;
818
819	cb->s_outx = 4;
820	if (so->so_options & SO_DEBUG || traceallspxs)
821		spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
822
823#ifdef MAC
824	mac_socket_create_mbuf(so, m);
825#endif
826
827	if (so->so_options & SO_DONTROUTE)
828		error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
829	else
830		error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
831	if (error)
832		return (error);
833	spxstat.spxs_sndtotal++;
834
835	/*
836	 * Data sent (as far as we can tell).  If this advertises a larger
837	 * window than any other segment, then remember the size of the
838	 * advertized window.  Any pending ACK has now been sent.
839	 */
840	cb->s_force = 0;
841	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
842	if (SSEQ_GT(alo, cb->s_alo))
843		cb->s_alo = alo;
844	if (sendalot)
845		goto again;
846	cb->s_outx = 5;
847	return (0);
848}
849
850static int spx_do_persist_panics = 0;
851
852static void
853spx_setpersist(struct spxpcb *cb)
854{
855	int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
856
857	IPX_LOCK_ASSERT(cb->s_ipxpcb);
858
859	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
860		panic("spx_output REXMT");
861
862	/*
863	 * Start/restart persistance timer.
864	 */
865	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
866	    t*spx_backoff[cb->s_rxtshift],
867	    SPXTV_PERSMIN, SPXTV_PERSMAX);
868	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
869		cb->s_rxtshift++;
870}
871
872int
873spx_ctloutput(struct socket *so, struct sockopt *sopt)
874{
875	struct spxhdr spxhdr;
876	struct ipxpcb *ipxp;
877	struct spxpcb *cb;
878	int mask, error;
879	short soptval;
880	u_short usoptval;
881	int optval;
882
883	ipxp = sotoipxpcb(so);
884	KASSERT(ipxp != NULL, ("spx_ctloutput: ipxp == NULL"));
885
886	/*
887	 * This will have to be changed when we do more general stacking of
888	 * protocols.
889	 */
890	if (sopt->sopt_level != IPXPROTO_SPX)
891		return (ipx_ctloutput(so, sopt));
892
893	IPX_LOCK(ipxp);
894	if (ipxp->ipxp_flags & IPXP_DROPPED) {
895		IPX_UNLOCK(ipxp);
896		return (ECONNRESET);
897	}
898
899	IPX_LOCK(ipxp);
900	cb = ipxtospxpcb(ipxp);
901	KASSERT(cb != NULL, ("spx_ctloutput: cb == NULL"));
902
903	error = 0;
904	switch (sopt->sopt_dir) {
905	case SOPT_GET:
906		switch (sopt->sopt_name) {
907		case SO_HEADERS_ON_INPUT:
908			mask = SF_HI;
909			goto get_flags;
910
911		case SO_HEADERS_ON_OUTPUT:
912			mask = SF_HO;
913		get_flags:
914			soptval = cb->s_flags & mask;
915			IPX_UNLOCK(ipxp);
916			error = sooptcopyout(sopt, &soptval,
917			    sizeof(soptval));
918			break;
919
920		case SO_MTU:
921			usoptval = cb->s_mtu;
922			IPX_UNLOCK(ipxp);
923			error = sooptcopyout(sopt, &usoptval,
924			    sizeof(usoptval));
925			break;
926
927		case SO_LAST_HEADER:
928			spxhdr = cb->s_rhdr;
929			IPX_UNLOCK(ipxp);
930			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
931			break;
932
933		case SO_DEFAULT_HEADERS:
934			spxhdr = cb->s_shdr;
935			IPX_UNLOCK(ipxp);
936			error = sooptcopyout(sopt, &spxhdr, sizeof(spxhdr));
937			break;
938
939		default:
940			IPX_UNLOCK(ipxp);
941			error = ENOPROTOOPT;
942		}
943		break;
944
945	case SOPT_SET:
946		/*
947		 * XXX Why are these shorts on get and ints on set?  That
948		 * doesn't make any sense...
949		 *
950		 * XXXRW: Note, when we re-acquire the ipxp lock, we should
951		 * re-check that it's not dropped.
952		 */
953		IPX_UNLOCK(ipxp);
954		switch (sopt->sopt_name) {
955		case SO_HEADERS_ON_INPUT:
956			mask = SF_HI;
957			goto set_head;
958
959		case SO_HEADERS_ON_OUTPUT:
960			mask = SF_HO;
961		set_head:
962			error = sooptcopyin(sopt, &optval, sizeof optval,
963					    sizeof optval);
964			if (error)
965				break;
966
967			IPX_LOCK(ipxp);
968			if (cb->s_flags & SF_PI) {
969				if (optval)
970					cb->s_flags |= mask;
971				else
972					cb->s_flags &= ~mask;
973			} else error = EINVAL;
974			IPX_UNLOCK(ipxp);
975			break;
976
977		case SO_MTU:
978			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
979					    sizeof usoptval);
980			if (error)
981				break;
982			/* Unlocked write. */
983			cb->s_mtu = usoptval;
984			break;
985
986#ifdef SF_NEWCALL
987		case SO_NEWCALL:
988			error = sooptcopyin(sopt, &optval, sizeof optval,
989					    sizeof optval);
990			if (error)
991				break;
992			IPX_LOCK(ipxp);
993			if (optval) {
994				cb->s_flags2 |= SF_NEWCALL;
995				spx_newchecks[5]++;
996			} else {
997				cb->s_flags2 &= ~SF_NEWCALL;
998				spx_newchecks[6]++;
999			}
1000			IPX_UNLOCK(ipxp);
1001			break;
1002#endif
1003
1004		case SO_DEFAULT_HEADERS:
1005			{
1006				struct spxhdr sp;
1007
1008				error = sooptcopyin(sopt, &sp, sizeof sp,
1009						    sizeof sp);
1010				if (error)
1011					break;
1012				IPX_LOCK(ipxp);
1013				cb->s_dt = sp.spx_dt;
1014				cb->s_cc = sp.spx_cc & SPX_EM;
1015				IPX_UNLOCK(ipxp);
1016			}
1017			break;
1018
1019		default:
1020			error = ENOPROTOOPT;
1021		}
1022		break;
1023
1024	default:
1025		panic("spx_ctloutput: bad socket option direction");
1026	}
1027	return (error);
1028}
1029
1030static void
1031spx_usr_abort(struct socket *so)
1032{
1033	struct ipxpcb *ipxp;
1034	struct spxpcb *cb;
1035
1036	ipxp = sotoipxpcb(so);
1037	KASSERT(ipxp != NULL, ("spx_usr_abort: ipxp == NULL"));
1038
1039	cb = ipxtospxpcb(ipxp);
1040	KASSERT(cb != NULL, ("spx_usr_abort: cb == NULL"));
1041
1042	IPX_LIST_LOCK();
1043	IPX_LOCK(ipxp);
1044	spx_drop(cb, ECONNABORTED);
1045	IPX_UNLOCK(ipxp);
1046	IPX_LIST_UNLOCK();
1047}
1048
1049/*
1050 * Accept a connection.  Essentially all the work is done at higher levels;
1051 * just return the address of the peer, storing through addr.
1052 */
1053static int
1054spx_accept(struct socket *so, struct sockaddr **nam)
1055{
1056	struct ipxpcb *ipxp;
1057	struct sockaddr_ipx *sipx, ssipx;
1058
1059	ipxp = sotoipxpcb(so);
1060	KASSERT(ipxp != NULL, ("spx_accept: ipxp == NULL"));
1061
1062	sipx = &ssipx;
1063	bzero(sipx, sizeof *sipx);
1064	sipx->sipx_len = sizeof *sipx;
1065	sipx->sipx_family = AF_IPX;
1066	IPX_LOCK(ipxp);
1067	sipx->sipx_addr = ipxp->ipxp_faddr;
1068	IPX_UNLOCK(ipxp);
1069	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1070	return (0);
1071}
1072
1073static int
1074spx_attach(struct socket *so, int proto, struct thread *td)
1075{
1076	struct ipxpcb *ipxp;
1077	struct spxpcb *cb;
1078	struct mbuf *mm;
1079	struct sockbuf *sb;
1080	int error;
1081
1082	ipxp = sotoipxpcb(so);
1083	KASSERT(ipxp == NULL, ("spx_attach: ipxp != NULL"));
1084
1085	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1086		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1087		if (error)
1088			return (error);
1089	}
1090
1091	cb = malloc(sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1092	if (cb == NULL)
1093		return (ENOBUFS);
1094	mm = m_getclr(M_DONTWAIT, MT_DATA);
1095	if (mm == NULL) {
1096		free(cb, M_PCB);
1097		return (ENOBUFS);
1098	}
1099
1100	IPX_LIST_LOCK();
1101	error = ipx_pcballoc(so, &ipxpcb_list, td);
1102	if (error) {
1103		IPX_LIST_UNLOCK();
1104		m_free(mm);
1105		free(cb, M_PCB);
1106		return (error);
1107	}
1108	ipxp = sotoipxpcb(so);
1109	ipxp->ipxp_flags |= IPXP_SPX;
1110
1111	cb->s_state = TCPS_LISTEN;
1112	cb->s_smax = -1;
1113	cb->s_swl1 = -1;
1114	spx_reass_init(cb);
1115	cb->s_ipxpcb = ipxp;
1116	cb->s_mtu = 576 - sizeof(struct spx);
1117	sb = &so->so_snd;
1118	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1119	cb->s_ssthresh = cb->s_cwnd;
1120	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1121
1122	/*
1123	 * Above is recomputed when connecting to account for changed
1124	 * buffering or mtu's.
1125	 */
1126	cb->s_rtt = SPXTV_SRTTBASE;
1127	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1128	SPXT_RANGESET(cb->s_rxtcur,
1129	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1130	    SPXTV_MIN, SPXTV_REXMTMAX);
1131	ipxp->ipxp_pcb = (caddr_t)cb;
1132	IPX_LIST_UNLOCK();
1133	return (0);
1134}
1135
1136static void
1137spx_pcbdetach(struct ipxpcb *ipxp)
1138{
1139	struct spxpcb *cb;
1140
1141	IPX_LOCK_ASSERT(ipxp);
1142
1143	cb = ipxtospxpcb(ipxp);
1144	KASSERT(cb != NULL, ("spx_pcbdetach: cb == NULL"));
1145
1146	spx_reass_flush(cb);
1147	free(cb, M_PCB);
1148	ipxp->ipxp_pcb = NULL;
1149}
1150
1151static int
1152spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1153{
1154	struct ipxpcb *ipxp;
1155	int error;
1156
1157	ipxp = sotoipxpcb(so);
1158	KASSERT(ipxp != NULL, ("spx_bind: ipxp == NULL"));
1159
1160	IPX_LIST_LOCK();
1161	IPX_LOCK(ipxp);
1162	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1163		error = EINVAL;
1164		goto out;
1165	}
1166	error = ipx_pcbbind(ipxp, nam, td);
1167out:
1168	IPX_UNLOCK(ipxp);
1169	IPX_LIST_UNLOCK();
1170	return (error);
1171}
1172
1173static void
1174spx_usr_close(struct socket *so)
1175{
1176	struct ipxpcb *ipxp;
1177	struct spxpcb *cb;
1178
1179	ipxp = sotoipxpcb(so);
1180	KASSERT(ipxp != NULL, ("spx_usr_close: ipxp == NULL"));
1181
1182	cb = ipxtospxpcb(ipxp);
1183	KASSERT(cb != NULL, ("spx_usr_close: cb == NULL"));
1184
1185	IPX_LIST_LOCK();
1186	IPX_LOCK(ipxp);
1187	if (cb->s_state > TCPS_LISTEN)
1188		spx_disconnect(cb);
1189	else
1190		spx_close(cb);
1191	IPX_UNLOCK(ipxp);
1192	IPX_LIST_UNLOCK();
1193}
1194
1195/*
1196 * Initiate connection to peer.  Enter SYN_SENT state, and mark socket as
1197 * connecting.  Start keep-alive timer, setup prototype header, send initial
1198 * system packet requesting connection.
1199 */
1200static int
1201spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1202{
1203	struct ipxpcb *ipxp;
1204	struct spxpcb *cb;
1205	int error;
1206
1207	ipxp = sotoipxpcb(so);
1208	KASSERT(ipxp != NULL, ("spx_connect: ipxp == NULL"));
1209
1210	cb = ipxtospxpcb(ipxp);
1211	KASSERT(cb != NULL, ("spx_connect: cb == NULL"));
1212
1213	IPX_LIST_LOCK();
1214	IPX_LOCK(ipxp);
1215	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1216		error = EINVAL;
1217		goto spx_connect_end;
1218	}
1219	if (ipxp->ipxp_lport == 0) {
1220		error = ipx_pcbbind(ipxp, NULL, td);
1221		if (error)
1222			goto spx_connect_end;
1223	}
1224	error = ipx_pcbconnect(ipxp, nam, td);
1225	if (error)
1226		goto spx_connect_end;
1227	soisconnecting(so);
1228	spxstat.spxs_connattempt++;
1229	cb->s_state = TCPS_SYN_SENT;
1230	cb->s_did = 0;
1231	spx_template(cb);
1232	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1233	cb->s_force = 1 + SPXTV_KEEP;
1234
1235	/*
1236	 * Other party is required to respond to the port I send from, but he
1237	 * is not required to answer from where I am sending to, so allow
1238	 * wildcarding.  Original port I am sending to is still saved in
1239	 * cb->s_dport.
1240	 */
1241	ipxp->ipxp_fport = 0;
1242	error = spx_output(cb, NULL);
1243spx_connect_end:
1244	IPX_UNLOCK(ipxp);
1245	IPX_LIST_UNLOCK();
1246	return (error);
1247}
1248
1249static void
1250spx_detach(struct socket *so)
1251{
1252	struct ipxpcb *ipxp;
1253	struct spxpcb *cb;
1254
1255	/*
1256	 * XXXRW: Should assert appropriately detached.
1257	 */
1258	ipxp = sotoipxpcb(so);
1259	KASSERT(ipxp != NULL, ("spx_detach: ipxp == NULL"));
1260
1261	cb = ipxtospxpcb(ipxp);
1262	KASSERT(cb != NULL, ("spx_detach: cb == NULL"));
1263
1264	IPX_LIST_LOCK();
1265	IPX_LOCK(ipxp);
1266	spx_pcbdetach(ipxp);
1267	ipx_pcbdetach(ipxp);
1268	ipx_pcbfree(ipxp);
1269	IPX_LIST_UNLOCK();
1270}
1271
1272/*
1273 * We may decide later to implement connection closing handshaking at the spx
1274 * level optionally.  Here is the hook to do it:
1275 */
1276static int
1277spx_usr_disconnect(struct socket *so)
1278{
1279	struct ipxpcb *ipxp;
1280	struct spxpcb *cb;
1281	int error;
1282
1283	ipxp = sotoipxpcb(so);
1284	KASSERT(ipxp != NULL, ("spx_usr_disconnect: ipxp == NULL"));
1285
1286	cb = ipxtospxpcb(ipxp);
1287	KASSERT(cb != NULL, ("spx_usr_disconnect: cb == NULL"));
1288
1289	IPX_LIST_LOCK();
1290	IPX_LOCK(ipxp);
1291	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1292		error = EINVAL;
1293		goto out;
1294	}
1295	spx_disconnect(cb);
1296	error = 0;
1297out:
1298	IPX_UNLOCK(ipxp);
1299	IPX_LIST_UNLOCK();
1300	return (error);
1301}
1302
1303static int
1304spx_listen(struct socket *so, int backlog, struct thread *td)
1305{
1306	int error;
1307	struct ipxpcb *ipxp;
1308	struct spxpcb *cb;
1309
1310	error = 0;
1311	ipxp = sotoipxpcb(so);
1312	KASSERT(ipxp != NULL, ("spx_listen: ipxp == NULL"));
1313
1314	cb = ipxtospxpcb(ipxp);
1315	KASSERT(cb != NULL, ("spx_listen: cb == NULL"));
1316
1317	IPX_LIST_LOCK();
1318	IPX_LOCK(ipxp);
1319	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1320		error = EINVAL;
1321		goto out;
1322	}
1323	SOCK_LOCK(so);
1324	error = solisten_proto_check(so);
1325	if (error == 0 && ipxp->ipxp_lport == 0)
1326		error = ipx_pcbbind(ipxp, NULL, td);
1327	if (error == 0) {
1328		cb->s_state = TCPS_LISTEN;
1329		solisten_proto(so, backlog);
1330	}
1331	SOCK_UNLOCK(so);
1332out:
1333	IPX_UNLOCK(ipxp);
1334	IPX_LIST_UNLOCK();
1335	return (error);
1336}
1337
1338/*
1339 * After a receive, possibly send acknowledgment updating allocation.
1340 */
1341static int
1342spx_rcvd(struct socket *so, int flags)
1343{
1344	struct ipxpcb *ipxp;
1345	struct spxpcb *cb;
1346	int error;
1347
1348	ipxp = sotoipxpcb(so);
1349	KASSERT(ipxp != NULL, ("spx_rcvd: ipxp == NULL"));
1350
1351	cb = ipxtospxpcb(ipxp);
1352	KASSERT(cb != NULL, ("spx_rcvd: cb == NULL"));
1353
1354	IPX_LOCK(ipxp);
1355	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1356		error = EINVAL;
1357		goto out;
1358	}
1359	cb->s_flags |= SF_RVD;
1360	spx_output(cb, NULL);
1361	cb->s_flags &= ~SF_RVD;
1362	error = 0;
1363out:
1364	IPX_UNLOCK(ipxp);
1365	return (error);
1366}
1367
1368static int
1369spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1370{
1371	struct ipxpcb *ipxp;
1372	struct spxpcb *cb;
1373	int error;
1374
1375	ipxp = sotoipxpcb(so);
1376	KASSERT(ipxp != NULL, ("spx_rcvoob: ipxp == NULL"));
1377
1378	cb = ipxtospxpcb(ipxp);
1379	KASSERT(cb != NULL, ("spx_rcvoob: cb == NULL"));
1380
1381	IPX_LOCK(ipxp);
1382	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1383		error = EINVAL;
1384		goto out;
1385	}
1386	SOCKBUF_LOCK(&so->so_rcv);
1387	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1388	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1389		SOCKBUF_UNLOCK(&so->so_rcv);
1390		m->m_len = 1;
1391		*mtod(m, caddr_t) = cb->s_iobc;
1392		error = 0;
1393		goto out;
1394	}
1395	SOCKBUF_UNLOCK(&so->so_rcv);
1396	error = EINVAL;
1397out:
1398	IPX_UNLOCK(ipxp);
1399	return (error);
1400}
1401
1402static int
1403spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1404    struct mbuf *controlp, struct thread *td)
1405{
1406	struct ipxpcb *ipxp;
1407	struct spxpcb *cb;
1408	int error;
1409
1410	ipxp = sotoipxpcb(so);
1411	KASSERT(ipxp != NULL, ("spx_send: ipxp == NULL"));
1412
1413	cb = ipxtospxpcb(ipxp);
1414	KASSERT(cb != NULL, ("spx_send: cb == NULL"));
1415
1416	error = 0;
1417	IPX_LOCK(ipxp);
1418	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1419		error = ECONNRESET;
1420		goto spx_send_end;
1421	}
1422	if (flags & PRUS_OOB) {
1423		if (sbspace(&so->so_snd) < -512) {
1424			error = ENOBUFS;
1425			goto spx_send_end;
1426		}
1427		cb->s_oobflags |= SF_SOOB;
1428	}
1429	if (controlp != NULL) {
1430		u_short *p = mtod(controlp, u_short *);
1431		spx_newchecks[2]++;
1432		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1433			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1434			spx_newchecks[3]++;
1435		}
1436		m_freem(controlp);
1437	}
1438	controlp = NULL;
1439	error = spx_output(cb, m);
1440	m = NULL;
1441spx_send_end:
1442	IPX_UNLOCK(ipxp);
1443	if (controlp != NULL)
1444		m_freem(controlp);
1445	if (m != NULL)
1446		m_freem(m);
1447	return (error);
1448}
1449
1450static int
1451spx_shutdown(struct socket *so)
1452{
1453	struct ipxpcb *ipxp;
1454	struct spxpcb *cb;
1455	int error;
1456
1457	ipxp = sotoipxpcb(so);
1458	KASSERT(ipxp != NULL, ("spx_shutdown: ipxp == NULL"));
1459
1460	cb = ipxtospxpcb(ipxp);
1461	KASSERT(cb != NULL, ("spx_shutdown: cb == NULL"));
1462
1463	socantsendmore(so);
1464	IPX_LIST_LOCK();
1465	IPX_LOCK(ipxp);
1466	if (ipxp->ipxp_flags & IPXP_DROPPED) {
1467		error = EINVAL;
1468		goto out;
1469	}
1470	spx_usrclosed(cb);
1471	error = 0;
1472out:
1473	IPX_UNLOCK(ipxp);
1474	IPX_LIST_UNLOCK();
1475	return (error);
1476}
1477
1478static int
1479spx_sp_attach(struct socket *so, int proto, struct thread *td)
1480{
1481	struct ipxpcb *ipxp;
1482	struct spxpcb *cb;
1483	int error;
1484
1485	KASSERT(so->so_pcb == NULL, ("spx_sp_attach: so_pcb != NULL"));
1486
1487	error = spx_attach(so, proto, td);
1488	if (error)
1489		return (error);
1490
1491	ipxp = sotoipxpcb(so);
1492	KASSERT(ipxp != NULL, ("spx_sp_attach: ipxp == NULL"));
1493
1494	cb = ipxtospxpcb(ipxp);
1495	KASSERT(cb != NULL, ("spx_sp_attach: cb == NULL"));
1496
1497	IPX_LOCK(ipxp);
1498	cb->s_flags |= (SF_HI | SF_HO | SF_PI);
1499	IPX_UNLOCK(ipxp);
1500	return (0);
1501}
1502
1503/*
1504 * Create template to be used to send spx packets on a connection.  Called
1505 * after host entry created, fills in a skeletal spx header (choosing
1506 * connection id), minimizing the amount of work necessary when the
1507 * connection is used.
1508 */
1509static void
1510spx_template(struct spxpcb *cb)
1511{
1512	struct ipxpcb *ipxp = cb->s_ipxpcb;
1513	struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1514
1515	IPX_LOCK_ASSERT(ipxp);
1516
1517	cb->s_ipx.ipx_pt = IPXPROTO_SPX;
1518	cb->s_ipx.ipx_sna = ipxp->ipxp_laddr;
1519	cb->s_ipx.ipx_dna = ipxp->ipxp_faddr;
1520	SPX_LOCK();
1521	cb->s_sid = htons(spx_iss);
1522	spx_iss += SPX_ISSINCR/2;
1523	SPX_UNLOCK();
1524	cb->s_alo = 1;
1525	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1526
1527	/*
1528	 * Try to expand fast to full complement of large packets.
1529	 */
1530	cb->s_ssthresh = cb->s_cwnd;
1531	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1532
1533	/*
1534	 * But allow for lots of little packets as well.
1535	 */
1536	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1537}
1538
1539/*
1540 * Close a SPIP control block.  Wake up any sleepers.  We used to free any
1541 * queued packets, but now we defer that until the pcb is discarded.
1542 */
1543void
1544spx_close(struct spxpcb *cb)
1545{
1546	struct ipxpcb *ipxp = cb->s_ipxpcb;
1547	struct socket *so = ipxp->ipxp_socket;
1548
1549	KASSERT(ipxp != NULL, ("spx_close: ipxp == NULL"));
1550	IPX_LIST_LOCK_ASSERT();
1551	IPX_LOCK_ASSERT(ipxp);
1552
1553	ipxp->ipxp_flags |= IPXP_DROPPED;
1554	soisdisconnected(so);
1555	spxstat.spxs_closed++;
1556}
1557
1558/*
1559 * Someday we may do level 3 handshaking to close a connection or send a
1560 * xerox style error.  For now, just close.  cb will always be invalid after
1561 * this call.
1562 */
1563static void
1564spx_usrclosed(struct spxpcb *cb)
1565{
1566
1567	IPX_LIST_LOCK_ASSERT();
1568	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1569
1570	spx_close(cb);
1571}
1572
1573/*
1574 * cb will always be invalid after this call.
1575 */
1576static void
1577spx_disconnect(struct spxpcb *cb)
1578{
1579
1580	IPX_LIST_LOCK_ASSERT();
1581	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1582
1583	spx_close(cb);
1584}
1585
1586/*
1587 * Drop connection, reporting the specified error.  cb will always be invalid
1588 * after this call.
1589 */
1590static void
1591spx_drop(struct spxpcb *cb, int errno)
1592{
1593	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1594
1595	IPX_LIST_LOCK_ASSERT();
1596	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1597
1598	/*
1599	 * Someday, in the xerox world we will generate error protocol
1600	 * packets announcing that the socket has gone away.
1601	 */
1602	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1603		spxstat.spxs_drops++;
1604		cb->s_state = TCPS_CLOSED;
1605		/*tcp_output(cb);*/
1606	} else
1607		spxstat.spxs_conndrops++;
1608	so->so_error = errno;
1609	spx_close(cb);
1610}
1611
1612/*
1613 * Fast timeout routine for processing delayed acks.
1614 */
1615void
1616spx_fasttimo(void)
1617{
1618	struct ipxpcb *ipxp;
1619	struct spxpcb *cb;
1620
1621	IPX_LIST_LOCK();
1622	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1623		IPX_LOCK(ipxp);
1624		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1625		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1626			IPX_UNLOCK(ipxp);
1627			continue;
1628		}
1629		cb = ipxtospxpcb(ipxp);
1630		if (cb->s_flags & SF_DELACK) {
1631			cb->s_flags &= ~SF_DELACK;
1632			cb->s_flags |= SF_ACKNOW;
1633			spxstat.spxs_delack++;
1634			spx_output(cb, NULL);
1635		}
1636		IPX_UNLOCK(ipxp);
1637	}
1638	IPX_LIST_UNLOCK();
1639}
1640
1641/*
1642 * spx protocol timeout routine called every 500 ms.  Updates the timers in
1643 * all active pcb's and causes finite state machine actions if timers expire.
1644 */
1645void
1646spx_slowtimo(void)
1647{
1648	struct ipxpcb *ipxp;
1649	struct spxpcb *cb;
1650	int i;
1651
1652	/*
1653	 * Search through tcb's and update active timers.  Once, timers could
1654	 * free ipxp's, but now we do that only when detaching a socket.
1655	 */
1656	IPX_LIST_LOCK();
1657	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1658		IPX_LOCK(ipxp);
1659		if (!(ipxp->ipxp_flags & IPXP_SPX) ||
1660		    (ipxp->ipxp_flags & IPXP_DROPPED)) {
1661			IPX_UNLOCK(ipxp);
1662			continue;
1663		}
1664
1665		cb = (struct spxpcb *)ipxp->ipxp_pcb;
1666		KASSERT(cb != NULL, ("spx_slowtimo: cb == NULL"));
1667		for (i = 0; i < SPXT_NTIMERS; i++) {
1668			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1669				spx_timers(cb, i);
1670				if (ipxp->ipxp_flags & IPXP_DROPPED)
1671					break;
1672			}
1673		}
1674		if (!(ipxp->ipxp_flags & IPXP_DROPPED)) {
1675			cb->s_idle++;
1676			if (cb->s_rtt)
1677				cb->s_rtt++;
1678		}
1679		IPX_UNLOCK(ipxp);
1680	}
1681	IPX_LIST_UNLOCK();
1682	SPX_LOCK();
1683	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1684	SPX_UNLOCK();
1685}
1686
1687/*
1688 * SPX timer processing.
1689 */
1690static void
1691spx_timers(struct spxpcb *cb, int timer)
1692{
1693	long rexmt;
1694	int win;
1695
1696	IPX_LIST_LOCK_ASSERT();
1697	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1698
1699	cb->s_force = 1 + timer;
1700	switch (timer) {
1701	case SPXT_2MSL:
1702		/*
1703		 * 2 MSL timeout in shutdown went off.  TCP deletes
1704		 * connection control block.
1705		 */
1706		printf("spx: SPXT_2MSL went off for no reason\n");
1707		cb->s_timer[timer] = 0;
1708		break;
1709
1710	case SPXT_REXMT:
1711		/*
1712		 * Retransmission timer went off.  Message has not been acked
1713		 * within retransmit interval.  Back off to a longer
1714		 * retransmit interval and retransmit one packet.
1715		 */
1716		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1717			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1718			spxstat.spxs_timeoutdrop++;
1719			spx_drop(cb, ETIMEDOUT);
1720			break;
1721		}
1722		spxstat.spxs_rexmttimeo++;
1723		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1724		rexmt *= spx_backoff[cb->s_rxtshift];
1725		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1726		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1727
1728		/*
1729		 * If we have backed off fairly far, our srtt estimate is
1730		 * probably bogus.  Clobber it so we'll take the next rtt
1731		 * measurement as our srtt; move the current srtt into rttvar
1732		 * to keep the current retransmit times until then.
1733		 */
1734		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1735			cb->s_rttvar += (cb->s_srtt >> 2);
1736			cb->s_srtt = 0;
1737		}
1738		cb->s_snxt = cb->s_rack;
1739
1740		/*
1741		 * If timing a packet, stop the timer.
1742		 */
1743		cb->s_rtt = 0;
1744
1745		/*
1746		 * See very long discussion in tcp_timer.c about congestion
1747		 * window and sstrhesh.
1748		 */
1749		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1750		if (win < 2)
1751			win = 2;
1752		cb->s_cwnd = CUNIT;
1753		cb->s_ssthresh = win * CUNIT;
1754		spx_output(cb, NULL);
1755		break;
1756
1757	case SPXT_PERSIST:
1758		/*
1759		 * Persistance timer into zero window.  Force a probe to be
1760		 * sent.
1761		 */
1762		spxstat.spxs_persisttimeo++;
1763		spx_setpersist(cb);
1764		spx_output(cb, NULL);
1765		break;
1766
1767	case SPXT_KEEP:
1768		/*
1769		 * Keep-alive timer went off; send something or drop
1770		 * connection if idle for too long.
1771		 */
1772		spxstat.spxs_keeptimeo++;
1773		if (cb->s_state < TCPS_ESTABLISHED)
1774			goto dropit;
1775		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1776		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1777				goto dropit;
1778			spxstat.spxs_keepprobe++;
1779			spx_output(cb, NULL);
1780		} else
1781			cb->s_idle = 0;
1782		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1783		break;
1784
1785	dropit:
1786		spxstat.spxs_keepdrops++;
1787		spx_drop(cb, ETIMEDOUT);
1788		break;
1789
1790	default:
1791		panic("spx_timers: unknown timer %d", timer);
1792	}
1793}
1794