spx_usrreq.c revision 157067
1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 1995, Mike Mitchell
5 * Copyright (c) 2004-2006 Robert N. M. Watson
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)spx_usrreq.h
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 157067 2006-03-24 00:15:58Z rwatson $");
41
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mbuf.h>
46#include <sys/mutex.h>
47#include <sys/proc.h>
48#include <sys/protosw.h>
49#include <sys/signalvar.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sx.h>
53#include <sys/systm.h>
54
55#include <net/route.h>
56#include <netinet/tcp_fsm.h>
57
58#include <netipx/ipx.h>
59#include <netipx/ipx_pcb.h>
60#include <netipx/ipx_var.h>
61#include <netipx/spx.h>
62#include <netipx/spx_debug.h>
63#include <netipx/spx_timer.h>
64#include <netipx/spx_var.h>
65
66/*
67 * SPX protocol implementation.
68 */
69static u_short 	spx_iss;
70static u_short	spx_newchecks[50];
71static int	spx_hardnosed;
72static int	spx_use_delack = 0;
73static int	traceallspxs = 0;
74static struct	spx_istat spx_istat;
75
76/* Following was struct spxstat spxstat; */
77#ifndef spxstat
78#define spxstat spx_istat.newstats
79#endif
80
81static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
82    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
83
84static	void spx_close(struct spxpcb *cb);
85static	void spx_disconnect(struct spxpcb *cb);
86static	void spx_drop(struct spxpcb *cb, int errno);
87static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
88static	int spx_reass(struct spxpcb *cb, struct spx *si);
89static	void spx_setpersist(struct spxpcb *cb);
90static	void spx_template(struct spxpcb *cb);
91static	struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
92static	void spx_usrclosed(struct spxpcb *cb);
93
94static	int spx_usr_abort(struct socket *so);
95static	int spx_accept(struct socket *so, struct sockaddr **nam);
96static	int spx_attach(struct socket *so, int proto, struct thread *td);
97static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
98static	int spx_connect(struct socket *so, struct sockaddr *nam,
99			struct thread *td);
100static	int spx_detach(struct socket *so);
101static	int spx_usr_disconnect(struct socket *so);
102static	int spx_listen(struct socket *so, int backlog, struct thread *td);
103static	int spx_rcvd(struct socket *so, int flags);
104static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
105static	int spx_send(struct socket *so, int flags, struct mbuf *m,
106		     struct sockaddr *addr, struct mbuf *control,
107		     struct thread *td);
108static	int spx_shutdown(struct socket *so);
109static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
110
111struct	pr_usrreqs spx_usrreqs = {
112	.pru_abort =		spx_usr_abort,
113	.pru_accept =		spx_accept,
114	.pru_attach =		spx_attach,
115	.pru_bind =		spx_bind,
116	.pru_connect =		spx_connect,
117	.pru_control =		ipx_control,
118	.pru_detach =		spx_detach,
119	.pru_disconnect =	spx_usr_disconnect,
120	.pru_listen =		spx_listen,
121	.pru_peeraddr =		ipx_peeraddr,
122	.pru_rcvd =		spx_rcvd,
123	.pru_rcvoob =		spx_rcvoob,
124	.pru_send =		spx_send,
125	.pru_shutdown =		spx_shutdown,
126	.pru_sockaddr =		ipx_sockaddr,
127};
128
129struct	pr_usrreqs spx_usrreq_sps = {
130	.pru_abort =		spx_usr_abort,
131	.pru_accept =		spx_accept,
132	.pru_attach =		spx_sp_attach,
133	.pru_bind =		spx_bind,
134	.pru_connect =		spx_connect,
135	.pru_control =		ipx_control,
136	.pru_detach =		spx_detach,
137	.pru_disconnect =	spx_usr_disconnect,
138	.pru_listen =		spx_listen,
139	.pru_peeraddr =		ipx_peeraddr,
140	.pru_rcvd =		spx_rcvd,
141	.pru_rcvoob =		spx_rcvoob,
142	.pru_send =		spx_send,
143	.pru_shutdown =		spx_shutdown,
144	.pru_sockaddr =		ipx_sockaddr,
145};
146
147void
148spx_init(void)
149{
150
151	spx_iss = 1; /* WRONG !! should fish it out of TODR */
152}
153
154void
155spx_input(struct mbuf *m, struct ipxpcb *ipxp)
156{
157	struct spxpcb *cb;
158	struct spx *si = mtod(m, struct spx *);
159	struct socket *so;
160	struct spx spx_savesi;
161	int dropsocket = 0;
162	short ostate = 0;
163
164	spxstat.spxs_rcvtotal++;
165	KASSERT(ipxp != NULL, ("spx_input: NULL ipxpcb"));
166
167	/*
168	 * spx_input() assumes that the caller will hold both the pcb list
169	 * lock and also the ipxp lock.  spx_input() will release both before
170	 * returning, and may in fact trade in the ipxp lock for another pcb
171	 * lock following sonewconn().
172	 */
173	IPX_LIST_LOCK_ASSERT();
174	IPX_LOCK_ASSERT(ipxp);
175
176	cb = ipxtospxpcb(ipxp);
177	if (cb == NULL)
178		goto bad;
179
180	if (m->m_len < sizeof(*si)) {
181		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
182			IPX_UNLOCK(ipxp);
183			IPX_LIST_UNLOCK();
184			spxstat.spxs_rcvshort++;
185			return;
186		}
187		si = mtod(m, struct spx *);
188	}
189	si->si_seq = ntohs(si->si_seq);
190	si->si_ack = ntohs(si->si_ack);
191	si->si_alo = ntohs(si->si_alo);
192
193	so = ipxp->ipxp_socket;
194
195	if (so->so_options & SO_DEBUG || traceallspxs) {
196		ostate = cb->s_state;
197		spx_savesi = *si;
198	}
199	if (so->so_options & SO_ACCEPTCONN) {
200		struct spxpcb *ocb = cb;
201
202		so = sonewconn(so, 0);
203		if (so == NULL) {
204			goto drop;
205		}
206		/*
207		 * This is ugly, but ....
208		 *
209		 * Mark socket as temporary until we're
210		 * committed to keeping it.  The code at
211		 * ``drop'' and ``dropwithreset'' check the
212		 * flag dropsocket to see if the temporary
213		 * socket created here should be discarded.
214		 * We mark the socket as discardable until
215		 * we're committed to it below in TCPS_LISTEN.
216		 */
217		dropsocket++;
218		IPX_UNLOCK(ipxp);
219		ipxp = (struct ipxpcb *)so->so_pcb;
220		IPX_LOCK(ipxp);
221		ipxp->ipxp_laddr = si->si_dna;
222		cb = ipxtospxpcb(ipxp);
223		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
224		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
225		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
226		cb->s_state = TCPS_LISTEN;
227	}
228
229	/*
230	 * Packet received on connection.
231	 * reset idle time and keep-alive timer;
232	 */
233	cb->s_idle = 0;
234	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
235
236	switch (cb->s_state) {
237
238	case TCPS_LISTEN:{
239		struct sockaddr_ipx *sipx, ssipx;
240		struct ipx_addr laddr;
241
242		/*
243		 * If somebody here was carying on a conversation
244		 * and went away, and his pen pal thinks he can
245		 * still talk, we get the misdirected packet.
246		 */
247		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
248			spx_istat.gonawy++;
249			goto dropwithreset;
250		}
251		sipx = &ssipx;
252		bzero(sipx, sizeof *sipx);
253		sipx->sipx_len = sizeof(*sipx);
254		sipx->sipx_family = AF_IPX;
255		sipx->sipx_addr = si->si_sna;
256		laddr = ipxp->ipxp_laddr;
257		if (ipx_nullhost(laddr))
258			ipxp->ipxp_laddr = si->si_dna;
259		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
260			ipxp->ipxp_laddr = laddr;
261			spx_istat.noconn++;
262			goto drop;
263		}
264		spx_template(cb);
265		dropsocket = 0;		/* committed to socket */
266		cb->s_did = si->si_sid;
267		cb->s_rack = si->si_ack;
268		cb->s_ralo = si->si_alo;
269#define THREEWAYSHAKE
270#ifdef THREEWAYSHAKE
271		cb->s_state = TCPS_SYN_RECEIVED;
272		cb->s_force = 1 + SPXT_KEEP;
273		spxstat.spxs_accepts++;
274		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
275		}
276		break;
277	/*
278	 * This state means that we have heard a response
279	 * to our acceptance of their connection
280	 * It is probably logically unnecessary in this
281	 * implementation.
282	 */
283	 case TCPS_SYN_RECEIVED: {
284		if (si->si_did != cb->s_sid) {
285			spx_istat.wrncon++;
286			goto drop;
287		}
288#endif
289		ipxp->ipxp_fport =  si->si_sport;
290		cb->s_timer[SPXT_REXMT] = 0;
291		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
292		soisconnected(so);
293		cb->s_state = TCPS_ESTABLISHED;
294		spxstat.spxs_accepts++;
295		}
296		break;
297
298	/*
299	 * This state means that we have gotten a response
300	 * to our attempt to establish a connection.
301	 * We fill in the data from the other side,
302	 * telling us which port to respond to, instead of the well-
303	 * known one we might have sent to in the first place.
304	 * We also require that this is a response to our
305	 * connection id.
306	 */
307	case TCPS_SYN_SENT:
308		if (si->si_did != cb->s_sid) {
309			spx_istat.notme++;
310			goto drop;
311		}
312		spxstat.spxs_connects++;
313		cb->s_did = si->si_sid;
314		cb->s_rack = si->si_ack;
315		cb->s_ralo = si->si_alo;
316		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
317		cb->s_timer[SPXT_REXMT] = 0;
318		cb->s_flags |= SF_ACKNOW;
319		soisconnected(so);
320		cb->s_state = TCPS_ESTABLISHED;
321		/* Use roundtrip time of connection request for initial rtt */
322		if (cb->s_rtt) {
323			cb->s_srtt = cb->s_rtt << 3;
324			cb->s_rttvar = cb->s_rtt << 1;
325			SPXT_RANGESET(cb->s_rxtcur,
326			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
327			    SPXTV_MIN, SPXTV_REXMTMAX);
328			    cb->s_rtt = 0;
329		}
330	}
331	if (so->so_options & SO_DEBUG || traceallspxs)
332		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
333
334	m->m_len -= sizeof(struct ipx);
335	m->m_pkthdr.len -= sizeof(struct ipx);
336	m->m_data += sizeof(struct ipx);
337
338	if (spx_reass(cb, si)) {
339		m_freem(m);
340	}
341	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
342		spx_output(cb, NULL);
343	cb->s_flags &= ~(SF_WIN|SF_RXT);
344	IPX_UNLOCK(ipxp);
345	IPX_LIST_UNLOCK();
346	return;
347
348dropwithreset:
349	IPX_UNLOCK(ipxp);
350	if (dropsocket) {
351		struct socket *head;
352		ACCEPT_LOCK();
353		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
354		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
355		head = so->so_head;
356		TAILQ_REMOVE(&head->so_incomp, so, so_list);
357		head->so_incqlen--;
358		so->so_qstate &= ~SQ_INCOMP;
359		so->so_head = NULL;
360		ACCEPT_UNLOCK();
361		soabort(so);
362		cb = NULL;
363	}
364	IPX_LIST_UNLOCK();
365	si->si_seq = ntohs(si->si_seq);
366	si->si_ack = ntohs(si->si_ack);
367	si->si_alo = ntohs(si->si_alo);
368	m_freem(dtom(si));
369	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
370	    traceallspxs)
371		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
372	return;
373
374drop:
375bad:
376	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
377            traceallspxs)
378		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
379	IPX_UNLOCK(ipxp);
380	IPX_LIST_UNLOCK();
381	m_freem(m);
382}
383
384static int spxrexmtthresh = 3;
385
386/*
387 * This is structurally similar to the tcp reassembly routine
388 * but its function is somewhat different:  It merely queues
389 * packets up, and suppresses duplicates.
390 */
391static int
392spx_reass(struct spxpcb *cb, struct spx *si)
393{
394	struct spx_q *q;
395	struct mbuf *m;
396	struct socket *so = cb->s_ipxpcb->ipxp_socket;
397	char packetp = cb->s_flags & SF_HI;
398	int incr;
399	char wakeup = 0;
400
401	IPX_LOCK_ASSERT(cb->s_ipxpcb);
402
403	if (si == SI(0))
404		goto present;
405	/*
406	 * Update our news from them.
407	 */
408	if (si->si_cc & SPX_SA)
409		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
410	if (SSEQ_GT(si->si_alo, cb->s_ralo))
411		cb->s_flags |= SF_WIN;
412	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
413		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
414			spxstat.spxs_rcvdupack++;
415			/*
416			 * If this is a completely duplicate ack
417			 * and other conditions hold, we assume
418			 * a packet has been dropped and retransmit
419			 * it exactly as in tcp_input().
420			 */
421			if (si->si_ack != cb->s_rack ||
422			    si->si_alo != cb->s_ralo)
423				cb->s_dupacks = 0;
424			else if (++cb->s_dupacks == spxrexmtthresh) {
425				u_short onxt = cb->s_snxt;
426				int cwnd = cb->s_cwnd;
427
428				cb->s_snxt = si->si_ack;
429				cb->s_cwnd = CUNIT;
430				cb->s_force = 1 + SPXT_REXMT;
431				spx_output(cb, NULL);
432				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
433				cb->s_rtt = 0;
434				if (cwnd >= 4 * CUNIT)
435					cb->s_cwnd = cwnd / 2;
436				if (SSEQ_GT(onxt, cb->s_snxt))
437					cb->s_snxt = onxt;
438				return (1);
439			}
440		} else
441			cb->s_dupacks = 0;
442		goto update_window;
443	}
444	cb->s_dupacks = 0;
445	/*
446	 * If our correspondent acknowledges data we haven't sent
447	 * TCP would drop the packet after acking.  We'll be a little
448	 * more permissive
449	 */
450	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
451		spxstat.spxs_rcvacktoomuch++;
452		si->si_ack = cb->s_smax + 1;
453	}
454	spxstat.spxs_rcvackpack++;
455	/*
456	 * If transmit timer is running and timed sequence
457	 * number was acked, update smoothed round trip time.
458	 * See discussion of algorithm in tcp_input.c
459	 */
460	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
461		spxstat.spxs_rttupdated++;
462		if (cb->s_srtt != 0) {
463			short delta;
464			delta = cb->s_rtt - (cb->s_srtt >> 3);
465			if ((cb->s_srtt += delta) <= 0)
466				cb->s_srtt = 1;
467			if (delta < 0)
468				delta = -delta;
469			delta -= (cb->s_rttvar >> 2);
470			if ((cb->s_rttvar += delta) <= 0)
471				cb->s_rttvar = 1;
472		} else {
473			/*
474			 * No rtt measurement yet
475			 */
476			cb->s_srtt = cb->s_rtt << 3;
477			cb->s_rttvar = cb->s_rtt << 1;
478		}
479		cb->s_rtt = 0;
480		cb->s_rxtshift = 0;
481		SPXT_RANGESET(cb->s_rxtcur,
482			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
483			SPXTV_MIN, SPXTV_REXMTMAX);
484	}
485	/*
486	 * If all outstanding data is acked, stop retransmit
487	 * timer and remember to restart (more output or persist).
488	 * If there is more data to be acked, restart retransmit
489	 * timer, using current (possibly backed-off) value;
490	 */
491	if (si->si_ack == cb->s_smax + 1) {
492		cb->s_timer[SPXT_REXMT] = 0;
493		cb->s_flags |= SF_RXT;
494	} else if (cb->s_timer[SPXT_PERSIST] == 0)
495		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
496	/*
497	 * When new data is acked, open the congestion window.
498	 * If the window gives us less than ssthresh packets
499	 * in flight, open exponentially (maxseg at a time).
500	 * Otherwise open linearly (maxseg^2 / cwnd at a time).
501	 */
502	incr = CUNIT;
503	if (cb->s_cwnd > cb->s_ssthresh)
504		incr = max(incr * incr / cb->s_cwnd, 1);
505	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
506	/*
507	 * Trim Acked data from output queue.
508	 */
509	SOCKBUF_LOCK(&so->so_snd);
510	while ((m = so->so_snd.sb_mb) != NULL) {
511		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
512			sbdroprecord_locked(&so->so_snd);
513		else
514			break;
515	}
516	sowwakeup_locked(so);
517	cb->s_rack = si->si_ack;
518update_window:
519	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
520		cb->s_snxt = cb->s_rack;
521	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
522	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
523	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
524		/* keep track of pure window updates */
525		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
526		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
527			spxstat.spxs_rcvwinupd++;
528			spxstat.spxs_rcvdupack--;
529		}
530		cb->s_ralo = si->si_alo;
531		cb->s_swl1 = si->si_seq;
532		cb->s_swl2 = si->si_ack;
533		cb->s_swnd = (1 + si->si_alo - si->si_ack);
534		if (cb->s_swnd > cb->s_smxw)
535			cb->s_smxw = cb->s_swnd;
536		cb->s_flags |= SF_WIN;
537	}
538	/*
539	 * If this packet number is higher than that which
540	 * we have allocated refuse it, unless urgent
541	 */
542	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
543		if (si->si_cc & SPX_SP) {
544			spxstat.spxs_rcvwinprobe++;
545			return (1);
546		} else
547			spxstat.spxs_rcvpackafterwin++;
548		if (si->si_cc & SPX_OB) {
549			if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
550				m_freem(dtom(si));
551				return (0);
552			} /* else queue this packet; */
553		} else {
554#ifdef BROKEN
555			/*
556			 * XXXRW: This is broken on at least one count:
557			 * spx_close() will free the ipxp and related parts,
558			 * which are then touched by spx_input() after the
559			 * return from spx_reass().
560			 */
561			/*struct socket *so = cb->s_ipxpcb->ipxp_socket;
562			if (so->so_state && SS_NOFDREF) {
563				spx_close(cb);
564			} else
565				       would crash system*/
566#endif
567			spx_istat.notyet++;
568			m_freem(dtom(si));
569			return (0);
570		}
571	}
572	/*
573	 * If this is a system packet, we don't need to
574	 * queue it up, and won't update acknowledge #
575	 */
576	if (si->si_cc & SPX_SP) {
577		return (1);
578	}
579	/*
580	 * We have already seen this packet, so drop.
581	 */
582	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
583		spx_istat.bdreas++;
584		spxstat.spxs_rcvduppack++;
585		if (si->si_seq == cb->s_ack - 1)
586			spx_istat.lstdup++;
587		return (1);
588	}
589	/*
590	 * Loop through all packets queued up to insert in
591	 * appropriate sequence.
592	 */
593	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
594		if (si->si_seq == SI(q)->si_seq) {
595			spxstat.spxs_rcvduppack++;
596			return (1);
597		}
598		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
599			spxstat.spxs_rcvoopack++;
600			break;
601		}
602	}
603	insque(si, q->si_prev);
604	/*
605	 * If this packet is urgent, inform process
606	 */
607	if (si->si_cc & SPX_OB) {
608		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
609		sohasoutofband(so);
610		cb->s_oobflags |= SF_IOOB;
611	}
612present:
613#define SPINC sizeof(struct spxhdr)
614	SOCKBUF_LOCK(&so->so_rcv);
615	/*
616	 * Loop through all packets queued up to update acknowledge
617	 * number, and present all acknowledged data to user;
618	 * If in packet interface mode, show packet headers.
619	 */
620	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
621		  if (SI(q)->si_seq == cb->s_ack) {
622			cb->s_ack++;
623			m = dtom(q);
624			if (SI(q)->si_cc & SPX_OB) {
625				cb->s_oobflags &= ~SF_IOOB;
626				if (so->so_rcv.sb_cc)
627					so->so_oobmark = so->so_rcv.sb_cc;
628				else
629					so->so_rcv.sb_state |= SBS_RCVATMARK;
630			}
631			q = q->si_prev;
632			remque(q->si_next);
633			wakeup = 1;
634			spxstat.spxs_rcvpack++;
635#ifdef SF_NEWCALL
636			if (cb->s_flags2 & SF_NEWCALL) {
637				struct spxhdr *sp = mtod(m, struct spxhdr *);
638				u_char dt = sp->spx_dt;
639				spx_newchecks[4]++;
640				if (dt != cb->s_rhdr.spx_dt) {
641					struct mbuf *mm =
642					   m_getclr(M_DONTWAIT, MT_CONTROL);
643					spx_newchecks[0]++;
644					if (mm != NULL) {
645						u_short *s =
646							mtod(mm, u_short *);
647						cb->s_rhdr.spx_dt = dt;
648						mm->m_len = 5; /*XXX*/
649						s[0] = 5;
650						s[1] = 1;
651						*(u_char *)(&s[2]) = dt;
652						sbappend_locked(&so->so_rcv, mm);
653					}
654				}
655				if (sp->spx_cc & SPX_OB) {
656					MCHTYPE(m, MT_OOBDATA);
657					spx_newchecks[1]++;
658					so->so_oobmark = 0;
659					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
660				}
661				if (packetp == 0) {
662					m->m_data += SPINC;
663					m->m_len -= SPINC;
664					m->m_pkthdr.len -= SPINC;
665				}
666				if ((sp->spx_cc & SPX_EM) || packetp) {
667					sbappendrecord_locked(&so->so_rcv, m);
668					spx_newchecks[9]++;
669				} else
670					sbappend_locked(&so->so_rcv, m);
671			} else
672#endif
673			if (packetp) {
674				sbappendrecord_locked(&so->so_rcv, m);
675			} else {
676				cb->s_rhdr = *mtod(m, struct spxhdr *);
677				m->m_data += SPINC;
678				m->m_len -= SPINC;
679				m->m_pkthdr.len -= SPINC;
680				sbappend_locked(&so->so_rcv, m);
681			}
682		  } else
683			break;
684	}
685	if (wakeup)
686		sorwakeup_locked(so);
687	else
688		SOCKBUF_UNLOCK(&so->so_rcv);
689	return (0);
690}
691
692void
693spx_ctlinput(int cmd, struct sockaddr *arg_as_sa, void *dummy)
694{
695
696	/* Currently, nothing. */
697}
698
699static int
700spx_output(struct spxpcb *cb, struct mbuf *m0)
701{
702	struct socket *so = cb->s_ipxpcb->ipxp_socket;
703	struct mbuf *m;
704	struct spx *si = NULL;
705	struct sockbuf *sb = &so->so_snd;
706	int len = 0, win, rcv_win;
707	short span, off, recordp = 0;
708	u_short alo;
709	int error = 0, sendalot;
710#ifdef notdef
711	int idle;
712#endif
713	struct mbuf *mprev;
714
715	IPX_LOCK_ASSERT(cb->s_ipxpcb);
716
717	if (m0 != NULL) {
718		int mtu = cb->s_mtu;
719		int datalen;
720		/*
721		 * Make sure that packet isn't too big.
722		 */
723		for (m = m0; m != NULL; m = m->m_next) {
724			mprev = m;
725			len += m->m_len;
726			if (m->m_flags & M_EOR)
727				recordp = 1;
728		}
729		datalen = (cb->s_flags & SF_HO) ?
730				len - sizeof(struct spxhdr) : len;
731		if (datalen > mtu) {
732			if (cb->s_flags & SF_PI) {
733				m_freem(m0);
734				return (EMSGSIZE);
735			} else {
736				int oldEM = cb->s_cc & SPX_EM;
737
738				cb->s_cc &= ~SPX_EM;
739				while (len > mtu) {
740					/*
741					 * Here we are only being called
742					 * from usrreq(), so it is OK to
743					 * block.
744					 */
745					m = m_copym(m0, 0, mtu, M_TRYWAIT);
746					if (cb->s_flags & SF_NEWCALL) {
747					    struct mbuf *mm = m;
748					    spx_newchecks[7]++;
749					    while (mm != NULL) {
750						mm->m_flags &= ~M_EOR;
751						mm = mm->m_next;
752					    }
753					}
754					error = spx_output(cb, m);
755					if (error) {
756						cb->s_cc |= oldEM;
757						m_freem(m0);
758						return (error);
759					}
760					m_adj(m0, mtu);
761					len -= mtu;
762				}
763				cb->s_cc |= oldEM;
764			}
765		}
766		/*
767		 * Force length even, by adding a "garbage byte" if
768		 * necessary.
769		 */
770		if (len & 1) {
771			m = mprev;
772			if (M_TRAILINGSPACE(m) >= 1)
773				m->m_len++;
774			else {
775				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
776
777				if (m1 == NULL) {
778					m_freem(m0);
779					return (ENOBUFS);
780				}
781				m1->m_len = 1;
782				*(mtod(m1, u_char *)) = 0;
783				m->m_next = m1;
784			}
785		}
786		m = m_gethdr(M_DONTWAIT, MT_DATA);
787		if (m == NULL) {
788			m_freem(m0);
789			return (ENOBUFS);
790		}
791		/*
792		 * Fill in mbuf with extended SP header
793		 * and addresses and length put into network format.
794		 */
795		MH_ALIGN(m, sizeof(struct spx));
796		m->m_len = sizeof(struct spx);
797		m->m_next = m0;
798		si = mtod(m, struct spx *);
799		si->si_i = *cb->s_ipx;
800		si->si_s = cb->s_shdr;
801		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
802			struct spxhdr *sh;
803			if (m0->m_len < sizeof(*sh)) {
804				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
805					m_free(m);
806					m_freem(m0);
807					return (EINVAL);
808				}
809				m->m_next = m0;
810			}
811			sh = mtod(m0, struct spxhdr *);
812			si->si_dt = sh->spx_dt;
813			si->si_cc |= sh->spx_cc & SPX_EM;
814			m0->m_len -= sizeof(*sh);
815			m0->m_data += sizeof(*sh);
816			len -= sizeof(*sh);
817		}
818		len += sizeof(*si);
819		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
820			si->si_cc |= SPX_EM;
821			spx_newchecks[8]++;
822		}
823		if (cb->s_oobflags & SF_SOOB) {
824			/*
825			 * Per jqj@cornell:
826			 * make sure OB packets convey exactly 1 byte.
827			 * If the packet is 1 byte or larger, we
828			 * have already guaranted there to be at least
829			 * one garbage byte for the checksum, and
830			 * extra bytes shouldn't hurt!
831			 */
832			if (len > sizeof(*si)) {
833				si->si_cc |= SPX_OB;
834				len = (1 + sizeof(*si));
835			}
836		}
837		si->si_len = htons((u_short)len);
838		m->m_pkthdr.len = ((len - 1) | 1) + 1;
839		/*
840		 * queue stuff up for output
841		 */
842		sbappendrecord(sb, m);
843		cb->s_seq++;
844	}
845#ifdef notdef
846	idle = (cb->s_smax == (cb->s_rack - 1));
847#endif
848again:
849	sendalot = 0;
850	off = cb->s_snxt - cb->s_rack;
851	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
852
853	/*
854	 * If in persist timeout with window of 0, send a probe.
855	 * Otherwise, if window is small but nonzero
856	 * and timer expired, send what we can and go into
857	 * transmit state.
858	 */
859	if (cb->s_force == 1 + SPXT_PERSIST) {
860		if (win != 0) {
861			cb->s_timer[SPXT_PERSIST] = 0;
862			cb->s_rxtshift = 0;
863		}
864	}
865	span = cb->s_seq - cb->s_rack;
866	len = min(span, win) - off;
867
868	if (len < 0) {
869		/*
870		 * Window shrank after we went into it.
871		 * If window shrank to 0, cancel pending
872		 * restransmission and pull s_snxt back
873		 * to (closed) window.  We will enter persist
874		 * state below.  If the widndow didn't close completely,
875		 * just wait for an ACK.
876		 */
877		len = 0;
878		if (win == 0) {
879			cb->s_timer[SPXT_REXMT] = 0;
880			cb->s_snxt = cb->s_rack;
881		}
882	}
883	if (len > 1)
884		sendalot = 1;
885	rcv_win = sbspace(&so->so_rcv);
886
887	/*
888	 * Send if we owe peer an ACK.
889	 */
890	if (cb->s_oobflags & SF_SOOB) {
891		/*
892		 * must transmit this out of band packet
893		 */
894		cb->s_oobflags &= ~ SF_SOOB;
895		sendalot = 1;
896		spxstat.spxs_sndurg++;
897		goto found;
898	}
899	if (cb->s_flags & SF_ACKNOW)
900		goto send;
901	if (cb->s_state < TCPS_ESTABLISHED)
902		goto send;
903	/*
904	 * Silly window can't happen in spx.
905	 * Code from tcp deleted.
906	 */
907	if (len)
908		goto send;
909	/*
910	 * Compare available window to amount of window
911	 * known to peer (as advertised window less
912	 * next expected input.)  If the difference is at least two
913	 * packets or at least 35% of the mximum possible window,
914	 * then want to send a window update to peer.
915	 */
916	if (rcv_win > 0) {
917		u_short delta =  1 + cb->s_alo - cb->s_ack;
918		int adv = rcv_win - (delta * cb->s_mtu);
919
920		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
921		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
922			spxstat.spxs_sndwinup++;
923			cb->s_flags |= SF_ACKNOW;
924			goto send;
925		}
926
927	}
928	/*
929	 * Many comments from tcp_output.c are appropriate here
930	 * including . . .
931	 * If send window is too small, there is data to transmit, and no
932	 * retransmit or persist is pending, then go to persist state.
933	 * If nothing happens soon, send when timer expires:
934	 * if window is nonzero, transmit what we can,
935	 * otherwise send a probe.
936	 */
937	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
938		cb->s_timer[SPXT_PERSIST] == 0) {
939			cb->s_rxtshift = 0;
940			spx_setpersist(cb);
941	}
942	/*
943	 * No reason to send a packet, just return.
944	 */
945	cb->s_outx = 1;
946	return (0);
947
948send:
949	/*
950	 * Find requested packet.
951	 */
952	si = 0;
953	if (len > 0) {
954		cb->s_want = cb->s_snxt;
955		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
956			si = mtod(m, struct spx *);
957			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
958				break;
959		}
960	found:
961		if (si != NULL) {
962			if (si->si_seq == cb->s_snxt)
963					cb->s_snxt++;
964				else
965					spxstat.spxs_sndvoid++, si = 0;
966		}
967	}
968	/*
969	 * update window
970	 */
971	if (rcv_win < 0)
972		rcv_win = 0;
973	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
974	if (SSEQ_LT(alo, cb->s_alo))
975		alo = cb->s_alo;
976
977	if (si != NULL) {
978		/*
979		 * must make a copy of this packet for
980		 * ipx_output to monkey with
981		 */
982		m = m_copy(dtom(si), 0, (int)M_COPYALL);
983		if (m == NULL) {
984			return (ENOBUFS);
985		}
986		si = mtod(m, struct spx *);
987		if (SSEQ_LT(si->si_seq, cb->s_smax))
988			spxstat.spxs_sndrexmitpack++;
989		else
990			spxstat.spxs_sndpack++;
991	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
992		/*
993		 * Must send an acknowledgement or a probe
994		 */
995		if (cb->s_force)
996			spxstat.spxs_sndprobe++;
997		if (cb->s_flags & SF_ACKNOW)
998			spxstat.spxs_sndacks++;
999		m = m_gethdr(M_DONTWAIT, MT_DATA);
1000		if (m == NULL)
1001			return (ENOBUFS);
1002		/*
1003		 * Fill in mbuf with extended SP header
1004		 * and addresses and length put into network format.
1005		 */
1006		MH_ALIGN(m, sizeof(struct spx));
1007		m->m_len = sizeof(*si);
1008		m->m_pkthdr.len = sizeof(*si);
1009		si = mtod(m, struct spx *);
1010		si->si_i = *cb->s_ipx;
1011		si->si_s = cb->s_shdr;
1012		si->si_seq = cb->s_smax + 1;
1013		si->si_len = htons(sizeof(*si));
1014		si->si_cc |= SPX_SP;
1015	} else {
1016		cb->s_outx = 3;
1017		if (so->so_options & SO_DEBUG || traceallspxs)
1018			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1019		return (0);
1020	}
1021	/*
1022	 * Stuff checksum and output datagram.
1023	 */
1024	if ((si->si_cc & SPX_SP) == 0) {
1025		if (cb->s_force != (1 + SPXT_PERSIST) ||
1026		    cb->s_timer[SPXT_PERSIST] == 0) {
1027			/*
1028			 * If this is a new packet and we are not currently
1029			 * timing anything, time this one.
1030			 */
1031			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1032				cb->s_smax = si->si_seq;
1033				if (cb->s_rtt == 0) {
1034					spxstat.spxs_segstimed++;
1035					cb->s_rtseq = si->si_seq;
1036					cb->s_rtt = 1;
1037				}
1038			}
1039			/*
1040			 * Set rexmt timer if not currently set,
1041			 * Initial value for retransmit timer is smoothed
1042			 * round-trip time + 2 * round-trip time variance.
1043			 * Initialize shift counter which is used for backoff
1044			 * of retransmit time.
1045			 */
1046			if (cb->s_timer[SPXT_REXMT] == 0 &&
1047			    cb->s_snxt != cb->s_rack) {
1048				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1049				if (cb->s_timer[SPXT_PERSIST]) {
1050					cb->s_timer[SPXT_PERSIST] = 0;
1051					cb->s_rxtshift = 0;
1052				}
1053			}
1054		} else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1055			cb->s_smax = si->si_seq;
1056		}
1057	} else if (cb->s_state < TCPS_ESTABLISHED) {
1058		if (cb->s_rtt == 0)
1059			cb->s_rtt = 1; /* Time initial handshake */
1060		if (cb->s_timer[SPXT_REXMT] == 0)
1061			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1062	}
1063	{
1064		/*
1065		 * Do not request acks when we ack their data packets or
1066		 * when we do a gratuitous window update.
1067		 */
1068		if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1069				si->si_cc |= SPX_SA;
1070		si->si_seq = htons(si->si_seq);
1071		si->si_alo = htons(alo);
1072		si->si_ack = htons(cb->s_ack);
1073
1074		if (ipxcksum) {
1075			si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1076		} else
1077			si->si_sum = 0xffff;
1078
1079		cb->s_outx = 4;
1080		if (so->so_options & SO_DEBUG || traceallspxs)
1081			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1082
1083		if (so->so_options & SO_DONTROUTE)
1084			error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1085		else
1086			error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1087	}
1088	if (error) {
1089		return (error);
1090	}
1091	spxstat.spxs_sndtotal++;
1092	/*
1093	 * Data sent (as far as we can tell).
1094	 * If this advertises a larger window than any other segment,
1095	 * then remember the size of the advertized window.
1096	 * Any pending ACK has now been sent.
1097	 */
1098	cb->s_force = 0;
1099	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1100	if (SSEQ_GT(alo, cb->s_alo))
1101		cb->s_alo = alo;
1102	if (sendalot)
1103		goto again;
1104	cb->s_outx = 5;
1105	return (0);
1106}
1107
1108static int spx_do_persist_panics = 0;
1109
1110static void
1111spx_setpersist(struct spxpcb *cb)
1112{
1113	int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1114
1115	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1116
1117	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1118		panic("spx_output REXMT");
1119	/*
1120	 * Start/restart persistance timer.
1121	 */
1122	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1123	    t*spx_backoff[cb->s_rxtshift],
1124	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1125	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1126		cb->s_rxtshift++;
1127}
1128
1129int
1130spx_ctloutput(struct socket *so, struct sockopt *sopt)
1131{
1132	struct ipxpcb *ipxp = sotoipxpcb(so);
1133	struct spxpcb *cb;
1134	int mask, error;
1135	short soptval;
1136	u_short usoptval;
1137	int optval;
1138
1139	error = 0;
1140
1141	if (sopt->sopt_level != IPXPROTO_SPX) {
1142		/* This will have to be changed when we do more general
1143		   stacking of protocols */
1144		return (ipx_ctloutput(so, sopt));
1145	}
1146	if (ipxp == NULL)
1147		return (EINVAL);
1148	else
1149		cb = ipxtospxpcb(ipxp);
1150
1151	switch (sopt->sopt_dir) {
1152	case SOPT_GET:
1153		switch (sopt->sopt_name) {
1154		case SO_HEADERS_ON_INPUT:
1155			mask = SF_HI;
1156			goto get_flags;
1157
1158		case SO_HEADERS_ON_OUTPUT:
1159			mask = SF_HO;
1160		get_flags:
1161			/* Unlocked read. */
1162			soptval = cb->s_flags & mask;
1163			error = sooptcopyout(sopt, &soptval, sizeof soptval);
1164			break;
1165
1166		case SO_MTU:
1167			/* Unlocked read. */
1168			usoptval = cb->s_mtu;
1169			error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1170			break;
1171
1172		case SO_LAST_HEADER:
1173			/* Unlocked read. */
1174			error = sooptcopyout(sopt, &cb->s_rhdr,
1175					     sizeof cb->s_rhdr);
1176			break;
1177
1178		case SO_DEFAULT_HEADERS:
1179			/* Unlocked read. */
1180			error = sooptcopyout(sopt, &cb->s_shdr,
1181					     sizeof cb->s_shdr);
1182			break;
1183
1184		default:
1185			error = ENOPROTOOPT;
1186		}
1187		break;
1188
1189	case SOPT_SET:
1190		switch (sopt->sopt_name) {
1191			/* XXX why are these shorts on get and ints on set?
1192			   that doesn't make any sense... */
1193		case SO_HEADERS_ON_INPUT:
1194			mask = SF_HI;
1195			goto set_head;
1196
1197		case SO_HEADERS_ON_OUTPUT:
1198			mask = SF_HO;
1199		set_head:
1200			error = sooptcopyin(sopt, &optval, sizeof optval,
1201					    sizeof optval);
1202			if (error)
1203				break;
1204
1205			IPX_LOCK(ipxp);
1206			if (cb->s_flags & SF_PI) {
1207				if (optval)
1208					cb->s_flags |= mask;
1209				else
1210					cb->s_flags &= ~mask;
1211			} else error = EINVAL;
1212			IPX_UNLOCK(ipxp);
1213			break;
1214
1215		case SO_MTU:
1216			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1217					    sizeof usoptval);
1218			if (error)
1219				break;
1220			/* Unlocked write. */
1221			cb->s_mtu = usoptval;
1222			break;
1223
1224#ifdef SF_NEWCALL
1225		case SO_NEWCALL:
1226			error = sooptcopyin(sopt, &optval, sizeof optval,
1227					    sizeof optval);
1228			if (error)
1229				break;
1230			IPX_LOCK(ipxp);
1231			if (optval) {
1232				cb->s_flags2 |= SF_NEWCALL;
1233				spx_newchecks[5]++;
1234			} else {
1235				cb->s_flags2 &= ~SF_NEWCALL;
1236				spx_newchecks[6]++;
1237			}
1238			IPX_UNLOCK(ipxp);
1239			break;
1240#endif
1241
1242		case SO_DEFAULT_HEADERS:
1243			{
1244				struct spxhdr sp;
1245
1246				error = sooptcopyin(sopt, &sp, sizeof sp,
1247						    sizeof sp);
1248				if (error)
1249					break;
1250				IPX_LOCK(ipxp);
1251				cb->s_dt = sp.spx_dt;
1252				cb->s_cc = sp.spx_cc & SPX_EM;
1253				IPX_UNLOCK(ipxp);
1254			}
1255			break;
1256
1257		default:
1258			error = ENOPROTOOPT;
1259		}
1260		break;
1261	}
1262	return (error);
1263}
1264
1265static int
1266spx_usr_abort(struct socket *so)
1267{
1268	struct ipxpcb *ipxp;
1269	struct spxpcb *cb;
1270
1271	ipxp = sotoipxpcb(so);
1272	cb = ipxtospxpcb(ipxp);
1273
1274	IPX_LIST_LOCK();
1275	IPX_LOCK(ipxp);
1276	spx_drop(cb, ECONNABORTED);
1277	IPX_LIST_UNLOCK();
1278	return (0);
1279}
1280
1281/*
1282 * Accept a connection.  Essentially all the work is
1283 * done at higher levels; just return the address
1284 * of the peer, storing through addr.
1285 */
1286static int
1287spx_accept(struct socket *so, struct sockaddr **nam)
1288{
1289	struct ipxpcb *ipxp;
1290	struct sockaddr_ipx *sipx, ssipx;
1291
1292	ipxp = sotoipxpcb(so);
1293	sipx = &ssipx;
1294	bzero(sipx, sizeof *sipx);
1295	sipx->sipx_len = sizeof *sipx;
1296	sipx->sipx_family = AF_IPX;
1297	IPX_LOCK(ipxp);
1298	sipx->sipx_addr = ipxp->ipxp_faddr;
1299	IPX_UNLOCK(ipxp);
1300	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1301	return (0);
1302}
1303
1304static int
1305spx_attach(struct socket *so, int proto, struct thread *td)
1306{
1307	struct ipxpcb *ipxp;
1308	struct spxpcb *cb;
1309	struct mbuf *mm;
1310	struct sockbuf *sb;
1311	int error;
1312
1313	ipxp = sotoipxpcb(so);
1314	cb = ipxtospxpcb(ipxp);
1315
1316	IPX_LIST_LOCK();
1317	error = ipx_pcballoc(so, &ipxpcb_list, td);
1318	if (error)
1319		goto spx_attach_end;
1320	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1321		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1322		if (error)
1323			goto spx_attach_end;
1324	}
1325	ipxp = sotoipxpcb(so);
1326
1327	MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1328
1329	if (cb == NULL) {
1330		error = ENOBUFS;
1331		goto spx_attach_end;
1332	}
1333	sb = &so->so_snd;
1334
1335	mm = m_getclr(M_DONTWAIT, MT_DATA);
1336	if (mm == NULL) {
1337		FREE(cb, M_PCB);
1338		error = ENOBUFS;
1339		goto spx_attach_end;
1340	}
1341	cb->s_ipx = mtod(mm, struct ipx *);
1342	cb->s_state = TCPS_LISTEN;
1343	cb->s_smax = -1;
1344	cb->s_swl1 = -1;
1345	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1346	cb->s_ipxpcb = ipxp;
1347	cb->s_mtu = 576 - sizeof(struct spx);
1348	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1349	cb->s_ssthresh = cb->s_cwnd;
1350	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1351	/* Above is recomputed when connecting to account
1352	   for changed buffering or mtu's */
1353	cb->s_rtt = SPXTV_SRTTBASE;
1354	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1355	SPXT_RANGESET(cb->s_rxtcur,
1356	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1357	    SPXTV_MIN, SPXTV_REXMTMAX);
1358	ipxp->ipxp_pcb = (caddr_t)cb;
1359spx_attach_end:
1360	IPX_LIST_UNLOCK();
1361	return (error);
1362}
1363
1364static int
1365spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1366{
1367	struct ipxpcb *ipxp;
1368	int error;
1369
1370	ipxp = sotoipxpcb(so);
1371
1372	IPX_LIST_LOCK();
1373	IPX_LOCK(ipxp);
1374	error = ipx_pcbbind(ipxp, nam, td);
1375	IPX_UNLOCK(ipxp);
1376	IPX_LIST_UNLOCK();
1377	return (error);
1378}
1379
1380/*
1381 * Initiate connection to peer.
1382 * Enter SYN_SENT state, and mark socket as connecting.
1383 * Start keep-alive timer, setup prototype header,
1384 * Send initial system packet requesting connection.
1385 */
1386static int
1387spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1388{
1389	struct ipxpcb *ipxp;
1390	struct spxpcb *cb;
1391	int error;
1392
1393	ipxp = sotoipxpcb(so);
1394	cb = ipxtospxpcb(ipxp);
1395
1396	IPX_LIST_LOCK();
1397	IPX_LOCK(ipxp);
1398	if (ipxp->ipxp_lport == 0) {
1399		error = ipx_pcbbind(ipxp, NULL, td);
1400		if (error)
1401			goto spx_connect_end;
1402	}
1403	error = ipx_pcbconnect(ipxp, nam, td);
1404	if (error)
1405		goto spx_connect_end;
1406	soisconnecting(so);
1407	spxstat.spxs_connattempt++;
1408	cb->s_state = TCPS_SYN_SENT;
1409	cb->s_did = 0;
1410	spx_template(cb);
1411	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1412	cb->s_force = 1 + SPXTV_KEEP;
1413	/*
1414	 * Other party is required to respond to
1415	 * the port I send from, but he is not
1416	 * required to answer from where I am sending to,
1417	 * so allow wildcarding.
1418	 * original port I am sending to is still saved in
1419	 * cb->s_dport.
1420	 */
1421	ipxp->ipxp_fport = 0;
1422	error = spx_output(cb, NULL);
1423spx_connect_end:
1424	IPX_UNLOCK(ipxp);
1425	IPX_LIST_UNLOCK();
1426	return (error);
1427}
1428
1429static int
1430spx_detach(struct socket *so)
1431{
1432	struct ipxpcb *ipxp;
1433	struct spxpcb *cb;
1434
1435	ipxp = sotoipxpcb(so);
1436	cb = ipxtospxpcb(ipxp);
1437
1438	IPX_LIST_LOCK();
1439	IPX_LOCK(ipxp);
1440	if (cb->s_state > TCPS_LISTEN)
1441		spx_disconnect(cb);
1442	else
1443		spx_close(cb);
1444	IPX_LIST_UNLOCK();
1445	return (0);
1446}
1447
1448/*
1449 * We may decide later to implement connection closing
1450 * handshaking at the spx level optionally.
1451 * here is the hook to do it:
1452 */
1453static int
1454spx_usr_disconnect(struct socket *so)
1455{
1456	struct ipxpcb *ipxp;
1457	struct spxpcb *cb;
1458
1459	ipxp = sotoipxpcb(so);
1460	cb = ipxtospxpcb(ipxp);
1461
1462	IPX_LIST_LOCK();
1463	IPX_LOCK(ipxp);
1464	spx_disconnect(cb);
1465	IPX_LIST_UNLOCK();
1466	return (0);
1467}
1468
1469static int
1470spx_listen(struct socket *so, int backlog, struct thread *td)
1471{
1472	int error;
1473	struct ipxpcb *ipxp;
1474	struct spxpcb *cb;
1475
1476	error = 0;
1477	ipxp = sotoipxpcb(so);
1478	cb = ipxtospxpcb(ipxp);
1479
1480	IPX_LIST_LOCK();
1481	IPX_LOCK(ipxp);
1482	SOCK_LOCK(so);
1483	error = solisten_proto_check(so);
1484	if (error == 0 && ipxp->ipxp_lport == 0)
1485		error = ipx_pcbbind(ipxp, NULL, td);
1486	if (error == 0) {
1487		cb->s_state = TCPS_LISTEN;
1488		solisten_proto(so, backlog);
1489	}
1490	SOCK_UNLOCK(so);
1491	IPX_UNLOCK(ipxp);
1492	IPX_LIST_UNLOCK();
1493	return (error);
1494}
1495
1496/*
1497 * After a receive, possibly send acknowledgment
1498 * updating allocation.
1499 */
1500static int
1501spx_rcvd(struct socket *so, int flags)
1502{
1503	struct ipxpcb *ipxp;
1504	struct spxpcb *cb;
1505
1506	ipxp = sotoipxpcb(so);
1507	cb = ipxtospxpcb(ipxp);
1508
1509	IPX_LOCK(ipxp);
1510	cb->s_flags |= SF_RVD;
1511	spx_output(cb, NULL);
1512	cb->s_flags &= ~SF_RVD;
1513	IPX_UNLOCK(ipxp);
1514	return (0);
1515}
1516
1517static int
1518spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1519{
1520	struct ipxpcb *ipxp;
1521	struct spxpcb *cb;
1522
1523	ipxp = sotoipxpcb(so);
1524	cb = ipxtospxpcb(ipxp);
1525
1526	SOCKBUF_LOCK(&so->so_rcv);
1527	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1528	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1529		SOCKBUF_UNLOCK(&so->so_rcv);
1530		m->m_len = 1;
1531		/* Unlocked read. */
1532		*mtod(m, caddr_t) = cb->s_iobc;
1533		return (0);
1534	}
1535	SOCKBUF_UNLOCK(&so->so_rcv);
1536	return (EINVAL);
1537}
1538
1539static int
1540spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1541    struct mbuf *controlp, struct thread *td)
1542{
1543	int error;
1544	struct ipxpcb *ipxp;
1545	struct spxpcb *cb;
1546
1547	error = 0;
1548	ipxp = sotoipxpcb(so);
1549	cb = ipxtospxpcb(ipxp);
1550
1551	IPX_LOCK(ipxp);
1552	if (flags & PRUS_OOB) {
1553		if (sbspace(&so->so_snd) < -512) {
1554			error = ENOBUFS;
1555			goto spx_send_end;
1556		}
1557		cb->s_oobflags |= SF_SOOB;
1558	}
1559	if (controlp != NULL) {
1560		u_short *p = mtod(controlp, u_short *);
1561		spx_newchecks[2]++;
1562		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1563			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1564			spx_newchecks[3]++;
1565		}
1566		m_freem(controlp);
1567	}
1568	controlp = NULL;
1569	error = spx_output(cb, m);
1570	m = NULL;
1571spx_send_end:
1572	IPX_UNLOCK(ipxp);
1573	if (controlp != NULL)
1574		m_freem(controlp);
1575	if (m != NULL)
1576		m_freem(m);
1577	return (error);
1578}
1579
1580static int
1581spx_shutdown(struct socket *so)
1582{
1583	struct ipxpcb *ipxp;
1584	struct spxpcb *cb;
1585
1586	ipxp = sotoipxpcb(so);
1587	cb = ipxtospxpcb(ipxp);
1588
1589	socantsendmore(so);
1590	IPX_LIST_LOCK();
1591	IPX_LOCK(ipxp);
1592	spx_usrclosed(cb);
1593	IPX_LIST_UNLOCK();
1594	return (0);
1595}
1596
1597static int
1598spx_sp_attach(struct socket *so, int proto, struct thread *td)
1599{
1600	int error;
1601	struct ipxpcb *ipxp;
1602
1603	error = spx_attach(so, proto, td);
1604	if (error == 0) {
1605		ipxp = sotoipxpcb(so);
1606		((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1607					(SF_HI | SF_HO | SF_PI);
1608	}
1609	return (error);
1610}
1611
1612/*
1613 * Create template to be used to send spx packets on a connection.
1614 * Called after host entry created, fills
1615 * in a skeletal spx header (choosing connection id),
1616 * minimizing the amount of work necessary when the connection is used.
1617 */
1618static void
1619spx_template(struct spxpcb *cb)
1620{
1621	struct ipxpcb *ipxp = cb->s_ipxpcb;
1622	struct ipx *ipx = cb->s_ipx;
1623	struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1624
1625	IPX_LOCK_ASSERT(ipxp);
1626
1627	ipx->ipx_pt = IPXPROTO_SPX;
1628	ipx->ipx_sna = ipxp->ipxp_laddr;
1629	ipx->ipx_dna = ipxp->ipxp_faddr;
1630	cb->s_sid = htons(spx_iss);
1631	spx_iss += SPX_ISSINCR/2;
1632	cb->s_alo = 1;
1633	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1634	cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1635					of large packets */
1636	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1637	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1638		/* But allow for lots of little packets as well */
1639}
1640
1641/*
1642 * Close a SPIP control block:
1643 *	discard spx control block itself
1644 *	discard ipx protocol control block
1645 *	wake up any sleepers
1646 * cb will always be invalid after this call.
1647 */
1648void
1649spx_close(struct spxpcb *cb)
1650{
1651	struct spx_q *s;
1652	struct ipxpcb *ipxp = cb->s_ipxpcb;
1653	struct socket *so = ipxp->ipxp_socket;
1654	struct mbuf *m;
1655
1656	IPX_LIST_LOCK_ASSERT();
1657	IPX_LOCK_ASSERT(ipxp);
1658
1659	s = cb->s_q.si_next;
1660	while (s != &(cb->s_q)) {
1661		s = s->si_next;
1662		m = dtom(s->si_prev);
1663		remque(s->si_prev);
1664		m_freem(m);
1665	}
1666	m_free(dtom(cb->s_ipx));
1667	FREE(cb, M_PCB);
1668	ipxp->ipxp_pcb = NULL;
1669	soisdisconnected(so);
1670	ipx_pcbdetach(ipxp);
1671	spxstat.spxs_closed++;
1672}
1673
1674/*
1675 *	Someday we may do level 3 handshaking
1676 *	to close a connection or send a xerox style error.
1677 *	For now, just close.
1678 * cb will always be invalid after this call.
1679 */
1680static void
1681spx_usrclosed(struct spxpcb *cb)
1682{
1683
1684	IPX_LIST_LOCK_ASSERT();
1685	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1686
1687	spx_close(cb);
1688}
1689
1690/*
1691 * cb will always be invalid after this call.
1692 */
1693static void
1694spx_disconnect(struct spxpcb *cb)
1695{
1696
1697	IPX_LIST_LOCK_ASSERT();
1698	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1699
1700	spx_close(cb);
1701}
1702
1703/*
1704 * Drop connection, reporting
1705 * the specified error.
1706 * cb will always be invalid after this call.
1707 */
1708static void
1709spx_drop(struct spxpcb *cb, int errno)
1710{
1711	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1712
1713	IPX_LIST_LOCK_ASSERT();
1714	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1715
1716	/*
1717	 * someday, in the xerox world
1718	 * we will generate error protocol packets
1719	 * announcing that the socket has gone away.
1720	 */
1721	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1722		spxstat.spxs_drops++;
1723		cb->s_state = TCPS_CLOSED;
1724		/*tcp_output(cb);*/
1725	} else
1726		spxstat.spxs_conndrops++;
1727	so->so_error = errno;
1728	spx_close(cb);
1729}
1730
1731/*
1732 * Fast timeout routine for processing delayed acks
1733 */
1734void
1735spx_fasttimo(void)
1736{
1737	struct ipxpcb *ipxp;
1738	struct spxpcb *cb;
1739
1740	IPX_LIST_LOCK();
1741	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1742		IPX_LOCK(ipxp);
1743		if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1744		    (cb->s_flags & SF_DELACK)) {
1745			cb->s_flags &= ~SF_DELACK;
1746			cb->s_flags |= SF_ACKNOW;
1747			spxstat.spxs_delack++;
1748			spx_output(cb, NULL);
1749		}
1750		IPX_UNLOCK(ipxp);
1751	}
1752	IPX_LIST_UNLOCK();
1753}
1754
1755/*
1756 * spx protocol timeout routine called every 500 ms.
1757 * Updates the timers in all active pcb's and
1758 * causes finite state machine actions if timers expire.
1759 */
1760void
1761spx_slowtimo(void)
1762{
1763	struct ipxpcb *ip, *ip_temp;
1764	struct spxpcb *cb;
1765	int i;
1766
1767	/*
1768	 * Search through tcb's and update active timers.  Note that timers
1769	 * may free the ipxpcb, so be sure to handle that case.
1770	 *
1771	 * spx_timers() may remove an ipxpcb entry, so we have to be ready to
1772	 * continue despite that.  The logic here is a bit obfuscated.
1773	 */
1774	IPX_LIST_LOCK();
1775	LIST_FOREACH_SAFE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1776		cb = ipxtospxpcb(ip);
1777		if (cb == NULL)
1778			continue;
1779		IPX_LOCK(cb->s_ipxpcb);
1780		for (i = 0; i < SPXT_NTIMERS; i++) {
1781			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1782				/*
1783				 * spx_timers() returns (NULL) if it free'd
1784				 * the pcb.
1785				 */
1786				cb = spx_timers(cb, i);
1787				if (cb == NULL)
1788					break;
1789			}
1790		}
1791		if (cb != NULL) {
1792			cb->s_idle++;
1793			if (cb->s_rtt)
1794				cb->s_rtt++;
1795			IPX_UNLOCK(cb->s_ipxpcb);
1796		}
1797	}
1798	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1799	IPX_LIST_UNLOCK();
1800}
1801
1802/*
1803 * SPX timer processing.
1804 */
1805static struct spxpcb *
1806spx_timers(struct spxpcb *cb, int timer)
1807{
1808	long rexmt;
1809	int win;
1810
1811	IPX_LIST_LOCK_ASSERT();
1812	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1813
1814	cb->s_force = 1 + timer;
1815	switch (timer) {
1816
1817	/*
1818	 * 2 MSL timeout in shutdown went off.  TCP deletes connection
1819	 * control block.
1820	 */
1821	case SPXT_2MSL:
1822		printf("spx: SPXT_2MSL went off for no reason\n");
1823		cb->s_timer[timer] = 0;
1824		break;
1825
1826	/*
1827	 * Retransmission timer went off.  Message has not
1828	 * been acked within retransmit interval.  Back off
1829	 * to a longer retransmit interval and retransmit one packet.
1830	 */
1831	case SPXT_REXMT:
1832		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1833			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1834			spxstat.spxs_timeoutdrop++;
1835			spx_drop(cb, ETIMEDOUT);
1836			cb = NULL;
1837			break;
1838		}
1839		spxstat.spxs_rexmttimeo++;
1840		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1841		rexmt *= spx_backoff[cb->s_rxtshift];
1842		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1843		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1844		/*
1845		 * If we have backed off fairly far, our srtt
1846		 * estimate is probably bogus.  Clobber it
1847		 * so we'll take the next rtt measurement as our srtt;
1848		 * move the current srtt into rttvar to keep the current
1849		 * retransmit times until then.
1850		 */
1851		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1852			cb->s_rttvar += (cb->s_srtt >> 2);
1853			cb->s_srtt = 0;
1854		}
1855		cb->s_snxt = cb->s_rack;
1856		/*
1857		 * If timing a packet, stop the timer.
1858		 */
1859		cb->s_rtt = 0;
1860		/*
1861		 * See very long discussion in tcp_timer.c about congestion
1862		 * window and sstrhesh
1863		 */
1864		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1865		if (win < 2)
1866			win = 2;
1867		cb->s_cwnd = CUNIT;
1868		cb->s_ssthresh = win * CUNIT;
1869		spx_output(cb, NULL);
1870		break;
1871
1872	/*
1873	 * Persistance timer into zero window.
1874	 * Force a probe to be sent.
1875	 */
1876	case SPXT_PERSIST:
1877		spxstat.spxs_persisttimeo++;
1878		spx_setpersist(cb);
1879		spx_output(cb, NULL);
1880		break;
1881
1882	/*
1883	 * Keep-alive timer went off; send something
1884	 * or drop connection if idle for too long.
1885	 */
1886	case SPXT_KEEP:
1887		spxstat.spxs_keeptimeo++;
1888		if (cb->s_state < TCPS_ESTABLISHED)
1889			goto dropit;
1890		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1891		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1892				goto dropit;
1893			spxstat.spxs_keepprobe++;
1894			spx_output(cb, NULL);
1895		} else
1896			cb->s_idle = 0;
1897		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1898		break;
1899	dropit:
1900		spxstat.spxs_keepdrops++;
1901		spx_drop(cb, ETIMEDOUT);
1902		cb = NULL;
1903		break;
1904	}
1905	return (cb);
1906}
1907