spx_usrreq.c revision 139586
1/*
2 * Copyright (c) 2004 Robert N. M. Watson
3 * Copyright (c) 1995, Mike Mitchell
4 * Copyright (c) 1984, 1985, 1986, 1987, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the University of
18 *	California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)spx_usrreq.h
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 139586 2005-01-02 15:19:24Z rwatson $");
40
41#include <sys/param.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/signalvar.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/sx.h>
52#include <sys/systm.h>
53
54#include <net/route.h>
55#include <netinet/tcp_fsm.h>
56
57#include <netipx/ipx.h>
58#include <netipx/ipx_pcb.h>
59#include <netipx/ipx_var.h>
60#include <netipx/spx.h>
61#include <netipx/spx_debug.h>
62#include <netipx/spx_timer.h>
63#include <netipx/spx_var.h>
64
65/*
66 * SPX protocol implementation.
67 */
68static u_short 	spx_iss;
69static u_short	spx_newchecks[50];
70static int	spx_hardnosed;
71static int	spx_use_delack = 0;
72static int	traceallspxs = 0;
73static struct	spx 	spx_savesi;
74static struct	spx_istat spx_istat;
75
76/* Following was struct spxstat spxstat; */
77#ifndef spxstat
78#define spxstat spx_istat.newstats
79#endif
80
81static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
82    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
83
84static	struct spxpcb *spx_close(struct spxpcb *cb);
85static	struct spxpcb *spx_disconnect(struct spxpcb *cb);
86static	struct spxpcb *spx_drop(struct spxpcb *cb, int errno);
87static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
88static	int spx_reass(struct spxpcb *cb, struct spx *si);
89static	void spx_setpersist(struct spxpcb *cb);
90static	void spx_template(struct spxpcb *cb);
91static	struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
92static	struct spxpcb *spx_usrclosed(struct spxpcb *cb);
93
94static	int spx_usr_abort(struct socket *so);
95static	int spx_accept(struct socket *so, struct sockaddr **nam);
96static	int spx_attach(struct socket *so, int proto, struct thread *td);
97static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
98static	int spx_connect(struct socket *so, struct sockaddr *nam,
99			struct thread *td);
100static	int spx_detach(struct socket *so);
101static	int spx_usr_disconnect(struct socket *so);
102static	int spx_listen(struct socket *so, struct thread *td);
103static	int spx_rcvd(struct socket *so, int flags);
104static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
105static	int spx_send(struct socket *so, int flags, struct mbuf *m,
106		     struct sockaddr *addr, struct mbuf *control,
107		     struct thread *td);
108static	int spx_shutdown(struct socket *so);
109static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
110
111struct	pr_usrreqs spx_usrreqs = {
112	.pru_abort =		spx_usr_abort,
113	.pru_accept =		spx_accept,
114	.pru_attach =		spx_attach,
115	.pru_bind =		spx_bind,
116	.pru_connect =		spx_connect,
117	.pru_control =		ipx_control,
118	.pru_detach =		spx_detach,
119	.pru_disconnect =	spx_usr_disconnect,
120	.pru_listen =		spx_listen,
121	.pru_peeraddr =		ipx_peeraddr,
122	.pru_rcvd =		spx_rcvd,
123	.pru_rcvoob =		spx_rcvoob,
124	.pru_send =		spx_send,
125	.pru_shutdown =		spx_shutdown,
126	.pru_sockaddr =		ipx_sockaddr,
127};
128
129struct	pr_usrreqs spx_usrreq_sps = {
130	.pru_abort =		spx_usr_abort,
131	.pru_accept =		spx_accept,
132	.pru_attach =		spx_sp_attach,
133	.pru_bind =		spx_bind,
134	.pru_connect =		spx_connect,
135	.pru_control =		ipx_control,
136	.pru_detach =		spx_detach,
137	.pru_disconnect =	spx_usr_disconnect,
138	.pru_listen =		spx_listen,
139	.pru_peeraddr =		ipx_peeraddr,
140	.pru_rcvd =		spx_rcvd,
141	.pru_rcvoob =		spx_rcvoob,
142	.pru_send =		spx_send,
143	.pru_shutdown =		spx_shutdown,
144	.pru_sockaddr =		ipx_sockaddr,
145};
146
147void
148spx_init()
149{
150
151	spx_iss = 1; /* WRONG !! should fish it out of TODR */
152}
153
154void
155spx_input(m, ipxp)
156	register struct mbuf *m;
157	register struct ipxpcb *ipxp;
158{
159	register struct spxpcb *cb;
160	register struct spx *si = mtod(m, struct spx *);
161	register struct socket *so;
162	int dropsocket = 0;
163	short ostate = 0;
164
165	spxstat.spxs_rcvtotal++;
166	KASSERT(ipxp != NULL, ("spx_input: NULL ipxpcb"));
167
168	cb = ipxtospxpcb(ipxp);
169	if (cb == NULL)
170		goto bad;
171
172	if (m->m_len < sizeof(*si)) {
173		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
174			spxstat.spxs_rcvshort++;
175			return;
176		}
177		si = mtod(m, struct spx *);
178	}
179	si->si_seq = ntohs(si->si_seq);
180	si->si_ack = ntohs(si->si_ack);
181	si->si_alo = ntohs(si->si_alo);
182
183	so = ipxp->ipxp_socket;
184
185	if (so->so_options & SO_DEBUG || traceallspxs) {
186		ostate = cb->s_state;
187		spx_savesi = *si;
188	}
189	if (so->so_options & SO_ACCEPTCONN) {
190		struct spxpcb *ocb = cb;
191
192		so = sonewconn(so, 0);
193		if (so == NULL) {
194			goto drop;
195		}
196		/*
197		 * This is ugly, but ....
198		 *
199		 * Mark socket as temporary until we're
200		 * committed to keeping it.  The code at
201		 * ``drop'' and ``dropwithreset'' check the
202		 * flag dropsocket to see if the temporary
203		 * socket created here should be discarded.
204		 * We mark the socket as discardable until
205		 * we're committed to it below in TCPS_LISTEN.
206		 */
207		dropsocket++;
208		ipxp = (struct ipxpcb *)so->so_pcb;
209		ipxp->ipxp_laddr = si->si_dna;
210		cb = ipxtospxpcb(ipxp);
211		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
212		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
213		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
214		cb->s_state = TCPS_LISTEN;
215	}
216
217	/*
218	 * Packet received on connection.
219	 * reset idle time and keep-alive timer;
220	 */
221	cb->s_idle = 0;
222	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
223
224	switch (cb->s_state) {
225
226	case TCPS_LISTEN:{
227		struct sockaddr_ipx *sipx, ssipx;
228		struct ipx_addr laddr;
229
230		/*
231		 * If somebody here was carying on a conversation
232		 * and went away, and his pen pal thinks he can
233		 * still talk, we get the misdirected packet.
234		 */
235		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
236			spx_istat.gonawy++;
237			goto dropwithreset;
238		}
239		sipx = &ssipx;
240		bzero(sipx, sizeof *sipx);
241		sipx->sipx_len = sizeof(*sipx);
242		sipx->sipx_family = AF_IPX;
243		sipx->sipx_addr = si->si_sna;
244		laddr = ipxp->ipxp_laddr;
245		if (ipx_nullhost(laddr))
246			ipxp->ipxp_laddr = si->si_dna;
247		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
248			ipxp->ipxp_laddr = laddr;
249			spx_istat.noconn++;
250			goto drop;
251		}
252		spx_template(cb);
253		dropsocket = 0;		/* committed to socket */
254		cb->s_did = si->si_sid;
255		cb->s_rack = si->si_ack;
256		cb->s_ralo = si->si_alo;
257#define THREEWAYSHAKE
258#ifdef THREEWAYSHAKE
259		cb->s_state = TCPS_SYN_RECEIVED;
260		cb->s_force = 1 + SPXT_KEEP;
261		spxstat.spxs_accepts++;
262		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
263		}
264		break;
265	/*
266	 * This state means that we have heard a response
267	 * to our acceptance of their connection
268	 * It is probably logically unnecessary in this
269	 * implementation.
270	 */
271	 case TCPS_SYN_RECEIVED: {
272		if (si->si_did != cb->s_sid) {
273			spx_istat.wrncon++;
274			goto drop;
275		}
276#endif
277		ipxp->ipxp_fport =  si->si_sport;
278		cb->s_timer[SPXT_REXMT] = 0;
279		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
280		soisconnected(so);
281		cb->s_state = TCPS_ESTABLISHED;
282		spxstat.spxs_accepts++;
283		}
284		break;
285
286	/*
287	 * This state means that we have gotten a response
288	 * to our attempt to establish a connection.
289	 * We fill in the data from the other side,
290	 * telling us which port to respond to, instead of the well-
291	 * known one we might have sent to in the first place.
292	 * We also require that this is a response to our
293	 * connection id.
294	 */
295	case TCPS_SYN_SENT:
296		if (si->si_did != cb->s_sid) {
297			spx_istat.notme++;
298			goto drop;
299		}
300		spxstat.spxs_connects++;
301		cb->s_did = si->si_sid;
302		cb->s_rack = si->si_ack;
303		cb->s_ralo = si->si_alo;
304		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
305		cb->s_timer[SPXT_REXMT] = 0;
306		cb->s_flags |= SF_ACKNOW;
307		soisconnected(so);
308		cb->s_state = TCPS_ESTABLISHED;
309		/* Use roundtrip time of connection request for initial rtt */
310		if (cb->s_rtt) {
311			cb->s_srtt = cb->s_rtt << 3;
312			cb->s_rttvar = cb->s_rtt << 1;
313			SPXT_RANGESET(cb->s_rxtcur,
314			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
315			    SPXTV_MIN, SPXTV_REXMTMAX);
316			    cb->s_rtt = 0;
317		}
318	}
319	if (so->so_options & SO_DEBUG || traceallspxs)
320		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
321
322	m->m_len -= sizeof(struct ipx);
323	m->m_pkthdr.len -= sizeof(struct ipx);
324	m->m_data += sizeof(struct ipx);
325
326	if (spx_reass(cb, si)) {
327		m_freem(m);
328	}
329	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
330		spx_output(cb, NULL);
331	cb->s_flags &= ~(SF_WIN|SF_RXT);
332	return;
333
334dropwithreset:
335	if (dropsocket) {
336		struct socket *head;
337		ACCEPT_LOCK();
338		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
339		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
340		head = so->so_head;
341		TAILQ_REMOVE(&head->so_incomp, so, so_list);
342		head->so_incqlen--;
343		so->so_qstate &= ~SQ_INCOMP;
344		so->so_head = NULL;
345		ACCEPT_UNLOCK();
346		soabort(so);
347	}
348	si->si_seq = ntohs(si->si_seq);
349	si->si_ack = ntohs(si->si_ack);
350	si->si_alo = ntohs(si->si_alo);
351	m_freem(dtom(si));
352	if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
353		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
354	return;
355
356drop:
357bad:
358	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
359            traceallspxs)
360		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
361	m_freem(m);
362}
363
364static int spxrexmtthresh = 3;
365
366/*
367 * This is structurally similar to the tcp reassembly routine
368 * but its function is somewhat different:  It merely queues
369 * packets up, and suppresses duplicates.
370 */
371static int
372spx_reass(cb, si)
373register struct spxpcb *cb;
374register struct spx *si;
375{
376	register struct spx_q *q;
377	register struct mbuf *m;
378	register struct socket *so = cb->s_ipxpcb->ipxp_socket;
379	char packetp = cb->s_flags & SF_HI;
380	int incr;
381	char wakeup = 0;
382
383	if (si == SI(0))
384		goto present;
385	/*
386	 * Update our news from them.
387	 */
388	if (si->si_cc & SPX_SA)
389		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
390	if (SSEQ_GT(si->si_alo, cb->s_ralo))
391		cb->s_flags |= SF_WIN;
392	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
393		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
394			spxstat.spxs_rcvdupack++;
395			/*
396			 * If this is a completely duplicate ack
397			 * and other conditions hold, we assume
398			 * a packet has been dropped and retransmit
399			 * it exactly as in tcp_input().
400			 */
401			if (si->si_ack != cb->s_rack ||
402			    si->si_alo != cb->s_ralo)
403				cb->s_dupacks = 0;
404			else if (++cb->s_dupacks == spxrexmtthresh) {
405				u_short onxt = cb->s_snxt;
406				int cwnd = cb->s_cwnd;
407
408				cb->s_snxt = si->si_ack;
409				cb->s_cwnd = CUNIT;
410				cb->s_force = 1 + SPXT_REXMT;
411				spx_output(cb, NULL);
412				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
413				cb->s_rtt = 0;
414				if (cwnd >= 4 * CUNIT)
415					cb->s_cwnd = cwnd / 2;
416				if (SSEQ_GT(onxt, cb->s_snxt))
417					cb->s_snxt = onxt;
418				return (1);
419			}
420		} else
421			cb->s_dupacks = 0;
422		goto update_window;
423	}
424	cb->s_dupacks = 0;
425	/*
426	 * If our correspondent acknowledges data we haven't sent
427	 * TCP would drop the packet after acking.  We'll be a little
428	 * more permissive
429	 */
430	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
431		spxstat.spxs_rcvacktoomuch++;
432		si->si_ack = cb->s_smax + 1;
433	}
434	spxstat.spxs_rcvackpack++;
435	/*
436	 * If transmit timer is running and timed sequence
437	 * number was acked, update smoothed round trip time.
438	 * See discussion of algorithm in tcp_input.c
439	 */
440	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
441		spxstat.spxs_rttupdated++;
442		if (cb->s_srtt != 0) {
443			register short delta;
444			delta = cb->s_rtt - (cb->s_srtt >> 3);
445			if ((cb->s_srtt += delta) <= 0)
446				cb->s_srtt = 1;
447			if (delta < 0)
448				delta = -delta;
449			delta -= (cb->s_rttvar >> 2);
450			if ((cb->s_rttvar += delta) <= 0)
451				cb->s_rttvar = 1;
452		} else {
453			/*
454			 * No rtt measurement yet
455			 */
456			cb->s_srtt = cb->s_rtt << 3;
457			cb->s_rttvar = cb->s_rtt << 1;
458		}
459		cb->s_rtt = 0;
460		cb->s_rxtshift = 0;
461		SPXT_RANGESET(cb->s_rxtcur,
462			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
463			SPXTV_MIN, SPXTV_REXMTMAX);
464	}
465	/*
466	 * If all outstanding data is acked, stop retransmit
467	 * timer and remember to restart (more output or persist).
468	 * If there is more data to be acked, restart retransmit
469	 * timer, using current (possibly backed-off) value;
470	 */
471	if (si->si_ack == cb->s_smax + 1) {
472		cb->s_timer[SPXT_REXMT] = 0;
473		cb->s_flags |= SF_RXT;
474	} else if (cb->s_timer[SPXT_PERSIST] == 0)
475		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
476	/*
477	 * When new data is acked, open the congestion window.
478	 * If the window gives us less than ssthresh packets
479	 * in flight, open exponentially (maxseg at a time).
480	 * Otherwise open linearly (maxseg^2 / cwnd at a time).
481	 */
482	incr = CUNIT;
483	if (cb->s_cwnd > cb->s_ssthresh)
484		incr = max(incr * incr / cb->s_cwnd, 1);
485	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
486	/*
487	 * Trim Acked data from output queue.
488	 */
489	while ((m = so->so_snd.sb_mb) != NULL) {
490		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
491			sbdroprecord(&so->so_snd);
492		else
493			break;
494	}
495	sowwakeup(so);
496	cb->s_rack = si->si_ack;
497update_window:
498	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
499		cb->s_snxt = cb->s_rack;
500	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
501	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
502	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
503		/* keep track of pure window updates */
504		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
505		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
506			spxstat.spxs_rcvwinupd++;
507			spxstat.spxs_rcvdupack--;
508		}
509		cb->s_ralo = si->si_alo;
510		cb->s_swl1 = si->si_seq;
511		cb->s_swl2 = si->si_ack;
512		cb->s_swnd = (1 + si->si_alo - si->si_ack);
513		if (cb->s_swnd > cb->s_smxw)
514			cb->s_smxw = cb->s_swnd;
515		cb->s_flags |= SF_WIN;
516	}
517	/*
518	 * If this packet number is higher than that which
519	 * we have allocated refuse it, unless urgent
520	 */
521	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
522		if (si->si_cc & SPX_SP) {
523			spxstat.spxs_rcvwinprobe++;
524			return (1);
525		} else
526			spxstat.spxs_rcvpackafterwin++;
527		if (si->si_cc & SPX_OB) {
528			if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
529				m_freem(dtom(si));
530				return (0);
531			} /* else queue this packet; */
532		} else {
533#ifdef BROKEN
534			/*
535			 * XXXRW: This is broken on at least one count:
536			 * spx_close() will free the ipxp and related parts,
537			 * which are then touched by spx_input() after the
538			 * return from spx_reass().
539			 */
540			/*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
541			if (so->so_state && SS_NOFDREF) {
542				spx_close(cb);
543			} else
544				       would crash system*/
545#endif
546			spx_istat.notyet++;
547			m_freem(dtom(si));
548			return (0);
549		}
550	}
551	/*
552	 * If this is a system packet, we don't need to
553	 * queue it up, and won't update acknowledge #
554	 */
555	if (si->si_cc & SPX_SP) {
556		return (1);
557	}
558	/*
559	 * We have already seen this packet, so drop.
560	 */
561	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
562		spx_istat.bdreas++;
563		spxstat.spxs_rcvduppack++;
564		if (si->si_seq == cb->s_ack - 1)
565			spx_istat.lstdup++;
566		return (1);
567	}
568	/*
569	 * Loop through all packets queued up to insert in
570	 * appropriate sequence.
571	 */
572	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
573		if (si->si_seq == SI(q)->si_seq) {
574			spxstat.spxs_rcvduppack++;
575			return (1);
576		}
577		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
578			spxstat.spxs_rcvoopack++;
579			break;
580		}
581	}
582	insque(si, q->si_prev);
583	/*
584	 * If this packet is urgent, inform process
585	 */
586	if (si->si_cc & SPX_OB) {
587		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
588		sohasoutofband(so);
589		cb->s_oobflags |= SF_IOOB;
590	}
591present:
592#define SPINC sizeof(struct spxhdr)
593	/*
594	 * Loop through all packets queued up to update acknowledge
595	 * number, and present all acknowledged data to user;
596	 * If in packet interface mode, show packet headers.
597	 */
598	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
599		  if (SI(q)->si_seq == cb->s_ack) {
600			cb->s_ack++;
601			m = dtom(q);
602			if (SI(q)->si_cc & SPX_OB) {
603				cb->s_oobflags &= ~SF_IOOB;
604				SOCKBUF_LOCK(&so->so_rcv);
605				if (so->so_rcv.sb_cc)
606					so->so_oobmark = so->so_rcv.sb_cc;
607				else
608					so->so_rcv.sb_state |= SBS_RCVATMARK;
609				SOCKBUF_UNLOCK(&so->so_rcv);
610			}
611			q = q->si_prev;
612			remque(q->si_next);
613			wakeup = 1;
614			spxstat.spxs_rcvpack++;
615#ifdef SF_NEWCALL
616			if (cb->s_flags2 & SF_NEWCALL) {
617				struct spxhdr *sp = mtod(m, struct spxhdr *);
618				u_char dt = sp->spx_dt;
619				spx_newchecks[4]++;
620				if (dt != cb->s_rhdr.spx_dt) {
621					struct mbuf *mm =
622					   m_getclr(M_DONTWAIT, MT_CONTROL);
623					spx_newchecks[0]++;
624					if (mm != NULL) {
625						u_short *s =
626							mtod(mm, u_short *);
627						cb->s_rhdr.spx_dt = dt;
628						mm->m_len = 5; /*XXX*/
629						s[0] = 5;
630						s[1] = 1;
631						*(u_char *)(&s[2]) = dt;
632						sbappend(&so->so_rcv, mm);
633					}
634				}
635				if (sp->spx_cc & SPX_OB) {
636					MCHTYPE(m, MT_OOBDATA);
637					spx_newchecks[1]++;
638					SOCKBUF_LOCK(&so->so_rcv);
639					so->so_oobmark = 0;
640					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
641					SOCKBUF_UNLOCK(&so->so_rcv);
642				}
643				if (packetp == 0) {
644					m->m_data += SPINC;
645					m->m_len -= SPINC;
646					m->m_pkthdr.len -= SPINC;
647				}
648				if ((sp->spx_cc & SPX_EM) || packetp) {
649					sbappendrecord(&so->so_rcv, m);
650					spx_newchecks[9]++;
651				} else
652					sbappend(&so->so_rcv, m);
653			} else
654#endif
655			if (packetp) {
656				sbappendrecord(&so->so_rcv, m);
657			} else {
658				cb->s_rhdr = *mtod(m, struct spxhdr *);
659				m->m_data += SPINC;
660				m->m_len -= SPINC;
661				m->m_pkthdr.len -= SPINC;
662				sbappend(&so->so_rcv, m);
663			}
664		  } else
665			break;
666	}
667	if (wakeup)
668		sorwakeup(so);
669	return (0);
670}
671
672void
673spx_ctlinput(cmd, arg_as_sa, dummy)
674	int cmd;
675	struct sockaddr *arg_as_sa;	/* XXX should be swapped with dummy */
676	void *dummy;
677{
678	caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
679	struct ipx_addr *na;
680	struct sockaddr_ipx *sipx;
681
682	if (cmd < 0 || cmd >= PRC_NCMDS)
683		return;
684
685	switch (cmd) {
686
687	case PRC_ROUTEDEAD:
688		return;
689
690	case PRC_IFDOWN:
691	case PRC_HOSTDEAD:
692	case PRC_HOSTUNREACH:
693		sipx = (struct sockaddr_ipx *)arg;
694		if (sipx->sipx_family != AF_IPX)
695			return;
696		na = &sipx->sipx_addr;
697		break;
698
699	default:
700		break;
701	}
702}
703
704static int
705spx_output(cb, m0)
706	register struct spxpcb *cb;
707	struct mbuf *m0;
708{
709	struct socket *so = cb->s_ipxpcb->ipxp_socket;
710	register struct mbuf *m;
711	register struct spx *si = NULL;
712	register struct sockbuf *sb = &so->so_snd;
713	int len = 0, win, rcv_win;
714	short span, off, recordp = 0;
715	u_short alo;
716	int error = 0, sendalot;
717#ifdef notdef
718	int idle;
719#endif
720	struct mbuf *mprev;
721
722	if (m0 != NULL) {
723		int mtu = cb->s_mtu;
724		int datalen;
725		/*
726		 * Make sure that packet isn't too big.
727		 */
728		for (m = m0; m != NULL; m = m->m_next) {
729			mprev = m;
730			len += m->m_len;
731			if (m->m_flags & M_EOR)
732				recordp = 1;
733		}
734		datalen = (cb->s_flags & SF_HO) ?
735				len - sizeof(struct spxhdr) : len;
736		if (datalen > mtu) {
737			if (cb->s_flags & SF_PI) {
738				m_freem(m0);
739				return (EMSGSIZE);
740			} else {
741				int oldEM = cb->s_cc & SPX_EM;
742
743				cb->s_cc &= ~SPX_EM;
744				while (len > mtu) {
745					/*
746					 * Here we are only being called
747					 * from usrreq(), so it is OK to
748					 * block.
749					 */
750					m = m_copym(m0, 0, mtu, M_TRYWAIT);
751					if (cb->s_flags & SF_NEWCALL) {
752					    struct mbuf *mm = m;
753					    spx_newchecks[7]++;
754					    while (mm != NULL) {
755						mm->m_flags &= ~M_EOR;
756						mm = mm->m_next;
757					    }
758					}
759					error = spx_output(cb, m);
760					if (error) {
761						cb->s_cc |= oldEM;
762						m_freem(m0);
763						return (error);
764					}
765					m_adj(m0, mtu);
766					len -= mtu;
767				}
768				cb->s_cc |= oldEM;
769			}
770		}
771		/*
772		 * Force length even, by adding a "garbage byte" if
773		 * necessary.
774		 */
775		if (len & 1) {
776			m = mprev;
777			if (M_TRAILINGSPACE(m) >= 1)
778				m->m_len++;
779			else {
780				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
781
782				if (m1 == NULL) {
783					m_freem(m0);
784					return (ENOBUFS);
785				}
786				m1->m_len = 1;
787				*(mtod(m1, u_char *)) = 0;
788				m->m_next = m1;
789			}
790		}
791		m = m_gethdr(M_DONTWAIT, MT_HEADER);
792		if (m == NULL) {
793			m_freem(m0);
794			return (ENOBUFS);
795		}
796		/*
797		 * Fill in mbuf with extended SP header
798		 * and addresses and length put into network format.
799		 */
800		MH_ALIGN(m, sizeof(struct spx));
801		m->m_len = sizeof(struct spx);
802		m->m_next = m0;
803		si = mtod(m, struct spx *);
804		si->si_i = *cb->s_ipx;
805		si->si_s = cb->s_shdr;
806		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
807			register struct spxhdr *sh;
808			if (m0->m_len < sizeof(*sh)) {
809				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
810					m_free(m);
811					m_freem(m0);
812					return (EINVAL);
813				}
814				m->m_next = m0;
815			}
816			sh = mtod(m0, struct spxhdr *);
817			si->si_dt = sh->spx_dt;
818			si->si_cc |= sh->spx_cc & SPX_EM;
819			m0->m_len -= sizeof(*sh);
820			m0->m_data += sizeof(*sh);
821			len -= sizeof(*sh);
822		}
823		len += sizeof(*si);
824		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
825			si->si_cc |= SPX_EM;
826			spx_newchecks[8]++;
827		}
828		if (cb->s_oobflags & SF_SOOB) {
829			/*
830			 * Per jqj@cornell:
831			 * make sure OB packets convey exactly 1 byte.
832			 * If the packet is 1 byte or larger, we
833			 * have already guaranted there to be at least
834			 * one garbage byte for the checksum, and
835			 * extra bytes shouldn't hurt!
836			 */
837			if (len > sizeof(*si)) {
838				si->si_cc |= SPX_OB;
839				len = (1 + sizeof(*si));
840			}
841		}
842		si->si_len = htons((u_short)len);
843		m->m_pkthdr.len = ((len - 1) | 1) + 1;
844		/*
845		 * queue stuff up for output
846		 */
847		sbappendrecord(sb, m);
848		cb->s_seq++;
849	}
850#ifdef notdef
851	idle = (cb->s_smax == (cb->s_rack - 1));
852#endif
853again:
854	sendalot = 0;
855	off = cb->s_snxt - cb->s_rack;
856	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
857
858	/*
859	 * If in persist timeout with window of 0, send a probe.
860	 * Otherwise, if window is small but nonzero
861	 * and timer expired, send what we can and go into
862	 * transmit state.
863	 */
864	if (cb->s_force == 1 + SPXT_PERSIST) {
865		if (win != 0) {
866			cb->s_timer[SPXT_PERSIST] = 0;
867			cb->s_rxtshift = 0;
868		}
869	}
870	span = cb->s_seq - cb->s_rack;
871	len = min(span, win) - off;
872
873	if (len < 0) {
874		/*
875		 * Window shrank after we went into it.
876		 * If window shrank to 0, cancel pending
877		 * restransmission and pull s_snxt back
878		 * to (closed) window.  We will enter persist
879		 * state below.  If the widndow didn't close completely,
880		 * just wait for an ACK.
881		 */
882		len = 0;
883		if (win == 0) {
884			cb->s_timer[SPXT_REXMT] = 0;
885			cb->s_snxt = cb->s_rack;
886		}
887	}
888	if (len > 1)
889		sendalot = 1;
890	rcv_win = sbspace(&so->so_rcv);
891
892	/*
893	 * Send if we owe peer an ACK.
894	 */
895	if (cb->s_oobflags & SF_SOOB) {
896		/*
897		 * must transmit this out of band packet
898		 */
899		cb->s_oobflags &= ~ SF_SOOB;
900		sendalot = 1;
901		spxstat.spxs_sndurg++;
902		goto found;
903	}
904	if (cb->s_flags & SF_ACKNOW)
905		goto send;
906	if (cb->s_state < TCPS_ESTABLISHED)
907		goto send;
908	/*
909	 * Silly window can't happen in spx.
910	 * Code from tcp deleted.
911	 */
912	if (len)
913		goto send;
914	/*
915	 * Compare available window to amount of window
916	 * known to peer (as advertised window less
917	 * next expected input.)  If the difference is at least two
918	 * packets or at least 35% of the mximum possible window,
919	 * then want to send a window update to peer.
920	 */
921	if (rcv_win > 0) {
922		u_short delta =  1 + cb->s_alo - cb->s_ack;
923		int adv = rcv_win - (delta * cb->s_mtu);
924
925		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
926		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
927			spxstat.spxs_sndwinup++;
928			cb->s_flags |= SF_ACKNOW;
929			goto send;
930		}
931
932	}
933	/*
934	 * Many comments from tcp_output.c are appropriate here
935	 * including . . .
936	 * If send window is too small, there is data to transmit, and no
937	 * retransmit or persist is pending, then go to persist state.
938	 * If nothing happens soon, send when timer expires:
939	 * if window is nonzero, transmit what we can,
940	 * otherwise send a probe.
941	 */
942	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
943		cb->s_timer[SPXT_PERSIST] == 0) {
944			cb->s_rxtshift = 0;
945			spx_setpersist(cb);
946	}
947	/*
948	 * No reason to send a packet, just return.
949	 */
950	cb->s_outx = 1;
951	return (0);
952
953send:
954	/*
955	 * Find requested packet.
956	 */
957	si = 0;
958	if (len > 0) {
959		cb->s_want = cb->s_snxt;
960		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
961			si = mtod(m, struct spx *);
962			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
963				break;
964		}
965	found:
966		if (si != NULL) {
967			if (si->si_seq == cb->s_snxt)
968					cb->s_snxt++;
969				else
970					spxstat.spxs_sndvoid++, si = 0;
971		}
972	}
973	/*
974	 * update window
975	 */
976	if (rcv_win < 0)
977		rcv_win = 0;
978	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
979	if (SSEQ_LT(alo, cb->s_alo))
980		alo = cb->s_alo;
981
982	if (si != NULL) {
983		/*
984		 * must make a copy of this packet for
985		 * ipx_output to monkey with
986		 */
987		m = m_copy(dtom(si), 0, (int)M_COPYALL);
988		if (m == NULL) {
989			return (ENOBUFS);
990		}
991		si = mtod(m, struct spx *);
992		if (SSEQ_LT(si->si_seq, cb->s_smax))
993			spxstat.spxs_sndrexmitpack++;
994		else
995			spxstat.spxs_sndpack++;
996	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
997		/*
998		 * Must send an acknowledgement or a probe
999		 */
1000		if (cb->s_force)
1001			spxstat.spxs_sndprobe++;
1002		if (cb->s_flags & SF_ACKNOW)
1003			spxstat.spxs_sndacks++;
1004		m = m_gethdr(M_DONTWAIT, MT_HEADER);
1005		if (m == NULL)
1006			return (ENOBUFS);
1007		/*
1008		 * Fill in mbuf with extended SP header
1009		 * and addresses and length put into network format.
1010		 */
1011		MH_ALIGN(m, sizeof(struct spx));
1012		m->m_len = sizeof(*si);
1013		m->m_pkthdr.len = sizeof(*si);
1014		si = mtod(m, struct spx *);
1015		si->si_i = *cb->s_ipx;
1016		si->si_s = cb->s_shdr;
1017		si->si_seq = cb->s_smax + 1;
1018		si->si_len = htons(sizeof(*si));
1019		si->si_cc |= SPX_SP;
1020	} else {
1021		cb->s_outx = 3;
1022		if (so->so_options & SO_DEBUG || traceallspxs)
1023			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1024		return (0);
1025	}
1026	/*
1027	 * Stuff checksum and output datagram.
1028	 */
1029	if ((si->si_cc & SPX_SP) == 0) {
1030		if (cb->s_force != (1 + SPXT_PERSIST) ||
1031		    cb->s_timer[SPXT_PERSIST] == 0) {
1032			/*
1033			 * If this is a new packet and we are not currently
1034			 * timing anything, time this one.
1035			 */
1036			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1037				cb->s_smax = si->si_seq;
1038				if (cb->s_rtt == 0) {
1039					spxstat.spxs_segstimed++;
1040					cb->s_rtseq = si->si_seq;
1041					cb->s_rtt = 1;
1042				}
1043			}
1044			/*
1045			 * Set rexmt timer if not currently set,
1046			 * Initial value for retransmit timer is smoothed
1047			 * round-trip time + 2 * round-trip time variance.
1048			 * Initialize shift counter which is used for backoff
1049			 * of retransmit time.
1050			 */
1051			if (cb->s_timer[SPXT_REXMT] == 0 &&
1052			    cb->s_snxt != cb->s_rack) {
1053				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1054				if (cb->s_timer[SPXT_PERSIST]) {
1055					cb->s_timer[SPXT_PERSIST] = 0;
1056					cb->s_rxtshift = 0;
1057				}
1058			}
1059		} else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1060			cb->s_smax = si->si_seq;
1061		}
1062	} else if (cb->s_state < TCPS_ESTABLISHED) {
1063		if (cb->s_rtt == 0)
1064			cb->s_rtt = 1; /* Time initial handshake */
1065		if (cb->s_timer[SPXT_REXMT] == 0)
1066			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1067	}
1068	{
1069		/*
1070		 * Do not request acks when we ack their data packets or
1071		 * when we do a gratuitous window update.
1072		 */
1073		if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1074				si->si_cc |= SPX_SA;
1075		si->si_seq = htons(si->si_seq);
1076		si->si_alo = htons(alo);
1077		si->si_ack = htons(cb->s_ack);
1078
1079		if (ipxcksum) {
1080			si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1081		} else
1082			si->si_sum = 0xffff;
1083
1084		cb->s_outx = 4;
1085		if (so->so_options & SO_DEBUG || traceallspxs)
1086			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1087
1088		if (so->so_options & SO_DONTROUTE)
1089			error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1090		else
1091			error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1092	}
1093	if (error) {
1094		return (error);
1095	}
1096	spxstat.spxs_sndtotal++;
1097	/*
1098	 * Data sent (as far as we can tell).
1099	 * If this advertises a larger window than any other segment,
1100	 * then remember the size of the advertized window.
1101	 * Any pending ACK has now been sent.
1102	 */
1103	cb->s_force = 0;
1104	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1105	if (SSEQ_GT(alo, cb->s_alo))
1106		cb->s_alo = alo;
1107	if (sendalot)
1108		goto again;
1109	cb->s_outx = 5;
1110	return (0);
1111}
1112
1113static int spx_do_persist_panics = 0;
1114
1115static void
1116spx_setpersist(cb)
1117	register struct spxpcb *cb;
1118{
1119	register int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1120
1121	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1122		panic("spx_output REXMT");
1123	/*
1124	 * Start/restart persistance timer.
1125	 */
1126	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1127	    t*spx_backoff[cb->s_rxtshift],
1128	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1129	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1130		cb->s_rxtshift++;
1131}
1132
1133int
1134spx_ctloutput(so, sopt)
1135	struct socket *so;
1136	struct sockopt *sopt;
1137{
1138	struct ipxpcb *ipxp = sotoipxpcb(so);
1139	register struct spxpcb *cb;
1140	int mask, error;
1141	short soptval;
1142	u_short usoptval;
1143	int optval;
1144
1145	error = 0;
1146
1147	if (sopt->sopt_level != IPXPROTO_SPX) {
1148		/* This will have to be changed when we do more general
1149		   stacking of protocols */
1150		return (ipx_ctloutput(so, sopt));
1151	}
1152	if (ipxp == NULL)
1153		return (EINVAL);
1154	else
1155		cb = ipxtospxpcb(ipxp);
1156
1157	switch (sopt->sopt_dir) {
1158	case SOPT_GET:
1159		switch (sopt->sopt_name) {
1160		case SO_HEADERS_ON_INPUT:
1161			mask = SF_HI;
1162			goto get_flags;
1163
1164		case SO_HEADERS_ON_OUTPUT:
1165			mask = SF_HO;
1166		get_flags:
1167			soptval = cb->s_flags & mask;
1168			error = sooptcopyout(sopt, &soptval, sizeof soptval);
1169			break;
1170
1171		case SO_MTU:
1172			usoptval = cb->s_mtu;
1173			error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1174			break;
1175
1176		case SO_LAST_HEADER:
1177			error = sooptcopyout(sopt, &cb->s_rhdr,
1178					     sizeof cb->s_rhdr);
1179			break;
1180
1181		case SO_DEFAULT_HEADERS:
1182			error = sooptcopyout(sopt, &cb->s_shdr,
1183					     sizeof cb->s_shdr);
1184			break;
1185
1186		default:
1187			error = ENOPROTOOPT;
1188		}
1189		break;
1190
1191	case SOPT_SET:
1192		switch (sopt->sopt_name) {
1193			/* XXX why are these shorts on get and ints on set?
1194			   that doesn't make any sense... */
1195		case SO_HEADERS_ON_INPUT:
1196			mask = SF_HI;
1197			goto set_head;
1198
1199		case SO_HEADERS_ON_OUTPUT:
1200			mask = SF_HO;
1201		set_head:
1202			error = sooptcopyin(sopt, &optval, sizeof optval,
1203					    sizeof optval);
1204			if (error)
1205				break;
1206
1207			if (cb->s_flags & SF_PI) {
1208				if (optval)
1209					cb->s_flags |= mask;
1210				else
1211					cb->s_flags &= ~mask;
1212			} else error = EINVAL;
1213			break;
1214
1215		case SO_MTU:
1216			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1217					    sizeof usoptval);
1218			if (error)
1219				break;
1220			cb->s_mtu = usoptval;
1221			break;
1222
1223#ifdef SF_NEWCALL
1224		case SO_NEWCALL:
1225			error = sooptcopyin(sopt, &optval, sizeof optval,
1226					    sizeof optval);
1227			if (error)
1228				break;
1229			if (optval) {
1230				cb->s_flags2 |= SF_NEWCALL;
1231				spx_newchecks[5]++;
1232			} else {
1233				cb->s_flags2 &= ~SF_NEWCALL;
1234				spx_newchecks[6]++;
1235			}
1236			break;
1237#endif
1238
1239		case SO_DEFAULT_HEADERS:
1240			{
1241				struct spxhdr sp;
1242
1243				error = sooptcopyin(sopt, &sp, sizeof sp,
1244						    sizeof sp);
1245				if (error)
1246					break;
1247				cb->s_dt = sp.spx_dt;
1248				cb->s_cc = sp.spx_cc & SPX_EM;
1249			}
1250			break;
1251
1252		default:
1253			error = ENOPROTOOPT;
1254		}
1255		break;
1256	}
1257	return (error);
1258}
1259
1260static int
1261spx_usr_abort(so)
1262	struct socket *so;
1263{
1264	int s;
1265	struct ipxpcb *ipxp;
1266	struct spxpcb *cb;
1267
1268	ipxp = sotoipxpcb(so);
1269	cb = ipxtospxpcb(ipxp);
1270
1271	s = splnet();
1272	spx_drop(cb, ECONNABORTED);
1273	splx(s);
1274	return (0);
1275}
1276
1277/*
1278 * Accept a connection.  Essentially all the work is
1279 * done at higher levels; just return the address
1280 * of the peer, storing through addr.
1281 */
1282static int
1283spx_accept(so, nam)
1284	struct socket *so;
1285	struct sockaddr **nam;
1286{
1287	struct ipxpcb *ipxp;
1288	struct sockaddr_ipx *sipx, ssipx;
1289
1290	ipxp = sotoipxpcb(so);
1291	sipx = &ssipx;
1292	bzero(sipx, sizeof *sipx);
1293	sipx->sipx_len = sizeof *sipx;
1294	sipx->sipx_family = AF_IPX;
1295	sipx->sipx_addr = ipxp->ipxp_faddr;
1296	*nam = sodupsockaddr((struct sockaddr *)sipx, M_NOWAIT);
1297	return (0);
1298}
1299
1300static int
1301spx_attach(so, proto, td)
1302	struct socket *so;
1303	int proto;
1304	struct thread *td;
1305{
1306	int error;
1307	int s;
1308	struct ipxpcb *ipxp;
1309	struct spxpcb *cb;
1310	struct mbuf *mm;
1311	struct sockbuf *sb;
1312
1313	ipxp = sotoipxpcb(so);
1314	cb = ipxtospxpcb(ipxp);
1315
1316	if (ipxp != NULL)
1317		return (EISCONN);
1318	s = splnet();
1319	error = ipx_pcballoc(so, &ipxpcb_list, td);
1320	if (error)
1321		goto spx_attach_end;
1322	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1323		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1324		if (error)
1325			goto spx_attach_end;
1326	}
1327	ipxp = sotoipxpcb(so);
1328
1329	MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1330
1331	if (cb == NULL) {
1332		error = ENOBUFS;
1333		goto spx_attach_end;
1334	}
1335	sb = &so->so_snd;
1336
1337	mm = m_getclr(M_DONTWAIT, MT_HEADER);
1338	if (mm == NULL) {
1339		FREE(cb, M_PCB);
1340		error = ENOBUFS;
1341		goto spx_attach_end;
1342	}
1343	cb->s_ipx = mtod(mm, struct ipx *);
1344	cb->s_state = TCPS_LISTEN;
1345	cb->s_smax = -1;
1346	cb->s_swl1 = -1;
1347	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1348	cb->s_ipxpcb = ipxp;
1349	cb->s_mtu = 576 - sizeof(struct spx);
1350	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1351	cb->s_ssthresh = cb->s_cwnd;
1352	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1353	/* Above is recomputed when connecting to account
1354	   for changed buffering or mtu's */
1355	cb->s_rtt = SPXTV_SRTTBASE;
1356	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1357	SPXT_RANGESET(cb->s_rxtcur,
1358	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1359	    SPXTV_MIN, SPXTV_REXMTMAX);
1360	ipxp->ipxp_pcb = (caddr_t)cb;
1361spx_attach_end:
1362	splx(s);
1363	return (error);
1364}
1365
1366static int
1367spx_bind(so, nam, td)
1368	struct socket *so;
1369	struct sockaddr *nam;
1370	struct thread *td;
1371{
1372	struct ipxpcb *ipxp;
1373
1374	ipxp = sotoipxpcb(so);
1375
1376	return (ipx_pcbbind(ipxp, nam, td));
1377}
1378
1379/*
1380 * Initiate connection to peer.
1381 * Enter SYN_SENT state, and mark socket as connecting.
1382 * Start keep-alive timer, setup prototype header,
1383 * Send initial system packet requesting connection.
1384 */
1385static int
1386spx_connect(so, nam, td)
1387	struct socket *so;
1388	struct sockaddr *nam;
1389	struct thread *td;
1390{
1391	int error;
1392	int s;
1393	struct ipxpcb *ipxp;
1394	struct spxpcb *cb;
1395
1396	ipxp = sotoipxpcb(so);
1397	cb = ipxtospxpcb(ipxp);
1398
1399	s = splnet();
1400	if (ipxp->ipxp_lport == 0) {
1401		error = ipx_pcbbind(ipxp, NULL, td);
1402		if (error)
1403			goto spx_connect_end;
1404	}
1405	error = ipx_pcbconnect(ipxp, nam, td);
1406	if (error)
1407		goto spx_connect_end;
1408	soisconnecting(so);
1409	spxstat.spxs_connattempt++;
1410	cb->s_state = TCPS_SYN_SENT;
1411	cb->s_did = 0;
1412	spx_template(cb);
1413	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1414	cb->s_force = 1 + SPXTV_KEEP;
1415	/*
1416	 * Other party is required to respond to
1417	 * the port I send from, but he is not
1418	 * required to answer from where I am sending to,
1419	 * so allow wildcarding.
1420	 * original port I am sending to is still saved in
1421	 * cb->s_dport.
1422	 */
1423	ipxp->ipxp_fport = 0;
1424	error = spx_output(cb, NULL);
1425spx_connect_end:
1426	splx(s);
1427	return (error);
1428}
1429
1430static int
1431spx_detach(so)
1432	struct socket *so;
1433{
1434	int s;
1435	struct ipxpcb *ipxp;
1436	struct spxpcb *cb;
1437
1438	ipxp = sotoipxpcb(so);
1439	cb = ipxtospxpcb(ipxp);
1440
1441	if (ipxp == NULL)
1442		return (ENOTCONN);
1443	s = splnet();
1444	if (cb->s_state > TCPS_LISTEN)
1445		spx_disconnect(cb);
1446	else
1447		spx_close(cb);
1448	splx(s);
1449	return (0);
1450}
1451
1452/*
1453 * We may decide later to implement connection closing
1454 * handshaking at the spx level optionally.
1455 * here is the hook to do it:
1456 */
1457static int
1458spx_usr_disconnect(so)
1459	struct socket *so;
1460{
1461	int s;
1462	struct ipxpcb *ipxp;
1463	struct spxpcb *cb;
1464
1465	ipxp = sotoipxpcb(so);
1466	cb = ipxtospxpcb(ipxp);
1467
1468	s = splnet();
1469	spx_disconnect(cb);
1470	splx(s);
1471	return (0);
1472}
1473
1474static int
1475spx_listen(so, td)
1476	struct socket *so;
1477	struct thread *td;
1478{
1479	int error;
1480	struct ipxpcb *ipxp;
1481	struct spxpcb *cb;
1482
1483	error = 0;
1484	ipxp = sotoipxpcb(so);
1485	cb = ipxtospxpcb(ipxp);
1486
1487	if (ipxp->ipxp_lport == 0)
1488		error = ipx_pcbbind(ipxp, NULL, td);
1489	if (error == 0)
1490		cb->s_state = TCPS_LISTEN;
1491	return (error);
1492}
1493
1494/*
1495 * After a receive, possibly send acknowledgment
1496 * updating allocation.
1497 */
1498static int
1499spx_rcvd(so, flags)
1500	struct socket *so;
1501	int flags;
1502{
1503	int s;
1504	struct ipxpcb *ipxp;
1505	struct spxpcb *cb;
1506
1507	ipxp = sotoipxpcb(so);
1508	cb = ipxtospxpcb(ipxp);
1509
1510	s = splnet();
1511	cb->s_flags |= SF_RVD;
1512	spx_output(cb, NULL);
1513	cb->s_flags &= ~SF_RVD;
1514	splx(s);
1515	return (0);
1516}
1517
1518static int
1519spx_rcvoob(so, m, flags)
1520	struct socket *so;
1521	struct mbuf *m;
1522	int flags;
1523{
1524	struct ipxpcb *ipxp;
1525	struct spxpcb *cb;
1526
1527	ipxp = sotoipxpcb(so);
1528	cb = ipxtospxpcb(ipxp);
1529
1530	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1531	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1532		m->m_len = 1;
1533		*mtod(m, caddr_t) = cb->s_iobc;
1534		return (0);
1535	}
1536	return (EINVAL);
1537}
1538
1539static int
1540spx_send(so, flags, m, addr, controlp, td)
1541	struct socket *so;
1542	int flags;
1543	struct mbuf *m;
1544	struct sockaddr *addr;
1545	struct mbuf *controlp;
1546	struct thread *td;
1547{
1548	int error;
1549	int s;
1550	struct ipxpcb *ipxp;
1551	struct spxpcb *cb;
1552
1553	error = 0;
1554	ipxp = sotoipxpcb(so);
1555	cb = ipxtospxpcb(ipxp);
1556
1557	s = splnet();
1558	if (flags & PRUS_OOB) {
1559		if (sbspace(&so->so_snd) < -512) {
1560			error = ENOBUFS;
1561			goto spx_send_end;
1562		}
1563		cb->s_oobflags |= SF_SOOB;
1564	}
1565	if (controlp != NULL) {
1566		u_short *p = mtod(controlp, u_short *);
1567		spx_newchecks[2]++;
1568		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1569			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1570			spx_newchecks[3]++;
1571		}
1572		m_freem(controlp);
1573	}
1574	controlp = NULL;
1575	error = spx_output(cb, m);
1576	m = NULL;
1577spx_send_end:
1578	if (controlp != NULL)
1579		m_freem(controlp);
1580	if (m != NULL)
1581		m_freem(m);
1582	splx(s);
1583	return (error);
1584}
1585
1586static int
1587spx_shutdown(so)
1588	struct socket *so;
1589{
1590	int error;
1591	int s;
1592	struct ipxpcb *ipxp;
1593	struct spxpcb *cb;
1594
1595	error = 0;
1596	ipxp = sotoipxpcb(so);
1597	cb = ipxtospxpcb(ipxp);
1598
1599	s = splnet();
1600	socantsendmore(so);
1601	cb = spx_usrclosed(cb);
1602	if (cb != NULL)
1603		error = spx_output(cb, NULL);
1604	splx(s);
1605	return (error);
1606}
1607
1608static int
1609spx_sp_attach(so, proto, td)
1610	struct socket *so;
1611	int proto;
1612	struct thread *td;
1613{
1614	int error;
1615	struct ipxpcb *ipxp;
1616
1617	error = spx_attach(so, proto, td);
1618	if (error == 0) {
1619		ipxp = sotoipxpcb(so);
1620		((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1621					(SF_HI | SF_HO | SF_PI);
1622	}
1623	return (error);
1624}
1625
1626/*
1627 * Create template to be used to send spx packets on a connection.
1628 * Called after host entry created, fills
1629 * in a skeletal spx header (choosing connection id),
1630 * minimizing the amount of work necessary when the connection is used.
1631 */
1632static void
1633spx_template(cb)
1634	register struct spxpcb *cb;
1635{
1636	register struct ipxpcb *ipxp = cb->s_ipxpcb;
1637	register struct ipx *ipx = cb->s_ipx;
1638	register struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1639
1640	ipx->ipx_pt = IPXPROTO_SPX;
1641	ipx->ipx_sna = ipxp->ipxp_laddr;
1642	ipx->ipx_dna = ipxp->ipxp_faddr;
1643	cb->s_sid = htons(spx_iss);
1644	spx_iss += SPX_ISSINCR/2;
1645	cb->s_alo = 1;
1646	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1647	cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1648					of large packets */
1649	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1650	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1651		/* But allow for lots of little packets as well */
1652}
1653
1654/*
1655 * Close a SPIP control block:
1656 *	discard spx control block itself
1657 *	discard ipx protocol control block
1658 *	wake up any sleepers
1659 */
1660static struct spxpcb *
1661spx_close(cb)
1662	register struct spxpcb *cb;
1663{
1664	register struct spx_q *s;
1665	struct ipxpcb *ipxp = cb->s_ipxpcb;
1666	struct socket *so = ipxp->ipxp_socket;
1667	register struct mbuf *m;
1668
1669	s = cb->s_q.si_next;
1670	while (s != &(cb->s_q)) {
1671		s = s->si_next;
1672		m = dtom(s->si_prev);
1673		remque(s->si_prev);
1674		m_freem(m);
1675	}
1676	m_free(dtom(cb->s_ipx));
1677	FREE(cb, M_PCB);
1678	ipxp->ipxp_pcb = NULL;
1679	soisdisconnected(so);
1680	ipx_pcbdetach(ipxp);
1681	spxstat.spxs_closed++;
1682	return (NULL);
1683}
1684
1685/*
1686 *	Someday we may do level 3 handshaking
1687 *	to close a connection or send a xerox style error.
1688 *	For now, just close.
1689 */
1690static struct spxpcb *
1691spx_usrclosed(cb)
1692	register struct spxpcb *cb;
1693{
1694	return (spx_close(cb));
1695}
1696
1697static struct spxpcb *
1698spx_disconnect(cb)
1699	register struct spxpcb *cb;
1700{
1701	return (spx_close(cb));
1702}
1703
1704/*
1705 * Drop connection, reporting
1706 * the specified error.
1707 */
1708static struct spxpcb *
1709spx_drop(cb, errno)
1710	register struct spxpcb *cb;
1711	int errno;
1712{
1713	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1714
1715	/*
1716	 * someday, in the xerox world
1717	 * we will generate error protocol packets
1718	 * announcing that the socket has gone away.
1719	 */
1720	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1721		spxstat.spxs_drops++;
1722		cb->s_state = TCPS_CLOSED;
1723		/*tcp_output(cb);*/
1724	} else
1725		spxstat.spxs_conndrops++;
1726	so->so_error = errno;
1727	return (spx_close(cb));
1728}
1729
1730/*
1731 * Fast timeout routine for processing delayed acks
1732 */
1733void
1734spx_fasttimo()
1735{
1736	register struct ipxpcb *ipxp;
1737	register struct spxpcb *cb;
1738	int s = splnet();
1739
1740	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1741		if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1742		    (cb->s_flags & SF_DELACK)) {
1743			cb->s_flags &= ~SF_DELACK;
1744			cb->s_flags |= SF_ACKNOW;
1745			spxstat.spxs_delack++;
1746			spx_output(cb, NULL);
1747		}
1748	}
1749
1750	splx(s);
1751}
1752
1753/*
1754 * spx protocol timeout routine called every 500 ms.
1755 * Updates the timers in all active pcb's and
1756 * causes finite state machine actions if timers expire.
1757 */
1758void
1759spx_slowtimo()
1760{
1761	register struct ipxpcb *ip, *ip_temp;
1762	register struct spxpcb *cb;
1763	int s = splnet();
1764	register int i;
1765
1766	/*
1767	 * Search through tcb's and update active timers.  Note that timers
1768	 * may free the ipxpcb, so be sure to handle that case.
1769	 */
1770	LIST_FOREACH_SAFE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1771		cb = ipxtospxpcb(ip);
1772		if (cb == NULL)
1773			continue;
1774		for (i = 0; i < SPXT_NTIMERS; i++) {
1775			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1776				/*
1777				 * spx_timers() returns (NULL) if it free'd
1778				 * the pcb.
1779				 */
1780				cb = spx_timers(cb, i);
1781				if (cb == NULL)
1782					break;
1783			}
1784		}
1785		if (cb != NULL) {
1786			cb->s_idle++;
1787			if (cb->s_rtt)
1788				cb->s_rtt++;
1789		}
1790	}
1791	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1792	splx(s);
1793}
1794
1795/*
1796 * SPX timer processing.
1797 */
1798static struct spxpcb *
1799spx_timers(cb, timer)
1800	register struct spxpcb *cb;
1801	int timer;
1802{
1803	long rexmt;
1804	int win;
1805
1806	cb->s_force = 1 + timer;
1807	switch (timer) {
1808
1809	/*
1810	 * 2 MSL timeout in shutdown went off.  TCP deletes connection
1811	 * control block.
1812	 */
1813	case SPXT_2MSL:
1814		printf("spx: SPXT_2MSL went off for no reason\n");
1815		cb->s_timer[timer] = 0;
1816		break;
1817
1818	/*
1819	 * Retransmission timer went off.  Message has not
1820	 * been acked within retransmit interval.  Back off
1821	 * to a longer retransmit interval and retransmit one packet.
1822	 */
1823	case SPXT_REXMT:
1824		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1825			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1826			spxstat.spxs_timeoutdrop++;
1827			cb = spx_drop(cb, ETIMEDOUT);
1828			break;
1829		}
1830		spxstat.spxs_rexmttimeo++;
1831		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1832		rexmt *= spx_backoff[cb->s_rxtshift];
1833		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1834		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1835		/*
1836		 * If we have backed off fairly far, our srtt
1837		 * estimate is probably bogus.  Clobber it
1838		 * so we'll take the next rtt measurement as our srtt;
1839		 * move the current srtt into rttvar to keep the current
1840		 * retransmit times until then.
1841		 */
1842		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1843			cb->s_rttvar += (cb->s_srtt >> 2);
1844			cb->s_srtt = 0;
1845		}
1846		cb->s_snxt = cb->s_rack;
1847		/*
1848		 * If timing a packet, stop the timer.
1849		 */
1850		cb->s_rtt = 0;
1851		/*
1852		 * See very long discussion in tcp_timer.c about congestion
1853		 * window and sstrhesh
1854		 */
1855		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1856		if (win < 2)
1857			win = 2;
1858		cb->s_cwnd = CUNIT;
1859		cb->s_ssthresh = win * CUNIT;
1860		spx_output(cb, NULL);
1861		break;
1862
1863	/*
1864	 * Persistance timer into zero window.
1865	 * Force a probe to be sent.
1866	 */
1867	case SPXT_PERSIST:
1868		spxstat.spxs_persisttimeo++;
1869		spx_setpersist(cb);
1870		spx_output(cb, NULL);
1871		break;
1872
1873	/*
1874	 * Keep-alive timer went off; send something
1875	 * or drop connection if idle for too long.
1876	 */
1877	case SPXT_KEEP:
1878		spxstat.spxs_keeptimeo++;
1879		if (cb->s_state < TCPS_ESTABLISHED)
1880			goto dropit;
1881		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1882		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1883				goto dropit;
1884			spxstat.spxs_keepprobe++;
1885			spx_output(cb, NULL);
1886		} else
1887			cb->s_idle = 0;
1888		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1889		break;
1890	dropit:
1891		spxstat.spxs_keepdrops++;
1892		cb = spx_drop(cb, ETIMEDOUT);
1893		break;
1894	}
1895	return (cb);
1896}
1897