spx_usrreq.c revision 139579
1/*
2 * Copyright (c) 2004 Robert N. M. Watson
3 * Copyright (c) 1995, Mike Mitchell
4 * Copyright (c) 1984, 1985, 1986, 1987, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the University of
18 *	California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)spx_usrreq.h
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 139579 2005-01-02 14:03:47Z rwatson $");
40
41#include <sys/param.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/signalvar.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/sx.h>
52#include <sys/systm.h>
53
54#include <net/route.h>
55#include <netinet/tcp_fsm.h>
56
57#include <netipx/ipx.h>
58#include <netipx/ipx_pcb.h>
59#include <netipx/ipx_var.h>
60#include <netipx/spx.h>
61#include <netipx/spx_debug.h>
62#include <netipx/spx_timer.h>
63#include <netipx/spx_var.h>
64
65/*
66 * SPX protocol implementation.
67 */
68static u_short 	spx_iss;
69static u_short	spx_newchecks[50];
70static int	spx_hardnosed;
71static int	spx_use_delack = 0;
72static int	traceallspxs = 0;
73static struct	spx 	spx_savesi;
74static struct	spx_istat spx_istat;
75
76/* Following was struct spxstat spxstat; */
77#ifndef spxstat
78#define spxstat spx_istat.newstats
79#endif
80
81static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
82    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
83
84static	struct spxpcb *spx_close(struct spxpcb *cb);
85static	struct spxpcb *spx_disconnect(struct spxpcb *cb);
86static	struct spxpcb *spx_drop(struct spxpcb *cb, int errno);
87static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
88static	int spx_reass(struct spxpcb *cb, struct spx *si);
89static	void spx_setpersist(struct spxpcb *cb);
90static	void spx_template(struct spxpcb *cb);
91static	struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
92static	struct spxpcb *spx_usrclosed(struct spxpcb *cb);
93
94static	int spx_usr_abort(struct socket *so);
95static	int spx_accept(struct socket *so, struct sockaddr **nam);
96static	int spx_attach(struct socket *so, int proto, struct thread *td);
97static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
98static	int spx_connect(struct socket *so, struct sockaddr *nam,
99			struct thread *td);
100static	int spx_detach(struct socket *so);
101static	int spx_usr_disconnect(struct socket *so);
102static	int spx_listen(struct socket *so, struct thread *td);
103static	int spx_rcvd(struct socket *so, int flags);
104static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
105static	int spx_send(struct socket *so, int flags, struct mbuf *m,
106		     struct sockaddr *addr, struct mbuf *control,
107		     struct thread *td);
108static	int spx_shutdown(struct socket *so);
109static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
110
111struct	pr_usrreqs spx_usrreqs = {
112	.pru_abort =		spx_usr_abort,
113	.pru_accept =		spx_accept,
114	.pru_attach =		spx_attach,
115	.pru_bind =		spx_bind,
116	.pru_connect =		spx_connect,
117	.pru_control =		ipx_control,
118	.pru_detach =		spx_detach,
119	.pru_disconnect =	spx_usr_disconnect,
120	.pru_listen =		spx_listen,
121	.pru_peeraddr =		ipx_peeraddr,
122	.pru_rcvd =		spx_rcvd,
123	.pru_rcvoob =		spx_rcvoob,
124	.pru_send =		spx_send,
125	.pru_shutdown =		spx_shutdown,
126	.pru_sockaddr =		ipx_sockaddr,
127};
128
129struct	pr_usrreqs spx_usrreq_sps = {
130	.pru_abort =		spx_usr_abort,
131	.pru_accept =		spx_accept,
132	.pru_attach =		spx_sp_attach,
133	.pru_bind =		spx_bind,
134	.pru_connect =		spx_connect,
135	.pru_control =		ipx_control,
136	.pru_detach =		spx_detach,
137	.pru_disconnect =	spx_usr_disconnect,
138	.pru_listen =		spx_listen,
139	.pru_peeraddr =		ipx_peeraddr,
140	.pru_rcvd =		spx_rcvd,
141	.pru_rcvoob =		spx_rcvoob,
142	.pru_send =		spx_send,
143	.pru_shutdown =		spx_shutdown,
144	.pru_sockaddr =		ipx_sockaddr,
145};
146
147void
148spx_init()
149{
150
151	spx_iss = 1; /* WRONG !! should fish it out of TODR */
152}
153
154void
155spx_input(m, ipxp)
156	register struct mbuf *m;
157	register struct ipxpcb *ipxp;
158{
159	register struct spxpcb *cb;
160	register struct spx *si = mtod(m, struct spx *);
161	register struct socket *so;
162	int dropsocket = 0;
163	short ostate = 0;
164
165	spxstat.spxs_rcvtotal++;
166	if (ipxp == NULL) {
167		panic("No ipxpcb in spx_input\n");
168		return;
169	}
170
171	cb = ipxtospxpcb(ipxp);
172	if (cb == NULL)
173		goto bad;
174
175	if (m->m_len < sizeof(*si)) {
176		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
177			spxstat.spxs_rcvshort++;
178			return;
179		}
180		si = mtod(m, struct spx *);
181	}
182	si->si_seq = ntohs(si->si_seq);
183	si->si_ack = ntohs(si->si_ack);
184	si->si_alo = ntohs(si->si_alo);
185
186	so = ipxp->ipxp_socket;
187
188	if (so->so_options & SO_DEBUG || traceallspxs) {
189		ostate = cb->s_state;
190		spx_savesi = *si;
191	}
192	if (so->so_options & SO_ACCEPTCONN) {
193		struct spxpcb *ocb = cb;
194
195		so = sonewconn(so, 0);
196		if (so == NULL) {
197			goto drop;
198		}
199		/*
200		 * This is ugly, but ....
201		 *
202		 * Mark socket as temporary until we're
203		 * committed to keeping it.  The code at
204		 * ``drop'' and ``dropwithreset'' check the
205		 * flag dropsocket to see if the temporary
206		 * socket created here should be discarded.
207		 * We mark the socket as discardable until
208		 * we're committed to it below in TCPS_LISTEN.
209		 */
210		dropsocket++;
211		ipxp = (struct ipxpcb *)so->so_pcb;
212		ipxp->ipxp_laddr = si->si_dna;
213		cb = ipxtospxpcb(ipxp);
214		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
215		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
216		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
217		cb->s_state = TCPS_LISTEN;
218	}
219
220	/*
221	 * Packet received on connection.
222	 * reset idle time and keep-alive timer;
223	 */
224	cb->s_idle = 0;
225	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
226
227	switch (cb->s_state) {
228
229	case TCPS_LISTEN:{
230		struct sockaddr_ipx *sipx, ssipx;
231		struct ipx_addr laddr;
232
233		/*
234		 * If somebody here was carying on a conversation
235		 * and went away, and his pen pal thinks he can
236		 * still talk, we get the misdirected packet.
237		 */
238		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
239			spx_istat.gonawy++;
240			goto dropwithreset;
241		}
242		sipx = &ssipx;
243		bzero(sipx, sizeof *sipx);
244		sipx->sipx_len = sizeof(*sipx);
245		sipx->sipx_family = AF_IPX;
246		sipx->sipx_addr = si->si_sna;
247		laddr = ipxp->ipxp_laddr;
248		if (ipx_nullhost(laddr))
249			ipxp->ipxp_laddr = si->si_dna;
250		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
251			ipxp->ipxp_laddr = laddr;
252			spx_istat.noconn++;
253			goto drop;
254		}
255		spx_template(cb);
256		dropsocket = 0;		/* committed to socket */
257		cb->s_did = si->si_sid;
258		cb->s_rack = si->si_ack;
259		cb->s_ralo = si->si_alo;
260#define THREEWAYSHAKE
261#ifdef THREEWAYSHAKE
262		cb->s_state = TCPS_SYN_RECEIVED;
263		cb->s_force = 1 + SPXT_KEEP;
264		spxstat.spxs_accepts++;
265		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
266		}
267		break;
268	/*
269	 * This state means that we have heard a response
270	 * to our acceptance of their connection
271	 * It is probably logically unnecessary in this
272	 * implementation.
273	 */
274	 case TCPS_SYN_RECEIVED: {
275		if (si->si_did != cb->s_sid) {
276			spx_istat.wrncon++;
277			goto drop;
278		}
279#endif
280		ipxp->ipxp_fport =  si->si_sport;
281		cb->s_timer[SPXT_REXMT] = 0;
282		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
283		soisconnected(so);
284		cb->s_state = TCPS_ESTABLISHED;
285		spxstat.spxs_accepts++;
286		}
287		break;
288
289	/*
290	 * This state means that we have gotten a response
291	 * to our attempt to establish a connection.
292	 * We fill in the data from the other side,
293	 * telling us which port to respond to, instead of the well-
294	 * known one we might have sent to in the first place.
295	 * We also require that this is a response to our
296	 * connection id.
297	 */
298	case TCPS_SYN_SENT:
299		if (si->si_did != cb->s_sid) {
300			spx_istat.notme++;
301			goto drop;
302		}
303		spxstat.spxs_connects++;
304		cb->s_did = si->si_sid;
305		cb->s_rack = si->si_ack;
306		cb->s_ralo = si->si_alo;
307		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
308		cb->s_timer[SPXT_REXMT] = 0;
309		cb->s_flags |= SF_ACKNOW;
310		soisconnected(so);
311		cb->s_state = TCPS_ESTABLISHED;
312		/* Use roundtrip time of connection request for initial rtt */
313		if (cb->s_rtt) {
314			cb->s_srtt = cb->s_rtt << 3;
315			cb->s_rttvar = cb->s_rtt << 1;
316			SPXT_RANGESET(cb->s_rxtcur,
317			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
318			    SPXTV_MIN, SPXTV_REXMTMAX);
319			    cb->s_rtt = 0;
320		}
321	}
322	if (so->so_options & SO_DEBUG || traceallspxs)
323		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
324
325	m->m_len -= sizeof(struct ipx);
326	m->m_pkthdr.len -= sizeof(struct ipx);
327	m->m_data += sizeof(struct ipx);
328
329	if (spx_reass(cb, si)) {
330		m_freem(m);
331	}
332	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
333		spx_output(cb, NULL);
334	cb->s_flags &= ~(SF_WIN|SF_RXT);
335	return;
336
337dropwithreset:
338	if (dropsocket) {
339		struct socket *head;
340		ACCEPT_LOCK();
341		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
342		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
343		head = so->so_head;
344		TAILQ_REMOVE(&head->so_incomp, so, so_list);
345		head->so_incqlen--;
346		so->so_qstate &= ~SQ_INCOMP;
347		so->so_head = NULL;
348		ACCEPT_UNLOCK();
349		soabort(so);
350	}
351	si->si_seq = ntohs(si->si_seq);
352	si->si_ack = ntohs(si->si_ack);
353	si->si_alo = ntohs(si->si_alo);
354	m_freem(dtom(si));
355	if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
356		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
357	return;
358
359drop:
360bad:
361	if (cb == 0 || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
362            traceallspxs)
363		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
364	m_freem(m);
365}
366
367static int spxrexmtthresh = 3;
368
369/*
370 * This is structurally similar to the tcp reassembly routine
371 * but its function is somewhat different:  It merely queues
372 * packets up, and suppresses duplicates.
373 */
374static int
375spx_reass(cb, si)
376register struct spxpcb *cb;
377register struct spx *si;
378{
379	register struct spx_q *q;
380	register struct mbuf *m;
381	register struct socket *so = cb->s_ipxpcb->ipxp_socket;
382	char packetp = cb->s_flags & SF_HI;
383	int incr;
384	char wakeup = 0;
385
386	if (si == SI(0))
387		goto present;
388	/*
389	 * Update our news from them.
390	 */
391	if (si->si_cc & SPX_SA)
392		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
393	if (SSEQ_GT(si->si_alo, cb->s_ralo))
394		cb->s_flags |= SF_WIN;
395	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
396		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
397			spxstat.spxs_rcvdupack++;
398			/*
399			 * If this is a completely duplicate ack
400			 * and other conditions hold, we assume
401			 * a packet has been dropped and retransmit
402			 * it exactly as in tcp_input().
403			 */
404			if (si->si_ack != cb->s_rack ||
405			    si->si_alo != cb->s_ralo)
406				cb->s_dupacks = 0;
407			else if (++cb->s_dupacks == spxrexmtthresh) {
408				u_short onxt = cb->s_snxt;
409				int cwnd = cb->s_cwnd;
410
411				cb->s_snxt = si->si_ack;
412				cb->s_cwnd = CUNIT;
413				cb->s_force = 1 + SPXT_REXMT;
414				spx_output(cb, NULL);
415				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
416				cb->s_rtt = 0;
417				if (cwnd >= 4 * CUNIT)
418					cb->s_cwnd = cwnd / 2;
419				if (SSEQ_GT(onxt, cb->s_snxt))
420					cb->s_snxt = onxt;
421				return (1);
422			}
423		} else
424			cb->s_dupacks = 0;
425		goto update_window;
426	}
427	cb->s_dupacks = 0;
428	/*
429	 * If our correspondent acknowledges data we haven't sent
430	 * TCP would drop the packet after acking.  We'll be a little
431	 * more permissive
432	 */
433	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
434		spxstat.spxs_rcvacktoomuch++;
435		si->si_ack = cb->s_smax + 1;
436	}
437	spxstat.spxs_rcvackpack++;
438	/*
439	 * If transmit timer is running and timed sequence
440	 * number was acked, update smoothed round trip time.
441	 * See discussion of algorithm in tcp_input.c
442	 */
443	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
444		spxstat.spxs_rttupdated++;
445		if (cb->s_srtt != 0) {
446			register short delta;
447			delta = cb->s_rtt - (cb->s_srtt >> 3);
448			if ((cb->s_srtt += delta) <= 0)
449				cb->s_srtt = 1;
450			if (delta < 0)
451				delta = -delta;
452			delta -= (cb->s_rttvar >> 2);
453			if ((cb->s_rttvar += delta) <= 0)
454				cb->s_rttvar = 1;
455		} else {
456			/*
457			 * No rtt measurement yet
458			 */
459			cb->s_srtt = cb->s_rtt << 3;
460			cb->s_rttvar = cb->s_rtt << 1;
461		}
462		cb->s_rtt = 0;
463		cb->s_rxtshift = 0;
464		SPXT_RANGESET(cb->s_rxtcur,
465			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
466			SPXTV_MIN, SPXTV_REXMTMAX);
467	}
468	/*
469	 * If all outstanding data is acked, stop retransmit
470	 * timer and remember to restart (more output or persist).
471	 * If there is more data to be acked, restart retransmit
472	 * timer, using current (possibly backed-off) value;
473	 */
474	if (si->si_ack == cb->s_smax + 1) {
475		cb->s_timer[SPXT_REXMT] = 0;
476		cb->s_flags |= SF_RXT;
477	} else if (cb->s_timer[SPXT_PERSIST] == 0)
478		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
479	/*
480	 * When new data is acked, open the congestion window.
481	 * If the window gives us less than ssthresh packets
482	 * in flight, open exponentially (maxseg at a time).
483	 * Otherwise open linearly (maxseg^2 / cwnd at a time).
484	 */
485	incr = CUNIT;
486	if (cb->s_cwnd > cb->s_ssthresh)
487		incr = max(incr * incr / cb->s_cwnd, 1);
488	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
489	/*
490	 * Trim Acked data from output queue.
491	 */
492	while ((m = so->so_snd.sb_mb) != NULL) {
493		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
494			sbdroprecord(&so->so_snd);
495		else
496			break;
497	}
498	sowwakeup(so);
499	cb->s_rack = si->si_ack;
500update_window:
501	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
502		cb->s_snxt = cb->s_rack;
503	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
504	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
505	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
506		/* keep track of pure window updates */
507		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
508		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
509			spxstat.spxs_rcvwinupd++;
510			spxstat.spxs_rcvdupack--;
511		}
512		cb->s_ralo = si->si_alo;
513		cb->s_swl1 = si->si_seq;
514		cb->s_swl2 = si->si_ack;
515		cb->s_swnd = (1 + si->si_alo - si->si_ack);
516		if (cb->s_swnd > cb->s_smxw)
517			cb->s_smxw = cb->s_swnd;
518		cb->s_flags |= SF_WIN;
519	}
520	/*
521	 * If this packet number is higher than that which
522	 * we have allocated refuse it, unless urgent
523	 */
524	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
525		if (si->si_cc & SPX_SP) {
526			spxstat.spxs_rcvwinprobe++;
527			return (1);
528		} else
529			spxstat.spxs_rcvpackafterwin++;
530		if (si->si_cc & SPX_OB) {
531			if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
532				m_freem(dtom(si));
533				return (0);
534			} /* else queue this packet; */
535		} else {
536#ifdef BROKEN
537			/*
538			 * XXXRW: This is broken on at least one count:
539			 * spx_close() will free the ipxp and related parts,
540			 * which are then touched by spx_input() after the
541			 * return from spx_reass().
542			 */
543			/*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
544			if (so->so_state && SS_NOFDREF) {
545				spx_close(cb);
546			} else
547				       would crash system*/
548#endif
549			spx_istat.notyet++;
550			m_freem(dtom(si));
551			return (0);
552		}
553	}
554	/*
555	 * If this is a system packet, we don't need to
556	 * queue it up, and won't update acknowledge #
557	 */
558	if (si->si_cc & SPX_SP) {
559		return (1);
560	}
561	/*
562	 * We have already seen this packet, so drop.
563	 */
564	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
565		spx_istat.bdreas++;
566		spxstat.spxs_rcvduppack++;
567		if (si->si_seq == cb->s_ack - 1)
568			spx_istat.lstdup++;
569		return (1);
570	}
571	/*
572	 * Loop through all packets queued up to insert in
573	 * appropriate sequence.
574	 */
575	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
576		if (si->si_seq == SI(q)->si_seq) {
577			spxstat.spxs_rcvduppack++;
578			return (1);
579		}
580		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
581			spxstat.spxs_rcvoopack++;
582			break;
583		}
584	}
585	insque(si, q->si_prev);
586	/*
587	 * If this packet is urgent, inform process
588	 */
589	if (si->si_cc & SPX_OB) {
590		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
591		sohasoutofband(so);
592		cb->s_oobflags |= SF_IOOB;
593	}
594present:
595#define SPINC sizeof(struct spxhdr)
596	/*
597	 * Loop through all packets queued up to update acknowledge
598	 * number, and present all acknowledged data to user;
599	 * If in packet interface mode, show packet headers.
600	 */
601	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
602		  if (SI(q)->si_seq == cb->s_ack) {
603			cb->s_ack++;
604			m = dtom(q);
605			if (SI(q)->si_cc & SPX_OB) {
606				cb->s_oobflags &= ~SF_IOOB;
607				SOCKBUF_LOCK(&so->so_rcv);
608				if (so->so_rcv.sb_cc)
609					so->so_oobmark = so->so_rcv.sb_cc;
610				else
611					so->so_rcv.sb_state |= SBS_RCVATMARK;
612				SOCKBUF_UNLOCK(&so->so_rcv);
613			}
614			q = q->si_prev;
615			remque(q->si_next);
616			wakeup = 1;
617			spxstat.spxs_rcvpack++;
618#ifdef SF_NEWCALL
619			if (cb->s_flags2 & SF_NEWCALL) {
620				struct spxhdr *sp = mtod(m, struct spxhdr *);
621				u_char dt = sp->spx_dt;
622				spx_newchecks[4]++;
623				if (dt != cb->s_rhdr.spx_dt) {
624					struct mbuf *mm =
625					   m_getclr(M_DONTWAIT, MT_CONTROL);
626					spx_newchecks[0]++;
627					if (mm != NULL) {
628						u_short *s =
629							mtod(mm, u_short *);
630						cb->s_rhdr.spx_dt = dt;
631						mm->m_len = 5; /*XXX*/
632						s[0] = 5;
633						s[1] = 1;
634						*(u_char *)(&s[2]) = dt;
635						sbappend(&so->so_rcv, mm);
636					}
637				}
638				if (sp->spx_cc & SPX_OB) {
639					MCHTYPE(m, MT_OOBDATA);
640					spx_newchecks[1]++;
641					SOCKBUF_LOCK(&so->so_rcv);
642					so->so_oobmark = 0;
643					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
644					SOCKBUF_UNLOCK(&so->so_rcv);
645				}
646				if (packetp == 0) {
647					m->m_data += SPINC;
648					m->m_len -= SPINC;
649					m->m_pkthdr.len -= SPINC;
650				}
651				if ((sp->spx_cc & SPX_EM) || packetp) {
652					sbappendrecord(&so->so_rcv, m);
653					spx_newchecks[9]++;
654				} else
655					sbappend(&so->so_rcv, m);
656			} else
657#endif
658			if (packetp) {
659				sbappendrecord(&so->so_rcv, m);
660			} else {
661				cb->s_rhdr = *mtod(m, struct spxhdr *);
662				m->m_data += SPINC;
663				m->m_len -= SPINC;
664				m->m_pkthdr.len -= SPINC;
665				sbappend(&so->so_rcv, m);
666			}
667		  } else
668			break;
669	}
670	if (wakeup)
671		sorwakeup(so);
672	return (0);
673}
674
675void
676spx_ctlinput(cmd, arg_as_sa, dummy)
677	int cmd;
678	struct sockaddr *arg_as_sa;	/* XXX should be swapped with dummy */
679	void *dummy;
680{
681	caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
682	struct ipx_addr *na;
683	struct sockaddr_ipx *sipx;
684
685	if (cmd < 0 || cmd >= PRC_NCMDS)
686		return;
687
688	switch (cmd) {
689
690	case PRC_ROUTEDEAD:
691		return;
692
693	case PRC_IFDOWN:
694	case PRC_HOSTDEAD:
695	case PRC_HOSTUNREACH:
696		sipx = (struct sockaddr_ipx *)arg;
697		if (sipx->sipx_family != AF_IPX)
698			return;
699		na = &sipx->sipx_addr;
700		break;
701
702	default:
703		break;
704	}
705}
706
707static int
708spx_output(cb, m0)
709	register struct spxpcb *cb;
710	struct mbuf *m0;
711{
712	struct socket *so = cb->s_ipxpcb->ipxp_socket;
713	register struct mbuf *m;
714	register struct spx *si = NULL;
715	register struct sockbuf *sb = &so->so_snd;
716	int len = 0, win, rcv_win;
717	short span, off, recordp = 0;
718	u_short alo;
719	int error = 0, sendalot;
720#ifdef notdef
721	int idle;
722#endif
723	struct mbuf *mprev;
724
725	if (m0 != NULL) {
726		int mtu = cb->s_mtu;
727		int datalen;
728		/*
729		 * Make sure that packet isn't too big.
730		 */
731		for (m = m0; m != NULL; m = m->m_next) {
732			mprev = m;
733			len += m->m_len;
734			if (m->m_flags & M_EOR)
735				recordp = 1;
736		}
737		datalen = (cb->s_flags & SF_HO) ?
738				len - sizeof(struct spxhdr) : len;
739		if (datalen > mtu) {
740			if (cb->s_flags & SF_PI) {
741				m_freem(m0);
742				return (EMSGSIZE);
743			} else {
744				int oldEM = cb->s_cc & SPX_EM;
745
746				cb->s_cc &= ~SPX_EM;
747				while (len > mtu) {
748					/*
749					 * Here we are only being called
750					 * from usrreq(), so it is OK to
751					 * block.
752					 */
753					m = m_copym(m0, 0, mtu, M_TRYWAIT);
754					if (cb->s_flags & SF_NEWCALL) {
755					    struct mbuf *mm = m;
756					    spx_newchecks[7]++;
757					    while (mm != NULL) {
758						mm->m_flags &= ~M_EOR;
759						mm = mm->m_next;
760					    }
761					}
762					error = spx_output(cb, m);
763					if (error) {
764						cb->s_cc |= oldEM;
765						m_freem(m0);
766						return (error);
767					}
768					m_adj(m0, mtu);
769					len -= mtu;
770				}
771				cb->s_cc |= oldEM;
772			}
773		}
774		/*
775		 * Force length even, by adding a "garbage byte" if
776		 * necessary.
777		 */
778		if (len & 1) {
779			m = mprev;
780			if (M_TRAILINGSPACE(m) >= 1)
781				m->m_len++;
782			else {
783				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
784
785				if (m1 == NULL) {
786					m_freem(m0);
787					return (ENOBUFS);
788				}
789				m1->m_len = 1;
790				*(mtod(m1, u_char *)) = 0;
791				m->m_next = m1;
792			}
793		}
794		m = m_gethdr(M_DONTWAIT, MT_HEADER);
795		if (m == NULL) {
796			m_freem(m0);
797			return (ENOBUFS);
798		}
799		/*
800		 * Fill in mbuf with extended SP header
801		 * and addresses and length put into network format.
802		 */
803		MH_ALIGN(m, sizeof(struct spx));
804		m->m_len = sizeof(struct spx);
805		m->m_next = m0;
806		si = mtod(m, struct spx *);
807		si->si_i = *cb->s_ipx;
808		si->si_s = cb->s_shdr;
809		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
810			register struct spxhdr *sh;
811			if (m0->m_len < sizeof(*sh)) {
812				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
813					m_free(m);
814					m_freem(m0);
815					return (EINVAL);
816				}
817				m->m_next = m0;
818			}
819			sh = mtod(m0, struct spxhdr *);
820			si->si_dt = sh->spx_dt;
821			si->si_cc |= sh->spx_cc & SPX_EM;
822			m0->m_len -= sizeof(*sh);
823			m0->m_data += sizeof(*sh);
824			len -= sizeof(*sh);
825		}
826		len += sizeof(*si);
827		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
828			si->si_cc |= SPX_EM;
829			spx_newchecks[8]++;
830		}
831		if (cb->s_oobflags & SF_SOOB) {
832			/*
833			 * Per jqj@cornell:
834			 * make sure OB packets convey exactly 1 byte.
835			 * If the packet is 1 byte or larger, we
836			 * have already guaranted there to be at least
837			 * one garbage byte for the checksum, and
838			 * extra bytes shouldn't hurt!
839			 */
840			if (len > sizeof(*si)) {
841				si->si_cc |= SPX_OB;
842				len = (1 + sizeof(*si));
843			}
844		}
845		si->si_len = htons((u_short)len);
846		m->m_pkthdr.len = ((len - 1) | 1) + 1;
847		/*
848		 * queue stuff up for output
849		 */
850		sbappendrecord(sb, m);
851		cb->s_seq++;
852	}
853#ifdef notdef
854	idle = (cb->s_smax == (cb->s_rack - 1));
855#endif
856again:
857	sendalot = 0;
858	off = cb->s_snxt - cb->s_rack;
859	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
860
861	/*
862	 * If in persist timeout with window of 0, send a probe.
863	 * Otherwise, if window is small but nonzero
864	 * and timer expired, send what we can and go into
865	 * transmit state.
866	 */
867	if (cb->s_force == 1 + SPXT_PERSIST) {
868		if (win != 0) {
869			cb->s_timer[SPXT_PERSIST] = 0;
870			cb->s_rxtshift = 0;
871		}
872	}
873	span = cb->s_seq - cb->s_rack;
874	len = min(span, win) - off;
875
876	if (len < 0) {
877		/*
878		 * Window shrank after we went into it.
879		 * If window shrank to 0, cancel pending
880		 * restransmission and pull s_snxt back
881		 * to (closed) window.  We will enter persist
882		 * state below.  If the widndow didn't close completely,
883		 * just wait for an ACK.
884		 */
885		len = 0;
886		if (win == 0) {
887			cb->s_timer[SPXT_REXMT] = 0;
888			cb->s_snxt = cb->s_rack;
889		}
890	}
891	if (len > 1)
892		sendalot = 1;
893	rcv_win = sbspace(&so->so_rcv);
894
895	/*
896	 * Send if we owe peer an ACK.
897	 */
898	if (cb->s_oobflags & SF_SOOB) {
899		/*
900		 * must transmit this out of band packet
901		 */
902		cb->s_oobflags &= ~ SF_SOOB;
903		sendalot = 1;
904		spxstat.spxs_sndurg++;
905		goto found;
906	}
907	if (cb->s_flags & SF_ACKNOW)
908		goto send;
909	if (cb->s_state < TCPS_ESTABLISHED)
910		goto send;
911	/*
912	 * Silly window can't happen in spx.
913	 * Code from tcp deleted.
914	 */
915	if (len)
916		goto send;
917	/*
918	 * Compare available window to amount of window
919	 * known to peer (as advertised window less
920	 * next expected input.)  If the difference is at least two
921	 * packets or at least 35% of the mximum possible window,
922	 * then want to send a window update to peer.
923	 */
924	if (rcv_win > 0) {
925		u_short delta =  1 + cb->s_alo - cb->s_ack;
926		int adv = rcv_win - (delta * cb->s_mtu);
927
928		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
929		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
930			spxstat.spxs_sndwinup++;
931			cb->s_flags |= SF_ACKNOW;
932			goto send;
933		}
934
935	}
936	/*
937	 * Many comments from tcp_output.c are appropriate here
938	 * including . . .
939	 * If send window is too small, there is data to transmit, and no
940	 * retransmit or persist is pending, then go to persist state.
941	 * If nothing happens soon, send when timer expires:
942	 * if window is nonzero, transmit what we can,
943	 * otherwise send a probe.
944	 */
945	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
946		cb->s_timer[SPXT_PERSIST] == 0) {
947			cb->s_rxtshift = 0;
948			spx_setpersist(cb);
949	}
950	/*
951	 * No reason to send a packet, just return.
952	 */
953	cb->s_outx = 1;
954	return (0);
955
956send:
957	/*
958	 * Find requested packet.
959	 */
960	si = 0;
961	if (len > 0) {
962		cb->s_want = cb->s_snxt;
963		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
964			si = mtod(m, struct spx *);
965			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
966				break;
967		}
968	found:
969		if (si != NULL) {
970			if (si->si_seq == cb->s_snxt)
971					cb->s_snxt++;
972				else
973					spxstat.spxs_sndvoid++, si = 0;
974		}
975	}
976	/*
977	 * update window
978	 */
979	if (rcv_win < 0)
980		rcv_win = 0;
981	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
982	if (SSEQ_LT(alo, cb->s_alo))
983		alo = cb->s_alo;
984
985	if (si != NULL) {
986		/*
987		 * must make a copy of this packet for
988		 * ipx_output to monkey with
989		 */
990		m = m_copy(dtom(si), 0, (int)M_COPYALL);
991		if (m == NULL) {
992			return (ENOBUFS);
993		}
994		si = mtod(m, struct spx *);
995		if (SSEQ_LT(si->si_seq, cb->s_smax))
996			spxstat.spxs_sndrexmitpack++;
997		else
998			spxstat.spxs_sndpack++;
999	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1000		/*
1001		 * Must send an acknowledgement or a probe
1002		 */
1003		if (cb->s_force)
1004			spxstat.spxs_sndprobe++;
1005		if (cb->s_flags & SF_ACKNOW)
1006			spxstat.spxs_sndacks++;
1007		m = m_gethdr(M_DONTWAIT, MT_HEADER);
1008		if (m == NULL)
1009			return (ENOBUFS);
1010		/*
1011		 * Fill in mbuf with extended SP header
1012		 * and addresses and length put into network format.
1013		 */
1014		MH_ALIGN(m, sizeof(struct spx));
1015		m->m_len = sizeof(*si);
1016		m->m_pkthdr.len = sizeof(*si);
1017		si = mtod(m, struct spx *);
1018		si->si_i = *cb->s_ipx;
1019		si->si_s = cb->s_shdr;
1020		si->si_seq = cb->s_smax + 1;
1021		si->si_len = htons(sizeof(*si));
1022		si->si_cc |= SPX_SP;
1023	} else {
1024		cb->s_outx = 3;
1025		if (so->so_options & SO_DEBUG || traceallspxs)
1026			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1027		return (0);
1028	}
1029	/*
1030	 * Stuff checksum and output datagram.
1031	 */
1032	if ((si->si_cc & SPX_SP) == 0) {
1033		if (cb->s_force != (1 + SPXT_PERSIST) ||
1034		    cb->s_timer[SPXT_PERSIST] == 0) {
1035			/*
1036			 * If this is a new packet and we are not currently
1037			 * timing anything, time this one.
1038			 */
1039			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1040				cb->s_smax = si->si_seq;
1041				if (cb->s_rtt == 0) {
1042					spxstat.spxs_segstimed++;
1043					cb->s_rtseq = si->si_seq;
1044					cb->s_rtt = 1;
1045				}
1046			}
1047			/*
1048			 * Set rexmt timer if not currently set,
1049			 * Initial value for retransmit timer is smoothed
1050			 * round-trip time + 2 * round-trip time variance.
1051			 * Initialize shift counter which is used for backoff
1052			 * of retransmit time.
1053			 */
1054			if (cb->s_timer[SPXT_REXMT] == 0 &&
1055			    cb->s_snxt != cb->s_rack) {
1056				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1057				if (cb->s_timer[SPXT_PERSIST]) {
1058					cb->s_timer[SPXT_PERSIST] = 0;
1059					cb->s_rxtshift = 0;
1060				}
1061			}
1062		} else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1063			cb->s_smax = si->si_seq;
1064		}
1065	} else if (cb->s_state < TCPS_ESTABLISHED) {
1066		if (cb->s_rtt == 0)
1067			cb->s_rtt = 1; /* Time initial handshake */
1068		if (cb->s_timer[SPXT_REXMT] == 0)
1069			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1070	}
1071	{
1072		/*
1073		 * Do not request acks when we ack their data packets or
1074		 * when we do a gratuitous window update.
1075		 */
1076		if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1077				si->si_cc |= SPX_SA;
1078		si->si_seq = htons(si->si_seq);
1079		si->si_alo = htons(alo);
1080		si->si_ack = htons(cb->s_ack);
1081
1082		if (ipxcksum) {
1083			si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1084		} else
1085			si->si_sum = 0xffff;
1086
1087		cb->s_outx = 4;
1088		if (so->so_options & SO_DEBUG || traceallspxs)
1089			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1090
1091		if (so->so_options & SO_DONTROUTE)
1092			error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1093		else
1094			error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1095	}
1096	if (error) {
1097		return (error);
1098	}
1099	spxstat.spxs_sndtotal++;
1100	/*
1101	 * Data sent (as far as we can tell).
1102	 * If this advertises a larger window than any other segment,
1103	 * then remember the size of the advertized window.
1104	 * Any pending ACK has now been sent.
1105	 */
1106	cb->s_force = 0;
1107	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1108	if (SSEQ_GT(alo, cb->s_alo))
1109		cb->s_alo = alo;
1110	if (sendalot)
1111		goto again;
1112	cb->s_outx = 5;
1113	return (0);
1114}
1115
1116static int spx_do_persist_panics = 0;
1117
1118static void
1119spx_setpersist(cb)
1120	register struct spxpcb *cb;
1121{
1122	register int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1123
1124	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1125		panic("spx_output REXMT");
1126	/*
1127	 * Start/restart persistance timer.
1128	 */
1129	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1130	    t*spx_backoff[cb->s_rxtshift],
1131	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1132	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1133		cb->s_rxtshift++;
1134}
1135
1136int
1137spx_ctloutput(so, sopt)
1138	struct socket *so;
1139	struct sockopt *sopt;
1140{
1141	struct ipxpcb *ipxp = sotoipxpcb(so);
1142	register struct spxpcb *cb;
1143	int mask, error;
1144	short soptval;
1145	u_short usoptval;
1146	int optval;
1147
1148	error = 0;
1149
1150	if (sopt->sopt_level != IPXPROTO_SPX) {
1151		/* This will have to be changed when we do more general
1152		   stacking of protocols */
1153		return (ipx_ctloutput(so, sopt));
1154	}
1155	if (ipxp == NULL)
1156		return (EINVAL);
1157	else
1158		cb = ipxtospxpcb(ipxp);
1159
1160	switch (sopt->sopt_dir) {
1161	case SOPT_GET:
1162		switch (sopt->sopt_name) {
1163		case SO_HEADERS_ON_INPUT:
1164			mask = SF_HI;
1165			goto get_flags;
1166
1167		case SO_HEADERS_ON_OUTPUT:
1168			mask = SF_HO;
1169		get_flags:
1170			soptval = cb->s_flags & mask;
1171			error = sooptcopyout(sopt, &soptval, sizeof soptval);
1172			break;
1173
1174		case SO_MTU:
1175			usoptval = cb->s_mtu;
1176			error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1177			break;
1178
1179		case SO_LAST_HEADER:
1180			error = sooptcopyout(sopt, &cb->s_rhdr,
1181					     sizeof cb->s_rhdr);
1182			break;
1183
1184		case SO_DEFAULT_HEADERS:
1185			error = sooptcopyout(sopt, &cb->s_shdr,
1186					     sizeof cb->s_shdr);
1187			break;
1188
1189		default:
1190			error = ENOPROTOOPT;
1191		}
1192		break;
1193
1194	case SOPT_SET:
1195		switch (sopt->sopt_name) {
1196			/* XXX why are these shorts on get and ints on set?
1197			   that doesn't make any sense... */
1198		case SO_HEADERS_ON_INPUT:
1199			mask = SF_HI;
1200			goto set_head;
1201
1202		case SO_HEADERS_ON_OUTPUT:
1203			mask = SF_HO;
1204		set_head:
1205			error = sooptcopyin(sopt, &optval, sizeof optval,
1206					    sizeof optval);
1207			if (error)
1208				break;
1209
1210			if (cb->s_flags & SF_PI) {
1211				if (optval)
1212					cb->s_flags |= mask;
1213				else
1214					cb->s_flags &= ~mask;
1215			} else error = EINVAL;
1216			break;
1217
1218		case SO_MTU:
1219			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1220					    sizeof usoptval);
1221			if (error)
1222				break;
1223			cb->s_mtu = usoptval;
1224			break;
1225
1226#ifdef SF_NEWCALL
1227		case SO_NEWCALL:
1228			error = sooptcopyin(sopt, &optval, sizeof optval,
1229					    sizeof optval);
1230			if (error)
1231				break;
1232			if (optval) {
1233				cb->s_flags2 |= SF_NEWCALL;
1234				spx_newchecks[5]++;
1235			} else {
1236				cb->s_flags2 &= ~SF_NEWCALL;
1237				spx_newchecks[6]++;
1238			}
1239			break;
1240#endif
1241
1242		case SO_DEFAULT_HEADERS:
1243			{
1244				struct spxhdr sp;
1245
1246				error = sooptcopyin(sopt, &sp, sizeof sp,
1247						    sizeof sp);
1248				if (error)
1249					break;
1250				cb->s_dt = sp.spx_dt;
1251				cb->s_cc = sp.spx_cc & SPX_EM;
1252			}
1253			break;
1254
1255		default:
1256			error = ENOPROTOOPT;
1257		}
1258		break;
1259	}
1260	return (error);
1261}
1262
1263static int
1264spx_usr_abort(so)
1265	struct socket *so;
1266{
1267	int s;
1268	struct ipxpcb *ipxp;
1269	struct spxpcb *cb;
1270
1271	ipxp = sotoipxpcb(so);
1272	cb = ipxtospxpcb(ipxp);
1273
1274	s = splnet();
1275	spx_drop(cb, ECONNABORTED);
1276	splx(s);
1277	return (0);
1278}
1279
1280/*
1281 * Accept a connection.  Essentially all the work is
1282 * done at higher levels; just return the address
1283 * of the peer, storing through addr.
1284 */
1285static int
1286spx_accept(so, nam)
1287	struct socket *so;
1288	struct sockaddr **nam;
1289{
1290	struct ipxpcb *ipxp;
1291	struct sockaddr_ipx *sipx, ssipx;
1292
1293	ipxp = sotoipxpcb(so);
1294	sipx = &ssipx;
1295	bzero(sipx, sizeof *sipx);
1296	sipx->sipx_len = sizeof *sipx;
1297	sipx->sipx_family = AF_IPX;
1298	sipx->sipx_addr = ipxp->ipxp_faddr;
1299	*nam = sodupsockaddr((struct sockaddr *)sipx, M_NOWAIT);
1300	return (0);
1301}
1302
1303static int
1304spx_attach(so, proto, td)
1305	struct socket *so;
1306	int proto;
1307	struct thread *td;
1308{
1309	int error;
1310	int s;
1311	struct ipxpcb *ipxp;
1312	struct spxpcb *cb;
1313	struct mbuf *mm;
1314	struct sockbuf *sb;
1315
1316	ipxp = sotoipxpcb(so);
1317	cb = ipxtospxpcb(ipxp);
1318
1319	if (ipxp != NULL)
1320		return (EISCONN);
1321	s = splnet();
1322	error = ipx_pcballoc(so, &ipxpcb_list, td);
1323	if (error)
1324		goto spx_attach_end;
1325	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1326		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1327		if (error)
1328			goto spx_attach_end;
1329	}
1330	ipxp = sotoipxpcb(so);
1331
1332	MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1333
1334	if (cb == NULL) {
1335		error = ENOBUFS;
1336		goto spx_attach_end;
1337	}
1338	sb = &so->so_snd;
1339
1340	mm = m_getclr(M_DONTWAIT, MT_HEADER);
1341	if (mm == NULL) {
1342		FREE(cb, M_PCB);
1343		error = ENOBUFS;
1344		goto spx_attach_end;
1345	}
1346	cb->s_ipx = mtod(mm, struct ipx *);
1347	cb->s_state = TCPS_LISTEN;
1348	cb->s_smax = -1;
1349	cb->s_swl1 = -1;
1350	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1351	cb->s_ipxpcb = ipxp;
1352	cb->s_mtu = 576 - sizeof(struct spx);
1353	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1354	cb->s_ssthresh = cb->s_cwnd;
1355	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1356	/* Above is recomputed when connecting to account
1357	   for changed buffering or mtu's */
1358	cb->s_rtt = SPXTV_SRTTBASE;
1359	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1360	SPXT_RANGESET(cb->s_rxtcur,
1361	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1362	    SPXTV_MIN, SPXTV_REXMTMAX);
1363	ipxp->ipxp_pcb = (caddr_t)cb;
1364spx_attach_end:
1365	splx(s);
1366	return (error);
1367}
1368
1369static int
1370spx_bind(so, nam, td)
1371	struct socket *so;
1372	struct sockaddr *nam;
1373	struct thread *td;
1374{
1375	struct ipxpcb *ipxp;
1376
1377	ipxp = sotoipxpcb(so);
1378
1379	return (ipx_pcbbind(ipxp, nam, td));
1380}
1381
1382/*
1383 * Initiate connection to peer.
1384 * Enter SYN_SENT state, and mark socket as connecting.
1385 * Start keep-alive timer, setup prototype header,
1386 * Send initial system packet requesting connection.
1387 */
1388static int
1389spx_connect(so, nam, td)
1390	struct socket *so;
1391	struct sockaddr *nam;
1392	struct thread *td;
1393{
1394	int error;
1395	int s;
1396	struct ipxpcb *ipxp;
1397	struct spxpcb *cb;
1398
1399	ipxp = sotoipxpcb(so);
1400	cb = ipxtospxpcb(ipxp);
1401
1402	s = splnet();
1403	if (ipxp->ipxp_lport == 0) {
1404		error = ipx_pcbbind(ipxp, NULL, td);
1405		if (error)
1406			goto spx_connect_end;
1407	}
1408	error = ipx_pcbconnect(ipxp, nam, td);
1409	if (error)
1410		goto spx_connect_end;
1411	soisconnecting(so);
1412	spxstat.spxs_connattempt++;
1413	cb->s_state = TCPS_SYN_SENT;
1414	cb->s_did = 0;
1415	spx_template(cb);
1416	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1417	cb->s_force = 1 + SPXTV_KEEP;
1418	/*
1419	 * Other party is required to respond to
1420	 * the port I send from, but he is not
1421	 * required to answer from where I am sending to,
1422	 * so allow wildcarding.
1423	 * original port I am sending to is still saved in
1424	 * cb->s_dport.
1425	 */
1426	ipxp->ipxp_fport = 0;
1427	error = spx_output(cb, NULL);
1428spx_connect_end:
1429	splx(s);
1430	return (error);
1431}
1432
1433static int
1434spx_detach(so)
1435	struct socket *so;
1436{
1437	int s;
1438	struct ipxpcb *ipxp;
1439	struct spxpcb *cb;
1440
1441	ipxp = sotoipxpcb(so);
1442	cb = ipxtospxpcb(ipxp);
1443
1444	if (ipxp == NULL)
1445		return (ENOTCONN);
1446	s = splnet();
1447	if (cb->s_state > TCPS_LISTEN)
1448		spx_disconnect(cb);
1449	else
1450		spx_close(cb);
1451	splx(s);
1452	return (0);
1453}
1454
1455/*
1456 * We may decide later to implement connection closing
1457 * handshaking at the spx level optionally.
1458 * here is the hook to do it:
1459 */
1460static int
1461spx_usr_disconnect(so)
1462	struct socket *so;
1463{
1464	int s;
1465	struct ipxpcb *ipxp;
1466	struct spxpcb *cb;
1467
1468	ipxp = sotoipxpcb(so);
1469	cb = ipxtospxpcb(ipxp);
1470
1471	s = splnet();
1472	spx_disconnect(cb);
1473	splx(s);
1474	return (0);
1475}
1476
1477static int
1478spx_listen(so, td)
1479	struct socket *so;
1480	struct thread *td;
1481{
1482	int error;
1483	struct ipxpcb *ipxp;
1484	struct spxpcb *cb;
1485
1486	error = 0;
1487	ipxp = sotoipxpcb(so);
1488	cb = ipxtospxpcb(ipxp);
1489
1490	if (ipxp->ipxp_lport == 0)
1491		error = ipx_pcbbind(ipxp, NULL, td);
1492	if (error == 0)
1493		cb->s_state = TCPS_LISTEN;
1494	return (error);
1495}
1496
1497/*
1498 * After a receive, possibly send acknowledgment
1499 * updating allocation.
1500 */
1501static int
1502spx_rcvd(so, flags)
1503	struct socket *so;
1504	int flags;
1505{
1506	int s;
1507	struct ipxpcb *ipxp;
1508	struct spxpcb *cb;
1509
1510	ipxp = sotoipxpcb(so);
1511	cb = ipxtospxpcb(ipxp);
1512
1513	s = splnet();
1514	cb->s_flags |= SF_RVD;
1515	spx_output(cb, NULL);
1516	cb->s_flags &= ~SF_RVD;
1517	splx(s);
1518	return (0);
1519}
1520
1521static int
1522spx_rcvoob(so, m, flags)
1523	struct socket *so;
1524	struct mbuf *m;
1525	int flags;
1526{
1527	struct ipxpcb *ipxp;
1528	struct spxpcb *cb;
1529
1530	ipxp = sotoipxpcb(so);
1531	cb = ipxtospxpcb(ipxp);
1532
1533	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1534	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1535		m->m_len = 1;
1536		*mtod(m, caddr_t) = cb->s_iobc;
1537		return (0);
1538	}
1539	return (EINVAL);
1540}
1541
1542static int
1543spx_send(so, flags, m, addr, controlp, td)
1544	struct socket *so;
1545	int flags;
1546	struct mbuf *m;
1547	struct sockaddr *addr;
1548	struct mbuf *controlp;
1549	struct thread *td;
1550{
1551	int error;
1552	int s;
1553	struct ipxpcb *ipxp;
1554	struct spxpcb *cb;
1555
1556	error = 0;
1557	ipxp = sotoipxpcb(so);
1558	cb = ipxtospxpcb(ipxp);
1559
1560	s = splnet();
1561	if (flags & PRUS_OOB) {
1562		if (sbspace(&so->so_snd) < -512) {
1563			error = ENOBUFS;
1564			goto spx_send_end;
1565		}
1566		cb->s_oobflags |= SF_SOOB;
1567	}
1568	if (controlp != NULL) {
1569		u_short *p = mtod(controlp, u_short *);
1570		spx_newchecks[2]++;
1571		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1572			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1573			spx_newchecks[3]++;
1574		}
1575		m_freem(controlp);
1576	}
1577	controlp = NULL;
1578	error = spx_output(cb, m);
1579	m = NULL;
1580spx_send_end:
1581	if (controlp != NULL)
1582		m_freem(controlp);
1583	if (m != NULL)
1584		m_freem(m);
1585	splx(s);
1586	return (error);
1587}
1588
1589static int
1590spx_shutdown(so)
1591	struct socket *so;
1592{
1593	int error;
1594	int s;
1595	struct ipxpcb *ipxp;
1596	struct spxpcb *cb;
1597
1598	error = 0;
1599	ipxp = sotoipxpcb(so);
1600	cb = ipxtospxpcb(ipxp);
1601
1602	s = splnet();
1603	socantsendmore(so);
1604	cb = spx_usrclosed(cb);
1605	if (cb != NULL)
1606		error = spx_output(cb, NULL);
1607	splx(s);
1608	return (error);
1609}
1610
1611static int
1612spx_sp_attach(so, proto, td)
1613	struct socket *so;
1614	int proto;
1615	struct thread *td;
1616{
1617	int error;
1618	struct ipxpcb *ipxp;
1619
1620	error = spx_attach(so, proto, td);
1621	if (error == 0) {
1622		ipxp = sotoipxpcb(so);
1623		((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1624					(SF_HI | SF_HO | SF_PI);
1625	}
1626	return (error);
1627}
1628
1629/*
1630 * Create template to be used to send spx packets on a connection.
1631 * Called after host entry created, fills
1632 * in a skeletal spx header (choosing connection id),
1633 * minimizing the amount of work necessary when the connection is used.
1634 */
1635static void
1636spx_template(cb)
1637	register struct spxpcb *cb;
1638{
1639	register struct ipxpcb *ipxp = cb->s_ipxpcb;
1640	register struct ipx *ipx = cb->s_ipx;
1641	register struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1642
1643	ipx->ipx_pt = IPXPROTO_SPX;
1644	ipx->ipx_sna = ipxp->ipxp_laddr;
1645	ipx->ipx_dna = ipxp->ipxp_faddr;
1646	cb->s_sid = htons(spx_iss);
1647	spx_iss += SPX_ISSINCR/2;
1648	cb->s_alo = 1;
1649	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1650	cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1651					of large packets */
1652	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1653	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1654		/* But allow for lots of little packets as well */
1655}
1656
1657/*
1658 * Close a SPIP control block:
1659 *	discard spx control block itself
1660 *	discard ipx protocol control block
1661 *	wake up any sleepers
1662 */
1663static struct spxpcb *
1664spx_close(cb)
1665	register struct spxpcb *cb;
1666{
1667	register struct spx_q *s;
1668	struct ipxpcb *ipxp = cb->s_ipxpcb;
1669	struct socket *so = ipxp->ipxp_socket;
1670	register struct mbuf *m;
1671
1672	s = cb->s_q.si_next;
1673	while (s != &(cb->s_q)) {
1674		s = s->si_next;
1675		m = dtom(s->si_prev);
1676		remque(s->si_prev);
1677		m_freem(m);
1678	}
1679	m_free(dtom(cb->s_ipx));
1680	FREE(cb, M_PCB);
1681	ipxp->ipxp_pcb = 0;
1682	soisdisconnected(so);
1683	ipx_pcbdetach(ipxp);
1684	spxstat.spxs_closed++;
1685	return (NULL);
1686}
1687
1688/*
1689 *	Someday we may do level 3 handshaking
1690 *	to close a connection or send a xerox style error.
1691 *	For now, just close.
1692 */
1693static struct spxpcb *
1694spx_usrclosed(cb)
1695	register struct spxpcb *cb;
1696{
1697	return (spx_close(cb));
1698}
1699
1700static struct spxpcb *
1701spx_disconnect(cb)
1702	register struct spxpcb *cb;
1703{
1704	return (spx_close(cb));
1705}
1706
1707/*
1708 * Drop connection, reporting
1709 * the specified error.
1710 */
1711static struct spxpcb *
1712spx_drop(cb, errno)
1713	register struct spxpcb *cb;
1714	int errno;
1715{
1716	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1717
1718	/*
1719	 * someday, in the xerox world
1720	 * we will generate error protocol packets
1721	 * announcing that the socket has gone away.
1722	 */
1723	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1724		spxstat.spxs_drops++;
1725		cb->s_state = TCPS_CLOSED;
1726		/*tcp_output(cb);*/
1727	} else
1728		spxstat.spxs_conndrops++;
1729	so->so_error = errno;
1730	return (spx_close(cb));
1731}
1732
1733/*
1734 * Fast timeout routine for processing delayed acks
1735 */
1736void
1737spx_fasttimo()
1738{
1739	register struct ipxpcb *ipxp;
1740	register struct spxpcb *cb;
1741	int s = splnet();
1742
1743	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1744		if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1745		    (cb->s_flags & SF_DELACK)) {
1746			cb->s_flags &= ~SF_DELACK;
1747			cb->s_flags |= SF_ACKNOW;
1748			spxstat.spxs_delack++;
1749			spx_output(cb, NULL);
1750		}
1751	}
1752
1753	splx(s);
1754}
1755
1756/*
1757 * spx protocol timeout routine called every 500 ms.
1758 * Updates the timers in all active pcb's and
1759 * causes finite state machine actions if timers expire.
1760 */
1761void
1762spx_slowtimo()
1763{
1764	register struct ipxpcb *ip, *ip_temp;
1765	register struct spxpcb *cb;
1766	int s = splnet();
1767	register int i;
1768
1769	/*
1770	 * Search through tcb's and update active timers.  Note that timers
1771	 * may free the ipxpcb, so be sure to handle that case.
1772	 */
1773	LIST_FOREACH_SAFE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1774		cb = ipxtospxpcb(ip);
1775		if (cb == NULL)
1776			continue;
1777		for (i = 0; i < SPXT_NTIMERS; i++) {
1778			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1779				/*
1780				 * spx_timers() returns (NULL) if it free'd
1781				 * the pcb.
1782				 */
1783				if (spx_timers(cb, i) == NULL)
1784					continue;
1785			}
1786		}
1787		cb->s_idle++;
1788		if (cb->s_rtt)
1789			cb->s_rtt++;
1790	}
1791	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1792	splx(s);
1793}
1794
1795/*
1796 * SPX timer processing.
1797 */
1798static struct spxpcb *
1799spx_timers(cb, timer)
1800	register struct spxpcb *cb;
1801	int timer;
1802{
1803	long rexmt;
1804	int win;
1805
1806	cb->s_force = 1 + timer;
1807	switch (timer) {
1808
1809	/*
1810	 * 2 MSL timeout in shutdown went off.  TCP deletes connection
1811	 * control block.
1812	 */
1813	case SPXT_2MSL:
1814		printf("spx: SPXT_2MSL went off for no reason\n");
1815		cb->s_timer[timer] = 0;
1816		break;
1817
1818	/*
1819	 * Retransmission timer went off.  Message has not
1820	 * been acked within retransmit interval.  Back off
1821	 * to a longer retransmit interval and retransmit one packet.
1822	 */
1823	case SPXT_REXMT:
1824		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1825			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1826			spxstat.spxs_timeoutdrop++;
1827			cb = spx_drop(cb, ETIMEDOUT);
1828			break;
1829		}
1830		spxstat.spxs_rexmttimeo++;
1831		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1832		rexmt *= spx_backoff[cb->s_rxtshift];
1833		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1834		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1835		/*
1836		 * If we have backed off fairly far, our srtt
1837		 * estimate is probably bogus.  Clobber it
1838		 * so we'll take the next rtt measurement as our srtt;
1839		 * move the current srtt into rttvar to keep the current
1840		 * retransmit times until then.
1841		 */
1842		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1843			cb->s_rttvar += (cb->s_srtt >> 2);
1844			cb->s_srtt = 0;
1845		}
1846		cb->s_snxt = cb->s_rack;
1847		/*
1848		 * If timing a packet, stop the timer.
1849		 */
1850		cb->s_rtt = 0;
1851		/*
1852		 * See very long discussion in tcp_timer.c about congestion
1853		 * window and sstrhesh
1854		 */
1855		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1856		if (win < 2)
1857			win = 2;
1858		cb->s_cwnd = CUNIT;
1859		cb->s_ssthresh = win * CUNIT;
1860		spx_output(cb, NULL);
1861		break;
1862
1863	/*
1864	 * Persistance timer into zero window.
1865	 * Force a probe to be sent.
1866	 */
1867	case SPXT_PERSIST:
1868		spxstat.spxs_persisttimeo++;
1869		spx_setpersist(cb);
1870		spx_output(cb, NULL);
1871		break;
1872
1873	/*
1874	 * Keep-alive timer went off; send something
1875	 * or drop connection if idle for too long.
1876	 */
1877	case SPXT_KEEP:
1878		spxstat.spxs_keeptimeo++;
1879		if (cb->s_state < TCPS_ESTABLISHED)
1880			goto dropit;
1881		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1882		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1883				goto dropit;
1884			spxstat.spxs_keepprobe++;
1885			spx_output(cb, NULL);
1886		} else
1887			cb->s_idle = 0;
1888		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1889		break;
1890	dropit:
1891		spxstat.spxs_keepdrops++;
1892		cb = spx_drop(cb, ETIMEDOUT);
1893		break;
1894	}
1895	return (cb);
1896}
1897