spx_usrreq.c revision 151888
1/*-
2 * Copyright (c) 2004-2005 Robert N. M. Watson
3 * Copyright (c) 1995, Mike Mitchell
4 * Copyright (c) 1984, 1985, 1986, 1987, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the University of
18 *	California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)spx_usrreq.h
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 151888 2005-10-30 19:44:40Z rwatson $");
40
41#include <sys/param.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/signalvar.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/sx.h>
52#include <sys/systm.h>
53
54#include <net/route.h>
55#include <netinet/tcp_fsm.h>
56
57#include <netipx/ipx.h>
58#include <netipx/ipx_pcb.h>
59#include <netipx/ipx_var.h>
60#include <netipx/spx.h>
61#include <netipx/spx_debug.h>
62#include <netipx/spx_timer.h>
63#include <netipx/spx_var.h>
64
65/*
66 * SPX protocol implementation.
67 */
68static u_short 	spx_iss;
69static u_short	spx_newchecks[50];
70static int	spx_hardnosed;
71static int	spx_use_delack = 0;
72static int	traceallspxs = 0;
73static struct	spx 	spx_savesi;
74static struct	spx_istat spx_istat;
75
76/* Following was struct spxstat spxstat; */
77#ifndef spxstat
78#define spxstat spx_istat.newstats
79#endif
80
81static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
82    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
83
84static	void spx_close(struct spxpcb *cb);
85static	void spx_disconnect(struct spxpcb *cb);
86static	void spx_drop(struct spxpcb *cb, int errno);
87static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
88static	int spx_reass(struct spxpcb *cb, struct spx *si);
89static	void spx_setpersist(struct spxpcb *cb);
90static	void spx_template(struct spxpcb *cb);
91static	struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
92static	void spx_usrclosed(struct spxpcb *cb);
93
94static	int spx_usr_abort(struct socket *so);
95static	int spx_accept(struct socket *so, struct sockaddr **nam);
96static	int spx_attach(struct socket *so, int proto, struct thread *td);
97static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
98static	int spx_connect(struct socket *so, struct sockaddr *nam,
99			struct thread *td);
100static	int spx_detach(struct socket *so);
101static	int spx_usr_disconnect(struct socket *so);
102static	int spx_listen(struct socket *so, int backlog, struct thread *td);
103static	int spx_rcvd(struct socket *so, int flags);
104static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
105static	int spx_send(struct socket *so, int flags, struct mbuf *m,
106		     struct sockaddr *addr, struct mbuf *control,
107		     struct thread *td);
108static	int spx_shutdown(struct socket *so);
109static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
110
111struct	pr_usrreqs spx_usrreqs = {
112	.pru_abort =		spx_usr_abort,
113	.pru_accept =		spx_accept,
114	.pru_attach =		spx_attach,
115	.pru_bind =		spx_bind,
116	.pru_connect =		spx_connect,
117	.pru_control =		ipx_control,
118	.pru_detach =		spx_detach,
119	.pru_disconnect =	spx_usr_disconnect,
120	.pru_listen =		spx_listen,
121	.pru_peeraddr =		ipx_peeraddr,
122	.pru_rcvd =		spx_rcvd,
123	.pru_rcvoob =		spx_rcvoob,
124	.pru_send =		spx_send,
125	.pru_shutdown =		spx_shutdown,
126	.pru_sockaddr =		ipx_sockaddr,
127};
128
129struct	pr_usrreqs spx_usrreq_sps = {
130	.pru_abort =		spx_usr_abort,
131	.pru_accept =		spx_accept,
132	.pru_attach =		spx_sp_attach,
133	.pru_bind =		spx_bind,
134	.pru_connect =		spx_connect,
135	.pru_control =		ipx_control,
136	.pru_detach =		spx_detach,
137	.pru_disconnect =	spx_usr_disconnect,
138	.pru_listen =		spx_listen,
139	.pru_peeraddr =		ipx_peeraddr,
140	.pru_rcvd =		spx_rcvd,
141	.pru_rcvoob =		spx_rcvoob,
142	.pru_send =		spx_send,
143	.pru_shutdown =		spx_shutdown,
144	.pru_sockaddr =		ipx_sockaddr,
145};
146
147void
148spx_init()
149{
150
151	spx_iss = 1; /* WRONG !! should fish it out of TODR */
152}
153
154void
155spx_input(m, ipxp)
156	register struct mbuf *m;
157	register struct ipxpcb *ipxp;
158{
159	register struct spxpcb *cb;
160	register struct spx *si = mtod(m, struct spx *);
161	register struct socket *so;
162	int dropsocket = 0;
163	short ostate = 0;
164
165	spxstat.spxs_rcvtotal++;
166	KASSERT(ipxp != NULL, ("spx_input: NULL ipxpcb"));
167
168	/*
169	 * spx_input() assumes that the caller will hold both the pcb list
170	 * lock and also the ipxp lock.  spx_input() will release both before
171	 * returning, and may in fact trade in the ipxp lock for another pcb
172	 * lock following sonewconn().
173	 */
174	IPX_LIST_LOCK_ASSERT();
175	IPX_LOCK_ASSERT(ipxp);
176
177	cb = ipxtospxpcb(ipxp);
178	if (cb == NULL)
179		goto bad;
180
181	if (m->m_len < sizeof(*si)) {
182		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
183			IPX_UNLOCK(ipxp);
184			IPX_LIST_UNLOCK();
185			spxstat.spxs_rcvshort++;
186			return;
187		}
188		si = mtod(m, struct spx *);
189	}
190	si->si_seq = ntohs(si->si_seq);
191	si->si_ack = ntohs(si->si_ack);
192	si->si_alo = ntohs(si->si_alo);
193
194	so = ipxp->ipxp_socket;
195
196	if (so->so_options & SO_DEBUG || traceallspxs) {
197		ostate = cb->s_state;
198		spx_savesi = *si;
199	}
200	if (so->so_options & SO_ACCEPTCONN) {
201		struct spxpcb *ocb = cb;
202
203		so = sonewconn(so, 0);
204		if (so == NULL) {
205			goto drop;
206		}
207		/*
208		 * This is ugly, but ....
209		 *
210		 * Mark socket as temporary until we're
211		 * committed to keeping it.  The code at
212		 * ``drop'' and ``dropwithreset'' check the
213		 * flag dropsocket to see if the temporary
214		 * socket created here should be discarded.
215		 * We mark the socket as discardable until
216		 * we're committed to it below in TCPS_LISTEN.
217		 */
218		dropsocket++;
219		IPX_UNLOCK(ipxp);
220		ipxp = (struct ipxpcb *)so->so_pcb;
221		IPX_LOCK(ipxp);
222		ipxp->ipxp_laddr = si->si_dna;
223		cb = ipxtospxpcb(ipxp);
224		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
225		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
226		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
227		cb->s_state = TCPS_LISTEN;
228	}
229
230	/*
231	 * Packet received on connection.
232	 * reset idle time and keep-alive timer;
233	 */
234	cb->s_idle = 0;
235	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
236
237	switch (cb->s_state) {
238
239	case TCPS_LISTEN:{
240		struct sockaddr_ipx *sipx, ssipx;
241		struct ipx_addr laddr;
242
243		/*
244		 * If somebody here was carying on a conversation
245		 * and went away, and his pen pal thinks he can
246		 * still talk, we get the misdirected packet.
247		 */
248		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
249			spx_istat.gonawy++;
250			goto dropwithreset;
251		}
252		sipx = &ssipx;
253		bzero(sipx, sizeof *sipx);
254		sipx->sipx_len = sizeof(*sipx);
255		sipx->sipx_family = AF_IPX;
256		sipx->sipx_addr = si->si_sna;
257		laddr = ipxp->ipxp_laddr;
258		if (ipx_nullhost(laddr))
259			ipxp->ipxp_laddr = si->si_dna;
260		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
261			ipxp->ipxp_laddr = laddr;
262			spx_istat.noconn++;
263			goto drop;
264		}
265		spx_template(cb);
266		dropsocket = 0;		/* committed to socket */
267		cb->s_did = si->si_sid;
268		cb->s_rack = si->si_ack;
269		cb->s_ralo = si->si_alo;
270#define THREEWAYSHAKE
271#ifdef THREEWAYSHAKE
272		cb->s_state = TCPS_SYN_RECEIVED;
273		cb->s_force = 1 + SPXT_KEEP;
274		spxstat.spxs_accepts++;
275		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
276		}
277		break;
278	/*
279	 * This state means that we have heard a response
280	 * to our acceptance of their connection
281	 * It is probably logically unnecessary in this
282	 * implementation.
283	 */
284	 case TCPS_SYN_RECEIVED: {
285		if (si->si_did != cb->s_sid) {
286			spx_istat.wrncon++;
287			goto drop;
288		}
289#endif
290		ipxp->ipxp_fport =  si->si_sport;
291		cb->s_timer[SPXT_REXMT] = 0;
292		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
293		soisconnected(so);
294		cb->s_state = TCPS_ESTABLISHED;
295		spxstat.spxs_accepts++;
296		}
297		break;
298
299	/*
300	 * This state means that we have gotten a response
301	 * to our attempt to establish a connection.
302	 * We fill in the data from the other side,
303	 * telling us which port to respond to, instead of the well-
304	 * known one we might have sent to in the first place.
305	 * We also require that this is a response to our
306	 * connection id.
307	 */
308	case TCPS_SYN_SENT:
309		if (si->si_did != cb->s_sid) {
310			spx_istat.notme++;
311			goto drop;
312		}
313		spxstat.spxs_connects++;
314		cb->s_did = si->si_sid;
315		cb->s_rack = si->si_ack;
316		cb->s_ralo = si->si_alo;
317		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
318		cb->s_timer[SPXT_REXMT] = 0;
319		cb->s_flags |= SF_ACKNOW;
320		soisconnected(so);
321		cb->s_state = TCPS_ESTABLISHED;
322		/* Use roundtrip time of connection request for initial rtt */
323		if (cb->s_rtt) {
324			cb->s_srtt = cb->s_rtt << 3;
325			cb->s_rttvar = cb->s_rtt << 1;
326			SPXT_RANGESET(cb->s_rxtcur,
327			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
328			    SPXTV_MIN, SPXTV_REXMTMAX);
329			    cb->s_rtt = 0;
330		}
331	}
332	if (so->so_options & SO_DEBUG || traceallspxs)
333		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
334
335	m->m_len -= sizeof(struct ipx);
336	m->m_pkthdr.len -= sizeof(struct ipx);
337	m->m_data += sizeof(struct ipx);
338
339	if (spx_reass(cb, si)) {
340		m_freem(m);
341	}
342	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
343		spx_output(cb, NULL);
344	cb->s_flags &= ~(SF_WIN|SF_RXT);
345	IPX_UNLOCK(ipxp);
346	IPX_LIST_UNLOCK();
347	return;
348
349dropwithreset:
350	IPX_UNLOCK(ipxp);
351	if (dropsocket) {
352		struct socket *head;
353		ACCEPT_LOCK();
354		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
355		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
356		head = so->so_head;
357		TAILQ_REMOVE(&head->so_incomp, so, so_list);
358		head->so_incqlen--;
359		so->so_qstate &= ~SQ_INCOMP;
360		so->so_head = NULL;
361		ACCEPT_UNLOCK();
362		soabort(so);
363		cb = NULL;
364	}
365	IPX_LIST_UNLOCK();
366	si->si_seq = ntohs(si->si_seq);
367	si->si_ack = ntohs(si->si_ack);
368	si->si_alo = ntohs(si->si_alo);
369	m_freem(dtom(si));
370	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
371	    traceallspxs)
372		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
373	return;
374
375drop:
376bad:
377	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
378            traceallspxs)
379		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
380	IPX_UNLOCK(ipxp);
381	IPX_LIST_UNLOCK();
382	m_freem(m);
383}
384
385static int spxrexmtthresh = 3;
386
387/*
388 * This is structurally similar to the tcp reassembly routine
389 * but its function is somewhat different:  It merely queues
390 * packets up, and suppresses duplicates.
391 */
392static int
393spx_reass(cb, si)
394register struct spxpcb *cb;
395register struct spx *si;
396{
397	register struct spx_q *q;
398	register struct mbuf *m;
399	register struct socket *so = cb->s_ipxpcb->ipxp_socket;
400	char packetp = cb->s_flags & SF_HI;
401	int incr;
402	char wakeup = 0;
403
404	IPX_LOCK_ASSERT(cb->s_ipxpcb);
405
406	if (si == SI(0))
407		goto present;
408	/*
409	 * Update our news from them.
410	 */
411	if (si->si_cc & SPX_SA)
412		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
413	if (SSEQ_GT(si->si_alo, cb->s_ralo))
414		cb->s_flags |= SF_WIN;
415	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
416		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
417			spxstat.spxs_rcvdupack++;
418			/*
419			 * If this is a completely duplicate ack
420			 * and other conditions hold, we assume
421			 * a packet has been dropped and retransmit
422			 * it exactly as in tcp_input().
423			 */
424			if (si->si_ack != cb->s_rack ||
425			    si->si_alo != cb->s_ralo)
426				cb->s_dupacks = 0;
427			else if (++cb->s_dupacks == spxrexmtthresh) {
428				u_short onxt = cb->s_snxt;
429				int cwnd = cb->s_cwnd;
430
431				cb->s_snxt = si->si_ack;
432				cb->s_cwnd = CUNIT;
433				cb->s_force = 1 + SPXT_REXMT;
434				spx_output(cb, NULL);
435				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
436				cb->s_rtt = 0;
437				if (cwnd >= 4 * CUNIT)
438					cb->s_cwnd = cwnd / 2;
439				if (SSEQ_GT(onxt, cb->s_snxt))
440					cb->s_snxt = onxt;
441				return (1);
442			}
443		} else
444			cb->s_dupacks = 0;
445		goto update_window;
446	}
447	cb->s_dupacks = 0;
448	/*
449	 * If our correspondent acknowledges data we haven't sent
450	 * TCP would drop the packet after acking.  We'll be a little
451	 * more permissive
452	 */
453	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
454		spxstat.spxs_rcvacktoomuch++;
455		si->si_ack = cb->s_smax + 1;
456	}
457	spxstat.spxs_rcvackpack++;
458	/*
459	 * If transmit timer is running and timed sequence
460	 * number was acked, update smoothed round trip time.
461	 * See discussion of algorithm in tcp_input.c
462	 */
463	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
464		spxstat.spxs_rttupdated++;
465		if (cb->s_srtt != 0) {
466			register short delta;
467			delta = cb->s_rtt - (cb->s_srtt >> 3);
468			if ((cb->s_srtt += delta) <= 0)
469				cb->s_srtt = 1;
470			if (delta < 0)
471				delta = -delta;
472			delta -= (cb->s_rttvar >> 2);
473			if ((cb->s_rttvar += delta) <= 0)
474				cb->s_rttvar = 1;
475		} else {
476			/*
477			 * No rtt measurement yet
478			 */
479			cb->s_srtt = cb->s_rtt << 3;
480			cb->s_rttvar = cb->s_rtt << 1;
481		}
482		cb->s_rtt = 0;
483		cb->s_rxtshift = 0;
484		SPXT_RANGESET(cb->s_rxtcur,
485			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
486			SPXTV_MIN, SPXTV_REXMTMAX);
487	}
488	/*
489	 * If all outstanding data is acked, stop retransmit
490	 * timer and remember to restart (more output or persist).
491	 * If there is more data to be acked, restart retransmit
492	 * timer, using current (possibly backed-off) value;
493	 */
494	if (si->si_ack == cb->s_smax + 1) {
495		cb->s_timer[SPXT_REXMT] = 0;
496		cb->s_flags |= SF_RXT;
497	} else if (cb->s_timer[SPXT_PERSIST] == 0)
498		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
499	/*
500	 * When new data is acked, open the congestion window.
501	 * If the window gives us less than ssthresh packets
502	 * in flight, open exponentially (maxseg at a time).
503	 * Otherwise open linearly (maxseg^2 / cwnd at a time).
504	 */
505	incr = CUNIT;
506	if (cb->s_cwnd > cb->s_ssthresh)
507		incr = max(incr * incr / cb->s_cwnd, 1);
508	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
509	/*
510	 * Trim Acked data from output queue.
511	 */
512	SOCKBUF_LOCK(&so->so_snd);
513	while ((m = so->so_snd.sb_mb) != NULL) {
514		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
515			sbdroprecord_locked(&so->so_snd);
516		else
517			break;
518	}
519	sowwakeup_locked(so);
520	cb->s_rack = si->si_ack;
521update_window:
522	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
523		cb->s_snxt = cb->s_rack;
524	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
525	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
526	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
527		/* keep track of pure window updates */
528		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
529		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
530			spxstat.spxs_rcvwinupd++;
531			spxstat.spxs_rcvdupack--;
532		}
533		cb->s_ralo = si->si_alo;
534		cb->s_swl1 = si->si_seq;
535		cb->s_swl2 = si->si_ack;
536		cb->s_swnd = (1 + si->si_alo - si->si_ack);
537		if (cb->s_swnd > cb->s_smxw)
538			cb->s_smxw = cb->s_swnd;
539		cb->s_flags |= SF_WIN;
540	}
541	/*
542	 * If this packet number is higher than that which
543	 * we have allocated refuse it, unless urgent
544	 */
545	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
546		if (si->si_cc & SPX_SP) {
547			spxstat.spxs_rcvwinprobe++;
548			return (1);
549		} else
550			spxstat.spxs_rcvpackafterwin++;
551		if (si->si_cc & SPX_OB) {
552			if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
553				m_freem(dtom(si));
554				return (0);
555			} /* else queue this packet; */
556		} else {
557#ifdef BROKEN
558			/*
559			 * XXXRW: This is broken on at least one count:
560			 * spx_close() will free the ipxp and related parts,
561			 * which are then touched by spx_input() after the
562			 * return from spx_reass().
563			 */
564			/*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
565			if (so->so_state && SS_NOFDREF) {
566				spx_close(cb);
567			} else
568				       would crash system*/
569#endif
570			spx_istat.notyet++;
571			m_freem(dtom(si));
572			return (0);
573		}
574	}
575	/*
576	 * If this is a system packet, we don't need to
577	 * queue it up, and won't update acknowledge #
578	 */
579	if (si->si_cc & SPX_SP) {
580		return (1);
581	}
582	/*
583	 * We have already seen this packet, so drop.
584	 */
585	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
586		spx_istat.bdreas++;
587		spxstat.spxs_rcvduppack++;
588		if (si->si_seq == cb->s_ack - 1)
589			spx_istat.lstdup++;
590		return (1);
591	}
592	/*
593	 * Loop through all packets queued up to insert in
594	 * appropriate sequence.
595	 */
596	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
597		if (si->si_seq == SI(q)->si_seq) {
598			spxstat.spxs_rcvduppack++;
599			return (1);
600		}
601		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
602			spxstat.spxs_rcvoopack++;
603			break;
604		}
605	}
606	insque(si, q->si_prev);
607	/*
608	 * If this packet is urgent, inform process
609	 */
610	if (si->si_cc & SPX_OB) {
611		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
612		sohasoutofband(so);
613		cb->s_oobflags |= SF_IOOB;
614	}
615present:
616#define SPINC sizeof(struct spxhdr)
617	SOCKBUF_LOCK(&so->so_rcv);
618	/*
619	 * Loop through all packets queued up to update acknowledge
620	 * number, and present all acknowledged data to user;
621	 * If in packet interface mode, show packet headers.
622	 */
623	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
624		  if (SI(q)->si_seq == cb->s_ack) {
625			cb->s_ack++;
626			m = dtom(q);
627			if (SI(q)->si_cc & SPX_OB) {
628				cb->s_oobflags &= ~SF_IOOB;
629				if (so->so_rcv.sb_cc)
630					so->so_oobmark = so->so_rcv.sb_cc;
631				else
632					so->so_rcv.sb_state |= SBS_RCVATMARK;
633			}
634			q = q->si_prev;
635			remque(q->si_next);
636			wakeup = 1;
637			spxstat.spxs_rcvpack++;
638#ifdef SF_NEWCALL
639			if (cb->s_flags2 & SF_NEWCALL) {
640				struct spxhdr *sp = mtod(m, struct spxhdr *);
641				u_char dt = sp->spx_dt;
642				spx_newchecks[4]++;
643				if (dt != cb->s_rhdr.spx_dt) {
644					struct mbuf *mm =
645					   m_getclr(M_DONTWAIT, MT_CONTROL);
646					spx_newchecks[0]++;
647					if (mm != NULL) {
648						u_short *s =
649							mtod(mm, u_short *);
650						cb->s_rhdr.spx_dt = dt;
651						mm->m_len = 5; /*XXX*/
652						s[0] = 5;
653						s[1] = 1;
654						*(u_char *)(&s[2]) = dt;
655						sbappend_locked(&so->so_rcv, mm);
656					}
657				}
658				if (sp->spx_cc & SPX_OB) {
659					MCHTYPE(m, MT_OOBDATA);
660					spx_newchecks[1]++;
661					so->so_oobmark = 0;
662					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
663				}
664				if (packetp == 0) {
665					m->m_data += SPINC;
666					m->m_len -= SPINC;
667					m->m_pkthdr.len -= SPINC;
668				}
669				if ((sp->spx_cc & SPX_EM) || packetp) {
670					sbappendrecord_locked(&so->so_rcv, m);
671					spx_newchecks[9]++;
672				} else
673					sbappend_locked(&so->so_rcv, m);
674			} else
675#endif
676			if (packetp) {
677				sbappendrecord_locked(&so->so_rcv, m);
678			} else {
679				cb->s_rhdr = *mtod(m, struct spxhdr *);
680				m->m_data += SPINC;
681				m->m_len -= SPINC;
682				m->m_pkthdr.len -= SPINC;
683				sbappend_locked(&so->so_rcv, m);
684			}
685		  } else
686			break;
687	}
688	if (wakeup)
689		sorwakeup_locked(so);
690	else
691		SOCKBUF_UNLOCK(&so->so_rcv);
692	return (0);
693}
694
695void
696spx_ctlinput(cmd, arg_as_sa, dummy)
697	int cmd;
698	struct sockaddr *arg_as_sa;	/* XXX should be swapped with dummy */
699	void *dummy;
700{
701	caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
702	struct ipx_addr *na;
703	struct sockaddr_ipx *sipx;
704
705	if (cmd < 0 || cmd >= PRC_NCMDS)
706		return;
707
708	switch (cmd) {
709
710	case PRC_ROUTEDEAD:
711		return;
712
713	case PRC_IFDOWN:
714	case PRC_HOSTDEAD:
715	case PRC_HOSTUNREACH:
716		sipx = (struct sockaddr_ipx *)arg;
717		if (sipx->sipx_family != AF_IPX)
718			return;
719		na = &sipx->sipx_addr;
720		break;
721
722	default:
723		break;
724	}
725}
726
727static int
728spx_output(cb, m0)
729	register struct spxpcb *cb;
730	struct mbuf *m0;
731{
732	struct socket *so = cb->s_ipxpcb->ipxp_socket;
733	register struct mbuf *m;
734	register struct spx *si = NULL;
735	register struct sockbuf *sb = &so->so_snd;
736	int len = 0, win, rcv_win;
737	short span, off, recordp = 0;
738	u_short alo;
739	int error = 0, sendalot;
740#ifdef notdef
741	int idle;
742#endif
743	struct mbuf *mprev;
744
745	IPX_LOCK_ASSERT(cb->s_ipxpcb);
746
747	if (m0 != NULL) {
748		int mtu = cb->s_mtu;
749		int datalen;
750		/*
751		 * Make sure that packet isn't too big.
752		 */
753		for (m = m0; m != NULL; m = m->m_next) {
754			mprev = m;
755			len += m->m_len;
756			if (m->m_flags & M_EOR)
757				recordp = 1;
758		}
759		datalen = (cb->s_flags & SF_HO) ?
760				len - sizeof(struct spxhdr) : len;
761		if (datalen > mtu) {
762			if (cb->s_flags & SF_PI) {
763				m_freem(m0);
764				return (EMSGSIZE);
765			} else {
766				int oldEM = cb->s_cc & SPX_EM;
767
768				cb->s_cc &= ~SPX_EM;
769				while (len > mtu) {
770					/*
771					 * Here we are only being called
772					 * from usrreq(), so it is OK to
773					 * block.
774					 */
775					m = m_copym(m0, 0, mtu, M_TRYWAIT);
776					if (cb->s_flags & SF_NEWCALL) {
777					    struct mbuf *mm = m;
778					    spx_newchecks[7]++;
779					    while (mm != NULL) {
780						mm->m_flags &= ~M_EOR;
781						mm = mm->m_next;
782					    }
783					}
784					error = spx_output(cb, m);
785					if (error) {
786						cb->s_cc |= oldEM;
787						m_freem(m0);
788						return (error);
789					}
790					m_adj(m0, mtu);
791					len -= mtu;
792				}
793				cb->s_cc |= oldEM;
794			}
795		}
796		/*
797		 * Force length even, by adding a "garbage byte" if
798		 * necessary.
799		 */
800		if (len & 1) {
801			m = mprev;
802			if (M_TRAILINGSPACE(m) >= 1)
803				m->m_len++;
804			else {
805				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
806
807				if (m1 == NULL) {
808					m_freem(m0);
809					return (ENOBUFS);
810				}
811				m1->m_len = 1;
812				*(mtod(m1, u_char *)) = 0;
813				m->m_next = m1;
814			}
815		}
816		m = m_gethdr(M_DONTWAIT, MT_HEADER);
817		if (m == NULL) {
818			m_freem(m0);
819			return (ENOBUFS);
820		}
821		/*
822		 * Fill in mbuf with extended SP header
823		 * and addresses and length put into network format.
824		 */
825		MH_ALIGN(m, sizeof(struct spx));
826		m->m_len = sizeof(struct spx);
827		m->m_next = m0;
828		si = mtod(m, struct spx *);
829		si->si_i = *cb->s_ipx;
830		si->si_s = cb->s_shdr;
831		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
832			register struct spxhdr *sh;
833			if (m0->m_len < sizeof(*sh)) {
834				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
835					m_free(m);
836					m_freem(m0);
837					return (EINVAL);
838				}
839				m->m_next = m0;
840			}
841			sh = mtod(m0, struct spxhdr *);
842			si->si_dt = sh->spx_dt;
843			si->si_cc |= sh->spx_cc & SPX_EM;
844			m0->m_len -= sizeof(*sh);
845			m0->m_data += sizeof(*sh);
846			len -= sizeof(*sh);
847		}
848		len += sizeof(*si);
849		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
850			si->si_cc |= SPX_EM;
851			spx_newchecks[8]++;
852		}
853		if (cb->s_oobflags & SF_SOOB) {
854			/*
855			 * Per jqj@cornell:
856			 * make sure OB packets convey exactly 1 byte.
857			 * If the packet is 1 byte or larger, we
858			 * have already guaranted there to be at least
859			 * one garbage byte for the checksum, and
860			 * extra bytes shouldn't hurt!
861			 */
862			if (len > sizeof(*si)) {
863				si->si_cc |= SPX_OB;
864				len = (1 + sizeof(*si));
865			}
866		}
867		si->si_len = htons((u_short)len);
868		m->m_pkthdr.len = ((len - 1) | 1) + 1;
869		/*
870		 * queue stuff up for output
871		 */
872		sbappendrecord(sb, m);
873		cb->s_seq++;
874	}
875#ifdef notdef
876	idle = (cb->s_smax == (cb->s_rack - 1));
877#endif
878again:
879	sendalot = 0;
880	off = cb->s_snxt - cb->s_rack;
881	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
882
883	/*
884	 * If in persist timeout with window of 0, send a probe.
885	 * Otherwise, if window is small but nonzero
886	 * and timer expired, send what we can and go into
887	 * transmit state.
888	 */
889	if (cb->s_force == 1 + SPXT_PERSIST) {
890		if (win != 0) {
891			cb->s_timer[SPXT_PERSIST] = 0;
892			cb->s_rxtshift = 0;
893		}
894	}
895	span = cb->s_seq - cb->s_rack;
896	len = min(span, win) - off;
897
898	if (len < 0) {
899		/*
900		 * Window shrank after we went into it.
901		 * If window shrank to 0, cancel pending
902		 * restransmission and pull s_snxt back
903		 * to (closed) window.  We will enter persist
904		 * state below.  If the widndow didn't close completely,
905		 * just wait for an ACK.
906		 */
907		len = 0;
908		if (win == 0) {
909			cb->s_timer[SPXT_REXMT] = 0;
910			cb->s_snxt = cb->s_rack;
911		}
912	}
913	if (len > 1)
914		sendalot = 1;
915	rcv_win = sbspace(&so->so_rcv);
916
917	/*
918	 * Send if we owe peer an ACK.
919	 */
920	if (cb->s_oobflags & SF_SOOB) {
921		/*
922		 * must transmit this out of band packet
923		 */
924		cb->s_oobflags &= ~ SF_SOOB;
925		sendalot = 1;
926		spxstat.spxs_sndurg++;
927		goto found;
928	}
929	if (cb->s_flags & SF_ACKNOW)
930		goto send;
931	if (cb->s_state < TCPS_ESTABLISHED)
932		goto send;
933	/*
934	 * Silly window can't happen in spx.
935	 * Code from tcp deleted.
936	 */
937	if (len)
938		goto send;
939	/*
940	 * Compare available window to amount of window
941	 * known to peer (as advertised window less
942	 * next expected input.)  If the difference is at least two
943	 * packets or at least 35% of the mximum possible window,
944	 * then want to send a window update to peer.
945	 */
946	if (rcv_win > 0) {
947		u_short delta =  1 + cb->s_alo - cb->s_ack;
948		int adv = rcv_win - (delta * cb->s_mtu);
949
950		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
951		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
952			spxstat.spxs_sndwinup++;
953			cb->s_flags |= SF_ACKNOW;
954			goto send;
955		}
956
957	}
958	/*
959	 * Many comments from tcp_output.c are appropriate here
960	 * including . . .
961	 * If send window is too small, there is data to transmit, and no
962	 * retransmit or persist is pending, then go to persist state.
963	 * If nothing happens soon, send when timer expires:
964	 * if window is nonzero, transmit what we can,
965	 * otherwise send a probe.
966	 */
967	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
968		cb->s_timer[SPXT_PERSIST] == 0) {
969			cb->s_rxtshift = 0;
970			spx_setpersist(cb);
971	}
972	/*
973	 * No reason to send a packet, just return.
974	 */
975	cb->s_outx = 1;
976	return (0);
977
978send:
979	/*
980	 * Find requested packet.
981	 */
982	si = 0;
983	if (len > 0) {
984		cb->s_want = cb->s_snxt;
985		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
986			si = mtod(m, struct spx *);
987			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
988				break;
989		}
990	found:
991		if (si != NULL) {
992			if (si->si_seq == cb->s_snxt)
993					cb->s_snxt++;
994				else
995					spxstat.spxs_sndvoid++, si = 0;
996		}
997	}
998	/*
999	 * update window
1000	 */
1001	if (rcv_win < 0)
1002		rcv_win = 0;
1003	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
1004	if (SSEQ_LT(alo, cb->s_alo))
1005		alo = cb->s_alo;
1006
1007	if (si != NULL) {
1008		/*
1009		 * must make a copy of this packet for
1010		 * ipx_output to monkey with
1011		 */
1012		m = m_copy(dtom(si), 0, (int)M_COPYALL);
1013		if (m == NULL) {
1014			return (ENOBUFS);
1015		}
1016		si = mtod(m, struct spx *);
1017		if (SSEQ_LT(si->si_seq, cb->s_smax))
1018			spxstat.spxs_sndrexmitpack++;
1019		else
1020			spxstat.spxs_sndpack++;
1021	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1022		/*
1023		 * Must send an acknowledgement or a probe
1024		 */
1025		if (cb->s_force)
1026			spxstat.spxs_sndprobe++;
1027		if (cb->s_flags & SF_ACKNOW)
1028			spxstat.spxs_sndacks++;
1029		m = m_gethdr(M_DONTWAIT, MT_HEADER);
1030		if (m == NULL)
1031			return (ENOBUFS);
1032		/*
1033		 * Fill in mbuf with extended SP header
1034		 * and addresses and length put into network format.
1035		 */
1036		MH_ALIGN(m, sizeof(struct spx));
1037		m->m_len = sizeof(*si);
1038		m->m_pkthdr.len = sizeof(*si);
1039		si = mtod(m, struct spx *);
1040		si->si_i = *cb->s_ipx;
1041		si->si_s = cb->s_shdr;
1042		si->si_seq = cb->s_smax + 1;
1043		si->si_len = htons(sizeof(*si));
1044		si->si_cc |= SPX_SP;
1045	} else {
1046		cb->s_outx = 3;
1047		if (so->so_options & SO_DEBUG || traceallspxs)
1048			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1049		return (0);
1050	}
1051	/*
1052	 * Stuff checksum and output datagram.
1053	 */
1054	if ((si->si_cc & SPX_SP) == 0) {
1055		if (cb->s_force != (1 + SPXT_PERSIST) ||
1056		    cb->s_timer[SPXT_PERSIST] == 0) {
1057			/*
1058			 * If this is a new packet and we are not currently
1059			 * timing anything, time this one.
1060			 */
1061			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1062				cb->s_smax = si->si_seq;
1063				if (cb->s_rtt == 0) {
1064					spxstat.spxs_segstimed++;
1065					cb->s_rtseq = si->si_seq;
1066					cb->s_rtt = 1;
1067				}
1068			}
1069			/*
1070			 * Set rexmt timer if not currently set,
1071			 * Initial value for retransmit timer is smoothed
1072			 * round-trip time + 2 * round-trip time variance.
1073			 * Initialize shift counter which is used for backoff
1074			 * of retransmit time.
1075			 */
1076			if (cb->s_timer[SPXT_REXMT] == 0 &&
1077			    cb->s_snxt != cb->s_rack) {
1078				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1079				if (cb->s_timer[SPXT_PERSIST]) {
1080					cb->s_timer[SPXT_PERSIST] = 0;
1081					cb->s_rxtshift = 0;
1082				}
1083			}
1084		} else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1085			cb->s_smax = si->si_seq;
1086		}
1087	} else if (cb->s_state < TCPS_ESTABLISHED) {
1088		if (cb->s_rtt == 0)
1089			cb->s_rtt = 1; /* Time initial handshake */
1090		if (cb->s_timer[SPXT_REXMT] == 0)
1091			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1092	}
1093	{
1094		/*
1095		 * Do not request acks when we ack their data packets or
1096		 * when we do a gratuitous window update.
1097		 */
1098		if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1099				si->si_cc |= SPX_SA;
1100		si->si_seq = htons(si->si_seq);
1101		si->si_alo = htons(alo);
1102		si->si_ack = htons(cb->s_ack);
1103
1104		if (ipxcksum) {
1105			si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1106		} else
1107			si->si_sum = 0xffff;
1108
1109		cb->s_outx = 4;
1110		if (so->so_options & SO_DEBUG || traceallspxs)
1111			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1112
1113		if (so->so_options & SO_DONTROUTE)
1114			error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1115		else
1116			error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1117	}
1118	if (error) {
1119		return (error);
1120	}
1121	spxstat.spxs_sndtotal++;
1122	/*
1123	 * Data sent (as far as we can tell).
1124	 * If this advertises a larger window than any other segment,
1125	 * then remember the size of the advertized window.
1126	 * Any pending ACK has now been sent.
1127	 */
1128	cb->s_force = 0;
1129	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1130	if (SSEQ_GT(alo, cb->s_alo))
1131		cb->s_alo = alo;
1132	if (sendalot)
1133		goto again;
1134	cb->s_outx = 5;
1135	return (0);
1136}
1137
1138static int spx_do_persist_panics = 0;
1139
1140static void
1141spx_setpersist(cb)
1142	register struct spxpcb *cb;
1143{
1144	register int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1145
1146	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1147
1148	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1149		panic("spx_output REXMT");
1150	/*
1151	 * Start/restart persistance timer.
1152	 */
1153	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1154	    t*spx_backoff[cb->s_rxtshift],
1155	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1156	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1157		cb->s_rxtshift++;
1158}
1159
1160int
1161spx_ctloutput(so, sopt)
1162	struct socket *so;
1163	struct sockopt *sopt;
1164{
1165	struct ipxpcb *ipxp = sotoipxpcb(so);
1166	register struct spxpcb *cb;
1167	int mask, error;
1168	short soptval;
1169	u_short usoptval;
1170	int optval;
1171
1172	error = 0;
1173
1174	if (sopt->sopt_level != IPXPROTO_SPX) {
1175		/* This will have to be changed when we do more general
1176		   stacking of protocols */
1177		return (ipx_ctloutput(so, sopt));
1178	}
1179	if (ipxp == NULL)
1180		return (EINVAL);
1181	else
1182		cb = ipxtospxpcb(ipxp);
1183
1184	switch (sopt->sopt_dir) {
1185	case SOPT_GET:
1186		switch (sopt->sopt_name) {
1187		case SO_HEADERS_ON_INPUT:
1188			mask = SF_HI;
1189			goto get_flags;
1190
1191		case SO_HEADERS_ON_OUTPUT:
1192			mask = SF_HO;
1193		get_flags:
1194			/* Unlocked read. */
1195			soptval = cb->s_flags & mask;
1196			error = sooptcopyout(sopt, &soptval, sizeof soptval);
1197			break;
1198
1199		case SO_MTU:
1200			/* Unlocked read. */
1201			usoptval = cb->s_mtu;
1202			error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1203			break;
1204
1205		case SO_LAST_HEADER:
1206			/* Unlocked read. */
1207			error = sooptcopyout(sopt, &cb->s_rhdr,
1208					     sizeof cb->s_rhdr);
1209			break;
1210
1211		case SO_DEFAULT_HEADERS:
1212			/* Unlocked read. */
1213			error = sooptcopyout(sopt, &cb->s_shdr,
1214					     sizeof cb->s_shdr);
1215			break;
1216
1217		default:
1218			error = ENOPROTOOPT;
1219		}
1220		break;
1221
1222	case SOPT_SET:
1223		switch (sopt->sopt_name) {
1224			/* XXX why are these shorts on get and ints on set?
1225			   that doesn't make any sense... */
1226		case SO_HEADERS_ON_INPUT:
1227			mask = SF_HI;
1228			goto set_head;
1229
1230		case SO_HEADERS_ON_OUTPUT:
1231			mask = SF_HO;
1232		set_head:
1233			error = sooptcopyin(sopt, &optval, sizeof optval,
1234					    sizeof optval);
1235			if (error)
1236				break;
1237
1238			IPX_LOCK(ipxp);
1239			if (cb->s_flags & SF_PI) {
1240				if (optval)
1241					cb->s_flags |= mask;
1242				else
1243					cb->s_flags &= ~mask;
1244			} else error = EINVAL;
1245			IPX_UNLOCK(ipxp);
1246			break;
1247
1248		case SO_MTU:
1249			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1250					    sizeof usoptval);
1251			if (error)
1252				break;
1253			/* Unlocked write. */
1254			cb->s_mtu = usoptval;
1255			break;
1256
1257#ifdef SF_NEWCALL
1258		case SO_NEWCALL:
1259			error = sooptcopyin(sopt, &optval, sizeof optval,
1260					    sizeof optval);
1261			if (error)
1262				break;
1263			IPX_LOCK(ipxp);
1264			if (optval) {
1265				cb->s_flags2 |= SF_NEWCALL;
1266				spx_newchecks[5]++;
1267			} else {
1268				cb->s_flags2 &= ~SF_NEWCALL;
1269				spx_newchecks[6]++;
1270			}
1271			IPX_UNLOCK(ipxp);
1272			break;
1273#endif
1274
1275		case SO_DEFAULT_HEADERS:
1276			{
1277				struct spxhdr sp;
1278
1279				error = sooptcopyin(sopt, &sp, sizeof sp,
1280						    sizeof sp);
1281				if (error)
1282					break;
1283				IPX_LOCK(ipxp);
1284				cb->s_dt = sp.spx_dt;
1285				cb->s_cc = sp.spx_cc & SPX_EM;
1286				IPX_UNLOCK(ipxp);
1287			}
1288			break;
1289
1290		default:
1291			error = ENOPROTOOPT;
1292		}
1293		break;
1294	}
1295	return (error);
1296}
1297
1298static int
1299spx_usr_abort(so)
1300	struct socket *so;
1301{
1302	struct ipxpcb *ipxp;
1303	struct spxpcb *cb;
1304
1305	ipxp = sotoipxpcb(so);
1306	cb = ipxtospxpcb(ipxp);
1307
1308	IPX_LIST_LOCK();
1309	IPX_LOCK(ipxp);
1310	spx_drop(cb, ECONNABORTED);
1311	IPX_LIST_UNLOCK();
1312	return (0);
1313}
1314
1315/*
1316 * Accept a connection.  Essentially all the work is
1317 * done at higher levels; just return the address
1318 * of the peer, storing through addr.
1319 */
1320static int
1321spx_accept(so, nam)
1322	struct socket *so;
1323	struct sockaddr **nam;
1324{
1325	struct ipxpcb *ipxp;
1326	struct sockaddr_ipx *sipx, ssipx;
1327
1328	ipxp = sotoipxpcb(so);
1329	sipx = &ssipx;
1330	bzero(sipx, sizeof *sipx);
1331	sipx->sipx_len = sizeof *sipx;
1332	sipx->sipx_family = AF_IPX;
1333	IPX_LOCK(ipxp);
1334	sipx->sipx_addr = ipxp->ipxp_faddr;
1335	IPX_UNLOCK(ipxp);
1336	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1337	return (0);
1338}
1339
1340static int
1341spx_attach(so, proto, td)
1342	struct socket *so;
1343	int proto;
1344	struct thread *td;
1345{
1346	struct ipxpcb *ipxp;
1347	struct spxpcb *cb;
1348	struct mbuf *mm;
1349	struct sockbuf *sb;
1350	int error;
1351
1352	ipxp = sotoipxpcb(so);
1353	cb = ipxtospxpcb(ipxp);
1354
1355	if (ipxp != NULL)
1356		return (EISCONN);
1357	IPX_LIST_LOCK();
1358	error = ipx_pcballoc(so, &ipxpcb_list, td);
1359	if (error)
1360		goto spx_attach_end;
1361	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1362		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1363		if (error)
1364			goto spx_attach_end;
1365	}
1366	ipxp = sotoipxpcb(so);
1367
1368	MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1369
1370	if (cb == NULL) {
1371		error = ENOBUFS;
1372		goto spx_attach_end;
1373	}
1374	sb = &so->so_snd;
1375
1376	mm = m_getclr(M_DONTWAIT, MT_HEADER);
1377	if (mm == NULL) {
1378		FREE(cb, M_PCB);
1379		error = ENOBUFS;
1380		goto spx_attach_end;
1381	}
1382	cb->s_ipx = mtod(mm, struct ipx *);
1383	cb->s_state = TCPS_LISTEN;
1384	cb->s_smax = -1;
1385	cb->s_swl1 = -1;
1386	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1387	cb->s_ipxpcb = ipxp;
1388	cb->s_mtu = 576 - sizeof(struct spx);
1389	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1390	cb->s_ssthresh = cb->s_cwnd;
1391	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1392	/* Above is recomputed when connecting to account
1393	   for changed buffering or mtu's */
1394	cb->s_rtt = SPXTV_SRTTBASE;
1395	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1396	SPXT_RANGESET(cb->s_rxtcur,
1397	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1398	    SPXTV_MIN, SPXTV_REXMTMAX);
1399	ipxp->ipxp_pcb = (caddr_t)cb;
1400spx_attach_end:
1401	IPX_LIST_UNLOCK();
1402	return (error);
1403}
1404
1405static int
1406spx_bind(so, nam, td)
1407	struct socket *so;
1408	struct sockaddr *nam;
1409	struct thread *td;
1410{
1411	struct ipxpcb *ipxp;
1412	int error;
1413
1414	ipxp = sotoipxpcb(so);
1415
1416	IPX_LIST_LOCK();
1417	IPX_LOCK(ipxp);
1418	error = ipx_pcbbind(ipxp, nam, td);
1419	IPX_UNLOCK(ipxp);
1420	IPX_LIST_UNLOCK();
1421	return (error);
1422}
1423
1424/*
1425 * Initiate connection to peer.
1426 * Enter SYN_SENT state, and mark socket as connecting.
1427 * Start keep-alive timer, setup prototype header,
1428 * Send initial system packet requesting connection.
1429 */
1430static int
1431spx_connect(so, nam, td)
1432	struct socket *so;
1433	struct sockaddr *nam;
1434	struct thread *td;
1435{
1436	struct ipxpcb *ipxp;
1437	struct spxpcb *cb;
1438	int error;
1439
1440	ipxp = sotoipxpcb(so);
1441	cb = ipxtospxpcb(ipxp);
1442
1443	IPX_LIST_LOCK();
1444	IPX_LOCK(ipxp);
1445	if (ipxp->ipxp_lport == 0) {
1446		error = ipx_pcbbind(ipxp, NULL, td);
1447		if (error)
1448			goto spx_connect_end;
1449	}
1450	error = ipx_pcbconnect(ipxp, nam, td);
1451	if (error)
1452		goto spx_connect_end;
1453	soisconnecting(so);
1454	spxstat.spxs_connattempt++;
1455	cb->s_state = TCPS_SYN_SENT;
1456	cb->s_did = 0;
1457	spx_template(cb);
1458	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1459	cb->s_force = 1 + SPXTV_KEEP;
1460	/*
1461	 * Other party is required to respond to
1462	 * the port I send from, but he is not
1463	 * required to answer from where I am sending to,
1464	 * so allow wildcarding.
1465	 * original port I am sending to is still saved in
1466	 * cb->s_dport.
1467	 */
1468	ipxp->ipxp_fport = 0;
1469	error = spx_output(cb, NULL);
1470spx_connect_end:
1471	IPX_UNLOCK(ipxp);
1472	IPX_LIST_UNLOCK();
1473	return (error);
1474}
1475
1476static int
1477spx_detach(so)
1478	struct socket *so;
1479{
1480	struct ipxpcb *ipxp;
1481	struct spxpcb *cb;
1482
1483	ipxp = sotoipxpcb(so);
1484	cb = ipxtospxpcb(ipxp);
1485
1486	if (ipxp == NULL)
1487		return (ENOTCONN);
1488	IPX_LIST_LOCK();
1489	IPX_LOCK(ipxp);
1490	if (cb->s_state > TCPS_LISTEN)
1491		spx_disconnect(cb);
1492	else
1493		spx_close(cb);
1494	IPX_LIST_UNLOCK();
1495	return (0);
1496}
1497
1498/*
1499 * We may decide later to implement connection closing
1500 * handshaking at the spx level optionally.
1501 * here is the hook to do it:
1502 */
1503static int
1504spx_usr_disconnect(so)
1505	struct socket *so;
1506{
1507	struct ipxpcb *ipxp;
1508	struct spxpcb *cb;
1509
1510	ipxp = sotoipxpcb(so);
1511	cb = ipxtospxpcb(ipxp);
1512
1513	IPX_LIST_LOCK();
1514	IPX_LOCK(ipxp);
1515	spx_disconnect(cb);
1516	IPX_LIST_UNLOCK();
1517	return (0);
1518}
1519
1520static int
1521spx_listen(so, backlog, td)
1522	struct socket *so;
1523	int backlog;
1524	struct thread *td;
1525{
1526	int error;
1527	struct ipxpcb *ipxp;
1528	struct spxpcb *cb;
1529
1530	error = 0;
1531	ipxp = sotoipxpcb(so);
1532	cb = ipxtospxpcb(ipxp);
1533
1534	IPX_LIST_LOCK();
1535	IPX_LOCK(ipxp);
1536	SOCK_LOCK(so);
1537	error = solisten_proto_check(so);
1538	if (error == 0 && ipxp->ipxp_lport == 0)
1539		error = ipx_pcbbind(ipxp, NULL, td);
1540	if (error == 0) {
1541		cb->s_state = TCPS_LISTEN;
1542		solisten_proto(so, backlog);
1543	}
1544	SOCK_UNLOCK(so);
1545	IPX_UNLOCK(ipxp);
1546	IPX_LIST_UNLOCK();
1547	return (error);
1548}
1549
1550/*
1551 * After a receive, possibly send acknowledgment
1552 * updating allocation.
1553 */
1554static int
1555spx_rcvd(so, flags)
1556	struct socket *so;
1557	int flags;
1558{
1559	struct ipxpcb *ipxp;
1560	struct spxpcb *cb;
1561
1562	ipxp = sotoipxpcb(so);
1563	cb = ipxtospxpcb(ipxp);
1564
1565	IPX_LOCK(ipxp);
1566	cb->s_flags |= SF_RVD;
1567	spx_output(cb, NULL);
1568	cb->s_flags &= ~SF_RVD;
1569	IPX_UNLOCK(ipxp);
1570	return (0);
1571}
1572
1573static int
1574spx_rcvoob(so, m, flags)
1575	struct socket *so;
1576	struct mbuf *m;
1577	int flags;
1578{
1579	struct ipxpcb *ipxp;
1580	struct spxpcb *cb;
1581
1582	ipxp = sotoipxpcb(so);
1583	cb = ipxtospxpcb(ipxp);
1584
1585	SOCKBUF_LOCK(&so->so_rcv);
1586	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1587	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1588		SOCKBUF_UNLOCK(&so->so_rcv);
1589		m->m_len = 1;
1590		/* Unlocked read. */
1591		*mtod(m, caddr_t) = cb->s_iobc;
1592		return (0);
1593	}
1594	SOCKBUF_UNLOCK(&so->so_rcv);
1595	return (EINVAL);
1596}
1597
1598static int
1599spx_send(so, flags, m, addr, controlp, td)
1600	struct socket *so;
1601	int flags;
1602	struct mbuf *m;
1603	struct sockaddr *addr;
1604	struct mbuf *controlp;
1605	struct thread *td;
1606{
1607	int error;
1608	struct ipxpcb *ipxp;
1609	struct spxpcb *cb;
1610
1611	error = 0;
1612	ipxp = sotoipxpcb(so);
1613	cb = ipxtospxpcb(ipxp);
1614
1615	IPX_LOCK(ipxp);
1616	if (flags & PRUS_OOB) {
1617		if (sbspace(&so->so_snd) < -512) {
1618			error = ENOBUFS;
1619			goto spx_send_end;
1620		}
1621		cb->s_oobflags |= SF_SOOB;
1622	}
1623	if (controlp != NULL) {
1624		u_short *p = mtod(controlp, u_short *);
1625		spx_newchecks[2]++;
1626		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1627			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1628			spx_newchecks[3]++;
1629		}
1630		m_freem(controlp);
1631	}
1632	controlp = NULL;
1633	error = spx_output(cb, m);
1634	m = NULL;
1635spx_send_end:
1636	IPX_UNLOCK(ipxp);
1637	if (controlp != NULL)
1638		m_freem(controlp);
1639	if (m != NULL)
1640		m_freem(m);
1641	return (error);
1642}
1643
1644static int
1645spx_shutdown(so)
1646	struct socket *so;
1647{
1648	struct ipxpcb *ipxp;
1649	struct spxpcb *cb;
1650
1651	ipxp = sotoipxpcb(so);
1652	cb = ipxtospxpcb(ipxp);
1653
1654	socantsendmore(so);
1655	IPX_LIST_LOCK();
1656	IPX_LOCK(ipxp);
1657	spx_usrclosed(cb);
1658	IPX_LIST_UNLOCK();
1659	return (0);
1660}
1661
1662static int
1663spx_sp_attach(so, proto, td)
1664	struct socket *so;
1665	int proto;
1666	struct thread *td;
1667{
1668	int error;
1669	struct ipxpcb *ipxp;
1670
1671	error = spx_attach(so, proto, td);
1672	if (error == 0) {
1673		ipxp = sotoipxpcb(so);
1674		((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1675					(SF_HI | SF_HO | SF_PI);
1676	}
1677	return (error);
1678}
1679
1680/*
1681 * Create template to be used to send spx packets on a connection.
1682 * Called after host entry created, fills
1683 * in a skeletal spx header (choosing connection id),
1684 * minimizing the amount of work necessary when the connection is used.
1685 */
1686static void
1687spx_template(cb)
1688	register struct spxpcb *cb;
1689{
1690	register struct ipxpcb *ipxp = cb->s_ipxpcb;
1691	register struct ipx *ipx = cb->s_ipx;
1692	register struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1693
1694	IPX_LOCK_ASSERT(ipxp);
1695
1696	ipx->ipx_pt = IPXPROTO_SPX;
1697	ipx->ipx_sna = ipxp->ipxp_laddr;
1698	ipx->ipx_dna = ipxp->ipxp_faddr;
1699	cb->s_sid = htons(spx_iss);
1700	spx_iss += SPX_ISSINCR/2;
1701	cb->s_alo = 1;
1702	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1703	cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1704					of large packets */
1705	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1706	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1707		/* But allow for lots of little packets as well */
1708}
1709
1710/*
1711 * Close a SPIP control block:
1712 *	discard spx control block itself
1713 *	discard ipx protocol control block
1714 *	wake up any sleepers
1715 * cb will always be invalid after this call.
1716 */
1717void
1718spx_close(cb)
1719	register struct spxpcb *cb;
1720{
1721	register struct spx_q *s;
1722	struct ipxpcb *ipxp = cb->s_ipxpcb;
1723	struct socket *so = ipxp->ipxp_socket;
1724	register struct mbuf *m;
1725
1726	IPX_LIST_LOCK_ASSERT();
1727	IPX_LOCK_ASSERT(ipxp);
1728
1729	s = cb->s_q.si_next;
1730	while (s != &(cb->s_q)) {
1731		s = s->si_next;
1732		m = dtom(s->si_prev);
1733		remque(s->si_prev);
1734		m_freem(m);
1735	}
1736	m_free(dtom(cb->s_ipx));
1737	FREE(cb, M_PCB);
1738	ipxp->ipxp_pcb = NULL;
1739	soisdisconnected(so);
1740	ipx_pcbdetach(ipxp);
1741	spxstat.spxs_closed++;
1742}
1743
1744/*
1745 *	Someday we may do level 3 handshaking
1746 *	to close a connection or send a xerox style error.
1747 *	For now, just close.
1748 * cb will always be invalid after this call.
1749 */
1750static void
1751spx_usrclosed(cb)
1752	register struct spxpcb *cb;
1753{
1754
1755	IPX_LIST_LOCK_ASSERT();
1756	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1757
1758	spx_close(cb);
1759}
1760
1761/*
1762 * cb will always be invalid after this call.
1763 */
1764static void
1765spx_disconnect(cb)
1766	register struct spxpcb *cb;
1767{
1768
1769	IPX_LIST_LOCK_ASSERT();
1770	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1771
1772	spx_close(cb);
1773}
1774
1775/*
1776 * Drop connection, reporting
1777 * the specified error.
1778 * cb will always be invalid after this call.
1779 */
1780static void
1781spx_drop(cb, errno)
1782	register struct spxpcb *cb;
1783	int errno;
1784{
1785	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1786
1787	IPX_LIST_LOCK_ASSERT();
1788	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1789
1790	/*
1791	 * someday, in the xerox world
1792	 * we will generate error protocol packets
1793	 * announcing that the socket has gone away.
1794	 */
1795	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1796		spxstat.spxs_drops++;
1797		cb->s_state = TCPS_CLOSED;
1798		/*tcp_output(cb);*/
1799	} else
1800		spxstat.spxs_conndrops++;
1801	so->so_error = errno;
1802	spx_close(cb);
1803}
1804
1805/*
1806 * Fast timeout routine for processing delayed acks
1807 */
1808void
1809spx_fasttimo()
1810{
1811	struct ipxpcb *ipxp;
1812	struct spxpcb *cb;
1813
1814	IPX_LIST_LOCK();
1815	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1816		IPX_LOCK(ipxp);
1817		if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1818		    (cb->s_flags & SF_DELACK)) {
1819			cb->s_flags &= ~SF_DELACK;
1820			cb->s_flags |= SF_ACKNOW;
1821			spxstat.spxs_delack++;
1822			spx_output(cb, NULL);
1823		}
1824		IPX_UNLOCK(ipxp);
1825	}
1826	IPX_LIST_UNLOCK();
1827}
1828
1829/*
1830 * spx protocol timeout routine called every 500 ms.
1831 * Updates the timers in all active pcb's and
1832 * causes finite state machine actions if timers expire.
1833 */
1834void
1835spx_slowtimo()
1836{
1837	struct ipxpcb *ip, *ip_temp;
1838	struct spxpcb *cb;
1839	int i;
1840
1841	/*
1842	 * Search through tcb's and update active timers.  Note that timers
1843	 * may free the ipxpcb, so be sure to handle that case.
1844	 *
1845	 * spx_timers() may remove an ipxpcb entry, so we have to be ready to
1846	 * continue despite that.  The logic here is a bit obfuscated.
1847	 */
1848	IPX_LIST_LOCK();
1849	LIST_FOREACH_SAFE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1850		cb = ipxtospxpcb(ip);
1851		if (cb == NULL)
1852			continue;
1853		IPX_LOCK(cb->s_ipxpcb);
1854		for (i = 0; i < SPXT_NTIMERS; i++) {
1855			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1856				/*
1857				 * spx_timers() returns (NULL) if it free'd
1858				 * the pcb.
1859				 */
1860				cb = spx_timers(cb, i);
1861				if (cb == NULL)
1862					break;
1863			}
1864		}
1865		if (cb != NULL) {
1866			cb->s_idle++;
1867			if (cb->s_rtt)
1868				cb->s_rtt++;
1869			IPX_UNLOCK(cb->s_ipxpcb);
1870		}
1871	}
1872	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1873	IPX_LIST_UNLOCK();
1874}
1875
1876/*
1877 * SPX timer processing.
1878 */
1879static struct spxpcb *
1880spx_timers(cb, timer)
1881	register struct spxpcb *cb;
1882	int timer;
1883{
1884	long rexmt;
1885	int win;
1886
1887	IPX_LIST_LOCK_ASSERT();
1888	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1889
1890	cb->s_force = 1 + timer;
1891	switch (timer) {
1892
1893	/*
1894	 * 2 MSL timeout in shutdown went off.  TCP deletes connection
1895	 * control block.
1896	 */
1897	case SPXT_2MSL:
1898		printf("spx: SPXT_2MSL went off for no reason\n");
1899		cb->s_timer[timer] = 0;
1900		break;
1901
1902	/*
1903	 * Retransmission timer went off.  Message has not
1904	 * been acked within retransmit interval.  Back off
1905	 * to a longer retransmit interval and retransmit one packet.
1906	 */
1907	case SPXT_REXMT:
1908		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1909			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1910			spxstat.spxs_timeoutdrop++;
1911			spx_drop(cb, ETIMEDOUT);
1912			cb = NULL;
1913			break;
1914		}
1915		spxstat.spxs_rexmttimeo++;
1916		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1917		rexmt *= spx_backoff[cb->s_rxtshift];
1918		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1919		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1920		/*
1921		 * If we have backed off fairly far, our srtt
1922		 * estimate is probably bogus.  Clobber it
1923		 * so we'll take the next rtt measurement as our srtt;
1924		 * move the current srtt into rttvar to keep the current
1925		 * retransmit times until then.
1926		 */
1927		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1928			cb->s_rttvar += (cb->s_srtt >> 2);
1929			cb->s_srtt = 0;
1930		}
1931		cb->s_snxt = cb->s_rack;
1932		/*
1933		 * If timing a packet, stop the timer.
1934		 */
1935		cb->s_rtt = 0;
1936		/*
1937		 * See very long discussion in tcp_timer.c about congestion
1938		 * window and sstrhesh
1939		 */
1940		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1941		if (win < 2)
1942			win = 2;
1943		cb->s_cwnd = CUNIT;
1944		cb->s_ssthresh = win * CUNIT;
1945		spx_output(cb, NULL);
1946		break;
1947
1948	/*
1949	 * Persistance timer into zero window.
1950	 * Force a probe to be sent.
1951	 */
1952	case SPXT_PERSIST:
1953		spxstat.spxs_persisttimeo++;
1954		spx_setpersist(cb);
1955		spx_output(cb, NULL);
1956		break;
1957
1958	/*
1959	 * Keep-alive timer went off; send something
1960	 * or drop connection if idle for too long.
1961	 */
1962	case SPXT_KEEP:
1963		spxstat.spxs_keeptimeo++;
1964		if (cb->s_state < TCPS_ESTABLISHED)
1965			goto dropit;
1966		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1967		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1968				goto dropit;
1969			spxstat.spxs_keepprobe++;
1970			spx_output(cb, NULL);
1971		} else
1972			cb->s_idle = 0;
1973		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1974		break;
1975	dropit:
1976		spxstat.spxs_keepdrops++;
1977		spx_drop(cb, ETIMEDOUT);
1978		cb = NULL;
1979		break;
1980	}
1981	return (cb);
1982}
1983