tcp_input.c revision 6348
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)tcp_input.c	8.5 (Berkeley) 4/10/94
34 * $Id: tcp_input.c,v 1.11 1995/02/09 23:13:23 wollman Exp $
35 */
36
37#ifndef TUBA_INCLUDE
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/protosw.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/errno.h>
46
47#include <net/if.h>
48#include <net/route.h>
49
50#include <netinet/in.h>
51#include <netinet/in_systm.h>
52#include <netinet/ip.h>
53#include <netinet/in_pcb.h>
54#include <netinet/ip_var.h>
55#include <netinet/tcp.h>
56#include <netinet/tcp_fsm.h>
57#include <netinet/tcp_seq.h>
58#include <netinet/tcp_timer.h>
59#include <netinet/tcp_var.h>
60#include <netinet/tcpip.h>
61#ifdef TCPDEBUG
62#include <netinet/tcp_debug.h>
63struct	tcpiphdr tcp_saveti;
64#endif
65
66int	tcprexmtthresh = 3;
67struct	inpcb *tcp_last_inpcb = &tcb;
68tcp_seq	tcp_iss;
69tcp_cc	tcp_ccgen;
70struct	inpcb tcb;
71struct	tcpstat tcpstat;
72u_long	tcp_now;
73
74#endif /* TUBA_INCLUDE */
75
76/*
77 * Insert segment ti into reassembly queue of tcp with
78 * control block tp.  Return TH_FIN if reassembly now includes
79 * a segment with FIN.  The macro form does the common case inline
80 * (segment is the next to be received on an established connection,
81 * and the queue is empty), avoiding linkage into and removal
82 * from the queue and repetition of various conversions.
83 * Set DELACK for segments received in order, but ack immediately
84 * when segments are out of order (so fast retransmit can work).
85 */
86#define	TCP_REASS(tp, ti, m, so, flags) { \
87	if ((ti)->ti_seq == (tp)->rcv_nxt && \
88	    (tp)->seg_next == (struct tcpiphdr *)(tp) && \
89	    (tp)->t_state == TCPS_ESTABLISHED) { \
90		tp->t_flags |= TF_DELACK; \
91		(tp)->rcv_nxt += (ti)->ti_len; \
92		flags = (ti)->ti_flags & TH_FIN; \
93		tcpstat.tcps_rcvpack++;\
94		tcpstat.tcps_rcvbyte += (ti)->ti_len;\
95		sbappend(&(so)->so_rcv, (m)); \
96		sorwakeup(so); \
97	} else { \
98		(flags) = tcp_reass((tp), (ti), (m)); \
99		tp->t_flags |= TF_ACKNOW; \
100	} \
101}
102#ifndef TUBA_INCLUDE
103
104int
105tcp_reass(tp, ti, m)
106	register struct tcpcb *tp;
107	register struct tcpiphdr *ti;
108	struct mbuf *m;
109{
110	register struct tcpiphdr *q;
111	struct socket *so = tp->t_inpcb->inp_socket;
112	int flags;
113
114	/*
115	 * Call with ti==0 after become established to
116	 * force pre-ESTABLISHED data up to user socket.
117	 */
118	if (ti == 0)
119		goto present;
120
121	/*
122	 * Find a segment which begins after this one does.
123	 */
124	for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
125	    q = (struct tcpiphdr *)q->ti_next)
126		if (SEQ_GT(q->ti_seq, ti->ti_seq))
127			break;
128
129	/*
130	 * If there is a preceding segment, it may provide some of
131	 * our data already.  If so, drop the data from the incoming
132	 * segment.  If it provides all of our data, drop us.
133	 */
134	if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
135		register int i;
136		q = (struct tcpiphdr *)q->ti_prev;
137		/* conversion to int (in i) handles seq wraparound */
138		i = q->ti_seq + q->ti_len - ti->ti_seq;
139		if (i > 0) {
140			if (i >= ti->ti_len) {
141				tcpstat.tcps_rcvduppack++;
142				tcpstat.tcps_rcvdupbyte += ti->ti_len;
143				m_freem(m);
144#ifdef TTCP
145				/*
146				 * Try to present any queued data
147				 * at the left window edge to the user.
148				 * This is needed after the 3-WHS
149				 * completes.
150				 */
151				goto present;	/* ??? */
152#else
153				return (0);
154#endif
155			}
156			m_adj(m, i);
157			ti->ti_len -= i;
158			ti->ti_seq += i;
159		}
160		q = (struct tcpiphdr *)(q->ti_next);
161	}
162	tcpstat.tcps_rcvoopack++;
163	tcpstat.tcps_rcvoobyte += ti->ti_len;
164	REASS_MBUF(ti) = m;		/* XXX */
165
166	/*
167	 * While we overlap succeeding segments trim them or,
168	 * if they are completely covered, dequeue them.
169	 */
170	while (q != (struct tcpiphdr *)tp) {
171		register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
172		if (i <= 0)
173			break;
174		if (i < q->ti_len) {
175			q->ti_seq += i;
176			q->ti_len -= i;
177			m_adj(REASS_MBUF(q), i);
178			break;
179		}
180		q = (struct tcpiphdr *)q->ti_next;
181		m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
182		remque(q->ti_prev);
183		m_freem(m);
184	}
185
186	/*
187	 * Stick new segment in its place.
188	 */
189	insque(ti, q->ti_prev);
190
191present:
192	/*
193	 * Present data to user, advancing rcv_nxt through
194	 * completed sequence space.
195	 */
196	if (!TCPS_HAVEESTABLISHED(tp->t_state))
197		return (0);
198	ti = tp->seg_next;
199	if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
200		return (0);
201	if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
202		return (0);
203	do {
204		tp->rcv_nxt += ti->ti_len;
205		flags = ti->ti_flags & TH_FIN;
206		remque(ti);
207		m = REASS_MBUF(ti);
208		ti = (struct tcpiphdr *)ti->ti_next;
209		if (so->so_state & SS_CANTRCVMORE)
210			m_freem(m);
211		else
212			sbappend(&so->so_rcv, m);
213	} while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
214	sorwakeup(so);
215	return (flags);
216}
217
218/*
219 * TCP input routine, follows pages 65-76 of the
220 * protocol specification dated September, 1981 very closely.
221 */
222void
223tcp_input(m, iphlen)
224	register struct mbuf *m;
225	int iphlen;
226{
227	register struct tcpiphdr *ti;
228	register struct inpcb *inp;
229	caddr_t optp = NULL;
230	int optlen = 0;
231	int len, tlen, off;
232	register struct tcpcb *tp = 0;
233	register int tiflags;
234	struct socket *so = 0;
235	int todrop, acked, ourfinisacked, needoutput = 0;
236	struct in_addr laddr;
237	int dropsocket = 0;
238	int iss = 0;
239#ifdef TTCP
240	u_long tiwin;
241	struct tcpopt to;		/* options in this segment */
242	struct rmxp_tao *taop;		/* pointer to our TAO cache entry */
243	struct rmxp_tao	tao_noncached;	/* in case there's no cached entry */
244#else
245	u_long tiwin, ts_val, ts_ecr;
246	int ts_present = 0;
247#endif
248#ifdef TCPDEBUG
249	short ostate = 0;
250#endif
251
252#ifdef TTCP
253	bzero((char *)&to, sizeof(to));
254#endif
255
256	tcpstat.tcps_rcvtotal++;
257	/*
258	 * Get IP and TCP header together in first mbuf.
259	 * Note: IP leaves IP header in first mbuf.
260	 */
261	ti = mtod(m, struct tcpiphdr *);
262	if (iphlen > sizeof (struct ip))
263		ip_stripoptions(m, (struct mbuf *)0);
264	if (m->m_len < sizeof (struct tcpiphdr)) {
265		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
266			tcpstat.tcps_rcvshort++;
267			return;
268		}
269		ti = mtod(m, struct tcpiphdr *);
270	}
271
272	/*
273	 * Checksum extended TCP header and data.
274	 */
275	tlen = ((struct ip *)ti)->ip_len;
276	len = sizeof (struct ip) + tlen;
277	ti->ti_next = ti->ti_prev = 0;
278	ti->ti_x1 = 0;
279	ti->ti_len = (u_short)tlen;
280	HTONS(ti->ti_len);
281	ti->ti_sum = in_cksum(m, len);
282	if (ti->ti_sum) {
283		tcpstat.tcps_rcvbadsum++;
284		goto drop;
285	}
286#endif /* TUBA_INCLUDE */
287
288	/*
289	 * Check that TCP offset makes sense,
290	 * pull out TCP options and adjust length.		XXX
291	 */
292	off = ti->ti_off << 2;
293	if (off < sizeof (struct tcphdr) || off > tlen) {
294		tcpstat.tcps_rcvbadoff++;
295		goto drop;
296	}
297	tlen -= off;
298	ti->ti_len = tlen;
299	if (off > sizeof (struct tcphdr)) {
300		if (m->m_len < sizeof(struct ip) + off) {
301			if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
302				tcpstat.tcps_rcvshort++;
303				return;
304			}
305			ti = mtod(m, struct tcpiphdr *);
306		}
307		optlen = off - sizeof (struct tcphdr);
308		optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
309		/*
310		 * Do quick retrieval of timestamp options ("options
311		 * prediction?").  If timestamp is the only option and it's
312		 * formatted as recommended in RFC 1323 appendix A, we
313		 * quickly get the values now and not bother calling
314		 * tcp_dooptions(), etc.
315		 */
316		if ((optlen == TCPOLEN_TSTAMP_APPA ||
317		     (optlen > TCPOLEN_TSTAMP_APPA &&
318			optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
319		     *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
320		     (ti->ti_flags & TH_SYN) == 0) {
321#ifdef TTCP
322			to.to_flag |= TOF_TS;
323			to.to_tsval = ntohl(*(u_long *)(optp + 4));
324			to.to_tsecr = ntohl(*(u_long *)(optp + 8));
325#else
326			ts_present = 1;
327			ts_val = ntohl(*(u_long *)(optp + 4));
328			ts_ecr = ntohl(*(u_long *)(optp + 8));
329#endif
330			optp = NULL;	/* we've parsed the options */
331		}
332	}
333	tiflags = ti->ti_flags;
334
335	/*
336	 * Convert TCP protocol specific fields to host format.
337	 */
338	NTOHL(ti->ti_seq);
339	NTOHL(ti->ti_ack);
340	NTOHS(ti->ti_win);
341	NTOHS(ti->ti_urp);
342
343	/*
344	 * Locate pcb for segment.
345	 */
346findpcb:
347	inp = tcp_last_inpcb;
348	if (inp->inp_lport != ti->ti_dport ||
349	    inp->inp_fport != ti->ti_sport ||
350	    inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
351	    inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
352		inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
353		    ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
354		if (inp)
355			tcp_last_inpcb = inp;
356		++tcpstat.tcps_pcbcachemiss;
357	}
358
359	/*
360	 * If the state is CLOSED (i.e., TCB does not exist) then
361	 * all data in the incoming segment is discarded.
362	 * If the TCB exists but is in CLOSED state, it is embryonic,
363	 * but should either do a listen or a connect soon.
364	 */
365	if (inp == 0)
366		goto dropwithreset;
367	tp = intotcpcb(inp);
368	if (tp == 0)
369		goto dropwithreset;
370	if (tp->t_state == TCPS_CLOSED)
371		goto drop;
372
373	/* Unscale the window into a 32-bit value. */
374	if ((tiflags & TH_SYN) == 0)
375		tiwin = ti->ti_win << tp->snd_scale;
376	else
377		tiwin = ti->ti_win;
378
379	so = inp->inp_socket;
380	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
381#ifdef TCPDEBUG
382		if (so->so_options & SO_DEBUG) {
383			ostate = tp->t_state;
384			tcp_saveti = *ti;
385		}
386#endif
387		if (so->so_options & SO_ACCEPTCONN) {
388#ifdef TTCP
389			register struct tcpcb *tp0 = tp;
390#endif
391			so = sonewconn(so, 0);
392			if (so == 0)
393				goto drop;
394			/*
395			 * This is ugly, but ....
396			 *
397			 * Mark socket as temporary until we're
398			 * committed to keeping it.  The code at
399			 * ``drop'' and ``dropwithreset'' check the
400			 * flag dropsocket to see if the temporary
401			 * socket created here should be discarded.
402			 * We mark the socket as discardable until
403			 * we're committed to it below in TCPS_LISTEN.
404			 */
405			dropsocket++;
406			inp = (struct inpcb *)so->so_pcb;
407			inp->inp_laddr = ti->ti_dst;
408			inp->inp_lport = ti->ti_dport;
409#if BSD>=43
410			inp->inp_options = ip_srcroute();
411#endif
412			tp = intotcpcb(inp);
413			tp->t_state = TCPS_LISTEN;
414#ifdef TTCP
415			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
416#endif
417
418			/* Compute proper scaling value from buffer space */
419			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
420			   TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
421				tp->request_r_scale++;
422		}
423	}
424
425	/*
426	 * Segment received on connection.
427	 * Reset idle time and keep-alive timer.
428	 */
429	tp->t_idle = 0;
430	tp->t_timer[TCPT_KEEP] = tcp_keepidle;
431
432	/*
433	 * Process options if not in LISTEN state,
434	 * else do it below (after getting remote address).
435	 */
436	if (optp && tp->t_state != TCPS_LISTEN)
437		tcp_dooptions(tp, optp, optlen, ti,
438#ifdef TTCP
439			&to);
440#else
441			&ts_present, &ts_val, &ts_ecr);
442#endif
443
444	/*
445	 * Header prediction: check for the two common cases
446	 * of a uni-directional data xfer.  If the packet has
447	 * no control flags, is in-sequence, the window didn't
448	 * change and we're not retransmitting, it's a
449	 * candidate.  If the length is zero and the ack moved
450	 * forward, we're the sender side of the xfer.  Just
451	 * free the data acked & wake any higher level process
452	 * that was blocked waiting for space.  If the length
453	 * is non-zero and the ack didn't move, we're the
454	 * receiver side.  If we're getting packets in-order
455	 * (the reassembly queue is empty), add the data to
456	 * the socket buffer and note that we need a delayed ack.
457#ifdef TTCP
458	 * Make sure that the hidden state-flags are also off.
459	 * Since we check for TCPS_ESTABLISHED above, it can only
460	 * be TH_NEEDSYN.
461#endif
462	 */
463	if (tp->t_state == TCPS_ESTABLISHED &&
464	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
465#ifdef TTCP
466	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
467	    ((to.to_flag & TOF_TS) == 0 ||
468	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
469	    /*
470	     * Using the CC option is compulsory if once started:
471	     *   the segment is OK if no T/TCP was negotiated or
472	     *   if the segment has a CC option equal to CCrecv
473	     */
474	    ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
475	     (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) &&
476#else
477	    (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
478#endif
479	    ti->ti_seq == tp->rcv_nxt &&
480	    tiwin && tiwin == tp->snd_wnd &&
481	    tp->snd_nxt == tp->snd_max) {
482
483		/*
484		 * If last ACK falls within this segment's sequence numbers,
485		 * record the timestamp.
486		 * NOTE that the test is modified according to the latest
487		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
488		 */
489#ifdef TTCP
490		if ((to.to_flag & TOF_TS) != 0 &&
491		   SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
492			tp->ts_recent_age = tcp_now;
493			tp->ts_recent = to.to_tsval;
494#else
495		if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
496			tp->ts_recent_age = tcp_now;
497			tp->ts_recent = ts_val;
498#endif
499		}
500
501		if (ti->ti_len == 0) {
502			if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
503			    SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
504			    tp->snd_cwnd >= tp->snd_wnd) {
505				/*
506				 * this is a pure ack for outstanding data.
507				 */
508				++tcpstat.tcps_predack;
509#ifdef TTCP
510				if ((to.to_flag & TOF_TS) != 0)
511					tcp_xmit_timer(tp,
512					    tcp_now - to.to_tsecr + 1);
513#else
514				if (ts_present)
515					tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
516#endif
517				else if (tp->t_rtt &&
518					    SEQ_GT(ti->ti_ack, tp->t_rtseq))
519					tcp_xmit_timer(tp, tp->t_rtt);
520				acked = ti->ti_ack - tp->snd_una;
521				tcpstat.tcps_rcvackpack++;
522				tcpstat.tcps_rcvackbyte += acked;
523				sbdrop(&so->so_snd, acked);
524				tp->snd_una = ti->ti_ack;
525				m_freem(m);
526
527				/*
528				 * If all outstanding data are acked, stop
529				 * retransmit timer, otherwise restart timer
530				 * using current (possibly backed-off) value.
531				 * If process is waiting for space,
532				 * wakeup/selwakeup/signal.  If data
533				 * are ready to send, let tcp_output
534				 * decide between more output or persist.
535				 */
536				if (tp->snd_una == tp->snd_max)
537					tp->t_timer[TCPT_REXMT] = 0;
538				else if (tp->t_timer[TCPT_PERSIST] == 0)
539					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
540
541				if (so->so_snd.sb_flags & SB_NOTIFY)
542					sowwakeup(so);
543				if (so->so_snd.sb_cc)
544					(void) tcp_output(tp);
545				return;
546			}
547		} else if (ti->ti_ack == tp->snd_una &&
548		    tp->seg_next == (struct tcpiphdr *)tp &&
549		    ti->ti_len <= sbspace(&so->so_rcv)) {
550			/*
551			 * this is a pure, in-sequence data packet
552			 * with nothing on the reassembly queue and
553			 * we have enough buffer space to take it.
554			 */
555			++tcpstat.tcps_preddat;
556			tp->rcv_nxt += ti->ti_len;
557			tcpstat.tcps_rcvpack++;
558			tcpstat.tcps_rcvbyte += ti->ti_len;
559			/*
560			 * Drop TCP, IP headers and TCP options then add data
561			 * to socket buffer.
562			 */
563			m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
564			m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
565			sbappend(&so->so_rcv, m);
566			sorwakeup(so);
567			tp->t_flags |= TF_DELACK;
568			return;
569		}
570	}
571
572	/*
573	 * Drop TCP, IP headers and TCP options.
574	 */
575	m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
576	m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
577
578	/*
579	 * Calculate amount of space in receive window,
580	 * and then do TCP input processing.
581	 * Receive window is amount of space in rcv queue,
582	 * but not less than advertised window.
583	 */
584	{ int win;
585
586	win = sbspace(&so->so_rcv);
587	if (win < 0)
588		win = 0;
589	tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
590	}
591
592	switch (tp->t_state) {
593
594	/*
595	 * If the state is LISTEN then ignore segment if it contains an RST.
596	 * If the segment contains an ACK then it is bad and send a RST.
597	 * If it does not contain a SYN then it is not interesting; drop it.
598	 * Don't bother responding if the destination was a broadcast.
599	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
600	 * tp->iss, and send a segment:
601	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
602	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
603	 * Fill in remote peer address fields if not previously specified.
604	 * Enter SYN_RECEIVED state, and process any other fields of this
605	 * segment in this state.
606	 */
607	case TCPS_LISTEN: {
608		struct mbuf *am;
609		register struct sockaddr_in *sin;
610
611		if (tiflags & TH_RST)
612			goto drop;
613		if (tiflags & TH_ACK)
614			goto dropwithreset;
615		if ((tiflags & TH_SYN) == 0)
616			goto drop;
617		/*
618		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
619		 * in_broadcast() should never return true on a received
620		 * packet with M_BCAST not set.
621		 */
622		if (m->m_flags & (M_BCAST|M_MCAST) ||
623		    IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
624			goto drop;
625		am = m_get(M_DONTWAIT, MT_SONAME);	/* XXX */
626		if (am == NULL)
627			goto drop;
628		am->m_len = sizeof (struct sockaddr_in);
629		sin = mtod(am, struct sockaddr_in *);
630		sin->sin_family = AF_INET;
631		sin->sin_len = sizeof(*sin);
632		sin->sin_addr = ti->ti_src;
633		sin->sin_port = ti->ti_sport;
634		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
635		laddr = inp->inp_laddr;
636		if (inp->inp_laddr.s_addr == INADDR_ANY)
637			inp->inp_laddr = ti->ti_dst;
638		if (in_pcbconnect(inp, am)) {
639			inp->inp_laddr = laddr;
640			(void) m_free(am);
641			goto drop;
642		}
643		(void) m_free(am);
644		tp->t_template = tcp_template(tp);
645		if (tp->t_template == 0) {
646			tp = tcp_drop(tp, ENOBUFS);
647			dropsocket = 0;		/* socket is already gone */
648			goto drop;
649		}
650#ifdef TTCP
651		if ((taop = tcp_gettaocache(inp)) == NULL) {
652			taop = &tao_noncached;
653			bzero(taop, sizeof(*taop));
654		}
655#endif /* TTCP */
656		if (optp)
657			tcp_dooptions(tp, optp, optlen, ti,
658#ifdef TTCP
659				&to);
660#else
661				&ts_present, &ts_val, &ts_ecr);
662#endif
663		if (iss)
664			tp->iss = iss;
665		else
666			tp->iss = tcp_iss;
667		tcp_iss += TCP_ISSINCR/2;
668		tp->irs = ti->ti_seq;
669		tcp_sendseqinit(tp);
670		tcp_rcvseqinit(tp);
671#ifdef TTCP
672		/*
673		 * Initialization of the tcpcb for transaction;
674		 *   set SND.WND = SEG.WND,
675		 *   initialize CCsend and CCrecv.
676		 */
677		tp->snd_wnd = tiwin;	/* initial send-window */
678		tp->cc_send = CC_INC(tcp_ccgen);
679		tp->cc_recv = to.to_cc;
680		/*
681		 * Perform TAO test on incoming CC (SEG.CC) option, if any.
682		 * - compare SEG.CC against cached CC from the same host,
683		 *	if any.
684		 * - if SEG.CC > chached value, SYN must be new and is accepted
685		 *	immediately: save new CC in the cache, mark the socket
686		 *	connected, enter ESTABLISHED state, turn on flag to
687		 *	send a SYN in the next segment.
688		 *	A virtual advertised window is set in rcv_adv to
689		 *	initialize SWS prevention.  Then enter normal segment
690		 *	processing: drop SYN, process data and FIN.
691		 * - otherwise do a normal 3-way handshake.
692		 */
693		if ((to.to_flag & TOF_CC) != 0) {
694		    if (taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
695			taop->tao_cc = to.to_cc;
696			tp->t_state = TCPS_ESTABLISHED;
697
698			/*
699			 * If there is a FIN, or if there is data and the
700			 * connection is local, then delay SYN,ACK(SYN) in
701			 * the hope of piggy-backing it on a response
702			 * segment.  Otherwise must send ACK now in case
703			 * the other side is slow starting.
704			 */
705			if ((tiflags & TH_FIN) || (ti->ti_len != 0 &&
706			    in_localaddr(inp->inp_faddr)))
707				tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
708			else
709				tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
710			tp->rcv_adv += tp->rcv_wnd;
711			tcpstat.tcps_connects++;
712			soisconnected(so);
713			tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
714			dropsocket = 0;		/* committed to socket */
715			tcpstat.tcps_accepts++;
716			goto trimthenstep6;
717		    }
718		/* else do standard 3-way handshake */
719		} else {
720		    /*
721		     * No CC option, but maybe CC.NEW:
722		     *   invalidate cached value.
723		     */
724		     taop->tao_cc = 0;
725		}
726		/*
727		 * TAO test failed or there was no CC option,
728		 *    do a standard 3-way handshake.
729		 */
730#endif /* TTCP */
731		tp->t_flags |= TF_ACKNOW;
732		tp->t_state = TCPS_SYN_RECEIVED;
733		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
734		dropsocket = 0;		/* committed to socket */
735		tcpstat.tcps_accepts++;
736		goto trimthenstep6;
737		}
738
739	/*
740	 * If the state is SYN_SENT:
741	 *	if seg contains an ACK, but not for our SYN, drop the input.
742	 *	if seg contains a RST, then drop the connection.
743	 *	if seg does not contain SYN, then drop it.
744	 * Otherwise this is an acceptable SYN segment
745	 *	initialize tp->rcv_nxt and tp->irs
746	 *	if seg contains ack then advance tp->snd_una
747	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
748	 *	arrange for segment to be acked (eventually)
749	 *	continue processing rest of data/controls, beginning with URG
750	 */
751	case TCPS_SYN_SENT:
752#ifdef TTCP
753		if ((taop = tcp_gettaocache(inp)) == NULL) {
754			taop = &tao_noncached;
755			bzero(taop, sizeof(*taop));
756		}
757
758		if ((tiflags & TH_ACK) &&
759		    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
760		     SEQ_GT(ti->ti_ack, tp->snd_max))) {
761			/*
762			 * If we have a cached CCsent for the remote host,
763			 * hence we haven't just crashed and restarted,
764			 * do not send a RST.  This may be a retransmission
765			 * from the other side after our earlier ACK was lost.
766			 * Our new SYN, when it arrives, will serve as the
767			 * needed ACK.
768			 */
769			if (taop->tao_ccsent != 0)
770				goto drop;
771			else
772				goto dropwithreset;
773		}
774#else
775		if ((tiflags & TH_ACK) &&
776		    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
777		     SEQ_GT(ti->ti_ack, tp->snd_max)))
778			goto dropwithreset;
779#endif
780		if (tiflags & TH_RST) {
781			if (tiflags & TH_ACK)
782				tp = tcp_drop(tp, ECONNREFUSED);
783			goto drop;
784		}
785		if ((tiflags & TH_SYN) == 0)
786			goto drop;
787#ifdef TTCP
788		tp->snd_wnd = ti->ti_win;	/* initial send window */
789		tp->cc_recv = to.to_cc;		/* foreign CC */
790#else
791		if (tiflags & TH_ACK) {
792			tp->snd_una = ti->ti_ack;
793			if (SEQ_LT(tp->snd_nxt, tp->snd_una))
794				tp->snd_nxt = tp->snd_una;
795		}
796		tp->t_timer[TCPT_REXMT] = 0;
797#endif
798
799		tp->irs = ti->ti_seq;
800		tcp_rcvseqinit(tp);
801#ifndef TTCP
802		tp->t_flags |= TF_ACKNOW;
803		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
804#else
805		if (tiflags & TH_ACK && SEQ_GT(ti->ti_ack, tp->iss)) {
806#endif
807			tcpstat.tcps_connects++;
808			soisconnected(so);
809			/* Do window scaling on this connection? */
810			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
811				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
812				tp->snd_scale = tp->requested_s_scale;
813				tp->rcv_scale = tp->request_r_scale;
814			}
815#ifdef TTCP
816			/*
817			 * Our SYN was acked.  If segment contains CC.ECHO
818			 * option, check it to make sure this segment really
819			 * matches our SYN.  If not, just drop it as old
820			 * duplicate, but send an RST if we're still playing
821			 * by the old rules.
822			 */
823			if ((to.to_flag & TOF_CCECHO) &&
824			    tp->cc_send != to.to_ccecho) {
825				if (taop->tao_ccsent != 0)
826					goto drop;
827				else
828					goto dropwithreset;
829			}
830			/* Segment is acceptable, update cache if undefined. */
831			if (taop->tao_ccsent == 0)
832				taop->tao_ccsent = to.to_ccecho;
833
834			tp->rcv_adv += tp->rcv_wnd;
835			tp->snd_una++;		/* SYN is acked */
836			/*
837			 * If there's data, delay ACK; if there's also a FIN
838			 * ACKNOW will be turned on later.
839			 */
840			if (ti->ti_len != 0)
841				tp->t_flags |= TF_DELACK;
842			else
843				tp->t_flags |= TF_ACKNOW;
844			/*
845			 * Received <SYN,ACK> in SYN_SENT[*] state.
846			 * Transitions:
847			 *	SYN_SENT  --> ESTABLISHED
848			 *	SYN_SENT* --> FIN_WAIT_1
849			 */
850			if (tp->t_flags & TF_NEEDFIN) {
851				tp->t_state = TCPS_FIN_WAIT_1;
852				tp->t_flags &= ~TF_NEEDFIN;
853				tiflags &= ~TH_SYN;
854			} else
855				tp->t_state = TCPS_ESTABLISHED;
856#else
857			tp->t_state = TCPS_ESTABLISHED;
858			(void) tcp_reass(tp, (struct tcpiphdr *)0,
859				(struct mbuf *)0);
860			/*
861			 * if we didn't have to retransmit the SYN,
862			 * use its rtt as our initial srtt & rtt var.
863			 */
864			if (tp->t_rtt)
865				tcp_xmit_timer(tp, tp->t_rtt);
866#endif
867
868#ifdef TTCP
869		} else {
870		/*
871		 *  Received initial SYN in SYN-SENT[*] state => simul-
872		 *  taneous open.  If segment contains CC option and there is
873		 *  a cached CC, apply TAO test; if it succeeds, connection is
874		 *  half-synchronized.  Otherwise, do 3-way handshake:
875		 *        SYN-SENT -> SYN-RECEIVED
876		 *        SYN-SENT* -> SYN-RECEIVED*
877		 *  If there was no CC option, clear cached CC value.
878		 */
879			tp->t_flags |= TF_ACKNOW;
880			tp->t_timer[TCPT_REXMT] = 0;
881			if (to.to_flag & TOF_CC) {
882				if (taop->tao_cc != 0 &&
883				    CC_GT(to.to_cc, taop->tao_cc)) {
884					/*
885					 * update cache and make transition:
886					 *        SYN-SENT -> ESTABLISHED*
887					 *        SYN-SENT* -> FIN-WAIT-1*
888					 */
889					taop->tao_cc = to.to_cc;
890					if (tp->t_flags & TF_NEEDFIN) {
891						tp->t_state = TCPS_FIN_WAIT_1;
892						tp->t_flags &= ~TF_NEEDFIN;
893					} else
894						tp->t_state = TCPS_ESTABLISHED;
895					tp->t_flags |= TF_NEEDSYN;
896				} else
897					tp->t_state = TCPS_SYN_RECEIVED;
898			} else {
899				/* CC.NEW or no option => invalidate cache */
900				taop->tao_cc = 0;
901				tp->t_state = TCPS_SYN_RECEIVED;
902			}
903		}
904#else
905		} else
906			tp->t_state = TCPS_SYN_RECEIVED;
907#endif
908
909trimthenstep6:
910		/*
911		 * Advance ti->ti_seq to correspond to first data byte.
912		 * If data, trim to stay within window,
913		 * dropping FIN if necessary.
914		 */
915		ti->ti_seq++;
916		if (ti->ti_len > tp->rcv_wnd) {
917			todrop = ti->ti_len - tp->rcv_wnd;
918			m_adj(m, -todrop);
919			ti->ti_len = tp->rcv_wnd;
920			tiflags &= ~TH_FIN;
921			tcpstat.tcps_rcvpackafterwin++;
922			tcpstat.tcps_rcvbyteafterwin += todrop;
923		}
924		tp->snd_wl1 = ti->ti_seq - 1;
925		tp->rcv_up = ti->ti_seq;
926#ifdef TTCP
927		/*
928		 *  Client side of transaction: already sent SYN and data.
929		 *  If the remote host used T/TCP to validate the SYN,
930		 *  our data will be ACK'd; if so, enter normal data segment
931		 *  processing in the middle of step 5, ack processing.
932		 *  Otherwise, goto step 6.
933		 */
934 		if (tiflags & TH_ACK)
935			goto process_ACK;
936#endif
937		goto step6;
938#ifdef TTCP
939	/*
940	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
941	 *	if segment contains a SYN and CC [not CC.NEW] option:
942	 *              if state == TIME_WAIT and connection duration > MSL,
943	 *                  drop packet and send RST;
944	 *
945	 *		if SEG.CC > CCrecv then is new SYN, and can implicitly
946	 *		    ack the FIN (and data) in retransmission queue.
947	 *                  Complete close and delete TCPCB.  Then reprocess
948	 *                  segment, hoping to find new TCPCB in LISTEN state;
949	 *
950	 *		else must be old SYN; drop it.
951	 *      else do normal processing.
952	 */
953	case TCPS_LAST_ACK:
954	case TCPS_CLOSING:
955	case TCPS_TIME_WAIT:
956		if ((tiflags & TH_SYN) &&
957		    (to.to_flag & TOF_CC) && tp->cc_recv != 0) {
958			if (tp->t_state == TCPS_TIME_WAIT &&
959					tp->t_duration > TCPTV_MSL)
960				goto dropwithreset;
961			if (CC_GT(to.to_cc, tp->cc_recv)) {
962				tp = tcp_close(tp);
963				goto findpcb;
964			}
965			else
966				goto drop;
967		}
968 		break;  /* continue normal processing */
969#endif
970	}
971
972	/*
973	 * States other than LISTEN or SYN_SENT.
974	 * First check timestamp, if present.
975#ifdef TTCP
976	 * Then check the connection count, if present.
977#endif
978	 * Then check that at least some bytes of segment are within
979	 * receive window.  If segment begins before rcv_nxt,
980	 * drop leading data (and SYN); if nothing left, just ack.
981	 *
982	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
983	 * and it's less than ts_recent, drop it.
984	 */
985#ifdef TTCP
986	if ((to.to_flag & TOF_TS) != 0 && (tiflags & TH_RST) == 0 &&
987	    tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) {
988#else
989	if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
990	    TSTMP_LT(ts_val, tp->ts_recent)) {
991#endif
992
993		/* Check to see if ts_recent is over 24 days old.  */
994		if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
995			/*
996			 * Invalidate ts_recent.  If this segment updates
997			 * ts_recent, the age will be reset later and ts_recent
998			 * will get a valid value.  If it does not, setting
999			 * ts_recent to zero will at least satisfy the
1000			 * requirement that zero be placed in the timestamp
1001			 * echo reply when ts_recent isn't valid.  The
1002			 * age isn't reset until we get a valid ts_recent
1003			 * because we don't want out-of-order segments to be
1004			 * dropped when ts_recent is old.
1005			 */
1006			tp->ts_recent = 0;
1007		} else {
1008			tcpstat.tcps_rcvduppack++;
1009			tcpstat.tcps_rcvdupbyte += ti->ti_len;
1010			tcpstat.tcps_pawsdrop++;
1011			goto dropafterack;
1012		}
1013	}
1014
1015#ifdef TTCP
1016	/*
1017	 * T/TCP mechanism
1018	 *   If T/TCP was negotiated and the segment doesn't have CC,
1019	 *   or if it's CC is wrong then drop the segment.
1020	 *   RST segments do not have to comply with this.
1021	 */
1022	if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
1023	    ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc) &&
1024	    (tiflags & TH_RST) == 0)
1025 		goto dropafterack;
1026#endif
1027
1028	todrop = tp->rcv_nxt - ti->ti_seq;
1029	if (todrop > 0) {
1030		if (tiflags & TH_SYN) {
1031			tiflags &= ~TH_SYN;
1032			ti->ti_seq++;
1033			if (ti->ti_urp > 1)
1034				ti->ti_urp--;
1035			else
1036				tiflags &= ~TH_URG;
1037			todrop--;
1038		}
1039		if (todrop >= ti->ti_len) {
1040			tcpstat.tcps_rcvduppack++;
1041			tcpstat.tcps_rcvdupbyte += ti->ti_len;
1042			/*
1043			 * If segment is just one to the left of the window,
1044			 * check two special cases:
1045			 * 1. Don't toss RST in response to 4.2-style keepalive.
1046			 * 2. If the only thing to drop is a FIN, we can drop
1047			 *    it, but check the ACK or we will get into FIN
1048			 *    wars if our FINs crossed (both CLOSING).
1049			 * In either case, send ACK to resynchronize,
1050			 * but keep on processing for RST or ACK.
1051			 */
1052			if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
1053#ifdef TCP_COMPAT_42
1054			  || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
1055#endif
1056			   ) {
1057				todrop = ti->ti_len;
1058				tiflags &= ~TH_FIN;
1059				tp->t_flags |= TF_ACKNOW;
1060			} else {
1061				/*
1062				 * Handle the case when a bound socket connects
1063				 * to itself. Allow packets with a SYN and
1064				 * an ACK to continue with the processing.
1065				 */
1066				if (todrop != 0 || (tiflags & TH_ACK) == 0)
1067					goto dropafterack;
1068			}
1069		} else {
1070			tcpstat.tcps_rcvpartduppack++;
1071			tcpstat.tcps_rcvpartdupbyte += todrop;
1072		}
1073		m_adj(m, todrop);
1074		ti->ti_seq += todrop;
1075		ti->ti_len -= todrop;
1076		if (ti->ti_urp > todrop)
1077			ti->ti_urp -= todrop;
1078		else {
1079			tiflags &= ~TH_URG;
1080			ti->ti_urp = 0;
1081		}
1082	}
1083
1084	/*
1085	 * If new data are received on a connection after the
1086	 * user processes are gone, then RST the other end.
1087	 */
1088	if ((so->so_state & SS_NOFDREF) &&
1089	    tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
1090		tp = tcp_close(tp);
1091		tcpstat.tcps_rcvafterclose++;
1092		goto dropwithreset;
1093	}
1094
1095	/*
1096	 * If segment ends after window, drop trailing data
1097	 * (and PUSH and FIN); if nothing left, just ACK.
1098	 */
1099	todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
1100	if (todrop > 0) {
1101		tcpstat.tcps_rcvpackafterwin++;
1102		if (todrop >= ti->ti_len) {
1103			tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
1104			/*
1105			 * If a new connection request is received
1106			 * while in TIME_WAIT, drop the old connection
1107			 * and start over if the sequence numbers
1108			 * are above the previous ones.
1109			 */
1110			if (tiflags & TH_SYN &&
1111			    tp->t_state == TCPS_TIME_WAIT &&
1112			    SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
1113				iss = tp->rcv_nxt + TCP_ISSINCR;
1114				tp = tcp_close(tp);
1115				goto findpcb;
1116			}
1117			/*
1118			 * If window is closed can only take segments at
1119			 * window edge, and have to drop data and PUSH from
1120			 * incoming segments.  Continue processing, but
1121			 * remember to ack.  Otherwise, drop segment
1122			 * and ack.
1123			 */
1124			if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
1125				tp->t_flags |= TF_ACKNOW;
1126				tcpstat.tcps_rcvwinprobe++;
1127			} else
1128				goto dropafterack;
1129		} else
1130			tcpstat.tcps_rcvbyteafterwin += todrop;
1131		m_adj(m, -todrop);
1132		ti->ti_len -= todrop;
1133		tiflags &= ~(TH_PUSH|TH_FIN);
1134	}
1135
1136	/*
1137	 * If last ACK falls within this segment's sequence numbers,
1138	 * record its timestamp.
1139	 * NOTE that the test is modified according to the latest
1140	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
1141	 */
1142#ifdef TTCP
1143	if ((to.to_flag & TOF_TS) != 0 &&
1144	    SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
1145		tp->ts_recent_age = tcp_now;
1146		tp->ts_recent = to.to_tsval;
1147#else
1148	if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
1149		tp->ts_recent_age = tcp_now;
1150		tp->ts_recent = ts_val;
1151#endif
1152	}
1153
1154	/*
1155	 * If the RST bit is set examine the state:
1156	 *    SYN_RECEIVED STATE:
1157	 *	If passive open, return to LISTEN state.
1158	 *	If active open, inform user that connection was refused.
1159	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
1160	 *	Inform user that connection was reset, and close tcb.
1161	 *    CLOSING, LAST_ACK, TIME_WAIT STATES
1162	 *	Close the tcb.
1163	 */
1164	if (tiflags&TH_RST) switch (tp->t_state) {
1165
1166	case TCPS_SYN_RECEIVED:
1167		so->so_error = ECONNREFUSED;
1168		goto close;
1169
1170	case TCPS_ESTABLISHED:
1171	case TCPS_FIN_WAIT_1:
1172	case TCPS_FIN_WAIT_2:
1173	case TCPS_CLOSE_WAIT:
1174		so->so_error = ECONNRESET;
1175	close:
1176		tp->t_state = TCPS_CLOSED;
1177		tcpstat.tcps_drops++;
1178		tp = tcp_close(tp);
1179		goto drop;
1180
1181	case TCPS_CLOSING:
1182	case TCPS_LAST_ACK:
1183	case TCPS_TIME_WAIT:
1184		tp = tcp_close(tp);
1185		goto drop;
1186	}
1187
1188	/*
1189	 * If a SYN is in the window, then this is an
1190	 * error and we send an RST and drop the connection.
1191	 */
1192	if (tiflags & TH_SYN) {
1193		tp = tcp_drop(tp, ECONNRESET);
1194		goto dropwithreset;
1195	}
1196
1197#ifdef TTCP
1198	/*
1199	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
1200	 * flag is on (half-synchronized state), then queue data for
1201	 * later processing; else drop segment and return.
1202	 */
1203	if ((tiflags & TH_ACK) == 0) {
1204		if (tp->t_state == TCPS_SYN_RECEIVED ||
1205		    (tp->t_flags & TF_NEEDSYN))
1206			goto step6;
1207		else
1208			goto drop;
1209	}
1210#else
1211	/*
1212	 * If the ACK bit is off we drop the segment and return.
1213	 */
1214	if ((tiflags & TH_ACK) == 0)
1215		goto drop;
1216#endif
1217
1218	/*
1219	 * Ack processing.
1220	 */
1221	switch (tp->t_state) {
1222
1223	/*
1224	 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
1225	 * ESTABLISHED state and continue processing, otherwise
1226	 * send an RST.
1227	 */
1228	case TCPS_SYN_RECEIVED:
1229		if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
1230		    SEQ_GT(ti->ti_ack, tp->snd_max))
1231			goto dropwithreset;
1232
1233		tcpstat.tcps_connects++;
1234		soisconnected(so);
1235		/* Do window scaling? */
1236		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1237			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
1238			tp->snd_scale = tp->requested_s_scale;
1239			tp->rcv_scale = tp->request_r_scale;
1240		}
1241#ifdef TTCP
1242		/*
1243		 * Upon successful completion of 3-way handshake,
1244		 * update cache.CC if it was undefined, pass any queued
1245		 * data to the user, and advance state appropriately.
1246		 */
1247		if ((taop = tcp_gettaocache(inp)) != NULL &&
1248		    taop->tao_cc == 0)
1249			taop->tao_cc = tp->cc_recv;
1250
1251		/*
1252		 * Make transitions:
1253		 *      SYN-RECEIVED  -> ESTABLISHED
1254		 *      SYN-RECEIVED* -> FIN-WAIT-1
1255		 */
1256		if (tp->t_flags & TF_NEEDFIN) {
1257			tp->t_state = TCPS_FIN_WAIT_1;
1258			tp->t_flags &= ~TF_NEEDFIN;
1259		} else
1260			tp->t_state = TCPS_ESTABLISHED;
1261		/*
1262		 * If segment contains data or ACK, will call tcp_reass()
1263		 * later; if not, do so now to pass queued data to user.
1264		 */
1265		if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0)
1266			(void) tcp_reass(tp, (struct tcpiphdr *)0,
1267			    (struct mbuf *)0);
1268#else /* TTCP */
1269		tp->t_state = TCPS_ESTABLISHED;
1270		(void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
1271#endif /* TTCP */
1272		tp->snd_wl1 = ti->ti_seq - 1;
1273		/* fall into ... */
1274
1275	/*
1276	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1277	 * ACKs.  If the ack is in the range
1278	 *	tp->snd_una < ti->ti_ack <= tp->snd_max
1279	 * then advance tp->snd_una to ti->ti_ack and drop
1280	 * data from the retransmission queue.  If this ACK reflects
1281	 * more up to date window information we update our window information.
1282	 */
1283	case TCPS_ESTABLISHED:
1284	case TCPS_FIN_WAIT_1:
1285	case TCPS_FIN_WAIT_2:
1286	case TCPS_CLOSE_WAIT:
1287	case TCPS_CLOSING:
1288	case TCPS_LAST_ACK:
1289	case TCPS_TIME_WAIT:
1290
1291		if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
1292			if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
1293				tcpstat.tcps_rcvdupack++;
1294				/*
1295				 * If we have outstanding data (other than
1296				 * a window probe), this is a completely
1297				 * duplicate ack (ie, window info didn't
1298				 * change), the ack is the biggest we've
1299				 * seen and we've seen exactly our rexmt
1300				 * threshhold of them, assume a packet
1301				 * has been dropped and retransmit it.
1302				 * Kludge snd_nxt & the congestion
1303				 * window so we send only this one
1304				 * packet.
1305				 *
1306				 * We know we're losing at the current
1307				 * window size so do congestion avoidance
1308				 * (set ssthresh to half the current window
1309				 * and pull our congestion window back to
1310				 * the new ssthresh).
1311				 *
1312				 * Dup acks mean that packets have left the
1313				 * network (they're now cached at the receiver)
1314				 * so bump cwnd by the amount in the receiver
1315				 * to keep a constant cwnd packets in the
1316				 * network.
1317				 */
1318				if (tp->t_timer[TCPT_REXMT] == 0 ||
1319				    ti->ti_ack != tp->snd_una)
1320					tp->t_dupacks = 0;
1321				else if (++tp->t_dupacks == tcprexmtthresh) {
1322					tcp_seq onxt = tp->snd_nxt;
1323					u_int win =
1324					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
1325						tp->t_maxseg;
1326
1327					if (win < 2)
1328						win = 2;
1329					tp->snd_ssthresh = win * tp->t_maxseg;
1330					tp->t_timer[TCPT_REXMT] = 0;
1331					tp->t_rtt = 0;
1332					tp->snd_nxt = ti->ti_ack;
1333					tp->snd_cwnd = tp->t_maxseg;
1334					(void) tcp_output(tp);
1335					tp->snd_cwnd = tp->snd_ssthresh +
1336					       tp->t_maxseg * tp->t_dupacks;
1337					if (SEQ_GT(onxt, tp->snd_nxt))
1338						tp->snd_nxt = onxt;
1339					goto drop;
1340				} else if (tp->t_dupacks > tcprexmtthresh) {
1341					tp->snd_cwnd += tp->t_maxseg;
1342					(void) tcp_output(tp);
1343					goto drop;
1344				}
1345			} else
1346				tp->t_dupacks = 0;
1347			break;
1348		}
1349		/*
1350		 * If the congestion window was inflated to account
1351		 * for the other side's cached packets, retract it.
1352		 */
1353		if (tp->t_dupacks > tcprexmtthresh &&
1354		    tp->snd_cwnd > tp->snd_ssthresh)
1355			tp->snd_cwnd = tp->snd_ssthresh;
1356		tp->t_dupacks = 0;
1357		if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1358			tcpstat.tcps_rcvacktoomuch++;
1359			goto dropafterack;
1360		}
1361#ifdef TTCP
1362		/*
1363		 *  If we reach this point, ACK is not a duplicate,
1364		 *     i.e., it ACKs something we sent.
1365		 */
1366		if (tp->t_flags & TF_NEEDSYN) {
1367			/*
1368			 *   T/TCP: Connection was half-synchronized, and our
1369			 *   SYN has been ACK'd (so connection is now fully
1370			 *   synchronized).  Go to non-starred state and
1371			 *   increment snd_una for ACK of SYN.
1372			 */
1373			tp->t_flags &= ~TF_NEEDSYN;
1374			tp->snd_una++;
1375		}
1376
1377process_ACK:
1378#endif
1379		acked = ti->ti_ack - tp->snd_una;
1380		tcpstat.tcps_rcvackpack++;
1381		tcpstat.tcps_rcvackbyte += acked;
1382
1383		/*
1384		 * If we have a timestamp reply, update smoothed
1385		 * round trip time.  If no timestamp is present but
1386		 * transmit timer is running and timed sequence
1387		 * number was acked, update smoothed round trip time.
1388		 * Since we now have an rtt measurement, cancel the
1389		 * timer backoff (cf., Phil Karn's retransmit alg.).
1390		 * Recompute the initial retransmit timer.
1391		 */
1392#ifdef TTCP
1393		if (to.to_flag & TOF_TS)
1394			tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
1395#else
1396		if (ts_present)
1397			tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1398#endif
1399		else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
1400			tcp_xmit_timer(tp,tp->t_rtt);
1401
1402		/*
1403		 * If all outstanding data is acked, stop retransmit
1404		 * timer and remember to restart (more output or persist).
1405		 * If there is more data to be acked, restart retransmit
1406		 * timer, using current (possibly backed-off) value.
1407		 */
1408		if (ti->ti_ack == tp->snd_max) {
1409			tp->t_timer[TCPT_REXMT] = 0;
1410			needoutput = 1;
1411		} else if (tp->t_timer[TCPT_PERSIST] == 0)
1412			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1413
1414#ifdef TTCP
1415		/*
1416		 * If no data (only SYN) was ACK'd,
1417		 *    skip rest of ACK processing.
1418		 */
1419		if (acked == 0)
1420			goto step6;
1421#endif
1422
1423		/*
1424		 * When new data is acked, open the congestion window.
1425		 * If the window gives us less than ssthresh packets
1426		 * in flight, open exponentially (maxseg per packet).
1427		 * Otherwise open linearly: maxseg per window
1428		 * (maxseg^2 / cwnd per packet).
1429		 */
1430		{
1431		register u_int cw = tp->snd_cwnd;
1432		register u_int incr = tp->t_maxseg;
1433
1434		if (cw > tp->snd_ssthresh)
1435			incr = incr * incr / cw;
1436		tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1437		}
1438		if (acked > so->so_snd.sb_cc) {
1439			tp->snd_wnd -= so->so_snd.sb_cc;
1440			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
1441			ourfinisacked = 1;
1442		} else {
1443			sbdrop(&so->so_snd, acked);
1444			tp->snd_wnd -= acked;
1445			ourfinisacked = 0;
1446		}
1447		if (so->so_snd.sb_flags & SB_NOTIFY)
1448			sowwakeup(so);
1449		tp->snd_una = ti->ti_ack;
1450		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1451			tp->snd_nxt = tp->snd_una;
1452
1453		switch (tp->t_state) {
1454
1455		/*
1456		 * In FIN_WAIT_1 STATE in addition to the processing
1457		 * for the ESTABLISHED state if our FIN is now acknowledged
1458		 * then enter FIN_WAIT_2.
1459		 */
1460		case TCPS_FIN_WAIT_1:
1461			if (ourfinisacked) {
1462				/*
1463				 * If we can't receive any more
1464				 * data, then closing user can proceed.
1465				 * Starting the timer is contrary to the
1466				 * specification, but if we don't get a FIN
1467				 * we'll hang forever.
1468				 */
1469				if (so->so_state & SS_CANTRCVMORE) {
1470					soisdisconnected(so);
1471					tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1472				}
1473				tp->t_state = TCPS_FIN_WAIT_2;
1474			}
1475			break;
1476
1477	 	/*
1478		 * In CLOSING STATE in addition to the processing for
1479		 * the ESTABLISHED state if the ACK acknowledges our FIN
1480		 * then enter the TIME-WAIT state, otherwise ignore
1481		 * the segment.
1482		 */
1483		case TCPS_CLOSING:
1484			if (ourfinisacked) {
1485				tp->t_state = TCPS_TIME_WAIT;
1486				tcp_canceltimers(tp);
1487#ifdef TTCP
1488				/* Shorten TIME_WAIT [RFC-1644, p.28] */
1489				if (tp->cc_recv != 0 &&
1490				    tp->t_duration < TCPTV_MSL)
1491					tp->t_timer[TCPT_2MSL] =
1492					    tp->t_rxtcur * TCPTV_TWTRUNC;
1493				else
1494#endif
1495					tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1496				soisdisconnected(so);
1497			}
1498			break;
1499
1500		/*
1501		 * In LAST_ACK, we may still be waiting for data to drain
1502		 * and/or to be acked, as well as for the ack of our FIN.
1503		 * If our FIN is now acknowledged, delete the TCB,
1504		 * enter the closed state and return.
1505		 */
1506		case TCPS_LAST_ACK:
1507			if (ourfinisacked) {
1508				tp = tcp_close(tp);
1509				goto drop;
1510			}
1511			break;
1512
1513		/*
1514		 * In TIME_WAIT state the only thing that should arrive
1515		 * is a retransmission of the remote FIN.  Acknowledge
1516		 * it and restart the finack timer.
1517		 */
1518		case TCPS_TIME_WAIT:
1519			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1520			goto dropafterack;
1521		}
1522	}
1523
1524step6:
1525	/*
1526	 * Update window information.
1527	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
1528	 */
1529	if ((tiflags & TH_ACK) &&
1530	    (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1531	    (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1532	     (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1533		/* keep track of pure window updates */
1534		if (ti->ti_len == 0 &&
1535		    tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
1536			tcpstat.tcps_rcvwinupd++;
1537		tp->snd_wnd = tiwin;
1538		tp->snd_wl1 = ti->ti_seq;
1539		tp->snd_wl2 = ti->ti_ack;
1540		if (tp->snd_wnd > tp->max_sndwnd)
1541			tp->max_sndwnd = tp->snd_wnd;
1542		needoutput = 1;
1543	}
1544
1545	/*
1546	 * Process segments with URG.
1547	 */
1548	if ((tiflags & TH_URG) && ti->ti_urp &&
1549	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1550		/*
1551		 * This is a kludge, but if we receive and accept
1552		 * random urgent pointers, we'll crash in
1553		 * soreceive.  It's hard to imagine someone
1554		 * actually wanting to send this much urgent data.
1555		 */
1556		if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
1557			ti->ti_urp = 0;			/* XXX */
1558			tiflags &= ~TH_URG;		/* XXX */
1559			goto dodata;			/* XXX */
1560		}
1561		/*
1562		 * If this segment advances the known urgent pointer,
1563		 * then mark the data stream.  This should not happen
1564		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1565		 * a FIN has been received from the remote side.
1566		 * In these states we ignore the URG.
1567		 *
1568		 * According to RFC961 (Assigned Protocols),
1569		 * the urgent pointer points to the last octet
1570		 * of urgent data.  We continue, however,
1571		 * to consider it to indicate the first octet
1572		 * of data past the urgent section as the original
1573		 * spec states (in one of two places).
1574		 */
1575		if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
1576			tp->rcv_up = ti->ti_seq + ti->ti_urp;
1577			so->so_oobmark = so->so_rcv.sb_cc +
1578			    (tp->rcv_up - tp->rcv_nxt) - 1;
1579			if (so->so_oobmark == 0)
1580				so->so_state |= SS_RCVATMARK;
1581			sohasoutofband(so);
1582			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1583		}
1584		/*
1585		 * Remove out of band data so doesn't get presented to user.
1586		 * This can happen independent of advancing the URG pointer,
1587		 * but if two URG's are pending at once, some out-of-band
1588		 * data may creep in... ick.
1589		 */
1590		if (ti->ti_urp <= (u_long)ti->ti_len
1591#ifdef SO_OOBINLINE
1592		     && (so->so_options & SO_OOBINLINE) == 0
1593#endif
1594		     )
1595			tcp_pulloutofband(so, ti, m);
1596	} else
1597		/*
1598		 * If no out of band data is expected,
1599		 * pull receive urgent pointer along
1600		 * with the receive window.
1601		 */
1602		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1603			tp->rcv_up = tp->rcv_nxt;
1604dodata:							/* XXX */
1605
1606	/*
1607	 * Process the segment text, merging it into the TCP sequencing queue,
1608	 * and arranging for acknowledgment of receipt if necessary.
1609	 * This process logically involves adjusting tp->rcv_wnd as data
1610	 * is presented to the user (this happens in tcp_usrreq.c,
1611	 * case PRU_RCVD).  If a FIN has already been received on this
1612	 * connection then we just ignore the text.
1613	 */
1614	if ((ti->ti_len || (tiflags&TH_FIN)) &&
1615	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1616		TCP_REASS(tp, ti, m, so, tiflags);
1617		/*
1618		 * Note the amount of data that peer has sent into
1619		 * our window, in order to estimate the sender's
1620		 * buffer size.
1621		 */
1622		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
1623	} else {
1624		m_freem(m);
1625		tiflags &= ~TH_FIN;
1626	}
1627
1628	/*
1629	 * If FIN is received ACK the FIN and let the user know
1630	 * that the connection is closing.
1631	 */
1632	if (tiflags & TH_FIN) {
1633		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1634			socantrcvmore(so);
1635#ifdef TTCP
1636			/*
1637			 *  If connection is half-synchronized
1638			 *  (ie SEND_SYN flag on) then delay ACK,
1639			 *  so it may be piggybacked when SYN is sent.
1640			 *  Otherwise, since we received a FIN then no
1641			 *  more input can be expected, send ACK now.
1642			 */
1643			if (tp->t_flags & TF_NEEDSYN)
1644				tp->t_flags |= TF_DELACK;
1645			else
1646#endif /* TTCP */
1647				tp->t_flags |= TF_ACKNOW;
1648			tp->rcv_nxt++;
1649		}
1650		switch (tp->t_state) {
1651
1652	 	/*
1653		 * In SYN_RECEIVED and ESTABLISHED STATES
1654		 * enter the CLOSE_WAIT state.
1655		 */
1656		case TCPS_SYN_RECEIVED:
1657		case TCPS_ESTABLISHED:
1658			tp->t_state = TCPS_CLOSE_WAIT;
1659			break;
1660
1661	 	/*
1662		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
1663		 * enter the CLOSING state.
1664		 */
1665		case TCPS_FIN_WAIT_1:
1666			tp->t_state = TCPS_CLOSING;
1667			break;
1668
1669	 	/*
1670		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
1671		 * starting the time-wait timer, turning off the other
1672		 * standard timers.
1673		 */
1674		case TCPS_FIN_WAIT_2:
1675			tp->t_state = TCPS_TIME_WAIT;
1676			tcp_canceltimers(tp);
1677#ifdef TTCP
1678			/* Shorten TIME_WAIT [RFC-1644, p.28] */
1679			if (tp->cc_recv != 0 &&
1680			    tp->t_duration < TCPTV_MSL) {
1681				tp->t_timer[TCPT_2MSL] =
1682				    tp->t_rxtcur * TCPTV_TWTRUNC;
1683				/* For transaction client, force ACK now. */
1684				tp->t_flags |= TF_ACKNOW;
1685			}
1686			else
1687#endif
1688				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1689			soisdisconnected(so);
1690			break;
1691
1692		/*
1693		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
1694		 */
1695		case TCPS_TIME_WAIT:
1696			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1697			break;
1698		}
1699	}
1700#ifdef TCPDEBUG
1701	if (so->so_options & SO_DEBUG)
1702		tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
1703#endif
1704
1705	/*
1706	 * Return any desired output.
1707	 */
1708	if (needoutput || (tp->t_flags & TF_ACKNOW))
1709		(void) tcp_output(tp);
1710	return;
1711
1712dropafterack:
1713	/*
1714	 * Generate an ACK dropping incoming segment if it occupies
1715	 * sequence space, where the ACK reflects our state.
1716	 */
1717	if (tiflags & TH_RST)
1718		goto drop;
1719#ifdef TTCP
1720#ifdef TCPDEBUG
1721	if (so->so_options & SO_DEBUG)
1722		tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
1723#endif
1724#endif
1725	m_freem(m);
1726	tp->t_flags |= TF_ACKNOW;
1727	(void) tcp_output(tp);
1728	return;
1729
1730dropwithreset:
1731	/*
1732	 * Generate a RST, dropping incoming segment.
1733	 * Make ACK acceptable to originator of segment.
1734	 * Don't bother to respond if destination was broadcast/multicast.
1735	 */
1736	if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
1737	    IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
1738		goto drop;
1739#ifdef TTCP
1740#ifdef TCPDEBUG
1741	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
1742		tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
1743#endif
1744#endif
1745	if (tiflags & TH_ACK)
1746		tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
1747	else {
1748		if (tiflags & TH_SYN)
1749			ti->ti_len++;
1750		tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1751		    TH_RST|TH_ACK);
1752	}
1753	/* destroy temporarily created socket */
1754	if (dropsocket)
1755		(void) soabort(so);
1756	return;
1757
1758drop:
1759	/*
1760	 * Drop space held by incoming segment and return.
1761	 */
1762#ifdef TCPDEBUG
1763#ifdef TTCP
1764	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
1765		tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
1766#else
1767	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
1768		tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
1769#endif
1770#endif
1771	m_freem(m);
1772	/* destroy temporarily created socket */
1773	if (dropsocket)
1774		(void) soabort(so);
1775	return;
1776#ifndef TUBA_INCLUDE
1777}
1778
1779void
1780#ifdef TTCP
1781tcp_dooptions(tp, cp, cnt, ti, to)
1782#else
1783tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
1784#endif
1785	struct tcpcb *tp;
1786	u_char *cp;
1787	int cnt;
1788	struct tcpiphdr *ti;
1789#ifdef TTCP
1790	struct tcpopt *to;
1791#else
1792	int *ts_present;
1793	u_long *ts_val, *ts_ecr;
1794#endif
1795{
1796	u_short mss = 0;
1797	int opt, optlen;
1798
1799	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1800		opt = cp[0];
1801		if (opt == TCPOPT_EOL)
1802			break;
1803		if (opt == TCPOPT_NOP)
1804			optlen = 1;
1805		else {
1806			optlen = cp[1];
1807			if (optlen <= 0)
1808				break;
1809		}
1810		switch (opt) {
1811
1812		default:
1813			continue;
1814
1815		case TCPOPT_MAXSEG:
1816			if (optlen != TCPOLEN_MAXSEG)
1817				continue;
1818			if (!(ti->ti_flags & TH_SYN))
1819				continue;
1820			bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
1821			NTOHS(mss);
1822			break;
1823
1824		case TCPOPT_WINDOW:
1825			if (optlen != TCPOLEN_WINDOW)
1826				continue;
1827			if (!(ti->ti_flags & TH_SYN))
1828				continue;
1829			tp->t_flags |= TF_RCVD_SCALE;
1830			tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
1831			break;
1832
1833		case TCPOPT_TIMESTAMP:
1834			if (optlen != TCPOLEN_TIMESTAMP)
1835				continue;
1836#ifdef TTCP
1837			to->to_flag |= TOF_TS;
1838			bcopy((char *)cp + 2,
1839			    (char *)&to->to_tsval, sizeof(to->to_tsval));
1840			NTOHL(to->to_tsval);
1841			bcopy((char *)cp + 6,
1842			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
1843			NTOHL(to->to_tsecr);
1844#else
1845			*ts_present = 1;
1846			bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
1847			NTOHL(*ts_val);
1848			bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
1849			NTOHL(*ts_ecr);
1850#endif
1851
1852			/*
1853			 * A timestamp received in a SYN makes
1854			 * it ok to send timestamp requests and replies.
1855			 */
1856			if (ti->ti_flags & TH_SYN) {
1857				tp->t_flags |= TF_RCVD_TSTMP;
1858#ifdef TTCP
1859				tp->ts_recent = to->to_tsval;
1860#else
1861				tp->ts_recent = *ts_val;
1862#endif
1863				tp->ts_recent_age = tcp_now;
1864			}
1865			break;
1866#ifdef TTCP
1867		case TCPOPT_CC:
1868			if (optlen != TCPOLEN_CC)
1869				continue;
1870			to->to_flag |= TCPOPT_CC;
1871			bcopy((char *)cp + 2,
1872			    (char *)&to->to_cc, sizeof(to->to_cc));
1873			NTOHL(to->to_cc);
1874			/*
1875			 * A CC or CC.new option received in a SYN makes
1876			 * it ok to send CC in subsequent segments.
1877			 */
1878			if (ti->ti_flags & TH_SYN)
1879				tp->t_flags |= TF_RCVD_CC;
1880			break;
1881		case TCPOPT_CCNEW:
1882			if (optlen != TCPOLEN_CC)
1883				continue;
1884			if (!(ti->ti_flags & TH_SYN))
1885				continue;
1886			to->to_flag |= TOF_CCNEW;
1887			bcopy((char *)cp + 2,
1888			    (char *)&to->to_cc, sizeof(to->to_cc));
1889			NTOHL(to->to_cc);
1890			/*
1891			 * A CC or CC.new option received in a SYN makes
1892			 * it ok to send CC in subsequent segments.
1893			 */
1894			tp->t_flags |= TF_RCVD_CC;
1895			break;
1896		case TCPOPT_CCECHO:
1897			if (optlen != TCPOLEN_CC)
1898				continue;
1899			if (!(ti->ti_flags & TH_SYN))
1900				continue;
1901			to->to_flag |= TOF_CCECHO;
1902			bcopy((char *)cp + 2,
1903			    (char *)&to->to_ccecho, sizeof(to->to_ccecho));
1904			NTOHL(to->to_ccecho);
1905			break;
1906#endif /* TTCP*/
1907		}
1908	}
1909	if (ti->ti_flags & TH_SYN)
1910		tcp_mss(tp, mss);	/* sets t_maxseg */
1911}
1912
1913/*
1914 * Pull out of band byte out of a segment so
1915 * it doesn't appear in the user's data queue.
1916 * It is still reflected in the segment length for
1917 * sequencing purposes.
1918 */
1919void
1920tcp_pulloutofband(so, ti, m)
1921	struct socket *so;
1922	struct tcpiphdr *ti;
1923	register struct mbuf *m;
1924{
1925	int cnt = ti->ti_urp - 1;
1926
1927	while (cnt >= 0) {
1928		if (m->m_len > cnt) {
1929			char *cp = mtod(m, caddr_t) + cnt;
1930			struct tcpcb *tp = sototcpcb(so);
1931
1932			tp->t_iobc = *cp;
1933			tp->t_oobflags |= TCPOOB_HAVEDATA;
1934			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
1935			m->m_len--;
1936			return;
1937		}
1938		cnt -= m->m_len;
1939		m = m->m_next;
1940		if (m == 0)
1941			break;
1942	}
1943	panic("tcp_pulloutofband");
1944}
1945
1946/*
1947 * Collect new round-trip time estimate
1948 * and update averages and current timeout.
1949 */
1950void
1951tcp_xmit_timer(tp, rtt)
1952	register struct tcpcb *tp;
1953	short rtt;
1954{
1955	register short delta;
1956
1957	tcpstat.tcps_rttupdated++;
1958	if (tp->t_srtt != 0) {
1959		/*
1960		 * srtt is stored as fixed point with 3 bits after the
1961		 * binary point (i.e., scaled by 8).  The following magic
1962		 * is equivalent to the smoothing algorithm in rfc793 with
1963		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1964		 * point).  Adjust rtt to origin 0.
1965		 */
1966		delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1967		if ((tp->t_srtt += delta) <= 0)
1968			tp->t_srtt = 1;
1969		/*
1970		 * We accumulate a smoothed rtt variance (actually, a
1971		 * smoothed mean difference), then set the retransmit
1972		 * timer to smoothed rtt + 4 times the smoothed variance.
1973		 * rttvar is stored as fixed point with 2 bits after the
1974		 * binary point (scaled by 4).  The following is
1975		 * equivalent to rfc793 smoothing with an alpha of .75
1976		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
1977		 * rfc793's wired-in beta.
1978		 */
1979		if (delta < 0)
1980			delta = -delta;
1981		delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1982		if ((tp->t_rttvar += delta) <= 0)
1983			tp->t_rttvar = 1;
1984	} else {
1985		/*
1986		 * No rtt measurement yet - use the unsmoothed rtt.
1987		 * Set the variance to half the rtt (so our first
1988		 * retransmit happens at 3*rtt).
1989		 */
1990		tp->t_srtt = rtt << TCP_RTT_SHIFT;
1991		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1992	}
1993	tp->t_rtt = 0;
1994	tp->t_rxtshift = 0;
1995
1996	/*
1997	 * the retransmit should happen at rtt + 4 * rttvar.
1998	 * Because of the way we do the smoothing, srtt and rttvar
1999	 * will each average +1/2 tick of bias.  When we compute
2000	 * the retransmit timer, we want 1/2 tick of rounding and
2001	 * 1 extra tick because of +-1/2 tick uncertainty in the
2002	 * firing of the timer.  The bias will give us exactly the
2003	 * 1.5 tick we need.  But, because the bias is
2004	 * statistical, we have to test that we don't drop below
2005	 * the minimum feasible timer (which is 2 ticks).
2006	 */
2007	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
2008	    tp->t_rttmin, TCPTV_REXMTMAX);
2009
2010	/*
2011	 * We received an ack for a packet that wasn't retransmitted;
2012	 * it is probably safe to discard any error indications we've
2013	 * received recently.  This isn't quite right, but close enough
2014	 * for now (a route might have failed after we sent a segment,
2015	 * and the return path might not be symmetrical).
2016	 */
2017	tp->t_softerror = 0;
2018}
2019
2020/*
2021 * Determine a reasonable value for maxseg size.
2022 * If the route is known, check route for mtu.
2023 * If none, use an mss that can be handled on the outgoing
2024 * interface without forcing IP to fragment; if bigger than
2025 * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
2026 * to utilize large mbufs.  If no route is found, route has no mtu,
2027 * or the destination isn't local, use a default, hopefully conservative
2028 * size (usually 512 or the default IP max size, but no more than the mtu
2029 * of the interface), as we can't discover anything about intervening
2030 * gateways or networks.  We also initialize the congestion/slow start
2031 * window to be a single segment if the destination isn't local.
2032 * While looking at the routing entry, we also initialize other path-dependent
2033 * parameters from pre-set or cached values in the routing entry.
2034 *
2035 * Also take into account the space needed for options that we
2036 * send regularly.  Make maxseg shorter by that amount to assure
2037 * that we can send maxseg amount of data even when the options
2038 * are present.  Store the upper limit of the length of options plus
2039 * data in maxopd.
2040 *
2041 * NOTE that this routine is only called when we process an incoming
2042 * segment, for outgoing segments only tcp_mssopt is called.
2043 *
2044#ifdef TTCP
2045 * In case of T/TCP, we call this routine during implicit connection
2046 * setup as well (offer = -1), to initialize maxseg from the cached
2047 * MSS of our peer.
2048#endif
2049 */
2050void
2051tcp_mss(tp, offer)
2052	struct tcpcb *tp;
2053	int offer;
2054{
2055	register struct rtentry *rt;
2056	struct ifnet *ifp;
2057	register int rtt, mss;
2058	u_long bufsize;
2059	struct inpcb *inp;
2060	struct socket *so;
2061#ifdef TTCP
2062	struct rmxp_tao *taop;
2063	int origoffer = offer;
2064	extern int tcp_do_rfc1644;
2065#endif
2066	extern int tcp_mssdflt;
2067	extern int tcp_do_rfc1323;
2068
2069	inp = tp->t_inpcb;
2070	if ((rt = tcp_rtlookup(inp)) == NULL) {
2071		tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
2072		return;
2073	}
2074	ifp = rt->rt_ifp;
2075	so = inp->inp_socket;
2076
2077#ifdef TTCP
2078	taop = rmx_taop(rt->rt_rmx);
2079	/*
2080	 * Offer == -1 means that we didn't receive SYN yet,
2081	 * use cached value in that case;
2082	 */
2083	if (offer == -1)
2084		offer = taop->tao_mssopt;
2085#endif /* TTCP */
2086	/*
2087	 * Offer == 0 means that there was no MSS on the SYN segment,
2088	 * in this case we use tcp_mssdflt.
2089	 */
2090	if (offer == 0)
2091		offer = tcp_mssdflt;
2092	else
2093		/*
2094		 * Sanity check: make sure that maxopd will be large
2095		 * enough to allow some data on segments even is the
2096		 * all the option space is used (40bytes).  Otherwise
2097		 * funny things may happen in tcp_output.
2098		 */
2099		offer = max(offer, 64);
2100#ifdef TTCP
2101	taop->tao_mssopt = offer;
2102#endif /* TTCP */
2103
2104#ifdef RTV_MTU	/* if route characteristics exist ... */
2105	/*
2106	 * While we're here, check if there's an initial rtt
2107	 * or rttvar.  Convert from the route-table units
2108	 * to scaled multiples of the slow timeout timer.
2109	 */
2110	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
2111		/*
2112		 * XXX the lock bit for RTT indicates that the value
2113		 * is also a minimum value; this is subject to time.
2114		 */
2115		if (rt->rt_rmx.rmx_locks & RTV_RTT)
2116			tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
2117		tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
2118		if (rt->rt_rmx.rmx_rttvar)
2119			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
2120			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
2121		else
2122			/* default variation is +- 1 rtt */
2123			tp->t_rttvar =
2124			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
2125		TCPT_RANGESET(tp->t_rxtcur,
2126		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
2127		    tp->t_rttmin, TCPTV_REXMTMAX);
2128	}
2129	/*
2130	 * if there's an mtu associated with the route, use it
2131	 */
2132	if (rt->rt_rmx.rmx_mtu)
2133		mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
2134	else
2135#endif /* RTV_MTU */
2136	{
2137		mss = ifp->if_mtu - sizeof(struct tcpiphdr);
2138		if (!in_localaddr(inp->inp_faddr))
2139			mss = min(mss, tcp_mssdflt);
2140	}
2141	mss = min(mss, offer);
2142	/*
2143	 * maxopd stores the maximum length of data AND options
2144	 * in a segment; maxseg is the amount of data in a normal
2145	 * segment.  We need to store this value (maxopd) apart
2146	 * from maxseg, because now every segment carries options
2147	 * and thus we normally have somewhat less data in segments.
2148	 */
2149	tp->t_maxopd = mss;
2150
2151#ifdef TTCP
2152	/*
2153	 * In case of T/TCP, origoffer==-1 indicates, that no segments
2154	 * were received yet.  In this case we just guess, otherwise
2155	 * we do the same as before T/TCP.
2156	 */
2157 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
2158	    (origoffer == -1 ||
2159	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
2160		mss -= TCPOLEN_TSTAMP_APPA;
2161 	if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
2162	    (origoffer == -1 ||
2163	     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
2164		mss -= TCPOLEN_CC_APPA;
2165#else /* TTCP */
2166	/*
2167	 * Adjust mss to leave space for the usual options.  We're
2168	 * called from the end of tcp_dooptions so we can use the
2169	 * REQ/RCVD flags to see if options will be used.
2170	 */
2171 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
2172	    (TF_REQ_TSTMP|TF_RCVD_TSTMP))
2173		mss -= TCPOLEN_TSTAMP_APPA;
2174#endif /* TTCP */
2175
2176#if	(MCLBYTES & (MCLBYTES - 1)) == 0
2177		if (mss > MCLBYTES)
2178			mss &= ~(MCLBYTES-1);
2179#else
2180		if (mss > MCLBYTES)
2181			mss = mss / MCLBYTES * MCLBYTES;
2182#endif
2183	/*
2184	 * If there's a pipesize, change the socket buffer
2185	 * to that size.  Make the socket buffers an integral
2186	 * number of mss units; if the mss is larger than
2187	 * the socket buffer, decrease the mss.
2188	 */
2189#ifdef RTV_SPIPE
2190	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
2191#endif
2192		bufsize = so->so_snd.sb_hiwat;
2193	if (bufsize < mss)
2194		mss = bufsize;
2195	else {
2196		bufsize = roundup(bufsize, mss);
2197		if (bufsize > sb_max)
2198			bufsize = sb_max;
2199		(void)sbreserve(&so->so_snd, bufsize);
2200	}
2201	tp->t_maxseg = mss;
2202
2203#ifdef RTV_RPIPE
2204	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
2205#endif
2206		bufsize = so->so_rcv.sb_hiwat;
2207	if (bufsize > mss) {
2208		bufsize = roundup(bufsize, mss);
2209		if (bufsize > sb_max)
2210			bufsize = sb_max;
2211		(void)sbreserve(&so->so_rcv, bufsize);
2212	}
2213#ifdef TTCP
2214	/*
2215	 * Don't force slow-start on local network.
2216	 */
2217	if (!in_localaddr(inp->inp_faddr))
2218#endif /* TTCP */
2219		tp->snd_cwnd = mss;
2220
2221#ifdef RTV_SSTHRESH
2222	if (rt->rt_rmx.rmx_ssthresh) {
2223		/*
2224		 * There's some sort of gateway or interface
2225		 * buffer limit on the path.  Use this to set
2226		 * the slow start threshhold, but set the
2227		 * threshold to no less than 2*mss.
2228		 */
2229		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
2230	}
2231#endif
2232}
2233
2234/*
2235 * Determine the MSS option to send on an outgoing SYN.
2236 */
2237int
2238tcp_mssopt(tp)
2239	struct tcpcb *tp;
2240{
2241	struct rtentry *rt;
2242	extern int tcp_mssdflt;
2243
2244	rt = tcp_rtlookup(tp->t_inpcb);
2245	if (rt == NULL)
2246		return tcp_mssdflt;
2247
2248	/*
2249	 * if there's an mtu associated with the route, use it
2250	 */
2251	if (rt->rt_rmx.rmx_mtu)
2252		return rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
2253
2254	return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr);
2255}
2256#endif /* TUBA_INCLUDE */
2257