tcp_timewait.c revision 12172
11541Srgrimes/*
211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 3. All advertising materials mentioning features or use of this software
141541Srgrimes *    must display the following acknowledgement:
151541Srgrimes *	This product includes software developed by the University of
161541Srgrimes *	California, Berkeley and its contributors.
171541Srgrimes * 4. Neither the name of the University nor the names of its contributors
181541Srgrimes *    may be used to endorse or promote products derived from this software
191541Srgrimes *    without specific prior written permission.
201541Srgrimes *
211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311541Srgrimes * SUCH DAMAGE.
321541Srgrimes *
3311150Swollman *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
3412172Sphk *	$Id: tcp_subr.c,v 1.20 1995/10/16 18:21:20 wollman Exp $
351541Srgrimes */
361541Srgrimes
371541Srgrimes#include <sys/param.h>
381541Srgrimes#include <sys/proc.h>
391541Srgrimes#include <sys/systm.h>
4012172Sphk#include <sys/kernel.h>
4112172Sphk#include <sys/sysctl.h>
421541Srgrimes#include <sys/malloc.h>
431541Srgrimes#include <sys/mbuf.h>
441541Srgrimes#include <sys/socket.h>
451541Srgrimes#include <sys/socketvar.h>
461541Srgrimes#include <sys/protosw.h>
471541Srgrimes#include <sys/errno.h>
487684Sdg#include <sys/queue.h>
491541Srgrimes
501541Srgrimes#include <net/route.h>
511541Srgrimes#include <net/if.h>
521541Srgrimes
531541Srgrimes#include <netinet/in.h>
541541Srgrimes#include <netinet/in_systm.h>
551541Srgrimes#include <netinet/ip.h>
561541Srgrimes#include <netinet/in_pcb.h>
577090Sbde#include <netinet/in_var.h>
581541Srgrimes#include <netinet/ip_var.h>
591541Srgrimes#include <netinet/ip_icmp.h>
601541Srgrimes#include <netinet/tcp.h>
611541Srgrimes#include <netinet/tcp_fsm.h>
621541Srgrimes#include <netinet/tcp_seq.h>
631541Srgrimes#include <netinet/tcp_timer.h>
641541Srgrimes#include <netinet/tcp_var.h>
651541Srgrimes#include <netinet/tcpip.h>
666283Swollman#ifdef TCPDEBUG
676283Swollman#include <netinet/tcp_debug.h>
686283Swollman#endif
691541Srgrimes
701541Srgrimes/* patchable/settable parameters for tcp */
711541Srgrimesint 	tcp_mssdflt = TCP_MSS;
7212172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
7312172Sphk	CTLFLAG_RW, &tcp_mssdflt , 0, "");
741541Srgrimesint 	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
7512172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
7612172Sphk	CTLFLAG_RW, &tcp_rttdflt , 0, "");
771541Srgrimesint	tcp_do_rfc1323 = 1;
7812172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
7912172Sphk	CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
806283Swollmanint	tcp_do_rfc1644 = 1;
8112172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
8212172Sphk	CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
836283Swollmanstatic	void tcp_cleartaocache(void);
841541Srgrimes
857684Sdg/*
867684Sdg * Target size of TCP PCB hash table. Will be rounded down to a prime
877684Sdg * number.
887684Sdg */
897684Sdg#ifndef TCBHASHSIZE
907684Sdg#define TCBHASHSIZE	128
917684Sdg#endif
921541Srgrimes
931541Srgrimes/*
941541Srgrimes * Tcp initialization
951541Srgrimes */
961541Srgrimesvoid
971541Srgrimestcp_init()
981541Srgrimes{
991541Srgrimes
10011150Swollman	tcp_iss = random();	/* wrong, but better than a constant */
1016283Swollman	tcp_ccgen = 1;
1026283Swollman	tcp_cleartaocache();
1037684Sdg	LIST_INIT(&tcb);
1047684Sdg	tcbinfo.listhead = &tcb;
1057684Sdg	tcbinfo.hashbase = phashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashsize);
1061541Srgrimes	if (max_protohdr < sizeof(struct tcpiphdr))
1071541Srgrimes		max_protohdr = sizeof(struct tcpiphdr);
1081541Srgrimes	if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
1091541Srgrimes		panic("tcp_init");
1101541Srgrimes}
1111541Srgrimes
1121541Srgrimes/*
1131541Srgrimes * Create template to be used to send tcp packets on a connection.
1141541Srgrimes * Call after host entry created, allocates an mbuf and fills
1151541Srgrimes * in a skeletal tcp/ip header, minimizing the amount of work
1161541Srgrimes * necessary when the connection is used.
1171541Srgrimes */
1181541Srgrimesstruct tcpiphdr *
1191541Srgrimestcp_template(tp)
1201541Srgrimes	struct tcpcb *tp;
1211541Srgrimes{
1221541Srgrimes	register struct inpcb *inp = tp->t_inpcb;
1231541Srgrimes	register struct mbuf *m;
1241541Srgrimes	register struct tcpiphdr *n;
1251541Srgrimes
1261541Srgrimes	if ((n = tp->t_template) == 0) {
1271541Srgrimes		m = m_get(M_DONTWAIT, MT_HEADER);
1281541Srgrimes		if (m == NULL)
1291541Srgrimes			return (0);
1301541Srgrimes		m->m_len = sizeof (struct tcpiphdr);
1311541Srgrimes		n = mtod(m, struct tcpiphdr *);
1321541Srgrimes	}
1331541Srgrimes	n->ti_next = n->ti_prev = 0;
1341541Srgrimes	n->ti_x1 = 0;
1351541Srgrimes	n->ti_pr = IPPROTO_TCP;
1361541Srgrimes	n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
1371541Srgrimes	n->ti_src = inp->inp_laddr;
1381541Srgrimes	n->ti_dst = inp->inp_faddr;
1391541Srgrimes	n->ti_sport = inp->inp_lport;
1401541Srgrimes	n->ti_dport = inp->inp_fport;
1411541Srgrimes	n->ti_seq = 0;
1421541Srgrimes	n->ti_ack = 0;
1431541Srgrimes	n->ti_x2 = 0;
1441541Srgrimes	n->ti_off = 5;
1451541Srgrimes	n->ti_flags = 0;
1461541Srgrimes	n->ti_win = 0;
1471541Srgrimes	n->ti_sum = 0;
1481541Srgrimes	n->ti_urp = 0;
1491541Srgrimes	return (n);
1501541Srgrimes}
1511541Srgrimes
1521541Srgrimes/*
1531541Srgrimes * Send a single message to the TCP at address specified by
1541541Srgrimes * the given TCP/IP header.  If m == 0, then we make a copy
1551541Srgrimes * of the tcpiphdr at ti and send directly to the addressed host.
1561541Srgrimes * This is used to force keep alive messages out using the TCP
1571541Srgrimes * template for a connection tp->t_template.  If flags are given
1581541Srgrimes * then we send a message back to the TCP which originated the
1591541Srgrimes * segment ti, and discard the mbuf containing it and any other
1601541Srgrimes * attached mbufs.
1611541Srgrimes *
1621541Srgrimes * In any case the ack and sequence number of the transmitted
1631541Srgrimes * segment are as specified by the parameters.
1641541Srgrimes */
1651541Srgrimesvoid
1661541Srgrimestcp_respond(tp, ti, m, ack, seq, flags)
1671541Srgrimes	struct tcpcb *tp;
1681541Srgrimes	register struct tcpiphdr *ti;
1691541Srgrimes	register struct mbuf *m;
1701541Srgrimes	tcp_seq ack, seq;
1711541Srgrimes	int flags;
1721541Srgrimes{
1731541Srgrimes	register int tlen;
1741541Srgrimes	int win = 0;
1751541Srgrimes	struct route *ro = 0;
1761541Srgrimes
1771541Srgrimes	if (tp) {
1781541Srgrimes		win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
1791541Srgrimes		ro = &tp->t_inpcb->inp_route;
1801541Srgrimes	}
1811541Srgrimes	if (m == 0) {
1821541Srgrimes		m = m_gethdr(M_DONTWAIT, MT_HEADER);
1831541Srgrimes		if (m == NULL)
1841541Srgrimes			return;
1851541Srgrimes#ifdef TCP_COMPAT_42
1861541Srgrimes		tlen = 1;
1871541Srgrimes#else
1881541Srgrimes		tlen = 0;
1891541Srgrimes#endif
1901541Srgrimes		m->m_data += max_linkhdr;
1911541Srgrimes		*mtod(m, struct tcpiphdr *) = *ti;
1921541Srgrimes		ti = mtod(m, struct tcpiphdr *);
1931541Srgrimes		flags = TH_ACK;
1941541Srgrimes	} else {
1951541Srgrimes		m_freem(m->m_next);
1961541Srgrimes		m->m_next = 0;
1971541Srgrimes		m->m_data = (caddr_t)ti;
1981541Srgrimes		m->m_len = sizeof (struct tcpiphdr);
1991541Srgrimes		tlen = 0;
2001541Srgrimes#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
2011541Srgrimes		xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
2021541Srgrimes		xchg(ti->ti_dport, ti->ti_sport, u_short);
2031541Srgrimes#undef xchg
2041541Srgrimes	}
2051541Srgrimes	ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
2061541Srgrimes	tlen += sizeof (struct tcpiphdr);
2071541Srgrimes	m->m_len = tlen;
2081541Srgrimes	m->m_pkthdr.len = tlen;
2091541Srgrimes	m->m_pkthdr.rcvif = (struct ifnet *) 0;
2101541Srgrimes	ti->ti_next = ti->ti_prev = 0;
2111541Srgrimes	ti->ti_x1 = 0;
2121541Srgrimes	ti->ti_seq = htonl(seq);
2131541Srgrimes	ti->ti_ack = htonl(ack);
2141541Srgrimes	ti->ti_x2 = 0;
2151541Srgrimes	ti->ti_off = sizeof (struct tcphdr) >> 2;
2161541Srgrimes	ti->ti_flags = flags;
2171541Srgrimes	if (tp)
2181541Srgrimes		ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
2191541Srgrimes	else
2201541Srgrimes		ti->ti_win = htons((u_short)win);
2211541Srgrimes	ti->ti_urp = 0;
2221541Srgrimes	ti->ti_sum = 0;
2231541Srgrimes	ti->ti_sum = in_cksum(m, tlen);
2241541Srgrimes	((struct ip *)ti)->ip_len = tlen;
2251541Srgrimes	((struct ip *)ti)->ip_ttl = ip_defttl;
2266283Swollman#ifdef TCPDEBUG
2276283Swollman	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
2286283Swollman		tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
2296283Swollman#endif
2301541Srgrimes	(void) ip_output(m, NULL, ro, 0, NULL);
2311541Srgrimes}
2321541Srgrimes
2331541Srgrimes/*
2341541Srgrimes * Create a new TCP control block, making an
2351541Srgrimes * empty reassembly queue and hooking it to the argument
2361541Srgrimes * protocol control block.
2371541Srgrimes */
2381541Srgrimesstruct tcpcb *
2391541Srgrimestcp_newtcpcb(inp)
2401541Srgrimes	struct inpcb *inp;
2411541Srgrimes{
2421541Srgrimes	register struct tcpcb *tp;
2431541Srgrimes
2441541Srgrimes	tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
2451541Srgrimes	if (tp == NULL)
2461541Srgrimes		return ((struct tcpcb *)0);
2471541Srgrimes	bzero((char *) tp, sizeof(struct tcpcb));
2481541Srgrimes	tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
2496283Swollman	tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
2501541Srgrimes
2516283Swollman	if (tcp_do_rfc1323)
2526283Swollman		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
2536283Swollman	if (tcp_do_rfc1644)
2546283Swollman		tp->t_flags |= TF_REQ_CC;
2551541Srgrimes	tp->t_inpcb = inp;
2561541Srgrimes	/*
2571541Srgrimes	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
2581541Srgrimes	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
2591541Srgrimes	 * reasonable initial retransmit time.
2601541Srgrimes	 */
2611541Srgrimes	tp->t_srtt = TCPTV_SRTTBASE;
2621541Srgrimes	tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
2631541Srgrimes	tp->t_rttmin = TCPTV_MIN;
2648876Srgrimes	TCPT_RANGESET(tp->t_rxtcur,
2651541Srgrimes	    ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
2661541Srgrimes	    TCPTV_MIN, TCPTV_REXMTMAX);
2671541Srgrimes	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
2681541Srgrimes	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
2691541Srgrimes	inp->inp_ip.ip_ttl = ip_defttl;
2701541Srgrimes	inp->inp_ppcb = (caddr_t)tp;
2711541Srgrimes	return (tp);
2721541Srgrimes}
2731541Srgrimes
2741541Srgrimes/*
2751541Srgrimes * Drop a TCP connection, reporting
2761541Srgrimes * the specified error.  If connection is synchronized,
2771541Srgrimes * then send a RST to peer.
2781541Srgrimes */
2791541Srgrimesstruct tcpcb *
2801541Srgrimestcp_drop(tp, errno)
2811541Srgrimes	register struct tcpcb *tp;
2821541Srgrimes	int errno;
2831541Srgrimes{
2841541Srgrimes	struct socket *so = tp->t_inpcb->inp_socket;
2851541Srgrimes
2861541Srgrimes	if (TCPS_HAVERCVDSYN(tp->t_state)) {
2871541Srgrimes		tp->t_state = TCPS_CLOSED;
2881541Srgrimes		(void) tcp_output(tp);
2891541Srgrimes		tcpstat.tcps_drops++;
2901541Srgrimes	} else
2911541Srgrimes		tcpstat.tcps_conndrops++;
2921541Srgrimes	if (errno == ETIMEDOUT && tp->t_softerror)
2931541Srgrimes		errno = tp->t_softerror;
2941541Srgrimes	so->so_error = errno;
2951541Srgrimes	return (tcp_close(tp));
2961541Srgrimes}
2971541Srgrimes
2981541Srgrimes/*
2991541Srgrimes * Close a TCP control block:
3001541Srgrimes *	discard all space held by the tcp
3011541Srgrimes *	discard internet protocol block
3021541Srgrimes *	wake up any sleepers
3031541Srgrimes */
3041541Srgrimesstruct tcpcb *
3051541Srgrimestcp_close(tp)
3061541Srgrimes	register struct tcpcb *tp;
3071541Srgrimes{
3081541Srgrimes	register struct tcpiphdr *t;
3091541Srgrimes	struct inpcb *inp = tp->t_inpcb;
3101541Srgrimes	struct socket *so = inp->inp_socket;
3111541Srgrimes	register struct mbuf *m;
3121541Srgrimes#ifdef RTV_RTT
3131541Srgrimes	register struct rtentry *rt;
3141541Srgrimes
3151541Srgrimes	/*
3169373Swollman	 * If we got enough samples through the srtt filter,
3179373Swollman	 * save the rtt and rttvar in the routing entry.
3189373Swollman	 * 'Enough' is arbitrarily defined as the 16 samples.
3199373Swollman	 * 16 samples is enough for the srtt filter to converge
3209373Swollman	 * to within 5% of the correct value; fewer samples and
3219373Swollman	 * we could save a very bogus rtt.
3221541Srgrimes	 *
3231541Srgrimes	 * Don't update the default route's characteristics and don't
3241541Srgrimes	 * update anything that the user "locked".
3251541Srgrimes	 */
3269373Swollman	if (tp->t_rttupdated >= 16 &&
3271541Srgrimes	    (rt = inp->inp_route.ro_rt) &&
3281541Srgrimes	    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
3291549Srgrimes		register u_long i = 0;
3301541Srgrimes
3311541Srgrimes		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
3321541Srgrimes			i = tp->t_srtt *
3331541Srgrimes			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
3341541Srgrimes			if (rt->rt_rmx.rmx_rtt && i)
3351541Srgrimes				/*
3361541Srgrimes				 * filter this update to half the old & half
3371541Srgrimes				 * the new values, converting scale.
3381541Srgrimes				 * See route.h and tcp_var.h for a
3391541Srgrimes				 * description of the scaling constants.
3401541Srgrimes				 */
3411541Srgrimes				rt->rt_rmx.rmx_rtt =
3421541Srgrimes				    (rt->rt_rmx.rmx_rtt + i) / 2;
3431541Srgrimes			else
3441541Srgrimes				rt->rt_rmx.rmx_rtt = i;
3459263Swollman			tcpstat.tcps_cachedrtt++;
3461541Srgrimes		}
3471541Srgrimes		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
3481541Srgrimes			i = tp->t_rttvar *
3491541Srgrimes			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
3501541Srgrimes			if (rt->rt_rmx.rmx_rttvar && i)
3511541Srgrimes				rt->rt_rmx.rmx_rttvar =
3521541Srgrimes				    (rt->rt_rmx.rmx_rttvar + i) / 2;
3531541Srgrimes			else
3541541Srgrimes				rt->rt_rmx.rmx_rttvar = i;
3559263Swollman			tcpstat.tcps_cachedrttvar++;
3561541Srgrimes		}
3571541Srgrimes		/*
3581541Srgrimes		 * update the pipelimit (ssthresh) if it has been updated
3591541Srgrimes		 * already or if a pipesize was specified & the threshhold
3601541Srgrimes		 * got below half the pipesize.  I.e., wait for bad news
3611541Srgrimes		 * before we start updating, then update on both good
3621541Srgrimes		 * and bad news.
3631541Srgrimes		 */
3643444Sphk		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
3653444Sphk		    ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
3661541Srgrimes		    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
3671541Srgrimes			/*
3681541Srgrimes			 * convert the limit from user data bytes to
3691541Srgrimes			 * packets then to packet data bytes.
3701541Srgrimes			 */
3711541Srgrimes			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
3721541Srgrimes			if (i < 2)
3731541Srgrimes				i = 2;
3741541Srgrimes			i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
3751541Srgrimes			if (rt->rt_rmx.rmx_ssthresh)
3761541Srgrimes				rt->rt_rmx.rmx_ssthresh =
3771541Srgrimes				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
3781541Srgrimes			else
3791541Srgrimes				rt->rt_rmx.rmx_ssthresh = i;
3809263Swollman			tcpstat.tcps_cachedssthresh++;
3811541Srgrimes		}
3821541Srgrimes	}
3831541Srgrimes#endif /* RTV_RTT */
3841541Srgrimes	/* free the reassembly queue, if any */
3851541Srgrimes	t = tp->seg_next;
3861541Srgrimes	while (t != (struct tcpiphdr *)tp) {
3871541Srgrimes		t = (struct tcpiphdr *)t->ti_next;
3881541Srgrimes		m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
3891541Srgrimes		remque(t->ti_prev);
3901541Srgrimes		m_freem(m);
3911541Srgrimes	}
3921541Srgrimes	if (tp->t_template)
3931541Srgrimes		(void) m_free(dtom(tp->t_template));
3941541Srgrimes	free(tp, M_PCB);
3951541Srgrimes	inp->inp_ppcb = 0;
3961541Srgrimes	soisdisconnected(so);
3971541Srgrimes	in_pcbdetach(inp);
3981541Srgrimes	tcpstat.tcps_closed++;
3991541Srgrimes	return ((struct tcpcb *)0);
4001541Srgrimes}
4011541Srgrimes
4021541Srgrimesvoid
4031541Srgrimestcp_drain()
4041541Srgrimes{
4051541Srgrimes
4061541Srgrimes}
4071541Srgrimes
4081541Srgrimes/*
4091541Srgrimes * Notify a tcp user of an asynchronous error;
4101541Srgrimes * store error as soft error, but wake up user
4111541Srgrimes * (for now, won't do anything until can select for soft error).
4121541Srgrimes */
4131541Srgrimesvoid
4141541Srgrimestcp_notify(inp, error)
4151541Srgrimes	struct inpcb *inp;
4161541Srgrimes	int error;
4171541Srgrimes{
4181541Srgrimes	register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
4191541Srgrimes	register struct socket *so = inp->inp_socket;
4201541Srgrimes
4211541Srgrimes	/*
4221541Srgrimes	 * Ignore some errors if we are hooked up.
4231541Srgrimes	 * If connection hasn't completed, has retransmitted several times,
4241541Srgrimes	 * and receives a second error, give up now.  This is better
4251541Srgrimes	 * than waiting a long time to establish a connection that
4261541Srgrimes	 * can never complete.
4271541Srgrimes	 */
4281541Srgrimes	if (tp->t_state == TCPS_ESTABLISHED &&
4291541Srgrimes	     (error == EHOSTUNREACH || error == ENETUNREACH ||
4301541Srgrimes	      error == EHOSTDOWN)) {
4311541Srgrimes		return;
4321541Srgrimes	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
4331541Srgrimes	    tp->t_softerror)
4341541Srgrimes		so->so_error = error;
4358876Srgrimes	else
4361541Srgrimes		tp->t_softerror = error;
4371541Srgrimes	wakeup((caddr_t) &so->so_timeo);
4381541Srgrimes	sorwakeup(so);
4391541Srgrimes	sowwakeup(so);
4401541Srgrimes}
4411541Srgrimes
4421541Srgrimesvoid
4431541Srgrimestcp_ctlinput(cmd, sa, ip)
4441541Srgrimes	int cmd;
4451541Srgrimes	struct sockaddr *sa;
4461541Srgrimes	register struct ip *ip;
4471541Srgrimes{
4481541Srgrimes	register struct tcphdr *th;
4491541Srgrimes	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
4501541Srgrimes
4511541Srgrimes	if (cmd == PRC_QUENCH)
4521541Srgrimes		notify = tcp_quench;
45310881Swollman#ifdef MTUDISC
45410881Swollman	else if (cmd == PRC_MSGSIZE)
45510881Swollman		notify = tcp_mtudisc;
45610881Swollman#endif /* MTUDISC */
4571541Srgrimes	else if (!PRC_IS_REDIRECT(cmd) &&
4581541Srgrimes		 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
4591541Srgrimes		return;
4601541Srgrimes	if (ip) {
4611541Srgrimes		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
4621541Srgrimes		in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
4631541Srgrimes			cmd, notify);
4641541Srgrimes	} else
4651541Srgrimes		in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
4661541Srgrimes}
4671541Srgrimes
4681541Srgrimes/*
4691541Srgrimes * When a source quench is received, close congestion window
4701541Srgrimes * to one segment.  We will gradually open it again as we proceed.
4711541Srgrimes */
4721541Srgrimesvoid
4731541Srgrimestcp_quench(inp, errno)
4741541Srgrimes	struct inpcb *inp;
4751541Srgrimes	int errno;
4761541Srgrimes{
4771541Srgrimes	struct tcpcb *tp = intotcpcb(inp);
4781541Srgrimes
4791541Srgrimes	if (tp)
4801541Srgrimes		tp->snd_cwnd = tp->t_maxseg;
4811541Srgrimes}
4826283Swollman
48310881Swollman#ifdef MTUDISC
4846283Swollman/*
48510881Swollman * When `need fragmentation' ICMP is received, update our idea of the MSS
48610881Swollman * based on the new value in the route.  Also nudge TCP to send something,
48710881Swollman * since we know the packet we just sent was dropped.
48810930Swollman * This duplicates some code in the tcp_mss() function in tcp_input.c.
48910881Swollman */
49011537Swollmanvoid
49110881Swollmantcp_mtudisc(inp, errno)
49210881Swollman	struct inpcb *inp;
49310881Swollman	int errno;
49410881Swollman{
49510881Swollman	struct tcpcb *tp = intotcpcb(inp);
49610930Swollman	struct rtentry *rt;
49710930Swollman	struct rmxp_tao *taop;
49810930Swollman	struct socket *so = inp->inp_socket;
49910930Swollman	int offered;
50010930Swollman	int mss;
50110881Swollman
50210930Swollman	if (tp) {
50310930Swollman		rt = tcp_rtlookup(inp);
50410930Swollman		if (!rt || !rt->rt_rmx.rmx_mtu) {
50510930Swollman			tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
50610930Swollman			return;
50710930Swollman		}
50810930Swollman		taop = rmx_taop(rt->rt_rmx);
50910930Swollman		offered = taop->tao_mssopt;
51010930Swollman		mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
51110956Swollman		mss = min(mss, offered);
51211415Swollman		if (tp->t_maxopd <= mss)
51311415Swollman			return;
51410930Swollman		tp->t_maxopd = mss;
51510930Swollman
51610930Swollman		if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
51710930Swollman		    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
51810930Swollman			mss -= TCPOLEN_TSTAMP_APPA;
51910930Swollman		if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
52010930Swollman		    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
52110930Swollman			mss -= TCPOLEN_CC_APPA;
52210930Swollman#if	(MCLBYTES & (MCLBYTES - 1)) == 0
52310930Swollman		if (mss > MCLBYTES)
52410930Swollman			mss &= ~(MCLBYTES-1);
52510930Swollman#else
52610930Swollman		if (mss > MCLBYTES)
52710930Swollman			mss = mss / MCLBYTES * MCLBYTES;
52810881Swollman#endif
52910930Swollman		if (so->so_snd.sb_hiwat < mss)
53010930Swollman			mss = so->so_snd.sb_hiwat;
53110930Swollman
53210930Swollman		tp->t_maxseg = mss;
53310930Swollman
53411450Swollman		tcpstat.tcps_mturesent++;
53511450Swollman		tp->t_rtt = 0;
53611450Swollman		tp->snd_nxt = tp->snd_una;
53711450Swollman		tcp_output(tp);
53810930Swollman	}
53910881Swollman}
54010881Swollman#endif /* MTUDISC */
54110881Swollman
54210881Swollman/*
5436283Swollman * Look-up the routing entry to the peer of this inpcb.  If no route
5446283Swollman * is found and it cannot be allocated the return NULL.  This routine
5456283Swollman * is called by TCP routines that access the rmx structure and by tcp_mss
5466283Swollman * to get the interface MTU.
5476283Swollman */
5486283Swollmanstruct rtentry *
5496283Swollmantcp_rtlookup(inp)
5506283Swollman	struct inpcb *inp;
5516283Swollman{
5526283Swollman	struct route *ro;
5536283Swollman	struct rtentry *rt;
5546283Swollman
5556283Swollman	ro = &inp->inp_route;
5566283Swollman	rt = ro->ro_rt;
5576283Swollman	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
5586283Swollman		/* No route yet, so try to acquire one */
5596283Swollman		if (inp->inp_faddr.s_addr != INADDR_ANY) {
5606283Swollman			ro->ro_dst.sa_family = AF_INET;
5616283Swollman			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
5626283Swollman			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
5636283Swollman				inp->inp_faddr;
5646283Swollman			rtalloc(ro);
5656283Swollman			rt = ro->ro_rt;
5666283Swollman		}
5676283Swollman	}
5686283Swollman	return rt;
5696283Swollman}
5706283Swollman
5716283Swollman/*
5726283Swollman * Return a pointer to the cached information about the remote host.
5736283Swollman * The cached information is stored in the protocol specific part of
5746283Swollman * the route metrics.
5756283Swollman */
5766283Swollmanstruct rmxp_tao *
5776283Swollmantcp_gettaocache(inp)
5786283Swollman	struct inpcb *inp;
5796283Swollman{
5806283Swollman	struct rtentry *rt = tcp_rtlookup(inp);
5816283Swollman
5826283Swollman	/* Make sure this is a host route and is up. */
5836283Swollman	if (rt == NULL ||
5846283Swollman	    (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
5856283Swollman		return NULL;
5866283Swollman
5876283Swollman	return rmx_taop(rt->rt_rmx);
5886283Swollman}
5896283Swollman
5906283Swollman/*
5916283Swollman * Clear all the TAO cache entries, called from tcp_init.
5926283Swollman *
5936283Swollman * XXX
5946283Swollman * This routine is just an empty one, because we assume that the routing
5956283Swollman * routing tables are initialized at the same time when TCP, so there is
5966283Swollman * nothing in the cache left over.
5976283Swollman */
5986283Swollmanstatic void
5996283Swollmantcp_cleartaocache(void)
6006283Swollman{ }
601