tcp_usrreq.c revision 121628
11558Srgrimes/*
21558Srgrimes * Copyright (c) 1982, 1986, 1988, 1993
31558Srgrimes *	The Regents of the University of California.  All rights reserved.
41558Srgrimes *
51558Srgrimes * Redistribution and use in source and binary forms, with or without
61558Srgrimes * modification, are permitted provided that the following conditions
71558Srgrimes * are met:
81558Srgrimes * 1. Redistributions of source code must retain the above copyright
91558Srgrimes *    notice, this list of conditions and the following disclaimer.
101558Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111558Srgrimes *    notice, this list of conditions and the following disclaimer in the
121558Srgrimes *    documentation and/or other materials provided with the distribution.
131558Srgrimes * 3. All advertising materials mentioning features or use of this software
141558Srgrimes *    must display the following acknowledgement:
151558Srgrimes *	This product includes software developed by the University of
161558Srgrimes *	California, Berkeley and its contributors.
171558Srgrimes * 4. Neither the name of the University nor the names of its contributors
181558Srgrimes *    may be used to endorse or promote products derived from this software
191558Srgrimes *    without specific prior written permission.
201558Srgrimes *
211558Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221558Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231558Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241558Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251558Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261558Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271558Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281558Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291558Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301558Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3113171Swollman * SUCH DAMAGE.
321558Srgrimes *
331558Srgrimes *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
341558Srgrimes * $FreeBSD: head/sys/netinet/tcp_usrreq.c 121628 2003-10-28 05:47:40Z sam $
351558Srgrimes */
361558Srgrimes
3737907Scharnier#include "opt_ipsec.h"
3885048Sru#include "opt_inet6.h"
3937907Scharnier#include "opt_tcpdebug.h"
4013171Swollman
4150476Speter#include <sys/param.h>
421558Srgrimes#include <sys/systm.h>
431558Srgrimes#include <sys/malloc.h>
441558Srgrimes#include <sys/kernel.h>
451558Srgrimes#include <sys/sysctl.h>
461558Srgrimes#include <sys/mbuf.h>
471558Srgrimes#ifdef INET6
481558Srgrimes#include <sys/domain.h>
4951639Sbillf#endif /* INET6 */
501558Srgrimes#include <sys/socket.h>
511558Srgrimes#include <sys/socketvar.h>
521558Srgrimes#include <sys/protosw.h>
531558Srgrimes#include <sys/proc.h>
541558Srgrimes#include <sys/jail.h>
5578140Sru
5617046Sjulian#include <net/if.h>
571558Srgrimes#include <net/route.h>
581558Srgrimes
591558Srgrimes#include <netinet/in.h>
6020287Swollman#include <netinet/in_systm.h>
6120287Swollman#ifdef INET6
621558Srgrimes#include <netinet/ip6.h>
6369793Sobrien#endif
641558Srgrimes#include <netinet/in_pcb.h>
651558Srgrimes#ifdef INET6
661558Srgrimes#include <netinet6/in6_pcb.h>
6713171Swollman#endif
6820287Swollman#include <netinet/in_var.h>
6978064Sume#include <netinet/ip_var.h>
701558Srgrimes#ifdef INET6
711558Srgrimes#include <netinet6/ip6_var.h>
721558Srgrimes#endif
731558Srgrimes#include <netinet/tcp.h>
741558Srgrimes#include <netinet/tcp_fsm.h>
751558Srgrimes#include <netinet/tcp_seq.h>
761558Srgrimes#include <netinet/tcp_timer.h>
771558Srgrimes#include <netinet/tcp_var.h>
781558Srgrimes#include <netinet/tcpip.h>
791558Srgrimes#ifdef TCPDEBUG
801558Srgrimes#include <netinet/tcp_debug.h>
811558Srgrimes#endif
821558Srgrimes
8354263Sshin#ifdef IPSEC
8454263Sshin#include <netinet6/ipsec.h>
8554263Sshin#endif /*IPSEC*/
8617046Sjulian
871558Srgrimes/*
8878140Sru * TCP protocol interface to socket abstraction.
8978064Sume */
901558Srgrimesextern	char *tcpstates[];	/* XXX ??? */
911558Srgrimes
921558Srgrimesstatic int	tcp_attach(struct socket *, struct thread *td);
9382651Srustatic int	tcp_connect(struct tcpcb *, struct sockaddr *,
941558Srgrimes		    struct thread *td);
951558Srgrimes#ifdef INET6
961558Srgrimesstatic int	tcp6_connect(struct tcpcb *, struct sockaddr *,
971558Srgrimes		    struct thread *td);
981558Srgrimes#endif /* INET6 */
991558Srgrimesstatic struct tcpcb *
10082651Sru		tcp_disconnect(struct tcpcb *);
10192883Simpstatic struct tcpcb *
10292883Simp		tcp_usrclosed(struct tcpcb *);
10378064Sume
1041558Srgrimes#ifdef TCPDEBUG
1051558Srgrimes#define	TCPDEBUG0	int ostate = 0
10697062Sume#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
10797073Sume#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
10897062Sume				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
1091558Srgrimes#else
11054263Sshin#define	TCPDEBUG0
11151639Sbillf#define	TCPDEBUG1()
1121558Srgrimes#define	TCPDEBUG2(req)
11392883Simp#endif
11413171Swollman
11518286Sbde/*
1161558Srgrimes * TCP attaches to socket via pru_attach(), reserving space,
11713171Swollman * and an internet control block.
1181558Srgrimes */
1191558Srgrimesstatic int
12013171Swollmantcp_usr_attach(struct socket *so, int proto, struct thread *td)
1211558Srgrimes{
12237907Scharnier	int s = splnet();
12313171Swollman	int error;
1241558Srgrimes	struct inpcb *inp;
1251558Srgrimes	struct tcpcb *tp = 0;
1261558Srgrimes	TCPDEBUG0;
1271558Srgrimes
1281558Srgrimes	INP_INFO_WLOCK(&tcbinfo);
1291558Srgrimes	TCPDEBUG1();
1301558Srgrimes	inp = sotoinpcb(so);
1311558Srgrimes	if (inp) {
1321558Srgrimes		error = EISCONN;
1331558Srgrimes		goto out;
1341558Srgrimes	}
1351558Srgrimes
1361558Srgrimes	error = tcp_attach(so, td);
13737907Scharnier	if (error)
1381558Srgrimes		goto out;
1391558Srgrimes
1401558Srgrimes	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1411558Srgrimes		so->so_linger = TCP_LINGERTIME;
1421558Srgrimes
1431558Srgrimes	inp = sotoinpcb(so);
1441558Srgrimes	tp = intotcpcb(inp);
1451558Srgrimesout:
1461558Srgrimes	TCPDEBUG2(PRU_ATTACH);
1471558Srgrimes	INP_INFO_WUNLOCK(&tcbinfo);
1481558Srgrimes	splx(s);
1491558Srgrimes	return error;
1501558Srgrimes}
1511558Srgrimes
1521558Srgrimes/*
1531558Srgrimes * pru_detach() detaches the TCP protocol from the socket.
1541558Srgrimes * If the protocol state is non-embryonic, then can't
1551558Srgrimes * do this directly: have to initiate a pru_disconnect(),
1561558Srgrimes * which may finish later; embryonic TCB's can just
1571558Srgrimes * be discarded here.
1581558Srgrimes */
1591558Srgrimesstatic int
1601558Srgrimestcp_usr_detach(struct socket *so)
1611558Srgrimes{
162109811Skbyanc	int s = splnet();
1631558Srgrimes	int error = 0;
16469793Sobrien	struct inpcb *inp;
1651558Srgrimes	struct tcpcb *tp;
1661558Srgrimes	TCPDEBUG0;
1671558Srgrimes
16813171Swollman	INP_INFO_WLOCK(&tcbinfo);
1691558Srgrimes	inp = sotoinpcb(so);
1701558Srgrimes	if (inp == 0) {
1711558Srgrimes		INP_INFO_WUNLOCK(&tcbinfo);
1721558Srgrimes		splx(s);
1731558Srgrimes		return EINVAL;	/* XXX */
1741558Srgrimes	}
1751558Srgrimes	INP_LOCK(inp);
1761558Srgrimes	tp = intotcpcb(inp);
1771558Srgrimes	TCPDEBUG1();
1781558Srgrimes	tp = tcp_disconnect(tp);
1791558Srgrimes
1801558Srgrimes	TCPDEBUG2(PRU_DETACH);
1811558Srgrimes	if (tp)
1821558Srgrimes		INP_UNLOCK(inp);
1831558Srgrimes	INP_INFO_WUNLOCK(&tcbinfo);
1841558Srgrimes	splx(s);
1851558Srgrimes	return error;
1861558Srgrimes}
1871558Srgrimes
1881558Srgrimes#define INI_NOLOCK	0
1891558Srgrimes#define INI_READ	1
1901558Srgrimes#define INI_WRITE	2
1911558Srgrimes
1921558Srgrimes#define	COMMON_START()						\
1931558Srgrimes	TCPDEBUG0;						\
1941558Srgrimes	do {							\
1951558Srgrimes		if (inirw == INI_READ)				\
1961558Srgrimes			INP_INFO_RLOCK(&tcbinfo);		\
1971558Srgrimes		else if (inirw == INI_WRITE)			\
1981558Srgrimes			INP_INFO_WLOCK(&tcbinfo);		\
1991558Srgrimes		inp = sotoinpcb(so);				\
2001558Srgrimes		if (inp == 0) {					\
2011558Srgrimes			if (inirw == INI_READ)			\
2021558Srgrimes				INP_INFO_RUNLOCK(&tcbinfo);	\
2031558Srgrimes			else if (inirw == INI_WRITE)		\
204128782Sambrisko				INP_INFO_WUNLOCK(&tcbinfo);	\
2051558Srgrimes			splx(s);				\
20692806Sobrien			return EINVAL;				\
2071558Srgrimes		}						\
208113559Sru		INP_LOCK(inp);					\
20913171Swollman		if (inirw == INI_READ)				\
2101558Srgrimes			INP_INFO_RUNLOCK(&tcbinfo);		\
211146079Sjmallett		tp = intotcpcb(inp);				\
2121558Srgrimes		TCPDEBUG1();					\
2131558Srgrimes} while(0)
2141558Srgrimes
2151558Srgrimes#define COMMON_END(req)						\
2161558Srgrimesout:	TCPDEBUG2(req);						\
2171558Srgrimes	do {							\
2181558Srgrimes		if (tp)						\
21954263Sshin			INP_UNLOCK(inp);			\
22054263Sshin		if (inirw == INI_WRITE)				\
22154263Sshin			INP_INFO_WUNLOCK(&tcbinfo);		\
22254263Sshin		splx(s);					\
22354263Sshin		return error;					\
22417046Sjulian		goto out;					\
22517046Sjulian} while(0)
22617046Sjulian
2271558Srgrimes/*
2281558Srgrimes * Give the socket an address.
2291558Srgrimes */
2301558Srgrimesstatic int
2311558Srgrimestcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
2321558Srgrimes{
2331558Srgrimes	int s = splnet();
2341558Srgrimes	int error = 0;
235128782Sambrisko	struct inpcb *inp;
2361558Srgrimes	struct tcpcb *tp;
2371558Srgrimes	struct sockaddr_in *sinp;
2381558Srgrimes	const int inirw = INI_WRITE;
2391558Srgrimes
2401558Srgrimes	COMMON_START();
2411558Srgrimes
2421558Srgrimes	/*
24313171Swollman	 * Must check for multicast addresses and disallow binding
2441558Srgrimes	 * to them.
24537907Scharnier	 */
246128782Sambrisko	sinp = (struct sockaddr_in *)nam;
247128782Sambrisko	if (sinp->sin_family == AF_INET &&
248128782Sambrisko	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
249128782Sambrisko		error = EAFNOSUPPORT;
250128782Sambrisko		goto out;
251128782Sambrisko	}
252128782Sambrisko	error = in_pcbbind(inp, nam, td);
25313171Swollman	if (error)
254128782Sambrisko		goto out;
2551558Srgrimes	COMMON_END(PRU_BIND);
2561558Srgrimes}
2571558Srgrimes
2581558Srgrimes#ifdef INET6
2591558Srgrimesstatic int
2601558Srgrimestcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
2611558Srgrimes{
2621558Srgrimes	int s = splnet();
2631558Srgrimes	int error = 0;
2641558Srgrimes	struct inpcb *inp;
2651558Srgrimes	struct tcpcb *tp;
2661558Srgrimes	struct sockaddr_in6 *sin6p;
2671558Srgrimes	const int inirw = INI_WRITE;
2681558Srgrimes
2691558Srgrimes	COMMON_START();
2701558Srgrimes
2711558Srgrimes	/*
2721558Srgrimes	 * Must check for multicast addresses and disallow binding
2731558Srgrimes	 * to them.
2741558Srgrimes	 */
2751558Srgrimes	sin6p = (struct sockaddr_in6 *)nam;
276129034Scsjp	if (sin6p->sin6_family == AF_INET6 &&
277129034Scsjp	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
2781558Srgrimes		error = EAFNOSUPPORT;
27913171Swollman		goto out;
2801558Srgrimes	}
281128782Sambrisko	inp->inp_vflag &= ~INP_IPV4;
282128782Sambrisko	inp->inp_vflag |= INP_IPV6;
2831558Srgrimes	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
2841558Srgrimes		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
2851558Srgrimes			inp->inp_vflag |= INP_IPV4;
2861558Srgrimes		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
2871558Srgrimes			struct sockaddr_in sin;
2881558Srgrimes
2891558Srgrimes			in6_sin6_2_sin(&sin, sin6p);
2901558Srgrimes			inp->inp_vflag |= INP_IPV4;
2911558Srgrimes			inp->inp_vflag &= ~INP_IPV6;
2921558Srgrimes			error = in_pcbbind(inp, (struct sockaddr *)&sin, td);
2931558Srgrimes			goto out;
294128186Sluigi		}
2951558Srgrimes	}
2961558Srgrimes	error = in6_pcbbind(inp, nam, td);
2971558Srgrimes	if (error)
2981558Srgrimes		goto out;
2991558Srgrimes	COMMON_END(PRU_BIND);
3001558Srgrimes}
30178064Sume#endif /* INET6 */
3021558Srgrimes
3031558Srgrimes/*
3041558Srgrimes * Prepare to accept connections.
30592806Sobrien */
30619209Sfennerstatic int
3071558Srgrimestcp_usr_listen(struct socket *so, struct thread *td)
3081558Srgrimes{
30981976Sbrian	int s = splnet();
3101558Srgrimes	int error = 0;
3111558Srgrimes	struct inpcb *inp;
3121558Srgrimes	struct tcpcb *tp;
3131558Srgrimes	const int inirw = INI_WRITE;
31485048Sru
31519209Sfenner	COMMON_START();
3161558Srgrimes	if (inp->inp_lport == 0)
31719209Sfenner		error = in_pcbbind(inp, (struct sockaddr *)0, td);
3181558Srgrimes	if (error == 0)
3191558Srgrimes		tp->t_state = TCPS_LISTEN;
3201558Srgrimes	COMMON_END(PRU_LISTEN);
3211558Srgrimes}
3221558Srgrimes
3231558Srgrimes#ifdef INET6
3241558Srgrimesstatic int
3251558Srgrimestcp6_usr_listen(struct socket *so, struct thread *td)
3261558Srgrimes{
3271558Srgrimes	int s = splnet();
3281558Srgrimes	int error = 0;
3291558Srgrimes	struct inpcb *inp;
3301558Srgrimes	struct tcpcb *tp;
3311558Srgrimes	const int inirw = INI_WRITE;
3321558Srgrimes
3331558Srgrimes	COMMON_START();
3341558Srgrimes	if (inp->inp_lport == 0) {
3351558Srgrimes		inp->inp_vflag &= ~INP_IPV4;
33685048Sru		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
3371558Srgrimes			inp->inp_vflag |= INP_IPV4;
3381558Srgrimes		error = in6_pcbbind(inp, (struct sockaddr *)0, td);
3391558Srgrimes	}
3401558Srgrimes	if (error == 0)
3411558Srgrimes		tp->t_state = TCPS_LISTEN;
34231958Simp	COMMON_END(PRU_LISTEN);
34331958Simp}
34431958Simp#endif /* INET6 */
34577873Sru
34677873Sru/*
3471558Srgrimes * Initiate connection to peer.
3481558Srgrimes * Create a template for use in transmissions on this connection.
3491558Srgrimes * Enter SYN_SENT state, and mark socket as connecting.
35054263Sshin * Start keep-alive timer, and seed output sequence space.
35154263Sshin * Send initial segment on connection.
35278064Sume */
35378064Sumestatic int
35478064Sumetcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
35554263Sshin{
35678064Sume	int s = splnet();
35778064Sume	int error = 0;
35878064Sume	struct inpcb *inp;
35978064Sume	struct tcpcb *tp;
36078064Sume	struct sockaddr_in *sinp;
36178064Sume	const int inirw = INI_WRITE;
36278064Sume
36378064Sume	COMMON_START();
36478064Sume
36578064Sume	/*
36678064Sume	 * Must disallow TCP ``connections'' to multicast addresses.
36778064Sume	 */
36878064Sume	sinp = (struct sockaddr_in *)nam;
36978064Sume	if (sinp->sin_family == AF_INET
37078064Sume	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
37178064Sume		error = EAFNOSUPPORT;
37278064Sume		goto out;
37378064Sume	}
37478064Sume
37578064Sume	if (td && jailed(td->td_ucred))
37678064Sume		prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr);
37778064Sume
37878064Sume	if ((error = tcp_connect(tp, nam, td)) != 0)
37978064Sume		goto out;
38078064Sume	error = tcp_output(tp);
38117046Sjulian	COMMON_END(PRU_CONNECT);
38217046Sjulian}
38317046Sjulian
38417046Sjulian#ifdef INET6
38517046Sjulianstatic int
3861558Srgrimestcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
3871558Srgrimes{
3881558Srgrimes	int s = splnet();
3891558Srgrimes	int error = 0;
3901558Srgrimes	struct inpcb *inp;
3911558Srgrimes	struct tcpcb *tp;
3921558Srgrimes	struct sockaddr_in6 *sin6p;
39319209Sfenner	const int inirw = INI_WRITE;
3941558Srgrimes
39519209Sfenner	COMMON_START();
39681976Sbrian
39781976Sbrian	/*
39881980Sbrian	 * Must disallow TCP ``connections'' to multicast addresses.
39981980Sbrian	 */
4001558Srgrimes	sin6p = (struct sockaddr_in6 *)nam;
4011558Srgrimes	if (sin6p->sin6_family == AF_INET6
4021558Srgrimes	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
4031558Srgrimes		error = EAFNOSUPPORT;
4041558Srgrimes		goto out;
4051558Srgrimes	}
4061558Srgrimes
4071558Srgrimes	if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
4081558Srgrimes		struct sockaddr_in sin;
4091558Srgrimes
41078064Sume		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
4111558Srgrimes			error = EINVAL;
4121558Srgrimes			goto out;
4131558Srgrimes		}
4141558Srgrimes
41519209Sfenner		in6_sin6_2_sin(&sin, sin6p);
4161558Srgrimes		inp->inp_vflag |= INP_IPV4;
4171558Srgrimes		inp->inp_vflag &= ~INP_IPV6;
41892806Sobrien		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
41981976Sbrian			goto out;
4201558Srgrimes		error = tcp_output(tp);
4211558Srgrimes		goto out;
4221558Srgrimes	}
4231558Srgrimes	inp->inp_vflag &= ~INP_IPV4;
4241558Srgrimes	inp->inp_vflag |= INP_IPV6;
4251558Srgrimes	inp->inp_inc.inc_isipv6 = 1;
4261558Srgrimes	if ((error = tcp6_connect(tp, nam, td)) != 0)
4271558Srgrimes		goto out;
4281558Srgrimes	error = tcp_output(tp);
4291558Srgrimes	COMMON_END(PRU_CONNECT);
4301558Srgrimes}
4311558Srgrimes#endif /* INET6 */
4321558Srgrimes
4331558Srgrimes/*
4341558Srgrimes * Initiate disconnect from peer.
4351558Srgrimes * If connection never passed embryonic stage, just drop;
4361558Srgrimes * else if don't need to let data drain, then can just drop anyways,
4371558Srgrimes * else have to begin TCP shutdown process: mark socket disconnecting,
4381558Srgrimes * drain unread data, state switch to reflect user close, and
4391558Srgrimes * send segment (e.g. FIN) to peer.  Socket will be really disconnected
4401558Srgrimes * when peer sends FIN and acks ours.
4411558Srgrimes *
4421558Srgrimes * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
4431558Srgrimes */
4441558Srgrimesstatic int
4451558Srgrimestcp_usr_disconnect(struct socket *so)
4461558Srgrimes{
4471558Srgrimes	int s = splnet();
4481558Srgrimes	int error = 0;
4491558Srgrimes	struct inpcb *inp;
4501558Srgrimes	struct tcpcb *tp;
4511558Srgrimes	const int inirw = INI_WRITE;
4521558Srgrimes
4531558Srgrimes	COMMON_START();
4541558Srgrimes	tp = tcp_disconnect(tp);
4551558Srgrimes	COMMON_END(PRU_DISCONNECT);
45677873Sru}
4571558Srgrimes
45819209Sfenner/*
4591558Srgrimes * Accept a connection.  Essentially all the work is
4601558Srgrimes * done at higher levels; just return the address
4611558Srgrimes * of the peer, storing through addr.
4621558Srgrimes */
4631558Srgrimesstatic int
4641558Srgrimestcp_usr_accept(struct socket *so, struct sockaddr **nam)
4651558Srgrimes{
4661558Srgrimes	int s;
4671558Srgrimes	int error = 0;
4681558Srgrimes	struct inpcb *inp = NULL;
4691558Srgrimes	struct tcpcb *tp = NULL;
4701558Srgrimes	struct in_addr addr;
47177873Sru	in_port_t port = 0;
4721558Srgrimes	TCPDEBUG0;
4731558Srgrimes
4741558Srgrimes	if (so->so_state & SS_ISDISCONNECTED) {
47554263Sshin		error = ECONNABORTED;
47654263Sshin		goto out;
47778064Sume	}
47878064Sume
47978064Sume	s = splnet();
48054263Sshin	INP_INFO_RLOCK(&tcbinfo);
48178064Sume	inp = sotoinpcb(so);
48278064Sume	if (!inp) {
48378064Sume		INP_INFO_RUNLOCK(&tcbinfo);
48478064Sume		splx(s);
48578064Sume		return (EINVAL);
48678064Sume	}
48778064Sume	INP_LOCK(inp);
48878064Sume	INP_INFO_RUNLOCK(&tcbinfo);
48978064Sume	tp = intotcpcb(inp);
49078064Sume	TCPDEBUG1();
49178064Sume
49278064Sume	/*
49378064Sume	 * We inline in_setpeeraddr and COMMON_END here, so that we can
49478064Sume	 * copy the data of interest and defer the malloc until after we
49578064Sume	 * release the lock.
49678064Sume	 */
49778064Sume	port = inp->inp_fport;
49878064Sume	addr = inp->inp_faddr;
49978064Sume
50078064Sumeout:	TCPDEBUG2(PRU_ACCEPT);
50178064Sume	if (tp)
50278064Sume		INP_UNLOCK(inp);
50378064Sume	splx(s);
50478064Sume	if (error == 0)
50578064Sume		*nam = in_sockaddr(port, &addr);
50617046Sjulian	return error;
50717046Sjulian}
50817046Sjulian
50917046Sjulian#ifdef INET6
51017046Sjulianstatic int
5111558Srgrimestcp6_usr_accept(struct socket *so, struct sockaddr **nam)
5121558Srgrimes{
5131558Srgrimes	int s;
5141558Srgrimes	struct inpcb *inp = NULL;
5151558Srgrimes	int error = 0;
5161558Srgrimes	struct tcpcb *tp = NULL;
5171558Srgrimes	struct in_addr addr;
5181558Srgrimes	struct in6_addr addr6;
51919209Sfenner	in_port_t port = 0;
5201558Srgrimes	int v4 = 0;
52119209Sfenner	TCPDEBUG0;
52281976Sbrian
52381976Sbrian	if (so->so_state & SS_ISDISCONNECTED) {
52481980Sbrian		error = ECONNABORTED;
52581980Sbrian		goto out;
5261558Srgrimes	}
5271558Srgrimes
5281558Srgrimes	s = splnet();
5291558Srgrimes	INP_INFO_RLOCK(&tcbinfo);
5301558Srgrimes	inp = sotoinpcb(so);
5311558Srgrimes	if (inp == 0) {
5321558Srgrimes		INP_INFO_RUNLOCK(&tcbinfo);
5331558Srgrimes		splx(s);
5341558Srgrimes		return (EINVAL);
5351558Srgrimes	}
5361558Srgrimes	INP_LOCK(inp);
5371558Srgrimes	INP_INFO_RUNLOCK(&tcbinfo);
5381558Srgrimes	tp = intotcpcb(inp);
5391558Srgrimes	TCPDEBUG1();
5401558Srgrimes	/*
5411558Srgrimes	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
5421558Srgrimes	 * copy the data of interest and defer the malloc until after we
5431558Srgrimes	 * release the lock.
5441558Srgrimes	 */
5451558Srgrimes	if (inp->inp_vflag & INP_IPV4) {
5461558Srgrimes		v4 = 1;
5471558Srgrimes		port = inp->inp_fport;
5481558Srgrimes		addr = inp->inp_faddr;
5491558Srgrimes	} else {
5501558Srgrimes		port = inp->inp_fport;
5511558Srgrimes		addr6 = inp->in6p_faddr;
5521558Srgrimes	}
5531558Srgrimes
5541558Srgrimesout:	TCPDEBUG2(PRU_ACCEPT);
5551558Srgrimes	if (tp)
5561558Srgrimes		INP_UNLOCK(inp);
5571558Srgrimes	splx(s);
5581558Srgrimes	if (error == 0) {
5591558Srgrimes		if (v4)
5601558Srgrimes			*nam = in6_v4mapsin6_sockaddr(port, &addr);
5611558Srgrimes		else
56292806Sobrien			*nam = in6_sockaddr(port, &addr6);
5631558Srgrimes	}
5641558Srgrimes	return error;
56578140Sru}
5661558Srgrimes#endif /* INET6 */
5671558Srgrimes
5681558Srgrimes/*
5691558Srgrimes * This is the wrapper function for in_setsockaddr. We just pass down
57013171Swollman * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
5711558Srgrimes * here because in_setsockaddr will call malloc and can block.
5721558Srgrimes */
5731558Srgrimesstatic int
574146079Sjmalletttcp_sockaddr(struct socket *so, struct sockaddr **nam)
5751558Srgrimes{
5761558Srgrimes	return (in_setsockaddr(so, nam, &tcbinfo));
5771558Srgrimes}
5781558Srgrimes
5791558Srgrimes/*
5801558Srgrimes * This is the wrapper function for in_setpeeraddr. We just pass down
5811558Srgrimes * the pcbinfo for in_setpeeraddr to lock.
5821558Srgrimes */
5831558Srgrimesstatic int
5841558Srgrimestcp_peeraddr(struct socket *so, struct sockaddr **nam)
5851558Srgrimes{
58654263Sshin	return (in_setpeeraddr(so, nam, &tcbinfo));
58754263Sshin}
58854263Sshin
58954263Sshin/*
59054263Sshin * Mark the connection as being incapable of further output.
59154263Sshin */
59217046Sjulianstatic int
59317046Sjuliantcp_usr_shutdown(struct socket *so)
59417046Sjulian{
59517046Sjulian	int s = splnet();
5961558Srgrimes	int error = 0;
5971558Srgrimes	struct inpcb *inp;
5981558Srgrimes	struct tcpcb *tp;
5991558Srgrimes	const int inirw = INI_WRITE;
6001558Srgrimes
6011558Srgrimes	COMMON_START();
6021558Srgrimes	socantsendmore(so);
6032787Spst	tp = tcp_usrclosed(tp);
6041558Srgrimes	if (tp)
6051558Srgrimes		error = tcp_output(tp);
6061558Srgrimes	COMMON_END(PRU_SHUTDOWN);
60717591Sjulian}
60817591Sjulian
60917591Sjulian/*
6101558Srgrimes * After a receive, possibly send window update to peer.
6111558Srgrimes */
6121558Srgrimesstatic int
6131558Srgrimestcp_usr_rcvd(struct socket *so, int flags)
6141558Srgrimes{
6151558Srgrimes	int s = splnet();
6161558Srgrimes	int error = 0;
6171558Srgrimes	struct inpcb *inp;
6181558Srgrimes	struct tcpcb *tp;
6191558Srgrimes	const int inirw = INI_READ;
6201558Srgrimes
6211558Srgrimes	COMMON_START();
6221558Srgrimes	tcp_output(tp);
6231558Srgrimes	COMMON_END(PRU_RCVD);
6241558Srgrimes}
6251558Srgrimes
6261558Srgrimes/*
6271558Srgrimes * Do a send by putting data in output queue and updating urgent
6281558Srgrimes * marker if URG set.  Possibly send more data.  Unlike the other
6291558Srgrimes * pru_*() routines, the mbuf chains are our responsibility.  We
6301558Srgrimes * must either enqueue them or free them.  The other pru_* routines
63178140Sru * generally are caller-frees.
63278140Sru */
63378140Srustatic int
6341558Srgrimestcp_usr_send(struct socket *so, int flags, struct mbuf *m,
6351558Srgrimes	     struct sockaddr *nam, struct mbuf *control, struct thread *td)
6361558Srgrimes{
6371558Srgrimes	int s = splnet();
6381558Srgrimes	int error = 0;
6391558Srgrimes	struct inpcb *inp;
6401558Srgrimes	struct tcpcb *tp;
6411558Srgrimes	const int inirw = INI_WRITE;
6421558Srgrimes#ifdef INET6
6431558Srgrimes	int isipv6;
64447668Sru#endif
64547668Sru	TCPDEBUG0;
6461558Srgrimes
6471558Srgrimes	/*
6481558Srgrimes	 * Need write lock here because this function might call
64947668Sru	 * tcp_connect or tcp_usrclosed.
65047668Sru	 * We really want to have to this function upgrade from read lock
6511558Srgrimes	 * to write lock.  XXX
6521558Srgrimes	 */
6531558Srgrimes	INP_INFO_WLOCK(&tcbinfo);
65447668Sru	inp = sotoinpcb(so);
65547668Sru	if (inp == NULL) {
6561558Srgrimes		/*
6571558Srgrimes		 * OOPS! we lost a race, the TCP session got reset after
6581558Srgrimes		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
65947668Sru		 * network interrupt in the non-splnet() section of sosend().
66047668Sru		 */
6611558Srgrimes		if (m)
6621558Srgrimes			m_freem(m);
6631558Srgrimes		if (control)
66447668Sru			m_freem(control);
66547668Sru		error = ECONNRESET;	/* XXX EPIPE? */
6661558Srgrimes		tp = NULL;
6671558Srgrimes		TCPDEBUG1();
6681558Srgrimes		goto out;
6691558Srgrimes	}
67047668Sru	INP_LOCK(inp);
67147668Sru#ifdef INET6
6721558Srgrimes	isipv6 = nam && nam->sa_family == AF_INET6;
6731558Srgrimes#endif /* INET6 */
6741558Srgrimes	tp = intotcpcb(inp);
6751558Srgrimes	TCPDEBUG1();
6761558Srgrimes	if (control) {
67754263Sshin		/* TCP doesn't do control messages (rights, creds, etc) */
67854263Sshin		if (control->m_len) {
67954263Sshin			m_freem(control);
68054263Sshin			if (m)
68154263Sshin				m_freem(m);
68254263Sshin			error = EINVAL;
68354263Sshin			goto out;
68454263Sshin		}
68554263Sshin		m_freem(control);	/* empty control, just free it */
68654263Sshin	}
68754263Sshin	if (!(flags & PRUS_OOB)) {
6881558Srgrimes		sbappendstream(&so->so_snd, m);
6891558Srgrimes		if (nam && tp->t_state < TCPS_SYN_SENT) {
6901558Srgrimes			/*
6911558Srgrimes			 * Do implied connect if not yet connected,
6921558Srgrimes			 * initialize window to default value, and
6931558Srgrimes			 * initialize maxseg/maxopd using peer's cached
6941558Srgrimes			 * MSS.
6951558Srgrimes			 */
69647668Sru#ifdef INET6
69747668Sru			if (isipv6)
6981558Srgrimes				error = tcp6_connect(tp, nam, td);
6991558Srgrimes			else
7001558Srgrimes#endif /* INET6 */
7011558Srgrimes			error = tcp_connect(tp, nam, td);
7021558Srgrimes			if (error)
7031558Srgrimes				goto out;
7041558Srgrimes			tp->snd_wnd = TTCP_CLIENT_SND_WND;
7051558Srgrimes			tcp_mss(tp, -1);
7061558Srgrimes		}
7071558Srgrimes
7081558Srgrimes		if (flags & PRUS_EOF) {
7091558Srgrimes			/*
7101558Srgrimes			 * Close the send side of the connection after
7111558Srgrimes			 * the data is sent.
71279588Sru			 */
7131558Srgrimes			socantsendmore(so);
7141558Srgrimes			tp = tcp_usrclosed(tp);
7151558Srgrimes		}
71654263Sshin		if (tp != NULL) {
7171558Srgrimes			if (flags & PRUS_MORETOCOME)
71854263Sshin				tp->t_flags |= TF_MORETOCOME;
71954263Sshin			error = tcp_output(tp);
72054263Sshin			if (flags & PRUS_MORETOCOME)
72154263Sshin				tp->t_flags &= ~TF_MORETOCOME;
72254263Sshin		}
72354263Sshin	} else {
72454263Sshin		if (sbspace(&so->so_snd) < -512) {
7251558Srgrimes			m_freem(m);
7261558Srgrimes			error = ENOBUFS;
7271558Srgrimes			goto out;
7281558Srgrimes		}
7291558Srgrimes		/*
7301558Srgrimes		 * According to RFC961 (Assigned Protocols),
7311558Srgrimes		 * the urgent pointer points to the last octet
73278140Sru		 * of urgent data.  We continue, however,
73378140Sru		 * to consider it to indicate the first octet
73478140Sru		 * of data past the urgent section.
73578140Sru		 * Otherwise, snd_up should be one lower.
7361558Srgrimes		 */
7371558Srgrimes		sbappendstream(&so->so_snd, m);
7381558Srgrimes		if (nam && tp->t_state < TCPS_SYN_SENT) {
7391558Srgrimes			/*
7401558Srgrimes			 * Do implied connect if not yet connected,
7411558Srgrimes			 * initialize window to default value, and
7421558Srgrimes			 * initialize maxseg/maxopd using peer's cached
7431558Srgrimes			 * MSS.
74485048Sru			 */
74531958Simp#ifdef INET6
7461558Srgrimes			if (isipv6)
7471558Srgrimes				error = tcp6_connect(tp, nam, td);
7481558Srgrimes			else
7491558Srgrimes#endif /* INET6 */
7501558Srgrimes			error = tcp_connect(tp, nam, td);
75197278Sru			if (error)
75297278Sru				goto out;
75397278Sru			tp->snd_wnd = TTCP_CLIENT_SND_WND;
75497278Sru			tcp_mss(tp, -1);
75597278Sru		}
75697278Sru		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
75797278Sru		tp->t_force = 1;
75897278Sru		error = tcp_output(tp);
7591558Srgrimes		tp->t_force = 0;
76097278Sru	}
76197278Sru	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
76297278Sru		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
76397278Sru}
76497278Sru
76597278Sru/*
76697278Sru * Abort the TCP.
76797278Sru */
76897278Srustatic int
76997278Srutcp_usr_abort(struct socket *so)
77097278Sru{
77197278Sru	int s = splnet();
77297278Sru	int error = 0;
77397278Sru	struct inpcb *inp;
77497278Sru	struct tcpcb *tp;
77597278Sru	const int inirw = INI_WRITE;
77697278Sru
77797278Sru	COMMON_START();
77897278Sru	tp = tcp_drop(tp, ECONNABORTED);
77997278Sru	COMMON_END(PRU_ABORT);
78097278Sru}
78197278Sru
7821558Srgrimes/*
78397278Sru * Receive out-of-band data.
7841558Srgrimes */
7851558Srgrimesstatic int
7861558Srgrimestcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
78724558Sphk{
78824558Sphk	int s = splnet();
78992806Sobrien	int error = 0;
7901558Srgrimes	struct inpcb *inp;
7911558Srgrimes	struct tcpcb *tp;
79292806Sobrien	const int inirw = INI_READ;
7931558Srgrimes
7941558Srgrimes	COMMON_START();
79577904Sru	if ((so->so_oobmark == 0 &&
79666448Sru	     (so->so_state & SS_RCVATMARK) == 0) ||
79766448Sru	    so->so_options & SO_OOBINLINE ||
7981558Srgrimes	    tp->t_oobflags & TCPOOB_HADDATA) {
7991558Srgrimes		error = EINVAL;
8001558Srgrimes		goto out;
8011558Srgrimes	}
8021558Srgrimes	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
8031558Srgrimes		error = EWOULDBLOCK;
8041558Srgrimes		goto out;
8051558Srgrimes	}
8061558Srgrimes	m->m_len = 1;
8071558Srgrimes	*mtod(m, caddr_t) = tp->t_iobc;
8081558Srgrimes	if ((flags & MSG_PEEK) == 0)
8091558Srgrimes		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
8101558Srgrimes	COMMON_END(PRU_RCVOOB);
8111558Srgrimes}
8121558Srgrimes
8131558Srgrimes/* xxx - should be const */
8141558Srgrimesstruct pr_usrreqs tcp_usrreqs = {
8151558Srgrimes	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
8161558Srgrimes	tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
81777904Sru	tcp_usr_disconnect, tcp_usr_listen, tcp_peeraddr, tcp_usr_rcvd,
81877904Sru	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
8191558Srgrimes	tcp_sockaddr, sosend, soreceive, sopoll
8201558Srgrimes};
8211558Srgrimes
8221558Srgrimes#ifdef INET6
8231558Srgrimesstruct pr_usrreqs tcp6_usrreqs = {
8241558Srgrimes	tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
8251558Srgrimes	tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
8261558Srgrimes	tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
8271558Srgrimes	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
8281558Srgrimes	in6_mapped_sockaddr, sosend, soreceive, sopoll
8291558Srgrimes};
83096997Sume#endif /* INET6 */
8311558Srgrimes
83296997Sume/*
83396997Sume * Common subroutine to open a TCP connection to remote host specified
83497062Sume * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
83597073Sume * port number if needed.  Call in_pcbconnect_setup to do the routing and
83696997Sume * to choose a local host address (interface).  If there is an existing
83797073Sume * incarnation of the same connection in TIME-WAIT state and if the remote
83896997Sume * host was sending CC options and if the connection duration was < MSL, then
83996997Sume * truncate the previous TIME-WAIT state and proceed.
84096997Sume * Initialize connection parameters and enter SYN-SENT state.
84197073Sume */
84297073Sumestatic int
84397073Sumetcp_connect(tp, nam, td)
84497073Sume	register struct tcpcb *tp;
84597073Sume	struct sockaddr *nam;
84697073Sume	struct thread *td;
84797073Sume{
84897073Sume	struct inpcb *inp = tp->t_inpcb, *oinp;
84997073Sume	struct socket *so = inp->inp_socket;
85097073Sume	struct tcptw *otw;
85197073Sume	struct rmxp_tao *taop;
85296997Sume	struct rmxp_tao tao_noncached;
85396997Sume	struct in_addr laddr;
85498053Sume	u_short lport;
85598053Sume	int error;
85698053Sume
85798053Sume	if (inp->inp_lport == 0) {
85897062Sume		error = in_pcbbind(inp, (struct sockaddr *)0, td);
85996997Sume		if (error)
86096997Sume			return error;
86196997Sume	}
86296997Sume
8631558Srgrimes	/*
8641558Srgrimes	 * Cannot simply call in_pcbconnect, because there might be an
8651558Srgrimes	 * earlier incarnation of this same connection still in
8661558Srgrimes	 * TIME_WAIT state, creating an ADDRINUSE error.
8671558Srgrimes	 */
8681558Srgrimes	laddr = inp->inp_laddr;
8691558Srgrimes	lport = inp->inp_lport;
8701558Srgrimes	error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
8711558Srgrimes	    &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td);
87292806Sobrien	if (error && oinp == NULL)
8731558Srgrimes		return error;
8741558Srgrimes	if (oinp) {
8751558Srgrimes		if (oinp != inp &&
87666449Sru		    (oinp->inp_vflag & INP_TIMEWAIT) &&
87754263Sshin		    (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl &&
8781558Srgrimes		    otw->cc_recv != 0) {
8791558Srgrimes			inp->inp_faddr = oinp->inp_faddr;
8801558Srgrimes			inp->inp_fport = oinp->inp_fport;
8811558Srgrimes			(void) tcp_twclose(otw, 0);
8821558Srgrimes		} else
88354263Sshin			return EADDRINUSE;
8841558Srgrimes	}
8851558Srgrimes	inp->inp_laddr = laddr;
8861558Srgrimes	in_pcbrehash(inp);
8871558Srgrimes
8881558Srgrimes	/* Compute window scaling to request.  */
8891558Srgrimes	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
8901558Srgrimes	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
89117486Sjulian		tp->request_r_scale++;
89278064Sume
89378064Sume	soisconnecting(so);
89417486Sjulian	tcpstat.tcps_connattempt++;
89578064Sume	tp->t_state = TCPS_SYN_SENT;
89678064Sume	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
89717486Sjulian	tp->iss = tcp_new_isn(tp);
89878064Sume	tp->t_bw_rtseq = tp->iss;
89978064Sume	tcp_sendseqinit(tp);
90078064Sume
90117486Sjulian	/*
90278139Sru	 * Generate a CC value for this connection and
90378064Sume	 * check whether CC or CCnew should be used.
90478064Sume	 */
90578064Sume	if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) {
90617486Sjulian		taop = &tao_noncached;
90717486Sjulian		bzero(taop, sizeof(*taop));
90817486Sjulian	}
90978064Sume
91078064Sume	tp->cc_send = CC_INC(tcp_ccgen);
91178064Sume	if (taop->tao_ccsent != 0 &&
91278064Sume	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
91378064Sume		taop->tao_ccsent = tp->cc_send;
91478064Sume	} else {
91578064Sume		taop->tao_ccsent = 0;
91678064Sume		tp->t_flags |= TF_SENDCCNEW;
91778064Sume	}
91878064Sume
91917486Sjulian	return 0;
92017486Sjulian}
9211558Srgrimes
9221558Srgrimes#ifdef INET6
9231558Srgrimesstatic int
9241558Srgrimestcp6_connect(tp, nam, td)
9251558Srgrimes	register struct tcpcb *tp;
9261558Srgrimes	struct sockaddr *nam;
9271558Srgrimes	struct thread *td;
9281558Srgrimes{
9291558Srgrimes	struct inpcb *inp = tp->t_inpcb, *oinp;
93054263Sshin	struct socket *so = inp->inp_socket;
9311558Srgrimes	struct tcptw *otw;
9321558Srgrimes	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
9331558Srgrimes	struct in6_addr *addr6;
9341558Srgrimes	struct rmxp_tao *taop;
9351558Srgrimes	struct rmxp_tao tao_noncached;
93637907Scharnier	int error;
9371558Srgrimes
9381558Srgrimes	if (inp->inp_lport == 0) {
9391558Srgrimes		error = in6_pcbbind(inp, (struct sockaddr *)0, td);
94054263Sshin		if (error)
9411558Srgrimes			return error;
94227500Sjulian	}
94327500Sjulian
94427500Sjulian	/*
9451558Srgrimes	 * Cannot simply call in_pcbconnect, because there might be an
9461558Srgrimes	 * earlier incarnation of this same connection still in
9471558Srgrimes	 * TIME_WAIT state, creating an ADDRINUSE error.
94897637Swollman	 */
94997637Swollman	error = in6_pcbladdr(inp, nam, &addr6);
95097637Swollman	if (error)
9511558Srgrimes		return error;
9521558Srgrimes	oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
95397637Swollman				  &sin6->sin6_addr, sin6->sin6_port,
9541558Srgrimes				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
9551558Srgrimes				  ? addr6
95697637Swollman				  : &inp->in6p_laddr,
95797637Swollman				  inp->inp_lport,  0, NULL);
9581558Srgrimes	if (oinp) {
9591558Srgrimes		if (oinp != inp &&
9601558Srgrimes		    (oinp->inp_vflag & INP_TIMEWAIT) &&
96154263Sshin		    (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl &&
96254263Sshin		    otw->cc_recv != 0) {
96354263Sshin			inp->inp_faddr = oinp->inp_faddr;
96478064Sume			inp->inp_fport = oinp->inp_fport;
96557108Sshin			(void) tcp_twclose(otw, 0);
966146546Sume		} else
96757108Sshin			return EADDRINUSE;
96897073Sume	}
96997073Sume	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
97097073Sume		inp->in6p_laddr = *addr6;
97178064Sume	inp->in6p_faddr = sin6->sin6_addr;
97278064Sume	inp->inp_fport = sin6->sin6_port;
97378064Sume	if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0)
974146546Sume		inp->in6p_flowinfo = sin6->sin6_flowinfo;
975146546Sume	in_pcbrehash(inp);
97678064Sume
977146546Sume	/* Compute window scaling to request.  */
978146546Sume	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
97954263Sshin	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
98054263Sshin		tp->request_r_scale++;
98178064Sume
98278064Sume	soisconnecting(so);
98378064Sume	tcpstat.tcps_connattempt++;
984123234Sume	tp->t_state = TCPS_SYN_SENT;
98578064Sume	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
98678064Sume	tp->iss = tcp_new_isn(tp);
98778064Sume	tp->t_bw_rtseq = tp->iss;
98878064Sume	tcp_sendseqinit(tp);
98978064Sume
99054263Sshin	/*
99197062Sume	 * Generate a CC value for this connection and
99297073Sume	 * check whether CC or CCnew should be used.
99397073Sume	 */
99496997Sume	if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) {
99598053Sume		taop = &tao_noncached;
99678064Sume		bzero(taop, sizeof(*taop));
99778064Sume	}
99878064Sume
99954263Sshin	tp->cc_send = CC_INC(tcp_ccgen);
100017046Sjulian	if (taop->tao_ccsent != 0 &&
100117046Sjulian	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
100217046Sjulian		taop->tao_ccsent = tp->cc_send;
100317265Sjulian	} else {
100417265Sjulian		taop->tao_ccsent = 0;
100517046Sjulian		tp->t_flags |= TF_SENDCCNEW;
10061558Srgrimes	}
10071558Srgrimes
10081558Srgrimes	return 0;
10091558Srgrimes}
10101558Srgrimes#endif /* INET6 */
10111558Srgrimes
10121558Srgrimes/*
10131558Srgrimes * The new sockopt interface makes it possible for us to block in the
10141558Srgrimes * copyin/out step (if we take a page fault).  Taking a page fault at
10151558Srgrimes * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
10161558Srgrimes * use TSM, there probably isn't any need for this function to run at
10171558Srgrimes * splnet() any more.  This needs more examination.)
10181558Srgrimes */
10191558Srgrimesint
10201558Srgrimestcp_ctloutput(so, sopt)
10211558Srgrimes	struct socket *so;
10221558Srgrimes	struct sockopt *sopt;
10231558Srgrimes{
102424558Sphk	int	error, opt, optval, s;
102524558Sphk	struct	inpcb *inp;
102624558Sphk	struct	tcpcb *tp;
102724558Sphk
102877904Sru	error = 0;
102924558Sphk	s = splnet();		/* XXX */
103077904Sru	INP_INFO_RLOCK(&tcbinfo);
103124558Sphk	inp = sotoinpcb(so);
103224558Sphk	if (inp == NULL) {
103366449Sru		INP_INFO_RUNLOCK(&tcbinfo);
103424558Sphk		splx(s);
103566449Sru		return (ECONNRESET);
103679588Sru	}
103779588Sru	INP_LOCK(inp);
1038130569Sbms	INP_INFO_RUNLOCK(&tcbinfo);
103966449Sru	if (sopt->sopt_level != IPPROTO_TCP) {
10401558Srgrimes#ifdef INET6
10411558Srgrimes		if (INP_CHECK_SOCKAF(so, AF_INET6))
10421558Srgrimes			error = ip6_ctloutput(so, sopt);
10431558Srgrimes		else
10441558Srgrimes#endif /* INET6 */
10451558Srgrimes		error = ip_ctloutput(so, sopt);
104666449Sru		INP_UNLOCK(inp);
104766449Sru		splx(s);
104866449Sru		return (error);
10491558Srgrimes	}
105066449Sru	tp = intotcpcb(inp);
10511558Srgrimes
10521558Srgrimes	switch (sopt->sopt_dir) {
10531558Srgrimes	case SOPT_SET:
10541558Srgrimes		switch (sopt->sopt_name) {
10551558Srgrimes		case TCP_NODELAY:
10561558Srgrimes		case TCP_NOOPT:
105785048Sru			error = sooptcopyin(sopt, &optval, sizeof optval,
105832008Simp					    sizeof optval);
10591558Srgrimes			if (error)
10601558Srgrimes				break;
106113515Smpp
10621558Srgrimes			switch (sopt->sopt_name) {
10631558Srgrimes			case TCP_NODELAY:
106454263Sshin				opt = TF_NODELAY;
106554263Sshin				break;
106654263Sshin			case TCP_NOOPT:
106754263Sshin				opt = TF_NOOPT;
106854263Sshin				break;
106954263Sshin			default:
107054263Sshin				opt = 0; /* dead code to fool gcc */
10711558Srgrimes				break;
107254263Sshin			}
107354263Sshin
107454263Sshin			if (optval)
107554263Sshin				tp->t_flags |= opt;
107654263Sshin			else
107754263Sshin				tp->t_flags &= ~opt;
107854263Sshin			break;
107954263Sshin
108054263Sshin		case TCP_NOPUSH:
108154263Sshin			error = sooptcopyin(sopt, &optval, sizeof optval,
108254263Sshin					    sizeof optval);
108354263Sshin			if (error)
108454263Sshin				break;
108554263Sshin
108654263Sshin			if (optval)
108754263Sshin				tp->t_flags |= TF_NOPUSH;
108854263Sshin			else {
108954263Sshin				tp->t_flags &= ~TF_NOPUSH;
109054263Sshin				error = tcp_output(tp);
109154263Sshin			}
109254263Sshin			break;
109354263Sshin
109454263Sshin		case TCP_MAXSEG:
109554263Sshin			error = sooptcopyin(sopt, &optval, sizeof optval,
109654263Sshin					    sizeof optval);
109754263Sshin			if (error)
109854263Sshin				break;
109954263Sshin
110054263Sshin			if (optval > 0 && optval <= tp->t_maxseg)
110154263Sshin				tp->t_maxseg = optval;
110254263Sshin			else
110354263Sshin				error = EINVAL;
110454263Sshin			break;
110554263Sshin
110654263Sshin		default:
110754263Sshin			error = ENOPROTOOPT;
110854263Sshin			break;
110954263Sshin		}
11101558Srgrimes		break;
11111558Srgrimes
11121558Srgrimes	case SOPT_GET:
11131558Srgrimes		switch (sopt->sopt_name) {
11141558Srgrimes		case TCP_NODELAY:
1115128782Sambrisko			optval = tp->t_flags & TF_NODELAY;
111692806Sobrien			break;
11171558Srgrimes		case TCP_MAXSEG:
1118128782Sambrisko			optval = tp->t_maxseg;
11191558Srgrimes			break;
11201558Srgrimes		case TCP_NOOPT:
11211558Srgrimes			optval = tp->t_flags & TF_NOOPT;
11221558Srgrimes			break;
11231558Srgrimes		case TCP_NOPUSH:
11241558Srgrimes			optval = tp->t_flags & TF_NOPUSH;
11251558Srgrimes			break;
112613171Swollman		default:
11271558Srgrimes			error = ENOPROTOOPT;
112837907Scharnier			break;
1129128782Sambrisko		}
1130128782Sambrisko		if (error == 0)
1131128782Sambrisko			error = sooptcopyout(sopt, &optval, sizeof optval);
1132128782Sambrisko		break;
1133128782Sambrisko	}
1134128782Sambrisko	INP_UNLOCK(inp);
1135128782Sambrisko	splx(s);
113613171Swollman	return (error);
1137128782Sambrisko}
11381558Srgrimes
11391558Srgrimes/*
11401558Srgrimes * tcp_sendspace and tcp_recvspace are the default send and receive window
11411558Srgrimes * sizes, respectively.  These are obsolescent (this information should
11421558Srgrimes * be set by the route).
11431558Srgrimes */
11441558Srgrimesu_long	tcp_sendspace = 1024*32;
11451558SrgrimesSYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
11461558Srgrimes    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
11471558Srgrimesu_long	tcp_recvspace = 1024*64;
11481558SrgrimesSYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
11491558Srgrimes    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
11501558Srgrimes
11511558Srgrimes/*
11521558Srgrimes * Attach TCP protocol to socket, allocating
11531558Srgrimes * internet protocol control block, tcp control block,
11541558Srgrimes * bufer space, and entering LISTEN state if to accept connections.
11551558Srgrimes */
11561558Srgrimesstatic int
115754263Sshintcp_attach(so, td)
11581558Srgrimes	struct socket *so;
115954263Sshin	struct thread *td;
116071061Sphk{
11611558Srgrimes	register struct tcpcb *tp;
11621558Srgrimes	struct inpcb *inp;
11631558Srgrimes	int error;
11641558Srgrimes#ifdef INET6
11651558Srgrimes	int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
11661558Srgrimes#endif
11671558Srgrimes
11681558Srgrimes	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
11691558Srgrimes		error = soreserve(so, tcp_sendspace, tcp_recvspace);
11701558Srgrimes		if (error)
11711558Srgrimes			return (error);
11721558Srgrimes	}
11731558Srgrimes	error = in_pcballoc(so, &tcbinfo, td);
11741558Srgrimes	if (error)
11751558Srgrimes		return (error);
117692806Sobrien	inp = sotoinpcb(so);
117792806Sobrien#ifdef INET6
11781558Srgrimes	if (isipv6) {
11791558Srgrimes		inp->inp_vflag |= INP_IPV6;
11801558Srgrimes		inp->in6p_hops = -1;	/* use kernel default */
1181128186Sluigi	}
11821558Srgrimes	else
11831558Srgrimes#endif
11841558Srgrimes	inp->inp_vflag |= INP_IPV4;
11851558Srgrimes	tp = tcp_newtcpcb(inp);
118685048Sru	if (tp == 0) {
11871558Srgrimes		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
11881558Srgrimes
11891558Srgrimes		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
11901558Srgrimes#ifdef INET6
11911558Srgrimes		if (isipv6)
11921558Srgrimes			in6_pcbdetach(inp);
11931558Srgrimes		else
119413171Swollman#endif
119513171Swollman		in_pcbdetach(inp);
11961558Srgrimes		so->so_state |= nofd;
11971558Srgrimes		return (ENOBUFS);
11981558Srgrimes	}
11991558Srgrimes	tp->t_state = TCPS_CLOSED;
12001558Srgrimes	return (0);
12011558Srgrimes}
12021558Srgrimes
12031558Srgrimes/*
12041558Srgrimes * Initiate (or continue) disconnect.
12051558Srgrimes * If embryonic state, just send reset (once).
12061558Srgrimes * If in ``let data drain'' option and linger null, just drop.
12071558Srgrimes * Otherwise (hard), mark socket disconnecting and drop
12081558Srgrimes * current input data; switch states based on user close, and
12091558Srgrimes * send segment to peer (with FIN).
12101558Srgrimes */
12111558Srgrimesstatic struct tcpcb *
12121558Srgrimestcp_disconnect(tp)
12131558Srgrimes	register struct tcpcb *tp;
12141558Srgrimes{
12151558Srgrimes	struct socket *so = tp->t_inpcb->inp_socket;
12161558Srgrimes
12171558Srgrimes	if (tp->t_state < TCPS_ESTABLISHED)
12181558Srgrimes		tp = tcp_close(tp);
12191558Srgrimes	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
12201558Srgrimes		tp = tcp_drop(tp, 0);
12211558Srgrimes	else {
12221558Srgrimes		soisdisconnecting(so);
1223129034Scsjp		sbflush(&so->so_rcv);
1224129034Scsjp		tp = tcp_usrclosed(tp);
122537907Scharnier		if (tp)
12261558Srgrimes			(void) tcp_output(tp);
12271558Srgrimes	}
12281558Srgrimes	return (tp);
12291558Srgrimes}
12301558Srgrimes
12311558Srgrimes/*
12321558Srgrimes * User issued close, and wish to trail through shutdown states:
123313171Swollman * if never received SYN, just forget it.  If got a SYN from peer,
12341558Srgrimes * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
12351558Srgrimes * If already got a FIN from peer, then almost done; go to LAST_ACK
12361558Srgrimes * state.  In all other cases, have already sent FIN to peer (e.g.
12371558Srgrimes * after PRU_SHUTDOWN), and just have to play tedious game waiting
12381558Srgrimes * for peer to send FIN or not respond to keep-alives, etc.
12391558Srgrimes * We can let the user exit from the close as soon as the FIN is acked.
12401558Srgrimes */
12411558Srgrimesstatic struct tcpcb *
12421558Srgrimestcp_usrclosed(tp)
12431558Srgrimes	register struct tcpcb *tp;
12441558Srgrimes{
124592806Sobrien
12461558Srgrimes	switch (tp->t_state) {
12471558Srgrimes
12481558Srgrimes	case TCPS_CLOSED:
12491558Srgrimes	case TCPS_LISTEN:
12501558Srgrimes		tp->t_state = TCPS_CLOSED;
12511558Srgrimes		tp = tcp_close(tp);
12521558Srgrimes		break;
12531558Srgrimes
12541558Srgrimes	case TCPS_SYN_SENT:
12551558Srgrimes	case TCPS_SYN_RECEIVED:
125654263Sshin		tp->t_flags |= TF_NEEDFIN;
125754263Sshin		break;
125854263Sshin
125917046Sjulian	case TCPS_ESTABLISHED:
12601558Srgrimes		tp->t_state = TCPS_FIN_WAIT_1;
12611558Srgrimes		break;
12621558Srgrimes
12631558Srgrimes	case TCPS_CLOSE_WAIT:
12641558Srgrimes		tp->t_state = TCPS_LAST_ACK;
12651558Srgrimes		break;
12661558Srgrimes	}
12671558Srgrimes	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
12681558Srgrimes		soisdisconnected(tp->t_inpcb->inp_socket);
12691558Srgrimes		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
12701558Srgrimes		if (tp->t_state == TCPS_FIN_WAIT_2)
12711558Srgrimes			callout_reset(tp->tt_2msl, tcp_maxidle,
12721558Srgrimes				      tcp_timer_2msl, tp);
12731558Srgrimes	}
12741558Srgrimes	return (tp);
12751558Srgrimes}
12761558Srgrimes
12771558Srgrimes