tcp_usrreq.c revision 75620
1185377Ssam/*
2187831Ssam * Copyright (c) 1982, 1986, 1988, 1993
3185377Ssam *	The Regents of the University of California.  All rights reserved.
4185377Ssam *
5185377Ssam * Redistribution and use in source and binary forms, with or without
6185377Ssam * modification, are permitted provided that the following conditions
7185377Ssam * are met:
8185377Ssam * 1. Redistributions of source code must retain the above copyright
9185377Ssam *    notice, this list of conditions and the following disclaimer.
10185377Ssam * 2. Redistributions in binary form must reproduce the above copyright
11185377Ssam *    notice, this list of conditions and the following disclaimer in the
12185377Ssam *    documentation and/or other materials provided with the distribution.
13185377Ssam * 3. All advertising materials mentioning features or use of this software
14185377Ssam *    must display the following acknowledgement:
15185377Ssam *	This product includes software developed by the University of
16185377Ssam *	California, Berkeley and its contributors.
17187831Ssam * 4. Neither the name of the University nor the names of its contributors
18185377Ssam *    may be used to endorse or promote products derived from this software
19185377Ssam *    without specific prior written permission.
20185377Ssam *
21185377Ssam * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22185377Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23185377Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24185377Ssam * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25185377Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26185377Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27185377Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28185377Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29185377Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30185377Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31185377Ssam * SUCH DAMAGE.
32185377Ssam *
33185377Ssam *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
34220442Sadrian * $FreeBSD: head/sys/netinet/tcp_usrreq.c 75619 2001-04-17 18:08:01Z kris $
35220442Sadrian */
36220442Sadrian
37220442Sadrian#include "opt_ipsec.h"
38240623Sadrian#include "opt_inet6.h"
39220442Sadrian#include "opt_tcpdebug.h"
40220442Sadrian
41220442Sadrian#include <sys/param.h>
42185377Ssam#include <sys/systm.h>
43185377Ssam#include <sys/kernel.h>
44185377Ssam#include <sys/sysctl.h>
45185377Ssam#include <sys/mbuf.h>
46185377Ssam#ifdef INET6
47185377Ssam#include <sys/domain.h>
48185377Ssam#endif /* INET6 */
49185377Ssam#include <sys/socket.h>
50185377Ssam#include <sys/socketvar.h>
51185377Ssam#include <sys/protosw.h>
52185377Ssam#include <sys/proc.h>
53185377Ssam#include <sys/jail.h>
54185377Ssam
55185377Ssam#include <net/if.h>
56185377Ssam#include <net/route.h>
57185377Ssam
58185377Ssam#include <netinet/in.h>
59185377Ssam#include <netinet/in_systm.h>
60185377Ssam#ifdef INET6
61185377Ssam#include <netinet/ip6.h>
62185377Ssam#endif
63185377Ssam#include <netinet/in_pcb.h>
64185377Ssam#ifdef INET6
65185377Ssam#include <netinet6/in6_pcb.h>
66185377Ssam#endif
67185377Ssam#include <netinet/in_var.h>
68185377Ssam#include <netinet/ip_var.h>
69185377Ssam#ifdef INET6
70185377Ssam#include <netinet6/ip6_var.h>
71185377Ssam#endif
72185377Ssam#include <netinet/tcp.h>
73185377Ssam#include <netinet/tcp_fsm.h>
74187831Ssam#include <netinet/tcp_seq.h>
75187831Ssam#include <netinet/tcp_timer.h>
76237874Sadrian#include <netinet/tcp_var.h>
77185377Ssam#include <netinet/tcpip.h>
78185377Ssam#ifdef TCPDEBUG
79185377Ssam#include <netinet/tcp_debug.h>
80185377Ssam#endif
81185377Ssam
82185377Ssam#ifdef IPSEC
83185377Ssam#include <netinet6/ipsec.h>
84185377Ssam#endif /*IPSEC*/
85185377Ssam
86185377Ssam/*
87185377Ssam * TCP protocol interface to socket abstraction.
88185377Ssam */
89185377Ssamextern	char *tcpstates[];	/* XXX ??? */
90185377Ssam
91185377Ssamstatic int	tcp_attach __P((struct socket *, struct proc *));
92185377Ssamstatic int	tcp_connect __P((struct tcpcb *, struct sockaddr *,
93185377Ssam				 struct proc *));
94185377Ssam#ifdef INET6
95185377Ssamstatic int	tcp6_connect __P((struct tcpcb *, struct sockaddr *,
96185377Ssam				 struct proc *));
97185377Ssam#endif /* INET6 */
98185377Ssamstatic struct tcpcb *
99185377Ssam		tcp_disconnect __P((struct tcpcb *));
100185377Ssamstatic struct tcpcb *
101185377Ssam		tcp_usrclosed __P((struct tcpcb *));
102185377Ssam
103185377Ssam#ifdef TCPDEBUG
104185377Ssam#define	TCPDEBUG0	int ostate = 0
105185377Ssam#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
106185377Ssam#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
107185380Ssam				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
108185380Ssam#else
109185377Ssam#define	TCPDEBUG0
110185377Ssam#define	TCPDEBUG1()
111185377Ssam#define	TCPDEBUG2(req)
112185377Ssam#endif
113237953Sadrian
114221603Sadrian/*
115221603Sadrian * TCP attaches to socket via pru_attach(), reserving space,
116221603Sadrian * and an internet control block.
117221603Sadrian */
118221603Sadrianstatic int
119221603Sadriantcp_usr_attach(struct socket *so, int proto, struct proc *p)
120221581Sadrian{
121221603Sadrian	int s = splnet();
122237953Sadrian	int error;
123237953Sadrian	struct inpcb *inp = sotoinpcb(so);
124237953Sadrian	struct tcpcb *tp = 0;
125221603Sadrian	TCPDEBUG0;
126221603Sadrian
127222584Sadrian	TCPDEBUG1();
128222584Sadrian	if (inp) {
129222584Sadrian		error = EISCONN;
130221603Sadrian		goto out;
131221603Sadrian	}
132221603Sadrian
133221603Sadrian	error = tcp_attach(so, p);
134221603Sadrian	if (error)
135221603Sadrian		goto out;
136237953Sadrian
137237953Sadrian	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
138237953Sadrian		so->so_linger = TCP_LINGERTIME;
139221603Sadrian	tp = sototcpcb(so);
140247366Sadrianout:
141247366Sadrian	TCPDEBUG2(PRU_ATTACH);
142247366Sadrian	splx(s);
143221603Sadrian	return error;
144237953Sadrian}
145221603Sadrian
146237953Sadrian/*
147237953Sadrian * pru_detach() detaches the TCP protocol from the socket.
148237953Sadrian * If the protocol state is non-embryonic, then can't
149237955Sadrian * do this directly: have to initiate a pru_disconnect(),
150237953Sadrian * which may finish later; embryonic TCB's can just
151237953Sadrian * be discarded here.
152237953Sadrian */
153237953Sadrianstatic int
154238280Sadriantcp_usr_detach(struct socket *so)
155238280Sadrian{
156238280Sadrian	int s = splnet();
157238280Sadrian	int error = 0;
158238280Sadrian	struct inpcb *inp = sotoinpcb(so);
159238280Sadrian	struct tcpcb *tp;
160238280Sadrian	TCPDEBUG0;
161237953Sadrian
162244854Sadrian	if (inp == 0) {
163237953Sadrian		splx(s);
164237953Sadrian		return EINVAL;	/* XXX */
165237953Sadrian	}
166221603Sadrian	tp = intotcpcb(inp);
167221603Sadrian	TCPDEBUG1();
168239631Sadrian	tp = tcp_disconnect(tp);
169239631Sadrian
170221603Sadrian	TCPDEBUG2(PRU_DETACH);
171239631Sadrian	splx(s);
172251360Sadrian	return error;
173239631Sadrian}
174239631Sadrian
175239631Sadrian#define	COMMON_START()	TCPDEBUG0; \
176239631Sadrian			do { \
177239631Sadrian				     if (inp == 0) { \
178239631Sadrian					     splx(s); \
179239631Sadrian					     return EINVAL; \
180239631Sadrian				     } \
181222584Sadrian				     tp = intotcpcb(inp); \
182239632Sadrian				     TCPDEBUG1(); \
183239632Sadrian		     } while(0)
184239632Sadrian
185239632Sadrian#define COMMON_END(req)	out: TCPDEBUG2(req); splx(s); return error; goto out
186239632Sadrian
187221603Sadrian
188221581Sadrian/*
189221581Sadrian * Give the socket an address.
190221603Sadrian */
191221603Sadrianstatic int
192221603Sadriantcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
193221603Sadrian{
194221603Sadrian	int s = splnet();
195221603Sadrian	int error = 0;
196221603Sadrian	struct inpcb *inp = sotoinpcb(so);
197225444Sadrian	struct tcpcb *tp;
198226488Sadrian	struct sockaddr_in *sinp;
199227410Sadrian
200243743Sadrian	COMMON_START();
201251400Sadrian
202185377Ssam	/*
203185377Ssam	 * Must check for multicast addresses and disallow binding
204185377Ssam	 * to them.
205185377Ssam	 */
206185377Ssam	sinp = (struct sockaddr_in *)nam;
207185377Ssam	if (sinp->sin_family == AF_INET &&
208185377Ssam	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
209185377Ssam		error = EAFNOSUPPORT;
210185377Ssam		goto out;
211185377Ssam	}
212185377Ssam	error = in_pcbbind(inp, nam, p);
213185377Ssam	if (error)
214185377Ssam		goto out;
215185377Ssam	COMMON_END(PRU_BIND);
216185377Ssam
217185377Ssam}
218185377Ssam
219185377Ssam#ifdef INET6
220185377Ssamstatic int
221185377Ssamtcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
222185377Ssam{
223185377Ssam	int s = splnet();
224185377Ssam	int error = 0;
225185377Ssam	struct inpcb *inp = sotoinpcb(so);
226185377Ssam	struct tcpcb *tp;
227185377Ssam	struct sockaddr_in6 *sin6p;
228185377Ssam
229185377Ssam	COMMON_START();
230219790Sadrian
231237874Sadrian	/*
232237874Sadrian	 * Must check for multicast addresses and disallow binding
233185377Ssam	 * to them.
234185377Ssam	 */
235185377Ssam	sin6p = (struct sockaddr_in6 *)nam;
236185377Ssam	if (sin6p->sin6_family == AF_INET6 &&
237242407Sadrian	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
238242407Sadrian		error = EAFNOSUPPORT;
239242407Sadrian		goto out;
240242407Sadrian	}
241242407Sadrian	inp->inp_vflag &= ~INP_IPV4;
242237874Sadrian	inp->inp_vflag |= INP_IPV6;
243237874Sadrian	if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) {
244238280Sadrian
245237874Sadrian		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
246237874Sadrian			inp->inp_vflag |= INP_IPV4;
247237874Sadrian		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
248237874Sadrian			struct sockaddr_in sin;
249238857Sadrian
250238857Sadrian			in6_sin6_2_sin(&sin, sin6p);
251185377Ssam			inp->inp_vflag |= INP_IPV4;
252185377Ssam			inp->inp_vflag &= ~INP_IPV6;
253185377Ssam			error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
254185377Ssam			goto out;
255185377Ssam		}
256185377Ssam	}
257185377Ssam	error = in6_pcbbind(inp, nam, p);
258185377Ssam	if (error)
259185377Ssam		goto out;
260185377Ssam	COMMON_END(PRU_BIND);
261185377Ssam}
262185377Ssam#endif /* INET6 */
263185377Ssam
264185377Ssam/*
265185377Ssam * Prepare to accept connections.
266185377Ssam */
267185377Ssamstatic int
268185377Ssamtcp_usr_listen(struct socket *so, struct proc *p)
269185377Ssam{
270185377Ssam	int s = splnet();
271185377Ssam	int error = 0;
272185377Ssam	struct inpcb *inp = sotoinpcb(so);
273185377Ssam	struct tcpcb *tp;
274185377Ssam
275185377Ssam	COMMON_START();
276185377Ssam	if (inp->inp_lport == 0)
277185377Ssam		error = in_pcbbind(inp, (struct sockaddr *)0, p);
278185377Ssam	if (error == 0)
279185377Ssam		tp->t_state = TCPS_LISTEN;
280185377Ssam	COMMON_END(PRU_LISTEN);
281185377Ssam}
282185377Ssam
283185377Ssam#ifdef INET6
284185377Ssamstatic int
285185377Ssamtcp6_usr_listen(struct socket *so, struct proc *p)
286185377Ssam{
287185377Ssam	int s = splnet();
288185377Ssam	int error = 0;
289185377Ssam	struct inpcb *inp = sotoinpcb(so);
290185377Ssam	struct tcpcb *tp;
291185377Ssam
292185377Ssam	COMMON_START();
293185377Ssam	if (inp->inp_lport == 0) {
294185377Ssam		inp->inp_vflag &= ~INP_IPV4;
295185377Ssam		if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0)
296185377Ssam			inp->inp_vflag |= INP_IPV4;
297185377Ssam		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
298185377Ssam	}
299185377Ssam	if (error == 0)
300185377Ssam		tp->t_state = TCPS_LISTEN;
301185377Ssam	COMMON_END(PRU_LISTEN);
302185377Ssam}
303185377Ssam#endif /* INET6 */
304185377Ssam
305185377Ssam/*
306185377Ssam * Initiate connection to peer.
307185377Ssam * Create a template for use in transmissions on this connection.
308185377Ssam * Enter SYN_SENT state, and mark socket as connecting.
309185377Ssam * Start keep-alive timer, and seed output sequence space.
310185377Ssam * Send initial segment on connection.
311185377Ssam */
312185377Ssamstatic int
313185377Ssamtcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
314185377Ssam{
315185377Ssam	int s = splnet();
316185377Ssam	int error = 0;
317185377Ssam	struct inpcb *inp = sotoinpcb(so);
318185377Ssam	struct tcpcb *tp;
319185377Ssam	struct sockaddr_in *sinp;
320185377Ssam
321185377Ssam	COMMON_START();
322185377Ssam
323185377Ssam	/*
324185377Ssam	 * Must disallow TCP ``connections'' to multicast addresses.
325185377Ssam	 */
326185377Ssam	sinp = (struct sockaddr_in *)nam;
327185377Ssam	if (sinp->sin_family == AF_INET
328185377Ssam	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
329185377Ssam		error = EAFNOSUPPORT;
330185377Ssam		goto out;
331185377Ssam	}
332185377Ssam
333185377Ssam	if (p && jailed(p->p_ucred))
334185377Ssam		prison_remote_ip(p->p_ucred, 0, &sinp->sin_addr.s_addr);
335185377Ssam
336185377Ssam	if ((error = tcp_connect(tp, nam, p)) != 0)
337185377Ssam		goto out;
338185377Ssam	error = tcp_output(tp);
339185377Ssam	COMMON_END(PRU_CONNECT);
340185377Ssam}
341185377Ssam
342185377Ssam#ifdef INET6
343185377Ssamstatic int
344185377Ssamtcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
345185377Ssam{
346185377Ssam	int s = splnet();
347185377Ssam	int error = 0;
348185377Ssam	struct inpcb *inp = sotoinpcb(so);
349185377Ssam	struct tcpcb *tp;
350185377Ssam	struct sockaddr_in6 *sin6p;
351185377Ssam
352185377Ssam	COMMON_START();
353185377Ssam
354185377Ssam	/*
355185377Ssam	 * Must disallow TCP ``connections'' to multicast addresses.
356185377Ssam	 */
357185377Ssam	sin6p = (struct sockaddr_in6 *)nam;
358185377Ssam	if (sin6p->sin6_family == AF_INET6
359185377Ssam	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
360185377Ssam		error = EAFNOSUPPORT;
361185377Ssam		goto out;
362185377Ssam	}
363185377Ssam
364185377Ssam	if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0 &&
365185377Ssam	    IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
366185377Ssam		struct sockaddr_in sin;
367185377Ssam
368185377Ssam		in6_sin6_2_sin(&sin, sin6p);
369185377Ssam		inp->inp_vflag |= INP_IPV4;
370185377Ssam		inp->inp_vflag &= ~INP_IPV6;
371185377Ssam		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0)
372185377Ssam			goto out;
373185377Ssam		error = tcp_output(tp);
374185377Ssam		goto out;
375185377Ssam	}
376185377Ssam	inp->inp_vflag &= ~INP_IPV4;
377185377Ssam	inp->inp_vflag |= INP_IPV6;
378185377Ssam	if ((error = tcp6_connect(tp, nam, p)) != 0)
379185377Ssam		goto out;
380185377Ssam	error = tcp_output(tp);
381185377Ssam	COMMON_END(PRU_CONNECT);
382185377Ssam}
383185377Ssam#endif /* INET6 */
384185377Ssam
385185377Ssam/*
386185377Ssam * Initiate disconnect from peer.
387185377Ssam * If connection never passed embryonic stage, just drop;
388185377Ssam * else if don't need to let data drain, then can just drop anyways,
389185377Ssam * else have to begin TCP shutdown process: mark socket disconnecting,
390185377Ssam * drain unread data, state switch to reflect user close, and
391185377Ssam * send segment (e.g. FIN) to peer.  Socket will be really disconnected
392185377Ssam * when peer sends FIN and acks ours.
393185377Ssam *
394185377Ssam * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
395220022Sadrian */
396220022Sadrianstatic int
397220022Sadriantcp_usr_disconnect(struct socket *so)
398220022Sadrian{
399185377Ssam	int s = splnet();
400185377Ssam	int error = 0;
401185377Ssam	struct inpcb *inp = sotoinpcb(so);
402185377Ssam	struct tcpcb *tp;
403185377Ssam
404185377Ssam	COMMON_START();
405185377Ssam	tp = tcp_disconnect(tp);
406220025Sadrian	COMMON_END(PRU_DISCONNECT);
407185377Ssam}
408220022Sadrian
409220025Sadrian/*
410220022Sadrian * Accept a connection.  Essentially all the work is
411220022Sadrian * done at higher levels; just return the address
412220022Sadrian * of the peer, storing through addr.
413220022Sadrian */
414220022Sadrianstatic int
415220022Sadriantcp_usr_accept(struct socket *so, struct sockaddr **nam)
416220022Sadrian{
417220022Sadrian	int s = splnet();
418220022Sadrian	int error = 0;
419220022Sadrian	struct inpcb *inp = sotoinpcb(so);
420185377Ssam	struct tcpcb *tp = NULL;
421185377Ssam	TCPDEBUG0;
422185377Ssam
423185377Ssam	if (so->so_state & SS_ISDISCONNECTED) {
424185377Ssam		error = ECONNABORTED;
425185377Ssam		goto out;
426185377Ssam	}
427185377Ssam	if (inp == 0) {
428185377Ssam		splx(s);
429185377Ssam		return (EINVAL);
430242407Sadrian	}
431242407Sadrian	tp = intotcpcb(inp);
432242407Sadrian	TCPDEBUG1();
433242407Sadrian	in_setpeeraddr(so, nam);
434242407Sadrian	COMMON_END(PRU_ACCEPT);
435242407Sadrian}
436242407Sadrian
437242407Sadrian#ifdef INET6
438242407Sadrianstatic int
439242407Sadriantcp6_usr_accept(struct socket *so, struct sockaddr **nam)
440242407Sadrian{
441242407Sadrian	int s = splnet();
442242407Sadrian	int error = 0;
443242407Sadrian	struct inpcb *inp = sotoinpcb(so);
444242407Sadrian	struct tcpcb *tp = NULL;
445242407Sadrian	TCPDEBUG0;
446242407Sadrian
447185377Ssam	if (so->so_state & SS_ISDISCONNECTED) {
448185377Ssam		error = ECONNABORTED;
449185377Ssam		goto out;
450185377Ssam	}
451185377Ssam	if (inp == 0) {
452185377Ssam		splx(s);
453185377Ssam		return (EINVAL);
454185377Ssam	}
455185377Ssam	tp = intotcpcb(inp);
456185377Ssam	TCPDEBUG1();
457238349Sadrian	in6_mapped_peeraddr(so, nam);
458239605Sadrian	COMMON_END(PRU_ACCEPT);
459238349Sadrian}
460238349Sadrian#endif /* INET6 */
461185377Ssam/*
462185377Ssam * Mark the connection as being incapable of further output.
463185377Ssam */
464185377Ssamstatic int
465185377Ssamtcp_usr_shutdown(struct socket *so)
466208711Srpaulo{
467239605Sadrian	int s = splnet();
468239605Sadrian	int error = 0;
469185377Ssam	struct inpcb *inp = sotoinpcb(so);
470185377Ssam	struct tcpcb *tp;
471185377Ssam
472185377Ssam	COMMON_START();
473185377Ssam	socantsendmore(so);
474239605Sadrian	tp = tcp_usrclosed(tp);
475185377Ssam	if (tp)
476192401Ssam		error = tcp_output(tp);
477185377Ssam	COMMON_END(PRU_SHUTDOWN);
478185377Ssam}
479185377Ssam
480185377Ssam/*
481185377Ssam * After a receive, possibly send window update to peer.
482185377Ssam */
483192400Ssamstatic int
484239605Sadriantcp_usr_rcvd(struct socket *so, int flags)
485242407Sadrian{
486185377Ssam	int s = splnet();
487185377Ssam	int error = 0;
488185377Ssam	struct inpcb *inp = sotoinpcb(so);
489185377Ssam	struct tcpcb *tp;
490185377Ssam
491185377Ssam	COMMON_START();
492185377Ssam	tcp_output(tp);
493192400Ssam	COMMON_END(PRU_RCVD);
494192400Ssam}
495185377Ssam
496185377Ssam/*
497185377Ssam * Do a send by putting data in output queue and updating urgent
498185377Ssam * marker if URG set.  Possibly send more data.  Unlike the other
499185377Ssam * pru_*() routines, the mbuf chains are our responsibility.  We
500185377Ssam * must either enqueue them or free them.  The other pru_* routines
501192396Ssam * generally are caller-frees.
502185377Ssam */
503185377Ssamstatic int
504185377Ssamtcp_usr_send(struct socket *so, int flags, struct mbuf *m,
505185377Ssam	     struct sockaddr *nam, struct mbuf *control, struct proc *p)
506185377Ssam{
507185377Ssam	int s = splnet();
508239605Sadrian	int error = 0;
509192397Ssam	struct inpcb *inp = sotoinpcb(so);
510185377Ssam	struct tcpcb *tp;
511185377Ssam#ifdef INET6
512185377Ssam	int isipv6;
513237622Sadrian#endif
514237622Sadrian	TCPDEBUG0;
515237622Sadrian
516185377Ssam	if (inp == NULL) {
517237622Sadrian		/*
518237622Sadrian		 * OOPS! we lost a race, the TCP session got reset after
519237622Sadrian		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
520237622Sadrian		 * network interrupt in the non-splnet() section of sosend().
521237622Sadrian		 */
522237622Sadrian		if (m)
523237622Sadrian			m_freem(m);
524237622Sadrian		if (control)
525237622Sadrian			m_freem(control);
526237622Sadrian		error = ECONNRESET;	/* XXX EPIPE? */
527237622Sadrian		tp = NULL;
528237622Sadrian		TCPDEBUG1();
529237622Sadrian		goto out;
530237622Sadrian	}
531237622Sadrian#ifdef INET6
532237622Sadrian	isipv6 = nam && nam->sa_family == AF_INET6;
533237622Sadrian#endif /* INET6 */
534237622Sadrian	tp = intotcpcb(inp);
535237622Sadrian	TCPDEBUG1();
536237622Sadrian	if (control) {
537242407Sadrian		/* TCP doesn't do control messages (rights, creds, etc) */
538242407Sadrian		if (control->m_len) {
539242407Sadrian			m_freem(control);
540242407Sadrian			if (m)
541242407Sadrian				m_freem(m);
542242407Sadrian			error = EINVAL;
543242407Sadrian			goto out;
544242407Sadrian		}
545242407Sadrian		m_freem(control);	/* empty control, just free it */
546242407Sadrian	}
547242407Sadrian	if(!(flags & PRUS_OOB)) {
548242407Sadrian		sbappend(&so->so_snd, m);
549242407Sadrian		if (nam && tp->t_state < TCPS_SYN_SENT) {
550242407Sadrian			/*
551242407Sadrian			 * Do implied connect if not yet connected,
552242407Sadrian			 * initialize window to default value, and
553242407Sadrian			 * initialize maxseg/maxopd using peer's cached
554242407Sadrian			 * MSS.
555242407Sadrian			 */
556242407Sadrian#ifdef INET6
557242407Sadrian			if (isipv6)
558242407Sadrian				error = tcp6_connect(tp, nam, p);
559242407Sadrian			else
560242407Sadrian#endif /* INET6 */
561242407Sadrian			error = tcp_connect(tp, nam, p);
562242407Sadrian			if (error)
563242407Sadrian				goto out;
564237622Sadrian			tp->snd_wnd = TTCP_CLIENT_SND_WND;
565237611Sadrian			tcp_mss(tp, -1);
566237611Sadrian		}
567237611Sadrian
568237611Sadrian		if (flags & PRUS_EOF) {
569237611Sadrian			/*
570237611Sadrian			 * Close the send side of the connection after
571249131Sadrian			 * the data is sent.
572249131Sadrian			 */
573249131Sadrian			socantsendmore(so);
574249131Sadrian			tp = tcp_usrclosed(tp);
575249131Sadrian		}
576249131Sadrian		if (tp != NULL) {
577249131Sadrian			if (flags & PRUS_MORETOCOME)
578249131Sadrian				tp->t_flags |= TF_MORETOCOME;
579249131Sadrian			error = tcp_output(tp);
580249131Sadrian			if (flags & PRUS_MORETOCOME)
581249131Sadrian				tp->t_flags &= ~TF_MORETOCOME;
582249131Sadrian		}
583249131Sadrian	} else {
584249131Sadrian		if (sbspace(&so->so_snd) < -512) {
585249131Sadrian			m_freem(m);
586249131Sadrian			error = ENOBUFS;
587188974Ssam			goto out;
588188974Ssam		}
589188974Ssam		/*
590188974Ssam		 * According to RFC961 (Assigned Protocols),
591188974Ssam		 * the urgent pointer points to the last octet
592188974Ssam		 * of urgent data.  We continue, however,
593188974Ssam		 * to consider it to indicate the first octet
594188974Ssam		 * of data past the urgent section.
595242509Sadrian		 * Otherwise, snd_up should be one lower.
596242509Sadrian		 */
597242509Sadrian		sbappend(&so->so_snd, m);
598242509Sadrian		if (nam && tp->t_state < TCPS_SYN_SENT) {
599242509Sadrian			/*
600242509Sadrian			 * Do implied connect if not yet connected,
601242509Sadrian			 * initialize window to default value, and
602242509Sadrian			 * initialize maxseg/maxopd using peer's cached
603242509Sadrian			 * MSS.
604188974Ssam			 */
605185377Ssam#ifdef INET6
606185377Ssam			if (isipv6)
607185377Ssam				error = tcp6_connect(tp, nam, p);
608185377Ssam			else
609185377Ssam#endif /* INET6 */
610187831Ssam			error = tcp_connect(tp, nam, p);
611187831Ssam			if (error)
612185377Ssam				goto out;
613185377Ssam			tp->snd_wnd = TTCP_CLIENT_SND_WND;
614185377Ssam			tcp_mss(tp, -1);
615185377Ssam		}
616185377Ssam		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
617185377Ssam		tp->t_force = 1;
618185377Ssam		error = tcp_output(tp);
619185377Ssam		tp->t_force = 0;
620185377Ssam	}
621185377Ssam	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
622185377Ssam		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
623185377Ssam}
624185377Ssam
625185377Ssam/*
626242407Sadrian * Abort the TCP.
627242407Sadrian */
628242407Sadrianstatic int
629242407Sadriantcp_usr_abort(struct socket *so)
630242407Sadrian{
631242407Sadrian	int s = splnet();
632242407Sadrian	int error = 0;
633242407Sadrian	struct inpcb *inp = sotoinpcb(so);
634242407Sadrian	struct tcpcb *tp;
635242407Sadrian
636242407Sadrian	COMMON_START();
637185377Ssam	tp = tcp_drop(tp, ECONNABORTED);
638185377Ssam	COMMON_END(PRU_ABORT);
639185377Ssam}
640185377Ssam
641185377Ssam/*
642185377Ssam * Receive out-of-band data.
643185377Ssam */
644185377Ssamstatic int
645185377Ssamtcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
646185377Ssam{
647185377Ssam	int s = splnet();
648185377Ssam	int error = 0;
649185380Ssam	struct inpcb *inp = sotoinpcb(so);
650185380Ssam	struct tcpcb *tp;
651185380Ssam
652185380Ssam	COMMON_START();
653185377Ssam	if ((so->so_oobmark == 0 &&
654185377Ssam	     (so->so_state & SS_RCVATMARK) == 0) ||
655185377Ssam	    so->so_options & SO_OOBINLINE ||
656185377Ssam	    tp->t_oobflags & TCPOOB_HADDATA) {
657185377Ssam		error = EINVAL;
658185377Ssam		goto out;
659185377Ssam	}
660185377Ssam	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
661185377Ssam		error = EWOULDBLOCK;
662185377Ssam		goto out;
663185377Ssam	}
664239289Sadrian	m->m_len = 1;
665185377Ssam	*mtod(m, caddr_t) = tp->t_iobc;
666188770Ssam	if ((flags & MSG_PEEK) == 0)
667188770Ssam		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
668185377Ssam	COMMON_END(PRU_RCVOOB);
669185377Ssam}
670185377Ssam
671185377Ssam/* xxx - should be const */
672185377Ssamstruct pr_usrreqs tcp_usrreqs = {
673185377Ssam	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
674185377Ssam	tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
675185377Ssam	tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
676185377Ssam	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
677185377Ssam	in_setsockaddr, sosend, soreceive, sopoll
678239289Sadrian};
679185377Ssam
680185377Ssam#ifdef INET6
681185377Ssamstruct pr_usrreqs tcp6_usrreqs = {
682185377Ssam	tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
683239289Sadrian	tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
684185377Ssam	tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
685185377Ssam	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
686185377Ssam	in6_mapped_sockaddr, sosend, soreceive, sopoll
687185377Ssam};
688185377Ssam#endif /* INET6 */
689185377Ssam
690185377Ssam/*
691185377Ssam * Common subroutine to open a TCP connection to remote host specified
692185377Ssam * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
693185377Ssam * port number if needed.  Call in_pcbladdr to do the routing and to choose
694185377Ssam * a local host address (interface).  If there is an existing incarnation
695185377Ssam * of the same connection in TIME-WAIT state and if the remote host was
696238840Sadrian * sending CC options and if the connection duration was < MSL, then
697238840Sadrian * truncate the previous TIME-WAIT state and proceed.
698185377Ssam * Initialize connection parameters and enter SYN-SENT state.
699185377Ssam */
700185377Ssamstatic int
701185377Ssamtcp_connect(tp, nam, p)
702185377Ssam	register struct tcpcb *tp;
703185377Ssam	struct sockaddr *nam;
704238841Sadrian	struct proc *p;
705242407Sadrian{
706185377Ssam	struct inpcb *inp = tp->t_inpcb, *oinp;
707185377Ssam	struct socket *so = inp->inp_socket;
708185377Ssam	struct tcpcb *otp;
709185377Ssam	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
710185377Ssam	struct sockaddr_in *ifaddr;
711185377Ssam	struct rmxp_tao *taop;
712185377Ssam	struct rmxp_tao tao_noncached;
713185377Ssam	int error;
714185377Ssam
715185377Ssam	if (inp->inp_lport == 0) {
716185377Ssam		error = in_pcbbind(inp, (struct sockaddr *)0, p);
717185377Ssam		if (error)
718185377Ssam			return error;
719185377Ssam	}
720185377Ssam
721185377Ssam	/*
722185377Ssam	 * Cannot simply call in_pcbconnect, because there might be an
723185377Ssam	 * earlier incarnation of this same connection still in
724185377Ssam	 * TIME_WAIT state, creating an ADDRINUSE error.
725185377Ssam	 */
726185377Ssam	error = in_pcbladdr(inp, nam, &ifaddr);
727185377Ssam	if (error)
728185377Ssam		return error;
729242407Sadrian	oinp = in_pcblookup_hash(inp->inp_pcbinfo,
730242407Sadrian	    sin->sin_addr, sin->sin_port,
731242407Sadrian	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
732242407Sadrian						: ifaddr->sin_addr,
733242407Sadrian	    inp->inp_lport,  0, NULL);
734185377Ssam	if (oinp) {
735185377Ssam		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
736185377Ssam		otp->t_state == TCPS_TIME_WAIT &&
737185377Ssam		    (ticks - otp->t_starttime) < tcp_msl &&
738185377Ssam		    (otp->t_flags & TF_RCVD_CC))
739185377Ssam			otp = tcp_close(otp);
740185377Ssam		else
741185377Ssam			return EADDRINUSE;
742185377Ssam	}
743185377Ssam	if (inp->inp_laddr.s_addr == INADDR_ANY)
744185377Ssam		inp->inp_laddr = ifaddr->sin_addr;
745185377Ssam	inp->inp_faddr = sin->sin_addr;
746185377Ssam	inp->inp_fport = sin->sin_port;
747185377Ssam	in_pcbrehash(inp);
748185377Ssam
749185377Ssam	tp->t_template = tcp_template(tp);
750185377Ssam	if (tp->t_template == 0) {
751185377Ssam		in_pcbdisconnect(inp);
752185377Ssam		return ENOBUFS;
753185377Ssam	}
754185377Ssam
755237874Sadrian	/* Compute window scaling to request.  */
756185377Ssam	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
757185377Ssam	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
758185377Ssam		tp->request_r_scale++;
759185377Ssam
760185377Ssam	soisconnecting(so);
761185377Ssam	tcpstat.tcps_connattempt++;
762242407Sadrian	tp->t_state = TCPS_SYN_SENT;
763242407Sadrian	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
764242407Sadrian#ifdef TCP_COMPAT_42
765242407Sadrian	tp->iss = tcp_iss;
766242407Sadrian	tcp_iss += TCP_ISSINCR/2;
767185377Ssam#else  /* TCP_COMPAT_42 */
768242407Sadrian	tp->iss = tcp_rndiss_next();
769242407Sadrian#endif /* !TCP_COMPAT_42 */
770242407Sadrian	tcp_sendseqinit(tp);
771242407Sadrian
772242407Sadrian	/*
773242407Sadrian	 * Generate a CC value for this connection and
774242407Sadrian	 * check whether CC or CCnew should be used.
775185377Ssam	 */
776185377Ssam	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
777185377Ssam		taop = &tao_noncached;
778185377Ssam		bzero(taop, sizeof(*taop));
779185377Ssam	}
780185377Ssam
781185377Ssam	tp->cc_send = CC_INC(tcp_ccgen);
782185377Ssam	if (taop->tao_ccsent != 0 &&
783185377Ssam	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
784185377Ssam		taop->tao_ccsent = tp->cc_send;
785185377Ssam	} else {
786185377Ssam		taop->tao_ccsent = 0;
787185377Ssam		tp->t_flags |= TF_SENDCCNEW;
788185377Ssam	}
789185377Ssam
790185377Ssam	return 0;
791185377Ssam}
792185377Ssam
793185377Ssam#ifdef INET6
794185377Ssamstatic int
795185377Ssamtcp6_connect(tp, nam, p)
796185377Ssam	register struct tcpcb *tp;
797185377Ssam	struct sockaddr *nam;
798185377Ssam	struct proc *p;
799185377Ssam{
800185377Ssam	struct inpcb *inp = tp->t_inpcb, *oinp;
801185377Ssam	struct socket *so = inp->inp_socket;
802243589Sadrian	struct tcpcb *otp;
803243589Sadrian	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
804243589Sadrian	struct in6_addr *addr6;
805243589Sadrian	struct rmxp_tao *taop;
806243589Sadrian	struct rmxp_tao tao_noncached;
807185377Ssam	int error;
808242407Sadrian
809185377Ssam	if (inp->inp_lport == 0) {
810185377Ssam		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
811242407Sadrian		if (error)
812185377Ssam			return error;
813185377Ssam	}
814185377Ssam
815185377Ssam	/*
816185377Ssam	 * Cannot simply call in_pcbconnect, because there might be an
817185377Ssam	 * earlier incarnation of this same connection still in
818185377Ssam	 * TIME_WAIT state, creating an ADDRINUSE error.
819242407Sadrian	 */
820185377Ssam	error = in6_pcbladdr(inp, nam, &addr6);
821185377Ssam	if (error)
822185377Ssam		return error;
823185377Ssam	oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
824185377Ssam				  &sin6->sin6_addr, sin6->sin6_port,
825185377Ssam				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
826185377Ssam				  ? addr6
827185377Ssam				  : &inp->in6p_laddr,
828185377Ssam				  inp->inp_lport,  0, NULL);
829185377Ssam	if (oinp) {
830185377Ssam		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
831185377Ssam		    otp->t_state == TCPS_TIME_WAIT &&
832185377Ssam		    (ticks - otp->t_starttime) < tcp_msl &&
833185377Ssam		    (otp->t_flags & TF_RCVD_CC))
834185377Ssam			otp = tcp_close(otp);
835185377Ssam		else
836185377Ssam			return EADDRINUSE;
837185377Ssam	}
838185377Ssam	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
839185377Ssam		inp->in6p_laddr = *addr6;
840185377Ssam	inp->in6p_faddr = sin6->sin6_addr;
841185377Ssam	inp->inp_fport = sin6->sin6_port;
842185377Ssam	if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL)
843185377Ssam		inp->in6p_flowinfo = sin6->sin6_flowinfo;
844185377Ssam	in_pcbrehash(inp);
845185377Ssam
846185377Ssam	tp->t_template = tcp_template(tp);
847185377Ssam	if (tp->t_template == 0) {
848185377Ssam		in6_pcbdisconnect(inp);
849185377Ssam		return ENOBUFS;
850242407Sadrian	}
851185377Ssam
852185377Ssam	/* Compute window scaling to request.  */
853185377Ssam	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
854187831Ssam	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
855185377Ssam		tp->request_r_scale++;
856185377Ssam
857219773Sadrian	soisconnecting(so);
858219773Sadrian	tcpstat.tcps_connattempt++;
859219773Sadrian	tp->t_state = TCPS_SYN_SENT;
860219773Sadrian	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
861219773Sadrian#ifdef TCP_COMPAT_42
862219773Sadrian	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
863219773Sadrian#else
864219773Sadrian	tp->iss = tcp_rndiss_next();
865219773Sadrian#endif /* TCP_COMPAT_42 */
866219773Sadrian	tcp_sendseqinit(tp);
867219773Sadrian
868234749Sadrian	/*
869219773Sadrian	 * Generate a CC value for this connection and
870234749Sadrian	 * check whether CC or CCnew should be used.
871234749Sadrian	 */
872234749Sadrian	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
873219773Sadrian		taop = &tao_noncached;
874219773Sadrian		bzero(taop, sizeof(*taop));
875219773Sadrian	}
876219773Sadrian
877219773Sadrian	tp->cc_send = CC_INC(tcp_ccgen);
878219773Sadrian	if (taop->tao_ccsent != 0 &&
879219773Sadrian	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
880219773Sadrian		taop->tao_ccsent = tp->cc_send;
881219773Sadrian	} else {
882219773Sadrian		taop->tao_ccsent = 0;
883219773Sadrian		tp->t_flags |= TF_SENDCCNEW;
884219773Sadrian	}
885219773Sadrian
886219773Sadrian	return 0;
887219773Sadrian}
888222277Sadrian#endif /* INET6 */
889222277Sadrian
890222277Sadrian/*
891222277Sadrian * The new sockopt interface makes it possible for us to block in the
892222277Sadrian * copyin/out step (if we take a page fault).  Taking a page fault at
893222277Sadrian * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
894222277Sadrian * use TSM, there probably isn't any need for this function to run at
895222277Sadrian * splnet() any more.  This needs more examination.)
896222277Sadrian */
897222277Sadrianint
898222277Sadriantcp_ctloutput(so, sopt)
899222277Sadrian	struct socket *so;
900222277Sadrian	struct sockopt *sopt;
901222277Sadrian{
902222277Sadrian	int	error, opt, optval, s;
903222277Sadrian	struct	inpcb *inp;
904222277Sadrian	struct	tcpcb *tp;
905237874Sadrian
906222277Sadrian	error = 0;
907222277Sadrian	s = splnet();		/* XXX */
908242407Sadrian	inp = sotoinpcb(so);
909242407Sadrian	if (inp == NULL) {
910222277Sadrian		splx(s);
911222277Sadrian		return (ECONNRESET);
912222277Sadrian	}
913222277Sadrian	if (sopt->sopt_level != IPPROTO_TCP) {
914222277Sadrian#ifdef INET6
915222277Sadrian		if (INP_CHECK_SOCKAF(so, AF_INET6))
916222277Sadrian			error = ip6_ctloutput(so, sopt);
917222277Sadrian		else
918222277Sadrian#endif /* INET6 */
919222277Sadrian		error = ip_ctloutput(so, sopt);
920222277Sadrian		splx(s);
921222277Sadrian		return (error);
922222277Sadrian	}
923222277Sadrian	tp = intotcpcb(inp);
924222277Sadrian
925222584Sadrian	switch (sopt->sopt_dir) {
926222584Sadrian	case SOPT_SET:
927222584Sadrian		switch (sopt->sopt_name) {
928222584Sadrian		case TCP_NODELAY:
929222584Sadrian		case TCP_NOOPT:
930222584Sadrian			error = sooptcopyin(sopt, &optval, sizeof optval,
931222584Sadrian					    sizeof optval);
932222584Sadrian			if (error)
933222584Sadrian				break;
934222584Sadrian
935222584Sadrian			switch (sopt->sopt_name) {
936224244Sadrian			case TCP_NODELAY:
937224244Sadrian				opt = TF_NODELAY;
938222584Sadrian				break;
939222584Sadrian			case TCP_NOOPT:
940222584Sadrian				opt = TF_NOOPT;
941222584Sadrian				break;
942224244Sadrian			default:
943222584Sadrian				opt = 0; /* dead code to fool gcc */
944222584Sadrian				break;
945222584Sadrian			}
946224244Sadrian
947224244Sadrian			if (optval)
948231708Sadrian				tp->t_flags |= opt;
949231708Sadrian			else
950222584Sadrian				tp->t_flags &= ~opt;
951222584Sadrian			break;
952222584Sadrian
953222584Sadrian		case TCP_NOPUSH:
954244854Sadrian			error = sooptcopyin(sopt, &optval, sizeof optval,
955244854Sadrian					    sizeof optval);
956244854Sadrian			if (error)
957244854Sadrian				break;
958244854Sadrian
959244854Sadrian			if (optval)
960244854Sadrian				tp->t_flags |= TF_NOPUSH;
961244854Sadrian			else {
962244854Sadrian				tp->t_flags &= ~TF_NOPUSH;
963244854Sadrian				error = tcp_output(tp);
964245281Sadrian			}
965245281Sadrian			break;
966244854Sadrian
967244854Sadrian		case TCP_MAXSEG:
968244854Sadrian			error = sooptcopyin(sopt, &optval, sizeof optval,
969244854Sadrian					    sizeof optval);
970224716Sadrian			if (error)
971224716Sadrian				break;
972224716Sadrian
973224716Sadrian			if (optval > 0 && optval <= tp->t_maxseg)
974224716Sadrian				tp->t_maxseg = optval;
975224716Sadrian			else
976224716Sadrian				error = EINVAL;
977224716Sadrian			break;
978224716Sadrian
979222584Sadrian		default:
980242407Sadrian			error = ENOPROTOOPT;
981222277Sadrian			break;
982239606Sadrian		}
983239606Sadrian		break;
984239606Sadrian
985239606Sadrian	case SOPT_GET:
986239606Sadrian		switch (sopt->sopt_name) {
987239606Sadrian		case TCP_NODELAY:
988239606Sadrian			optval = tp->t_flags & TF_NODELAY;
989239606Sadrian			break;
990239890Sadrian		case TCP_MAXSEG:
991239890Sadrian			optval = tp->t_maxseg;
992239890Sadrian			break;
993239890Sadrian		case TCP_NOOPT:
994239890Sadrian			optval = tp->t_flags & TF_NOOPT;
995239890Sadrian			break;
996239890Sadrian		case TCP_NOPUSH:
997239890Sadrian			optval = tp->t_flags & TF_NOPUSH;
998239890Sadrian			break;
999239890Sadrian		default:
1000239890Sadrian			error = ENOPROTOOPT;
1001239890Sadrian			break;
1002239890Sadrian		}
1003239890Sadrian		if (error == 0)
1004239890Sadrian			error = sooptcopyout(sopt, &optval, sizeof optval);
1005239890Sadrian		break;
1006239890Sadrian	}
1007239890Sadrian	splx(s);
1008239890Sadrian	return (error);
1009239890Sadrian}
1010239890Sadrian
1011239890Sadrian/*
1012239890Sadrian * tcp_sendspace and tcp_recvspace are the default send and receive window
1013239606Sadrian * sizes, respectively.  These are obsolescent (this information should
1014222644Sadrian * be set by the route).
1015222644Sadrian */
1016222644Sadrianu_long	tcp_sendspace = 1024*16;
1017222644SadrianSYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
1018222644Sadrian    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
1019222644Sadrianu_long	tcp_recvspace = 1024*16;
1020222644SadrianSYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1021222644Sadrian    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
1022222644Sadrian
1023222815Sadrian/*
1024224539Sadrian * Attach TCP protocol to socket, allocating
1025224539Sadrian * internet protocol control block, tcp control block,
1026224539Sadrian * bufer space, and entering LISTEN state if to accept connections.
1027222815Sadrian */
1028224633Sadrianstatic int
1029222815Sadriantcp_attach(so, p)
1030222815Sadrian	struct socket *so;
1031222815Sadrian	struct proc *p;
1032222815Sadrian{
1033222815Sadrian	register struct tcpcb *tp;
1034222815Sadrian	struct inpcb *inp;
1035224633Sadrian	int error;
1036222815Sadrian#ifdef INET6
1037237611Sadrian	int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL;
1038242407Sadrian#endif
1039242407Sadrian
1040242407Sadrian	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1041242407Sadrian		error = soreserve(so, tcp_sendspace, tcp_recvspace);
1042242407Sadrian		if (error)
1043242407Sadrian			return (error);
1044242407Sadrian	}
1045242407Sadrian	error = in_pcballoc(so, &tcbinfo, p);
1046242407Sadrian	if (error)
1047242407Sadrian		return (error);
1048242407Sadrian	inp = sotoinpcb(so);
1049242407Sadrian#ifdef IPSEC
1050242407Sadrian	error = ipsec_init_policy(so, &inp->inp_sp);
1051242407Sadrian	if (error) {
1052237611Sadrian#ifdef INET6
1053237611Sadrian		if (isipv6)
1054237611Sadrian			in6_pcbdetach(inp);
1055237611Sadrian		else
1056237611Sadrian#endif
1057237611Sadrian		in_pcbdetach(inp);
1058237611Sadrian		return (error);
1059237611Sadrian	}
1060237611Sadrian#endif /*IPSEC*/
1061237611Sadrian#ifdef INET6
1062237611Sadrian	if (isipv6) {
1063237611Sadrian		inp->inp_vflag |= INP_IPV6;
1064237611Sadrian		inp->in6p_hops = -1;	/* use kernel default */
1065237611Sadrian	}
1066237611Sadrian	else
1067237611Sadrian#endif
1068237611Sadrian	inp->inp_vflag |= INP_IPV4;
1069237611Sadrian	tp = tcp_newtcpcb(inp);
1070237611Sadrian	if (tp == 0) {
1071237611Sadrian		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
1072237611Sadrian
1073237611Sadrian		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
1074237611Sadrian#ifdef INET6
1075237611Sadrian		if (isipv6)
1076237611Sadrian			in6_pcbdetach(inp);
1077237611Sadrian		else
1078237611Sadrian#endif
1079237611Sadrian		in_pcbdetach(inp);
1080237611Sadrian		so->so_state |= nofd;
1081237611Sadrian		return (ENOBUFS);
1082237611Sadrian	}
1083237611Sadrian	tp->t_state = TCPS_CLOSED;
1084237611Sadrian	return (0);
1085237611Sadrian}
1086237611Sadrian
1087237611Sadrian/*
1088237611Sadrian * Initiate (or continue) disconnect.
1089237611Sadrian * If embryonic state, just send reset (once).
1090237611Sadrian * If in ``let data drain'' option and linger null, just drop.
1091237611Sadrian * Otherwise (hard), mark socket disconnecting and drop
1092237611Sadrian * current input data; switch states based on user close, and
1093237611Sadrian * send segment to peer (with FIN).
1094249131Sadrian */
1095249131Sadrianstatic struct tcpcb *
1096237611Sadriantcp_disconnect(tp)
1097237611Sadrian	register struct tcpcb *tp;
1098237611Sadrian{
1099237611Sadrian	struct socket *so = tp->t_inpcb->inp_socket;
1100237611Sadrian
1101237611Sadrian	if (tp->t_state < TCPS_ESTABLISHED)
1102237611Sadrian		tp = tcp_close(tp);
1103237611Sadrian	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1104237611Sadrian		tp = tcp_drop(tp, 0);
1105237611Sadrian	else {
1106237611Sadrian		soisdisconnecting(so);
1107237611Sadrian		sbflush(&so->so_rcv);
1108237611Sadrian		tp = tcp_usrclosed(tp);
1109237611Sadrian		if (tp)
1110237611Sadrian			(void) tcp_output(tp);
1111237611Sadrian	}
1112237611Sadrian	return (tp);
1113237611Sadrian}
1114237611Sadrian
1115237611Sadrian/*
1116237611Sadrian * User issued close, and wish to trail through shutdown states:
1117237611Sadrian * if never received SYN, just forget it.  If got a SYN from peer,
1118237611Sadrian * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1119237611Sadrian * If already got a FIN from peer, then almost done; go to LAST_ACK
1120237611Sadrian * state.  In all other cases, have already sent FIN to peer (e.g.
1121237611Sadrian * after PRU_SHUTDOWN), and just have to play tedious game waiting
1122237611Sadrian * for peer to send FIN or not respond to keep-alives, etc.
1123237611Sadrian * We can let the user exit from the close as soon as the FIN is acked.
1124237611Sadrian */
1125237611Sadrianstatic struct tcpcb *
1126237611Sadriantcp_usrclosed(tp)
1127237611Sadrian	register struct tcpcb *tp;
1128237611Sadrian{
1129237611Sadrian
1130237611Sadrian	switch (tp->t_state) {
1131237611Sadrian
1132237611Sadrian	case TCPS_CLOSED:
1133237611Sadrian	case TCPS_LISTEN:
1134237611Sadrian		tp->t_state = TCPS_CLOSED;
1135237611Sadrian		tp = tcp_close(tp);
1136237611Sadrian		break;
1137237611Sadrian
1138237611Sadrian	case TCPS_SYN_SENT:
1139237611Sadrian	case TCPS_SYN_RECEIVED:
1140237611Sadrian		tp->t_flags |= TF_NEEDFIN;
1141237611Sadrian		break;
1142237611Sadrian
1143237611Sadrian	case TCPS_ESTABLISHED:
1144237611Sadrian		tp->t_state = TCPS_FIN_WAIT_1;
1145237611Sadrian		break;
1146237611Sadrian
1147237611Sadrian	case TCPS_CLOSE_WAIT:
1148237611Sadrian		tp->t_state = TCPS_LAST_ACK;
1149237611Sadrian		break;
1150237611Sadrian	}
1151237611Sadrian	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1152237611Sadrian		soisdisconnected(tp->t_inpcb->inp_socket);
1153237611Sadrian		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
1154237611Sadrian		if (tp->t_state == TCPS_FIN_WAIT_2)
1155237611Sadrian			callout_reset(tp->tt_2msl, tcp_maxidle,
1156237611Sadrian				      tcp_timer_2msl, tp);
1157237611Sadrian	}
1158237611Sadrian	return (tp);
1159237611Sadrian}
1160237611Sadrian
1161237611Sadrian