tcp_usrreq.c revision 71937
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
34 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 71937 2001-02-02 18:48:25Z jlemon $
35 */
36
37#include "opt_ipsec.h"
38#include "opt_inet6.h"
39#include "opt_tcpdebug.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/mbuf.h>
46#ifdef INET6
47#include <sys/domain.h>
48#endif /* INET6 */
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/protosw.h>
52
53#include <net/if.h>
54#include <net/route.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#ifdef INET6
59#include <netinet/ip6.h>
60#endif
61#include <netinet/in_pcb.h>
62#ifdef INET6
63#include <netinet6/in6_pcb.h>
64#endif
65#include <netinet/in_var.h>
66#include <netinet/ip_var.h>
67#ifdef INET6
68#include <netinet6/ip6_var.h>
69#endif
70#include <netinet/tcp.h>
71#include <netinet/tcp_fsm.h>
72#include <netinet/tcp_seq.h>
73#include <netinet/tcp_timer.h>
74#include <netinet/tcp_var.h>
75#include <netinet/tcpip.h>
76#ifdef TCPDEBUG
77#include <netinet/tcp_debug.h>
78#endif
79
80#ifdef IPSEC
81#include <netinet6/ipsec.h>
82#endif /*IPSEC*/
83
84/*
85 * TCP protocol interface to socket abstraction.
86 */
87extern	char *tcpstates[];	/* XXX ??? */
88
89static int	tcp_attach __P((struct socket *, struct proc *));
90static int	tcp_connect __P((struct tcpcb *, struct sockaddr *,
91				 struct proc *));
92#ifdef INET6
93static int	tcp6_connect __P((struct tcpcb *, struct sockaddr *,
94				 struct proc *));
95#endif /* INET6 */
96static struct tcpcb *
97		tcp_disconnect __P((struct tcpcb *));
98static struct tcpcb *
99		tcp_usrclosed __P((struct tcpcb *));
100
101#ifdef TCPDEBUG
102#define	TCPDEBUG0	int ostate
103#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
104#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
105				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
106#else
107#define	TCPDEBUG0
108#define	TCPDEBUG1()
109#define	TCPDEBUG2(req)
110#endif
111
112/*
113 * TCP attaches to socket via pru_attach(), reserving space,
114 * and an internet control block.
115 */
116static int
117tcp_usr_attach(struct socket *so, int proto, struct proc *p)
118{
119	int s = splnet();
120	int error;
121	struct inpcb *inp = sotoinpcb(so);
122	struct tcpcb *tp = 0;
123	TCPDEBUG0;
124
125	TCPDEBUG1();
126	if (inp) {
127		error = EISCONN;
128		goto out;
129	}
130
131	error = tcp_attach(so, p);
132	if (error)
133		goto out;
134
135	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
136		so->so_linger = TCP_LINGERTIME;
137	tp = sototcpcb(so);
138out:
139	TCPDEBUG2(PRU_ATTACH);
140	splx(s);
141	return error;
142}
143
144/*
145 * pru_detach() detaches the TCP protocol from the socket.
146 * If the protocol state is non-embryonic, then can't
147 * do this directly: have to initiate a pru_disconnect(),
148 * which may finish later; embryonic TCB's can just
149 * be discarded here.
150 */
151static int
152tcp_usr_detach(struct socket *so)
153{
154	int s = splnet();
155	int error = 0;
156	struct inpcb *inp = sotoinpcb(so);
157	struct tcpcb *tp;
158	TCPDEBUG0;
159
160	if (inp == 0) {
161		splx(s);
162		return EINVAL;	/* XXX */
163	}
164	tp = intotcpcb(inp);
165	TCPDEBUG1();
166	tp = tcp_disconnect(tp);
167
168	TCPDEBUG2(PRU_DETACH);
169	splx(s);
170	return error;
171}
172
173#define	COMMON_START()	TCPDEBUG0; \
174			do { \
175				     if (inp == 0) { \
176					     splx(s); \
177					     return EINVAL; \
178				     } \
179				     tp = intotcpcb(inp); \
180				     TCPDEBUG1(); \
181		     } while(0)
182
183#define COMMON_END(req)	out: TCPDEBUG2(req); splx(s); return error; goto out
184
185
186/*
187 * Give the socket an address.
188 */
189static int
190tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
191{
192	int s = splnet();
193	int error = 0;
194	struct inpcb *inp = sotoinpcb(so);
195	struct tcpcb *tp;
196	struct sockaddr_in *sinp;
197
198	COMMON_START();
199
200	/*
201	 * Must check for multicast addresses and disallow binding
202	 * to them.
203	 */
204	sinp = (struct sockaddr_in *)nam;
205	if (sinp->sin_family == AF_INET &&
206	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
207		error = EAFNOSUPPORT;
208		goto out;
209	}
210	error = in_pcbbind(inp, nam, p);
211	if (error)
212		goto out;
213	COMMON_END(PRU_BIND);
214
215}
216
217#ifdef INET6
218static int
219tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
220{
221	int s = splnet();
222	int error = 0;
223	struct inpcb *inp = sotoinpcb(so);
224	struct tcpcb *tp;
225	struct sockaddr_in6 *sin6p;
226
227	COMMON_START();
228
229	/*
230	 * Must check for multicast addresses and disallow binding
231	 * to them.
232	 */
233	sin6p = (struct sockaddr_in6 *)nam;
234	if (sin6p->sin6_family == AF_INET6 &&
235	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
236		error = EAFNOSUPPORT;
237		goto out;
238	}
239	inp->inp_vflag &= ~INP_IPV4;
240	inp->inp_vflag |= INP_IPV6;
241	if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) {
242
243		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
244			inp->inp_vflag |= INP_IPV4;
245		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
246			struct sockaddr_in sin;
247
248			in6_sin6_2_sin(&sin, sin6p);
249			inp->inp_vflag |= INP_IPV4;
250			inp->inp_vflag &= ~INP_IPV6;
251			error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
252			goto out;
253		}
254	}
255	error = in6_pcbbind(inp, nam, p);
256	if (error)
257		goto out;
258	COMMON_END(PRU_BIND);
259}
260#endif /* INET6 */
261
262/*
263 * Prepare to accept connections.
264 */
265static int
266tcp_usr_listen(struct socket *so, struct proc *p)
267{
268	int s = splnet();
269	int error = 0;
270	struct inpcb *inp = sotoinpcb(so);
271	struct tcpcb *tp;
272
273	COMMON_START();
274	if (inp->inp_lport == 0)
275		error = in_pcbbind(inp, (struct sockaddr *)0, p);
276	if (error == 0)
277		tp->t_state = TCPS_LISTEN;
278	COMMON_END(PRU_LISTEN);
279}
280
281#ifdef INET6
282static int
283tcp6_usr_listen(struct socket *so, struct proc *p)
284{
285	int s = splnet();
286	int error = 0;
287	struct inpcb *inp = sotoinpcb(so);
288	struct tcpcb *tp;
289
290	COMMON_START();
291	if (inp->inp_lport == 0) {
292		inp->inp_vflag &= ~INP_IPV4;
293		if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0)
294			inp->inp_vflag |= INP_IPV4;
295		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
296	}
297	if (error == 0)
298		tp->t_state = TCPS_LISTEN;
299	COMMON_END(PRU_LISTEN);
300}
301#endif /* INET6 */
302
303/*
304 * Initiate connection to peer.
305 * Create a template for use in transmissions on this connection.
306 * Enter SYN_SENT state, and mark socket as connecting.
307 * Start keep-alive timer, and seed output sequence space.
308 * Send initial segment on connection.
309 */
310static int
311tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
312{
313	int s = splnet();
314	int error = 0;
315	struct inpcb *inp = sotoinpcb(so);
316	struct tcpcb *tp;
317	struct sockaddr_in *sinp;
318
319	COMMON_START();
320
321	/*
322	 * Must disallow TCP ``connections'' to multicast addresses.
323	 */
324	sinp = (struct sockaddr_in *)nam;
325	if (sinp->sin_family == AF_INET
326	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
327		error = EAFNOSUPPORT;
328		goto out;
329	}
330
331	prison_remote_ip(p, 0, &sinp->sin_addr.s_addr);
332
333	if ((error = tcp_connect(tp, nam, p)) != 0)
334		goto out;
335	error = tcp_output(tp);
336	COMMON_END(PRU_CONNECT);
337}
338
339#ifdef INET6
340static int
341tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
342{
343	int s = splnet();
344	int error = 0;
345	struct inpcb *inp = sotoinpcb(so);
346	struct tcpcb *tp;
347	struct sockaddr_in6 *sin6p;
348
349	COMMON_START();
350
351	/*
352	 * Must disallow TCP ``connections'' to multicast addresses.
353	 */
354	sin6p = (struct sockaddr_in6 *)nam;
355	if (sin6p->sin6_family == AF_INET6
356	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
357		error = EAFNOSUPPORT;
358		goto out;
359	}
360
361	if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0 &&
362	    IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
363		struct sockaddr_in sin;
364
365		in6_sin6_2_sin(&sin, sin6p);
366		inp->inp_vflag |= INP_IPV4;
367		inp->inp_vflag &= ~INP_IPV6;
368		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0)
369			goto out;
370		error = tcp_output(tp);
371		goto out;
372	}
373	inp->inp_vflag &= ~INP_IPV4;
374	inp->inp_vflag |= INP_IPV6;
375	if ((error = tcp6_connect(tp, nam, p)) != 0)
376		goto out;
377	error = tcp_output(tp);
378	COMMON_END(PRU_CONNECT);
379}
380#endif /* INET6 */
381
382/*
383 * Initiate disconnect from peer.
384 * If connection never passed embryonic stage, just drop;
385 * else if don't need to let data drain, then can just drop anyways,
386 * else have to begin TCP shutdown process: mark socket disconnecting,
387 * drain unread data, state switch to reflect user close, and
388 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
389 * when peer sends FIN and acks ours.
390 *
391 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
392 */
393static int
394tcp_usr_disconnect(struct socket *so)
395{
396	int s = splnet();
397	int error = 0;
398	struct inpcb *inp = sotoinpcb(so);
399	struct tcpcb *tp;
400
401	COMMON_START();
402	tp = tcp_disconnect(tp);
403	COMMON_END(PRU_DISCONNECT);
404}
405
406/*
407 * Accept a connection.  Essentially all the work is
408 * done at higher levels; just return the address
409 * of the peer, storing through addr.
410 */
411static int
412tcp_usr_accept(struct socket *so, struct sockaddr **nam)
413{
414	int s = splnet();
415	int error = 0;
416	struct inpcb *inp = sotoinpcb(so);
417	struct tcpcb *tp;
418
419	COMMON_START();
420	in_setpeeraddr(so, nam);
421	COMMON_END(PRU_ACCEPT);
422}
423
424#ifdef INET6
425static int
426tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
427{
428	int s = splnet();
429	int error = 0;
430	struct inpcb *inp = sotoinpcb(so);
431	struct tcpcb *tp;
432
433	COMMON_START();
434	in6_mapped_peeraddr(so, nam);
435	COMMON_END(PRU_ACCEPT);
436}
437#endif /* INET6 */
438/*
439 * Mark the connection as being incapable of further output.
440 */
441static int
442tcp_usr_shutdown(struct socket *so)
443{
444	int s = splnet();
445	int error = 0;
446	struct inpcb *inp = sotoinpcb(so);
447	struct tcpcb *tp;
448
449	COMMON_START();
450	socantsendmore(so);
451	tp = tcp_usrclosed(tp);
452	if (tp)
453		error = tcp_output(tp);
454	COMMON_END(PRU_SHUTDOWN);
455}
456
457/*
458 * After a receive, possibly send window update to peer.
459 */
460static int
461tcp_usr_rcvd(struct socket *so, int flags)
462{
463	int s = splnet();
464	int error = 0;
465	struct inpcb *inp = sotoinpcb(so);
466	struct tcpcb *tp;
467
468	COMMON_START();
469	tcp_output(tp);
470	COMMON_END(PRU_RCVD);
471}
472
473/*
474 * Do a send by putting data in output queue and updating urgent
475 * marker if URG set.  Possibly send more data.  Unlike the other
476 * pru_*() routines, the mbuf chains are our responsibility.  We
477 * must either enqueue them or free them.  The other pru_* routines
478 * generally are caller-frees.
479 */
480static int
481tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
482	     struct sockaddr *nam, struct mbuf *control, struct proc *p)
483{
484	int s = splnet();
485	int error = 0;
486	struct inpcb *inp = sotoinpcb(so);
487	struct tcpcb *tp;
488#ifdef INET6
489	int isipv6;
490#endif
491	TCPDEBUG0;
492
493	if (inp == NULL) {
494		/*
495		 * OOPS! we lost a race, the TCP session got reset after
496		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
497		 * network interrupt in the non-splnet() section of sosend().
498		 */
499		if (m)
500			m_freem(m);
501		if (control)
502			m_freem(control);
503		error = ECONNRESET;	/* XXX EPIPE? */
504		tp = NULL;
505		TCPDEBUG1();
506		goto out;
507	}
508#ifdef INET6
509	isipv6 = nam && nam->sa_family == AF_INET6;
510#endif /* INET6 */
511	tp = intotcpcb(inp);
512	TCPDEBUG1();
513	if (control) {
514		/* TCP doesn't do control messages (rights, creds, etc) */
515		if (control->m_len) {
516			m_freem(control);
517			if (m)
518				m_freem(m);
519			error = EINVAL;
520			goto out;
521		}
522		m_freem(control);	/* empty control, just free it */
523	}
524	if(!(flags & PRUS_OOB)) {
525		sbappend(&so->so_snd, m);
526		if (nam && tp->t_state < TCPS_SYN_SENT) {
527			/*
528			 * Do implied connect if not yet connected,
529			 * initialize window to default value, and
530			 * initialize maxseg/maxopd using peer's cached
531			 * MSS.
532			 */
533#ifdef INET6
534			if (isipv6)
535				error = tcp6_connect(tp, nam, p);
536			else
537#endif /* INET6 */
538			error = tcp_connect(tp, nam, p);
539			if (error)
540				goto out;
541			tp->snd_wnd = TTCP_CLIENT_SND_WND;
542			tcp_mss(tp, -1);
543		}
544
545		if (flags & PRUS_EOF) {
546			/*
547			 * Close the send side of the connection after
548			 * the data is sent.
549			 */
550			socantsendmore(so);
551			tp = tcp_usrclosed(tp);
552		}
553		if (tp != NULL) {
554			if (flags & PRUS_MORETOCOME)
555				tp->t_flags |= TF_MORETOCOME;
556			error = tcp_output(tp);
557			if (flags & PRUS_MORETOCOME)
558				tp->t_flags &= ~TF_MORETOCOME;
559		}
560	} else {
561		if (sbspace(&so->so_snd) < -512) {
562			m_freem(m);
563			error = ENOBUFS;
564			goto out;
565		}
566		/*
567		 * According to RFC961 (Assigned Protocols),
568		 * the urgent pointer points to the last octet
569		 * of urgent data.  We continue, however,
570		 * to consider it to indicate the first octet
571		 * of data past the urgent section.
572		 * Otherwise, snd_up should be one lower.
573		 */
574		sbappend(&so->so_snd, m);
575		if (nam && tp->t_state < TCPS_SYN_SENT) {
576			/*
577			 * Do implied connect if not yet connected,
578			 * initialize window to default value, and
579			 * initialize maxseg/maxopd using peer's cached
580			 * MSS.
581			 */
582#ifdef INET6
583			if (isipv6)
584				error = tcp6_connect(tp, nam, p);
585			else
586#endif /* INET6 */
587			error = tcp_connect(tp, nam, p);
588			if (error)
589				goto out;
590			tp->snd_wnd = TTCP_CLIENT_SND_WND;
591			tcp_mss(tp, -1);
592		}
593		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
594		tp->t_force = 1;
595		error = tcp_output(tp);
596		tp->t_force = 0;
597	}
598	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
599		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
600}
601
602/*
603 * Abort the TCP.
604 */
605static int
606tcp_usr_abort(struct socket *so)
607{
608	int s = splnet();
609	int error = 0;
610	struct inpcb *inp = sotoinpcb(so);
611	struct tcpcb *tp;
612
613	COMMON_START();
614	tp = tcp_drop(tp, ECONNABORTED);
615	COMMON_END(PRU_ABORT);
616}
617
618/*
619 * Receive out-of-band data.
620 */
621static int
622tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
623{
624	int s = splnet();
625	int error = 0;
626	struct inpcb *inp = sotoinpcb(so);
627	struct tcpcb *tp;
628
629	COMMON_START();
630	if ((so->so_oobmark == 0 &&
631	     (so->so_state & SS_RCVATMARK) == 0) ||
632	    so->so_options & SO_OOBINLINE ||
633	    tp->t_oobflags & TCPOOB_HADDATA) {
634		error = EINVAL;
635		goto out;
636	}
637	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
638		error = EWOULDBLOCK;
639		goto out;
640	}
641	m->m_len = 1;
642	*mtod(m, caddr_t) = tp->t_iobc;
643	if ((flags & MSG_PEEK) == 0)
644		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
645	COMMON_END(PRU_RCVOOB);
646}
647
648/* xxx - should be const */
649struct pr_usrreqs tcp_usrreqs = {
650	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
651	tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
652	tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
653	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
654	in_setsockaddr, sosend, soreceive, sopoll
655};
656
657#ifdef INET6
658struct pr_usrreqs tcp6_usrreqs = {
659	tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
660	tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
661	tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
662	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
663	in6_mapped_sockaddr, sosend, soreceive, sopoll
664};
665#endif /* INET6 */
666
667/*
668 * Common subroutine to open a TCP connection to remote host specified
669 * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
670 * port number if needed.  Call in_pcbladdr to do the routing and to choose
671 * a local host address (interface).  If there is an existing incarnation
672 * of the same connection in TIME-WAIT state and if the remote host was
673 * sending CC options and if the connection duration was < MSL, then
674 * truncate the previous TIME-WAIT state and proceed.
675 * Initialize connection parameters and enter SYN-SENT state.
676 */
677static int
678tcp_connect(tp, nam, p)
679	register struct tcpcb *tp;
680	struct sockaddr *nam;
681	struct proc *p;
682{
683	struct inpcb *inp = tp->t_inpcb, *oinp;
684	struct socket *so = inp->inp_socket;
685	struct tcpcb *otp;
686	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
687	struct sockaddr_in *ifaddr;
688	struct rmxp_tao *taop;
689	struct rmxp_tao tao_noncached;
690	int error;
691
692	if (inp->inp_lport == 0) {
693		error = in_pcbbind(inp, (struct sockaddr *)0, p);
694		if (error)
695			return error;
696	}
697
698	/*
699	 * Cannot simply call in_pcbconnect, because there might be an
700	 * earlier incarnation of this same connection still in
701	 * TIME_WAIT state, creating an ADDRINUSE error.
702	 */
703	error = in_pcbladdr(inp, nam, &ifaddr);
704	if (error)
705		return error;
706	oinp = in_pcblookup_hash(inp->inp_pcbinfo,
707	    sin->sin_addr, sin->sin_port,
708	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
709						: ifaddr->sin_addr,
710	    inp->inp_lport,  0, NULL);
711	if (oinp) {
712		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
713		otp->t_state == TCPS_TIME_WAIT &&
714		    (ticks - otp->t_starttime) < tcp_msl &&
715		    (otp->t_flags & TF_RCVD_CC))
716			otp = tcp_close(otp);
717		else
718			return EADDRINUSE;
719	}
720	if (inp->inp_laddr.s_addr == INADDR_ANY)
721		inp->inp_laddr = ifaddr->sin_addr;
722	inp->inp_faddr = sin->sin_addr;
723	inp->inp_fport = sin->sin_port;
724	in_pcbrehash(inp);
725
726	tp->t_template = tcp_template(tp);
727	if (tp->t_template == 0) {
728		in_pcbdisconnect(inp);
729		return ENOBUFS;
730	}
731
732	/* Compute window scaling to request.  */
733	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
734	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
735		tp->request_r_scale++;
736
737	soisconnecting(so);
738	tcpstat.tcps_connattempt++;
739	tp->t_state = TCPS_SYN_SENT;
740	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
741	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
742	tcp_sendseqinit(tp);
743
744	/*
745	 * Generate a CC value for this connection and
746	 * check whether CC or CCnew should be used.
747	 */
748	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
749		taop = &tao_noncached;
750		bzero(taop, sizeof(*taop));
751	}
752
753	tp->cc_send = CC_INC(tcp_ccgen);
754	if (taop->tao_ccsent != 0 &&
755	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
756		taop->tao_ccsent = tp->cc_send;
757	} else {
758		taop->tao_ccsent = 0;
759		tp->t_flags |= TF_SENDCCNEW;
760	}
761
762	return 0;
763}
764
765#ifdef INET6
766static int
767tcp6_connect(tp, nam, p)
768	register struct tcpcb *tp;
769	struct sockaddr *nam;
770	struct proc *p;
771{
772	struct inpcb *inp = tp->t_inpcb, *oinp;
773	struct socket *so = inp->inp_socket;
774	struct tcpcb *otp;
775	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
776	struct in6_addr *addr6;
777	struct rmxp_tao *taop;
778	struct rmxp_tao tao_noncached;
779	int error;
780
781	if (inp->inp_lport == 0) {
782		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
783		if (error)
784			return error;
785	}
786
787	/*
788	 * Cannot simply call in_pcbconnect, because there might be an
789	 * earlier incarnation of this same connection still in
790	 * TIME_WAIT state, creating an ADDRINUSE error.
791	 */
792	error = in6_pcbladdr(inp, nam, &addr6);
793	if (error)
794		return error;
795	oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
796				  &sin6->sin6_addr, sin6->sin6_port,
797				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
798				  ? addr6
799				  : &inp->in6p_laddr,
800				  inp->inp_lport,  0, NULL);
801	if (oinp) {
802		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
803		    otp->t_state == TCPS_TIME_WAIT &&
804		    (ticks - otp->t_starttime) < tcp_msl &&
805		    (otp->t_flags & TF_RCVD_CC))
806			otp = tcp_close(otp);
807		else
808			return EADDRINUSE;
809	}
810	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
811		inp->in6p_laddr = *addr6;
812	inp->in6p_faddr = sin6->sin6_addr;
813	inp->inp_fport = sin6->sin6_port;
814	if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL)
815		inp->in6p_flowinfo = sin6->sin6_flowinfo;
816	in_pcbrehash(inp);
817
818	tp->t_template = tcp_template(tp);
819	if (tp->t_template == 0) {
820		in6_pcbdisconnect(inp);
821		return ENOBUFS;
822	}
823
824	/* Compute window scaling to request.  */
825	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
826	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
827		tp->request_r_scale++;
828
829	soisconnecting(so);
830	tcpstat.tcps_connattempt++;
831	tp->t_state = TCPS_SYN_SENT;
832	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
833	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
834	tcp_sendseqinit(tp);
835
836	/*
837	 * Generate a CC value for this connection and
838	 * check whether CC or CCnew should be used.
839	 */
840	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
841		taop = &tao_noncached;
842		bzero(taop, sizeof(*taop));
843	}
844
845	tp->cc_send = CC_INC(tcp_ccgen);
846	if (taop->tao_ccsent != 0 &&
847	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
848		taop->tao_ccsent = tp->cc_send;
849	} else {
850		taop->tao_ccsent = 0;
851		tp->t_flags |= TF_SENDCCNEW;
852	}
853
854	return 0;
855}
856#endif /* INET6 */
857
858/*
859 * The new sockopt interface makes it possible for us to block in the
860 * copyin/out step (if we take a page fault).  Taking a page fault at
861 * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
862 * use TSM, there probably isn't any need for this function to run at
863 * splnet() any more.  This needs more examination.)
864 */
865int
866tcp_ctloutput(so, sopt)
867	struct socket *so;
868	struct sockopt *sopt;
869{
870	int	error, opt, optval, s;
871	struct	inpcb *inp;
872	struct	tcpcb *tp;
873
874	error = 0;
875	s = splnet();		/* XXX */
876	inp = sotoinpcb(so);
877	if (inp == NULL) {
878		splx(s);
879		return (ECONNRESET);
880	}
881	if (sopt->sopt_level != IPPROTO_TCP) {
882#ifdef INET6
883		if (INP_CHECK_SOCKAF(so, AF_INET6))
884			error = ip6_ctloutput(so, sopt);
885		else
886#endif /* INET6 */
887		error = ip_ctloutput(so, sopt);
888		splx(s);
889		return (error);
890	}
891	tp = intotcpcb(inp);
892
893	switch (sopt->sopt_dir) {
894	case SOPT_SET:
895		switch (sopt->sopt_name) {
896		case TCP_NODELAY:
897		case TCP_NOOPT:
898			error = sooptcopyin(sopt, &optval, sizeof optval,
899					    sizeof optval);
900			if (error)
901				break;
902
903			switch (sopt->sopt_name) {
904			case TCP_NODELAY:
905				opt = TF_NODELAY;
906				break;
907			case TCP_NOOPT:
908				opt = TF_NOOPT;
909				break;
910			default:
911				opt = 0; /* dead code to fool gcc */
912				break;
913			}
914
915			if (optval)
916				tp->t_flags |= opt;
917			else
918				tp->t_flags &= ~opt;
919			break;
920
921		case TCP_NOPUSH:
922			error = sooptcopyin(sopt, &optval, sizeof optval,
923					    sizeof optval);
924			if (error)
925				break;
926
927			if (optval)
928				tp->t_flags |= TF_NOPUSH;
929			else {
930				tp->t_flags &= ~TF_NOPUSH;
931				error = tcp_output(tp);
932			}
933			break;
934
935		case TCP_MAXSEG:
936			error = sooptcopyin(sopt, &optval, sizeof optval,
937					    sizeof optval);
938			if (error)
939				break;
940
941			if (optval > 0 && optval <= tp->t_maxseg)
942				tp->t_maxseg = optval;
943			else
944				error = EINVAL;
945			break;
946
947		default:
948			error = ENOPROTOOPT;
949			break;
950		}
951		break;
952
953	case SOPT_GET:
954		switch (sopt->sopt_name) {
955		case TCP_NODELAY:
956			optval = tp->t_flags & TF_NODELAY;
957			break;
958		case TCP_MAXSEG:
959			optval = tp->t_maxseg;
960			break;
961		case TCP_NOOPT:
962			optval = tp->t_flags & TF_NOOPT;
963			break;
964		case TCP_NOPUSH:
965			optval = tp->t_flags & TF_NOPUSH;
966			break;
967		default:
968			error = ENOPROTOOPT;
969			break;
970		}
971		if (error == 0)
972			error = sooptcopyout(sopt, &optval, sizeof optval);
973		break;
974	}
975	splx(s);
976	return (error);
977}
978
979/*
980 * tcp_sendspace and tcp_recvspace are the default send and receive window
981 * sizes, respectively.  These are obsolescent (this information should
982 * be set by the route).
983 */
984u_long	tcp_sendspace = 1024*16;
985SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
986    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
987u_long	tcp_recvspace = 1024*16;
988SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
989    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
990
991/*
992 * Attach TCP protocol to socket, allocating
993 * internet protocol control block, tcp control block,
994 * bufer space, and entering LISTEN state if to accept connections.
995 */
996static int
997tcp_attach(so, p)
998	struct socket *so;
999	struct proc *p;
1000{
1001	register struct tcpcb *tp;
1002	struct inpcb *inp;
1003	int error;
1004#ifdef INET6
1005	int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL;
1006#endif
1007
1008	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1009		error = soreserve(so, tcp_sendspace, tcp_recvspace);
1010		if (error)
1011			return (error);
1012	}
1013	error = in_pcballoc(so, &tcbinfo, p);
1014	if (error)
1015		return (error);
1016	inp = sotoinpcb(so);
1017#ifdef IPSEC
1018	error = ipsec_init_policy(so, &inp->inp_sp);
1019	if (error) {
1020#ifdef INET6
1021		if (isipv6)
1022			in6_pcbdetach(inp);
1023		else
1024#endif
1025		in_pcbdetach(inp);
1026		return (error);
1027	}
1028#endif /*IPSEC*/
1029#ifdef INET6
1030	if (isipv6) {
1031		inp->inp_vflag |= INP_IPV6;
1032		inp->in6p_hops = -1;	/* use kernel default */
1033	}
1034	else
1035#endif
1036	inp->inp_vflag |= INP_IPV4;
1037	tp = tcp_newtcpcb(inp);
1038	if (tp == 0) {
1039		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
1040
1041		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
1042#ifdef INET6
1043		if (isipv6)
1044			in6_pcbdetach(inp);
1045		else
1046#endif
1047		in_pcbdetach(inp);
1048		so->so_state |= nofd;
1049		return (ENOBUFS);
1050	}
1051	tp->t_state = TCPS_CLOSED;
1052	return (0);
1053}
1054
1055/*
1056 * Initiate (or continue) disconnect.
1057 * If embryonic state, just send reset (once).
1058 * If in ``let data drain'' option and linger null, just drop.
1059 * Otherwise (hard), mark socket disconnecting and drop
1060 * current input data; switch states based on user close, and
1061 * send segment to peer (with FIN).
1062 */
1063static struct tcpcb *
1064tcp_disconnect(tp)
1065	register struct tcpcb *tp;
1066{
1067	struct socket *so = tp->t_inpcb->inp_socket;
1068
1069	if (tp->t_state < TCPS_ESTABLISHED)
1070		tp = tcp_close(tp);
1071	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1072		tp = tcp_drop(tp, 0);
1073	else {
1074		soisdisconnecting(so);
1075		sbflush(&so->so_rcv);
1076		tp = tcp_usrclosed(tp);
1077		if (tp)
1078			(void) tcp_output(tp);
1079	}
1080	return (tp);
1081}
1082
1083/*
1084 * User issued close, and wish to trail through shutdown states:
1085 * if never received SYN, just forget it.  If got a SYN from peer,
1086 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1087 * If already got a FIN from peer, then almost done; go to LAST_ACK
1088 * state.  In all other cases, have already sent FIN to peer (e.g.
1089 * after PRU_SHUTDOWN), and just have to play tedious game waiting
1090 * for peer to send FIN or not respond to keep-alives, etc.
1091 * We can let the user exit from the close as soon as the FIN is acked.
1092 */
1093static struct tcpcb *
1094tcp_usrclosed(tp)
1095	register struct tcpcb *tp;
1096{
1097
1098	switch (tp->t_state) {
1099
1100	case TCPS_CLOSED:
1101	case TCPS_LISTEN:
1102		tp->t_state = TCPS_CLOSED;
1103		tp = tcp_close(tp);
1104		break;
1105
1106	case TCPS_SYN_SENT:
1107	case TCPS_SYN_RECEIVED:
1108		tp->t_flags |= TF_NEEDFIN;
1109		break;
1110
1111	case TCPS_ESTABLISHED:
1112		tp->t_state = TCPS_FIN_WAIT_1;
1113		break;
1114
1115	case TCPS_CLOSE_WAIT:
1116		tp->t_state = TCPS_LAST_ACK;
1117		break;
1118	}
1119	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1120		soisdisconnected(tp->t_inpcb->inp_socket);
1121		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
1122		if (tp->t_state == TCPS_FIN_WAIT_2)
1123			callout_reset(tp->tt_2msl, tcp_maxidle,
1124				      tcp_timer_2msl, tp);
1125	}
1126	return (tp);
1127}
1128
1129