tcp_usrreq.c revision 22719
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
34 *	$FreeBSD: head/sys/netinet/tcp_usrreq.c 22719 1997-02-14 18:15:53Z wollman $
35 */
36
37#include <sys/param.h>
38#include <sys/queue.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/sysctl.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/protosw.h>
47#include <sys/errno.h>
48#include <sys/stat.h>
49
50#include <net/if.h>
51#include <net/route.h>
52
53#include <netinet/in.h>
54#include <netinet/in_systm.h>
55#include <netinet/ip.h>
56#include <netinet/in_pcb.h>
57#include <netinet/in_var.h>
58#include <netinet/ip_var.h>
59#include <netinet/tcp.h>
60#include <netinet/tcp_fsm.h>
61#include <netinet/tcp_seq.h>
62#include <netinet/tcp_timer.h>
63#include <netinet/tcp_var.h>
64#include <netinet/tcpip.h>
65#ifdef TCPDEBUG
66#include <netinet/tcp_debug.h>
67#endif
68
69/*
70 * TCP protocol interface to socket abstraction.
71 */
72extern	char *tcpstates[];
73
74static int	tcp_attach __P((struct socket *));
75static int	tcp_connect __P((struct tcpcb *, struct mbuf *));
76static struct tcpcb *
77		tcp_disconnect __P((struct tcpcb *));
78static struct tcpcb *
79		tcp_usrclosed __P((struct tcpcb *));
80
81#ifdef TCPDEBUG
82#define	TCPDEBUG0	int ostate
83#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
84#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
85				tcp_trace(TA_USER, ostate, tp, 0, req)
86#else
87#define	TCPDEBUG0
88#define	TCPDEBUG1()
89#define	TCPDEBUG2(req)
90#endif
91
92/*
93 * TCP attaches to socket via pru_attach(), reserving space,
94 * and an internet control block.
95 */
96static int
97tcp_usr_attach(struct socket *so, int proto)
98{
99	int s = splnet();
100	int error;
101	struct inpcb *inp = sotoinpcb(so);
102	struct tcpcb *tp = 0;
103	TCPDEBUG0;
104
105	TCPDEBUG1();
106	if (inp) {
107		error = EISCONN;
108		goto out;
109	}
110
111	error = tcp_attach(so);
112	if (error)
113		goto out;
114
115	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
116		so->so_linger = TCP_LINGERTIME * hz;
117	tp = sototcpcb(so);
118out:
119	TCPDEBUG2(PRU_ATTACH);
120	splx(s);
121	return error;
122}
123
124/*
125 * pru_detach() detaches the TCP protocol from the socket.
126 * If the protocol state is non-embryonic, then can't
127 * do this directly: have to initiate a pru_disconnect(),
128 * which may finish later; embryonic TCB's can just
129 * be discarded here.
130 */
131static int
132tcp_usr_detach(struct socket *so)
133{
134	int s = splnet();
135	int error = 0;
136	struct inpcb *inp = sotoinpcb(so);
137	struct tcpcb *tp;
138	TCPDEBUG0;
139
140	if (inp == 0) {
141		splx(s);
142		return EINVAL;	/* XXX */
143	}
144	tp = intotcpcb(inp);
145	TCPDEBUG1();
146	if (tp->t_state > TCPS_LISTEN)
147		tp = tcp_disconnect(tp);
148	else
149		tp = tcp_close(tp);
150
151	TCPDEBUG2(PRU_DETACH);
152	splx(s);
153	return error;
154}
155
156#define	COMMON_START()	TCPDEBUG0; \
157			do { \
158				     if (inp == 0) { \
159					     splx(s); \
160					     return EINVAL; \
161				     } \
162				     tp = intotcpcb(inp); \
163				     TCPDEBUG1(); \
164		     } while(0)
165
166#define COMMON_END(req)	out: TCPDEBUG2(req); splx(s); return error; goto out
167
168
169/*
170 * Give the socket an address.
171 */
172static int
173tcp_usr_bind(struct socket *so, struct mbuf *nam)
174{
175	int s = splnet();
176	int error = 0;
177	struct inpcb *inp = sotoinpcb(so);
178	struct tcpcb *tp;
179	struct sockaddr_in *sinp;
180
181	COMMON_START();
182
183	/*
184	 * Must check for multicast addresses and disallow binding
185	 * to them.
186	 */
187	sinp = mtod(nam, struct sockaddr_in *);
188	if (sinp->sin_family == AF_INET &&
189	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
190		error = EAFNOSUPPORT;
191		goto out;
192	}
193	error = in_pcbbind(inp, nam);
194	if (error)
195		goto out;
196	COMMON_END(PRU_BIND);
197
198}
199
200/*
201 * Prepare to accept connections.
202 */
203static int
204tcp_usr_listen(struct socket *so)
205{
206	int s = splnet();
207	int error = 0;
208	struct inpcb *inp = sotoinpcb(so);
209	struct tcpcb *tp;
210
211	COMMON_START();
212	if (inp->inp_lport == 0)
213		error = in_pcbbind(inp, NULL);
214	if (error == 0)
215		tp->t_state = TCPS_LISTEN;
216	COMMON_END(PRU_LISTEN);
217}
218
219/*
220 * Initiate connection to peer.
221 * Create a template for use in transmissions on this connection.
222 * Enter SYN_SENT state, and mark socket as connecting.
223 * Start keep-alive timer, and seed output sequence space.
224 * Send initial segment on connection.
225 */
226static int
227tcp_usr_connect(struct socket *so, struct mbuf *nam)
228{
229	int s = splnet();
230	int error = 0;
231	struct inpcb *inp = sotoinpcb(so);
232	struct tcpcb *tp;
233	struct sockaddr_in *sinp;
234
235	COMMON_START();
236
237	/*
238	 * Must disallow TCP ``connections'' to multicast addresses.
239	 */
240	sinp = mtod(nam, struct sockaddr_in *);
241	if (sinp->sin_family == AF_INET
242	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
243		error = EAFNOSUPPORT;
244		goto out;
245	}
246
247	if ((error = tcp_connect(tp, nam)) != 0)
248		goto out;
249	error = tcp_output(tp);
250	COMMON_END(PRU_CONNECT);
251}
252
253/*
254 * Initiate disconnect from peer.
255 * If connection never passed embryonic stage, just drop;
256 * else if don't need to let data drain, then can just drop anyways,
257 * else have to begin TCP shutdown process: mark socket disconnecting,
258 * drain unread data, state switch to reflect user close, and
259 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
260 * when peer sends FIN and acks ours.
261 *
262 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
263 */
264static int
265tcp_usr_disconnect(struct socket *so)
266{
267	int s = splnet();
268	int error = 0;
269	struct inpcb *inp = sotoinpcb(so);
270	struct tcpcb *tp;
271
272	COMMON_START();
273	tp = tcp_disconnect(tp);
274	COMMON_END(PRU_DISCONNECT);
275}
276
277/*
278 * Accept a connection.  Essentially all the work is
279 * done at higher levels; just return the address
280 * of the peer, storing through addr.
281 */
282static int
283tcp_usr_accept(struct socket *so, struct mbuf *nam)
284{
285	int s = splnet();
286	int error = 0;
287	struct inpcb *inp = sotoinpcb(so);
288	struct tcpcb *tp;
289
290	COMMON_START();
291	in_setpeeraddr(inp, nam);
292	COMMON_END(PRU_ACCEPT);
293}
294
295/*
296 * Mark the connection as being incapable of further output.
297 */
298static int
299tcp_usr_shutdown(struct socket *so)
300{
301	int s = splnet();
302	int error = 0;
303	struct inpcb *inp = sotoinpcb(so);
304	struct tcpcb *tp;
305
306	COMMON_START();
307	socantsendmore(so);
308	tp = tcp_usrclosed(tp);
309	if (tp)
310		error = tcp_output(tp);
311	COMMON_END(PRU_SHUTDOWN);
312}
313
314/*
315 * After a receive, possibly send window update to peer.
316 */
317static int
318tcp_usr_rcvd(struct socket *so, int flags)
319{
320	int s = splnet();
321	int error = 0;
322	struct inpcb *inp = sotoinpcb(so);
323	struct tcpcb *tp;
324
325	COMMON_START();
326	tcp_output(tp);
327	COMMON_END(PRU_RCVD);
328}
329
330/*
331 * Do a send by putting data in output queue and updating urgent
332 * marker if URG set.  Possibly send more data.
333 */
334static int
335tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam,
336	     struct mbuf *control)
337{
338	int s = splnet();
339	int error = 0;
340	struct inpcb *inp = sotoinpcb(so);
341	struct tcpcb *tp;
342
343	COMMON_START();
344	if (control && control->m_len) {
345		m_freem(control); /* XXX shouldn't caller do this??? */
346		if (m)
347			m_freem(m);
348		return EINVAL;
349	}
350
351	if(!(flags & PRUS_OOB)) {
352		sbappend(&so->so_snd, m);
353		if (nam && tp->t_state < TCPS_SYN_SENT) {
354			/*
355			 * Do implied connect if not yet connected,
356			 * initialize window to default value, and
357			 * initialize maxseg/maxopd using peer's cached
358			 * MSS.
359			 */
360			error = tcp_connect(tp, nam);
361			if (error)
362				goto out;
363			tp->snd_wnd = TTCP_CLIENT_SND_WND;
364			tcp_mss(tp, -1);
365		}
366
367		if (flags & PRUS_EOF) {
368			/*
369			 * Close the send side of the connection after
370			 * the data is sent.
371			 */
372			socantsendmore(so);
373			tp = tcp_usrclosed(tp);
374		}
375		if (tp != NULL)
376			error = tcp_output(tp);
377	} else {
378		if (sbspace(&so->so_snd) < -512) {
379			m_freem(m);
380			error = ENOBUFS;
381			goto out;
382		}
383		/*
384		 * According to RFC961 (Assigned Protocols),
385		 * the urgent pointer points to the last octet
386		 * of urgent data.  We continue, however,
387		 * to consider it to indicate the first octet
388		 * of data past the urgent section.
389		 * Otherwise, snd_up should be one lower.
390		 */
391		sbappend(&so->so_snd, m);
392		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
393		tp->t_force = 1;
394		error = tcp_output(tp);
395		tp->t_force = 0;
396	}
397	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
398		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
399}
400
401/*
402 * Abort the TCP.
403 */
404static int
405tcp_usr_abort(struct socket *so)
406{
407	int s = splnet();
408	int error = 0;
409	struct inpcb *inp = sotoinpcb(so);
410	struct tcpcb *tp;
411
412	COMMON_START();
413	tp = tcp_drop(tp, ECONNABORTED);
414	COMMON_END(PRU_ABORT);
415}
416
417/*
418 * Fill in st_bklsize for fstat() operations on a socket.
419 */
420static int
421tcp_usr_sense(struct socket *so, struct stat *sb)
422{
423	int s = splnet();
424
425	sb->st_blksize = so->so_snd.sb_hiwat;
426	splx(s);
427	return 0;
428}
429
430/*
431 * Receive out-of-band data.
432 */
433static int
434tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
435{
436	int s = splnet();
437	int error = 0;
438	struct inpcb *inp = sotoinpcb(so);
439	struct tcpcb *tp;
440
441	COMMON_START();
442	if ((so->so_oobmark == 0 &&
443	     (so->so_state & SS_RCVATMARK) == 0) ||
444	    so->so_options & SO_OOBINLINE ||
445	    tp->t_oobflags & TCPOOB_HADDATA) {
446		error = EINVAL;
447		goto out;
448	}
449	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
450		error = EWOULDBLOCK;
451		goto out;
452	}
453	m->m_len = 1;
454	*mtod(m, caddr_t) = tp->t_iobc;
455	if ((flags & MSG_PEEK) == 0)
456		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
457	COMMON_END(PRU_RCVOOB);
458}
459
460static int
461tcp_usr_sockaddr(struct socket *so, struct mbuf *nam)
462{
463	int s = splnet();
464	int error = 0;
465	struct inpcb *inp = sotoinpcb(so);
466	struct tcpcb *tp;
467
468	COMMON_START();
469	in_setsockaddr(inp, nam);
470	COMMON_END(PRU_SOCKADDR);
471}
472
473static int
474tcp_usr_peeraddr(struct socket *so, struct mbuf *nam)
475{
476	int s = splnet();
477	int error = 0;
478	struct inpcb *inp = sotoinpcb(so);
479	struct tcpcb *tp;
480
481	COMMON_START();
482	in_setpeeraddr(inp, nam);
483	COMMON_END(PRU_PEERADDR);
484}
485
486/*
487 * XXX - this should just be a call to in_control, but we need to get
488 * the types worked out.
489 */
490static int
491tcp_usr_control(struct socket *so, int cmd, caddr_t arg, struct ifnet *ifp)
492{
493	return in_control(so, cmd, arg, ifp);
494}
495
496/* xxx - should be const */
497struct pr_usrreqs tcp_usrreqs = {
498	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
499	tcp_usr_connect, pru_connect2_notsupp, tcp_usr_control, tcp_usr_detach,
500	tcp_usr_disconnect, tcp_usr_listen, tcp_usr_peeraddr, tcp_usr_rcvd,
501	tcp_usr_rcvoob, tcp_usr_send, tcp_usr_sense, tcp_usr_shutdown,
502	tcp_usr_sockaddr
503};
504
505/*
506 * Common subroutine to open a TCP connection to remote host specified
507 * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
508 * port number if needed.  Call in_pcbladdr to do the routing and to choose
509 * a local host address (interface).  If there is an existing incarnation
510 * of the same connection in TIME-WAIT state and if the remote host was
511 * sending CC options and if the connection duration was < MSL, then
512 * truncate the previous TIME-WAIT state and proceed.
513 * Initialize connection parameters and enter SYN-SENT state.
514 */
515static int
516tcp_connect(tp, nam)
517	register struct tcpcb *tp;
518	struct mbuf *nam;
519{
520	struct inpcb *inp = tp->t_inpcb, *oinp;
521	struct socket *so = inp->inp_socket;
522	struct tcpcb *otp;
523	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
524	struct sockaddr_in *ifaddr;
525	int error;
526	struct rmxp_tao *taop;
527	struct rmxp_tao tao_noncached;
528
529	if (inp->inp_lport == 0) {
530		error = in_pcbbind(inp, NULL);
531		if (error)
532			return error;
533	}
534
535	/*
536	 * Cannot simply call in_pcbconnect, because there might be an
537	 * earlier incarnation of this same connection still in
538	 * TIME_WAIT state, creating an ADDRINUSE error.
539	 */
540	error = in_pcbladdr(inp, nam, &ifaddr);
541	if (error)
542		return error;
543	oinp = in_pcblookuphash(inp->inp_pcbinfo,
544	    sin->sin_addr, sin->sin_port,
545	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
546						: ifaddr->sin_addr,
547	    inp->inp_lport,  0);
548	if (oinp) {
549		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
550		otp->t_state == TCPS_TIME_WAIT &&
551		    otp->t_duration < TCPTV_MSL &&
552		    (otp->t_flags & TF_RCVD_CC))
553			otp = tcp_close(otp);
554		else
555			return EADDRINUSE;
556	}
557	if (inp->inp_laddr.s_addr == INADDR_ANY)
558		inp->inp_laddr = ifaddr->sin_addr;
559	inp->inp_faddr = sin->sin_addr;
560	inp->inp_fport = sin->sin_port;
561	in_pcbrehash(inp);
562
563	tp->t_template = tcp_template(tp);
564	if (tp->t_template == 0) {
565		in_pcbdisconnect(inp);
566		return ENOBUFS;
567	}
568
569	/* Compute window scaling to request.  */
570	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
571	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
572		tp->request_r_scale++;
573
574	soisconnecting(so);
575	tcpstat.tcps_connattempt++;
576	tp->t_state = TCPS_SYN_SENT;
577	tp->t_timer[TCPT_KEEP] = tcp_keepinit;
578	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
579	tcp_sendseqinit(tp);
580
581	/*
582	 * Generate a CC value for this connection and
583	 * check whether CC or CCnew should be used.
584	 */
585	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
586		taop = &tao_noncached;
587		bzero(taop, sizeof(*taop));
588	}
589
590	tp->cc_send = CC_INC(tcp_ccgen);
591	if (taop->tao_ccsent != 0 &&
592	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
593		taop->tao_ccsent = tp->cc_send;
594	} else {
595		taop->tao_ccsent = 0;
596		tp->t_flags |= TF_SENDCCNEW;
597	}
598
599	return 0;
600}
601
602int
603tcp_ctloutput(op, so, level, optname, mp)
604	int op;
605	struct socket *so;
606	int level, optname;
607	struct mbuf **mp;
608{
609	int error = 0, s;
610	struct inpcb *inp;
611	register struct tcpcb *tp;
612	register struct mbuf *m;
613	register int i;
614
615	s = splnet();
616	inp = sotoinpcb(so);
617	if (inp == NULL) {
618		splx(s);
619		if (op == PRCO_SETOPT && *mp)
620			(void) m_free(*mp);
621		return (ECONNRESET);
622	}
623	if (level != IPPROTO_TCP) {
624		error = ip_ctloutput(op, so, level, optname, mp);
625		splx(s);
626		return (error);
627	}
628	tp = intotcpcb(inp);
629
630	switch (op) {
631
632	case PRCO_SETOPT:
633		m = *mp;
634		switch (optname) {
635
636		case TCP_NODELAY:
637			if (m == NULL || m->m_len < sizeof (int))
638				error = EINVAL;
639			else if (*mtod(m, int *))
640				tp->t_flags |= TF_NODELAY;
641			else
642				tp->t_flags &= ~TF_NODELAY;
643			break;
644
645		case TCP_MAXSEG:
646			if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
647				tp->t_maxseg = i;
648			else
649				error = EINVAL;
650			break;
651
652		case TCP_NOOPT:
653			if (m == NULL || m->m_len < sizeof (int))
654				error = EINVAL;
655			else if (*mtod(m, int *))
656				tp->t_flags |= TF_NOOPT;
657			else
658				tp->t_flags &= ~TF_NOOPT;
659			break;
660
661		case TCP_NOPUSH:
662			if (m == NULL || m->m_len < sizeof (int))
663				error = EINVAL;
664			else if (*mtod(m, int *))
665				tp->t_flags |= TF_NOPUSH;
666			else
667				tp->t_flags &= ~TF_NOPUSH;
668			break;
669
670		default:
671			error = ENOPROTOOPT;
672			break;
673		}
674		if (m)
675			(void) m_free(m);
676		break;
677
678	case PRCO_GETOPT:
679		*mp = m = m_get(M_WAIT, MT_SOOPTS);
680		m->m_len = sizeof(int);
681
682		switch (optname) {
683		case TCP_NODELAY:
684			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
685			break;
686		case TCP_MAXSEG:
687			*mtod(m, int *) = tp->t_maxseg;
688			break;
689		case TCP_NOOPT:
690			*mtod(m, int *) = tp->t_flags & TF_NOOPT;
691			break;
692		case TCP_NOPUSH:
693			*mtod(m, int *) = tp->t_flags & TF_NOPUSH;
694			break;
695		default:
696			error = ENOPROTOOPT;
697			break;
698		}
699		break;
700	}
701	splx(s);
702	return (error);
703}
704
705/*
706 * tcp_sendspace and tcp_recvspace are the default send and receive window
707 * sizes, respectively.  These are obsolescent (this information should
708 * be set by the route).
709 */
710u_long	tcp_sendspace = 1024*16;
711SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
712	CTLFLAG_RW, &tcp_sendspace , 0, "");
713u_long	tcp_recvspace = 1024*16;
714SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
715	CTLFLAG_RW, &tcp_recvspace , 0, "");
716
717/*
718 * Attach TCP protocol to socket, allocating
719 * internet protocol control block, tcp control block,
720 * bufer space, and entering LISTEN state if to accept connections.
721 */
722static int
723tcp_attach(so)
724	struct socket *so;
725{
726	register struct tcpcb *tp;
727	struct inpcb *inp;
728	int error;
729
730	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
731		error = soreserve(so, tcp_sendspace, tcp_recvspace);
732		if (error)
733			return (error);
734	}
735	error = in_pcballoc(so, &tcbinfo);
736	if (error)
737		return (error);
738	inp = sotoinpcb(so);
739	tp = tcp_newtcpcb(inp);
740	if (tp == 0) {
741		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
742
743		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
744		in_pcbdetach(inp);
745		so->so_state |= nofd;
746		return (ENOBUFS);
747	}
748	tp->t_state = TCPS_CLOSED;
749	return (0);
750}
751
752/*
753 * Initiate (or continue) disconnect.
754 * If embryonic state, just send reset (once).
755 * If in ``let data drain'' option and linger null, just drop.
756 * Otherwise (hard), mark socket disconnecting and drop
757 * current input data; switch states based on user close, and
758 * send segment to peer (with FIN).
759 */
760static struct tcpcb *
761tcp_disconnect(tp)
762	register struct tcpcb *tp;
763{
764	struct socket *so = tp->t_inpcb->inp_socket;
765
766	if (tp->t_state < TCPS_ESTABLISHED)
767		tp = tcp_close(tp);
768	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
769		tp = tcp_drop(tp, 0);
770	else {
771		soisdisconnecting(so);
772		sbflush(&so->so_rcv);
773		tp = tcp_usrclosed(tp);
774		if (tp)
775			(void) tcp_output(tp);
776	}
777	return (tp);
778}
779
780/*
781 * User issued close, and wish to trail through shutdown states:
782 * if never received SYN, just forget it.  If got a SYN from peer,
783 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
784 * If already got a FIN from peer, then almost done; go to LAST_ACK
785 * state.  In all other cases, have already sent FIN to peer (e.g.
786 * after PRU_SHUTDOWN), and just have to play tedious game waiting
787 * for peer to send FIN or not respond to keep-alives, etc.
788 * We can let the user exit from the close as soon as the FIN is acked.
789 */
790static struct tcpcb *
791tcp_usrclosed(tp)
792	register struct tcpcb *tp;
793{
794
795	switch (tp->t_state) {
796
797	case TCPS_CLOSED:
798	case TCPS_LISTEN:
799		tp->t_state = TCPS_CLOSED;
800		tp = tcp_close(tp);
801		break;
802
803	case TCPS_SYN_SENT:
804	case TCPS_SYN_RECEIVED:
805		tp->t_flags |= TF_NEEDFIN;
806		break;
807
808	case TCPS_ESTABLISHED:
809		tp->t_state = TCPS_FIN_WAIT_1;
810		break;
811
812	case TCPS_CLOSE_WAIT:
813		tp->t_state = TCPS_LAST_ACK;
814		break;
815	}
816	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
817		soisdisconnected(tp->t_inpcb->inp_socket);
818		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
819		if (tp->t_state == TCPS_FIN_WAIT_2)
820			tp->t_timer[TCPT_2MSL] = tcp_maxidle;
821	}
822	return (tp);
823}
824
825