tcp_usrreq.c revision 22962
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
34 *	$Id$
35 */
36
37#include <sys/param.h>
38#include <sys/queue.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/sysctl.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/protosw.h>
47#include <sys/errno.h>
48#include <sys/stat.h>
49
50#include <net/if.h>
51#include <net/route.h>
52
53#include <netinet/in.h>
54#include <netinet/in_systm.h>
55#include <netinet/ip.h>
56#include <netinet/in_pcb.h>
57#include <netinet/in_var.h>
58#include <netinet/ip_var.h>
59#include <netinet/tcp.h>
60#include <netinet/tcp_fsm.h>
61#include <netinet/tcp_seq.h>
62#include <netinet/tcp_timer.h>
63#include <netinet/tcp_var.h>
64#include <netinet/tcpip.h>
65#ifdef TCPDEBUG
66#include <netinet/tcp_debug.h>
67#endif
68
69/*
70 * TCP protocol interface to socket abstraction.
71 */
72extern	char *tcpstates[];	/* XXX ??? */
73
74static int	tcp_attach __P((struct socket *));
75static int	tcp_connect __P((struct tcpcb *, struct mbuf *));
76static struct tcpcb *
77		tcp_disconnect __P((struct tcpcb *));
78static struct tcpcb *
79		tcp_usrclosed __P((struct tcpcb *));
80
81#ifdef TCPDEBUG
82#define	TCPDEBUG0	int ostate
83#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
84#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
85				tcp_trace(TA_USER, ostate, tp, 0, req)
86#else
87#define	TCPDEBUG0
88#define	TCPDEBUG1()
89#define	TCPDEBUG2(req)
90#endif
91
92/*
93 * TCP attaches to socket via pru_attach(), reserving space,
94 * and an internet control block.
95 */
96static int
97tcp_usr_attach(struct socket *so, int proto)
98{
99	int s = splnet();
100	int error;
101	struct inpcb *inp = sotoinpcb(so);
102	struct tcpcb *tp = 0;
103	TCPDEBUG0;
104
105	TCPDEBUG1();
106	if (inp) {
107		error = EISCONN;
108		goto out;
109	}
110
111	error = tcp_attach(so);
112	if (error)
113		goto out;
114
115	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
116		so->so_linger = TCP_LINGERTIME * hz;
117	tp = sototcpcb(so);
118out:
119	TCPDEBUG2(PRU_ATTACH);
120	splx(s);
121	return error;
122}
123
124/*
125 * pru_detach() detaches the TCP protocol from the socket.
126 * If the protocol state is non-embryonic, then can't
127 * do this directly: have to initiate a pru_disconnect(),
128 * which may finish later; embryonic TCB's can just
129 * be discarded here.
130 */
131static int
132tcp_usr_detach(struct socket *so)
133{
134	int s = splnet();
135	int error = 0;
136	struct inpcb *inp = sotoinpcb(so);
137	struct tcpcb *tp;
138	TCPDEBUG0;
139
140	if (inp == 0) {
141		splx(s);
142		return EINVAL;	/* XXX */
143	}
144	tp = intotcpcb(inp);
145	TCPDEBUG1();
146	if (tp->t_state > TCPS_LISTEN)
147		tp = tcp_disconnect(tp);
148	else
149		tp = tcp_close(tp);
150
151	TCPDEBUG2(PRU_DETACH);
152	splx(s);
153	return error;
154}
155
156#define	COMMON_START()	TCPDEBUG0; \
157			do { \
158				     if (inp == 0) { \
159					     splx(s); \
160					     return EINVAL; \
161				     } \
162				     tp = intotcpcb(inp); \
163				     TCPDEBUG1(); \
164		     } while(0)
165
166#define COMMON_END(req)	out: TCPDEBUG2(req); splx(s); return error; goto out
167
168
169/*
170 * Give the socket an address.
171 */
172static int
173tcp_usr_bind(struct socket *so, struct mbuf *nam)
174{
175	int s = splnet();
176	int error = 0;
177	struct inpcb *inp = sotoinpcb(so);
178	struct tcpcb *tp;
179	struct sockaddr_in *sinp;
180
181	COMMON_START();
182
183	/*
184	 * Must check for multicast addresses and disallow binding
185	 * to them.
186	 */
187	sinp = mtod(nam, struct sockaddr_in *);
188	if (sinp->sin_family == AF_INET &&
189	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
190		error = EAFNOSUPPORT;
191		goto out;
192	}
193	error = in_pcbbind(inp, nam);
194	if (error)
195		goto out;
196	COMMON_END(PRU_BIND);
197
198}
199
200/*
201 * Prepare to accept connections.
202 */
203static int
204tcp_usr_listen(struct socket *so)
205{
206	int s = splnet();
207	int error = 0;
208	struct inpcb *inp = sotoinpcb(so);
209	struct tcpcb *tp;
210
211	COMMON_START();
212	if (inp->inp_lport == 0)
213		error = in_pcbbind(inp, NULL);
214	if (error == 0)
215		tp->t_state = TCPS_LISTEN;
216	COMMON_END(PRU_LISTEN);
217}
218
219/*
220 * Initiate connection to peer.
221 * Create a template for use in transmissions on this connection.
222 * Enter SYN_SENT state, and mark socket as connecting.
223 * Start keep-alive timer, and seed output sequence space.
224 * Send initial segment on connection.
225 */
226static int
227tcp_usr_connect(struct socket *so, struct mbuf *nam)
228{
229	int s = splnet();
230	int error = 0;
231	struct inpcb *inp = sotoinpcb(so);
232	struct tcpcb *tp;
233	struct sockaddr_in *sinp;
234
235	COMMON_START();
236
237	/*
238	 * Must disallow TCP ``connections'' to multicast addresses.
239	 */
240	sinp = mtod(nam, struct sockaddr_in *);
241	if (sinp->sin_family == AF_INET
242	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
243		error = EAFNOSUPPORT;
244		goto out;
245	}
246
247	if ((error = tcp_connect(tp, nam)) != 0)
248		goto out;
249	error = tcp_output(tp);
250	COMMON_END(PRU_CONNECT);
251}
252
253/*
254 * Initiate disconnect from peer.
255 * If connection never passed embryonic stage, just drop;
256 * else if don't need to let data drain, then can just drop anyways,
257 * else have to begin TCP shutdown process: mark socket disconnecting,
258 * drain unread data, state switch to reflect user close, and
259 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
260 * when peer sends FIN and acks ours.
261 *
262 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
263 */
264static int
265tcp_usr_disconnect(struct socket *so)
266{
267	int s = splnet();
268	int error = 0;
269	struct inpcb *inp = sotoinpcb(so);
270	struct tcpcb *tp;
271
272	COMMON_START();
273	tp = tcp_disconnect(tp);
274	COMMON_END(PRU_DISCONNECT);
275}
276
277/*
278 * Accept a connection.  Essentially all the work is
279 * done at higher levels; just return the address
280 * of the peer, storing through addr.
281 */
282static int
283tcp_usr_accept(struct socket *so, struct mbuf *nam)
284{
285	int s = splnet();
286	int error = 0;
287	struct inpcb *inp = sotoinpcb(so);
288	struct tcpcb *tp;
289
290	COMMON_START();
291	in_setpeeraddr(so, nam);
292	COMMON_END(PRU_ACCEPT);
293}
294
295/*
296 * Mark the connection as being incapable of further output.
297 */
298static int
299tcp_usr_shutdown(struct socket *so)
300{
301	int s = splnet();
302	int error = 0;
303	struct inpcb *inp = sotoinpcb(so);
304	struct tcpcb *tp;
305
306	COMMON_START();
307	socantsendmore(so);
308	tp = tcp_usrclosed(tp);
309	if (tp)
310		error = tcp_output(tp);
311	COMMON_END(PRU_SHUTDOWN);
312}
313
314/*
315 * After a receive, possibly send window update to peer.
316 */
317static int
318tcp_usr_rcvd(struct socket *so, int flags)
319{
320	int s = splnet();
321	int error = 0;
322	struct inpcb *inp = sotoinpcb(so);
323	struct tcpcb *tp;
324
325	COMMON_START();
326	tcp_output(tp);
327	COMMON_END(PRU_RCVD);
328}
329
330/*
331 * Do a send by putting data in output queue and updating urgent
332 * marker if URG set.  Possibly send more data.
333 */
334static int
335tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam,
336	     struct mbuf *control)
337{
338	int s = splnet();
339	int error = 0;
340	struct inpcb *inp = sotoinpcb(so);
341	struct tcpcb *tp;
342
343	COMMON_START();
344	if (control && control->m_len) {
345		m_freem(control); /* XXX shouldn't caller do this??? */
346		if (m)
347			m_freem(m);
348		return EINVAL;
349	}
350
351	if(!(flags & PRUS_OOB)) {
352		sbappend(&so->so_snd, m);
353		if (nam && tp->t_state < TCPS_SYN_SENT) {
354			/*
355			 * Do implied connect if not yet connected,
356			 * initialize window to default value, and
357			 * initialize maxseg/maxopd using peer's cached
358			 * MSS.
359			 */
360			error = tcp_connect(tp, nam);
361			if (error)
362				goto out;
363			tp->snd_wnd = TTCP_CLIENT_SND_WND;
364			tcp_mss(tp, -1);
365		}
366
367		if (flags & PRUS_EOF) {
368			/*
369			 * Close the send side of the connection after
370			 * the data is sent.
371			 */
372			socantsendmore(so);
373			tp = tcp_usrclosed(tp);
374		}
375		if (tp != NULL)
376			error = tcp_output(tp);
377	} else {
378		if (sbspace(&so->so_snd) < -512) {
379			m_freem(m);
380			error = ENOBUFS;
381			goto out;
382		}
383		/*
384		 * According to RFC961 (Assigned Protocols),
385		 * the urgent pointer points to the last octet
386		 * of urgent data.  We continue, however,
387		 * to consider it to indicate the first octet
388		 * of data past the urgent section.
389		 * Otherwise, snd_up should be one lower.
390		 */
391		sbappend(&so->so_snd, m);
392		if (nam && tp->t_state < TCPS_SYN_SENT) {
393			/*
394			 * Do implied connect if not yet connected,
395			 * initialize window to default value, and
396			 * initialize maxseg/maxopd using peer's cached
397			 * MSS.
398			 */
399			error = tcp_connect(tp, nam);
400			if (error)
401				goto out;
402			tp->snd_wnd = TTCP_CLIENT_SND_WND;
403			tcp_mss(tp, -1);
404		}
405		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
406		tp->t_force = 1;
407		error = tcp_output(tp);
408		tp->t_force = 0;
409	}
410	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
411		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
412}
413
414/*
415 * Abort the TCP.
416 */
417static int
418tcp_usr_abort(struct socket *so)
419{
420	int s = splnet();
421	int error = 0;
422	struct inpcb *inp = sotoinpcb(so);
423	struct tcpcb *tp;
424
425	COMMON_START();
426	tp = tcp_drop(tp, ECONNABORTED);
427	COMMON_END(PRU_ABORT);
428}
429
430/*
431 * Receive out-of-band data.
432 */
433static int
434tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
435{
436	int s = splnet();
437	int error = 0;
438	struct inpcb *inp = sotoinpcb(so);
439	struct tcpcb *tp;
440
441	COMMON_START();
442	if ((so->so_oobmark == 0 &&
443	     (so->so_state & SS_RCVATMARK) == 0) ||
444	    so->so_options & SO_OOBINLINE ||
445	    tp->t_oobflags & TCPOOB_HADDATA) {
446		error = EINVAL;
447		goto out;
448	}
449	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
450		error = EWOULDBLOCK;
451		goto out;
452	}
453	m->m_len = 1;
454	*mtod(m, caddr_t) = tp->t_iobc;
455	if ((flags & MSG_PEEK) == 0)
456		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
457	COMMON_END(PRU_RCVOOB);
458}
459
460/* xxx - should be const */
461struct pr_usrreqs tcp_usrreqs = {
462	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
463	tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
464	tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
465	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
466	in_setsockaddr
467};
468
469/*
470 * Common subroutine to open a TCP connection to remote host specified
471 * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
472 * port number if needed.  Call in_pcbladdr to do the routing and to choose
473 * a local host address (interface).  If there is an existing incarnation
474 * of the same connection in TIME-WAIT state and if the remote host was
475 * sending CC options and if the connection duration was < MSL, then
476 * truncate the previous TIME-WAIT state and proceed.
477 * Initialize connection parameters and enter SYN-SENT state.
478 */
479static int
480tcp_connect(tp, nam)
481	register struct tcpcb *tp;
482	struct mbuf *nam;
483{
484	struct inpcb *inp = tp->t_inpcb, *oinp;
485	struct socket *so = inp->inp_socket;
486	struct tcpcb *otp;
487	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
488	struct sockaddr_in *ifaddr;
489	int error;
490	struct rmxp_tao *taop;
491	struct rmxp_tao tao_noncached;
492
493	if (inp->inp_lport == 0) {
494		error = in_pcbbind(inp, NULL);
495		if (error)
496			return error;
497	}
498
499	/*
500	 * Cannot simply call in_pcbconnect, because there might be an
501	 * earlier incarnation of this same connection still in
502	 * TIME_WAIT state, creating an ADDRINUSE error.
503	 */
504	error = in_pcbladdr(inp, nam, &ifaddr);
505	if (error)
506		return error;
507	oinp = in_pcblookuphash(inp->inp_pcbinfo,
508	    sin->sin_addr, sin->sin_port,
509	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
510						: ifaddr->sin_addr,
511	    inp->inp_lport,  0);
512	if (oinp) {
513		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
514		otp->t_state == TCPS_TIME_WAIT &&
515		    otp->t_duration < TCPTV_MSL &&
516		    (otp->t_flags & TF_RCVD_CC))
517			otp = tcp_close(otp);
518		else
519			return EADDRINUSE;
520	}
521	if (inp->inp_laddr.s_addr == INADDR_ANY)
522		inp->inp_laddr = ifaddr->sin_addr;
523	inp->inp_faddr = sin->sin_addr;
524	inp->inp_fport = sin->sin_port;
525	in_pcbrehash(inp);
526
527	tp->t_template = tcp_template(tp);
528	if (tp->t_template == 0) {
529		in_pcbdisconnect(inp);
530		return ENOBUFS;
531	}
532
533	/* Compute window scaling to request.  */
534	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
535	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
536		tp->request_r_scale++;
537
538	soisconnecting(so);
539	tcpstat.tcps_connattempt++;
540	tp->t_state = TCPS_SYN_SENT;
541	tp->t_timer[TCPT_KEEP] = tcp_keepinit;
542	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
543	tcp_sendseqinit(tp);
544
545	/*
546	 * Generate a CC value for this connection and
547	 * check whether CC or CCnew should be used.
548	 */
549	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
550		taop = &tao_noncached;
551		bzero(taop, sizeof(*taop));
552	}
553
554	tp->cc_send = CC_INC(tcp_ccgen);
555	if (taop->tao_ccsent != 0 &&
556	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
557		taop->tao_ccsent = tp->cc_send;
558	} else {
559		taop->tao_ccsent = 0;
560		tp->t_flags |= TF_SENDCCNEW;
561	}
562
563	return 0;
564}
565
566int
567tcp_ctloutput(op, so, level, optname, mp)
568	int op;
569	struct socket *so;
570	int level, optname;
571	struct mbuf **mp;
572{
573	int error = 0, s;
574	struct inpcb *inp;
575	register struct tcpcb *tp;
576	register struct mbuf *m;
577	register int i;
578
579	s = splnet();
580	inp = sotoinpcb(so);
581	if (inp == NULL) {
582		splx(s);
583		if (op == PRCO_SETOPT && *mp)
584			(void) m_free(*mp);
585		return (ECONNRESET);
586	}
587	if (level != IPPROTO_TCP) {
588		error = ip_ctloutput(op, so, level, optname, mp);
589		splx(s);
590		return (error);
591	}
592	tp = intotcpcb(inp);
593
594	switch (op) {
595
596	case PRCO_SETOPT:
597		m = *mp;
598		switch (optname) {
599
600		case TCP_NODELAY:
601			if (m == NULL || m->m_len < sizeof (int))
602				error = EINVAL;
603			else if (*mtod(m, int *))
604				tp->t_flags |= TF_NODELAY;
605			else
606				tp->t_flags &= ~TF_NODELAY;
607			break;
608
609		case TCP_MAXSEG:
610			if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
611				tp->t_maxseg = i;
612			else
613				error = EINVAL;
614			break;
615
616		case TCP_NOOPT:
617			if (m == NULL || m->m_len < sizeof (int))
618				error = EINVAL;
619			else if (*mtod(m, int *))
620				tp->t_flags |= TF_NOOPT;
621			else
622				tp->t_flags &= ~TF_NOOPT;
623			break;
624
625		case TCP_NOPUSH:
626			if (m == NULL || m->m_len < sizeof (int))
627				error = EINVAL;
628			else if (*mtod(m, int *))
629				tp->t_flags |= TF_NOPUSH;
630			else
631				tp->t_flags &= ~TF_NOPUSH;
632			break;
633
634		default:
635			error = ENOPROTOOPT;
636			break;
637		}
638		if (m)
639			(void) m_free(m);
640		break;
641
642	case PRCO_GETOPT:
643		*mp = m = m_get(M_WAIT, MT_SOOPTS);
644		m->m_len = sizeof(int);
645
646		switch (optname) {
647		case TCP_NODELAY:
648			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
649			break;
650		case TCP_MAXSEG:
651			*mtod(m, int *) = tp->t_maxseg;
652			break;
653		case TCP_NOOPT:
654			*mtod(m, int *) = tp->t_flags & TF_NOOPT;
655			break;
656		case TCP_NOPUSH:
657			*mtod(m, int *) = tp->t_flags & TF_NOPUSH;
658			break;
659		default:
660			error = ENOPROTOOPT;
661			break;
662		}
663		break;
664	}
665	splx(s);
666	return (error);
667}
668
669/*
670 * tcp_sendspace and tcp_recvspace are the default send and receive window
671 * sizes, respectively.  These are obsolescent (this information should
672 * be set by the route).
673 */
674u_long	tcp_sendspace = 1024*16;
675SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
676	CTLFLAG_RW, &tcp_sendspace , 0, "");
677u_long	tcp_recvspace = 1024*16;
678SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
679	CTLFLAG_RW, &tcp_recvspace , 0, "");
680
681/*
682 * Attach TCP protocol to socket, allocating
683 * internet protocol control block, tcp control block,
684 * bufer space, and entering LISTEN state if to accept connections.
685 */
686static int
687tcp_attach(so)
688	struct socket *so;
689{
690	register struct tcpcb *tp;
691	struct inpcb *inp;
692	int error;
693
694	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
695		error = soreserve(so, tcp_sendspace, tcp_recvspace);
696		if (error)
697			return (error);
698	}
699	error = in_pcballoc(so, &tcbinfo);
700	if (error)
701		return (error);
702	inp = sotoinpcb(so);
703	tp = tcp_newtcpcb(inp);
704	if (tp == 0) {
705		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
706
707		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
708		in_pcbdetach(inp);
709		so->so_state |= nofd;
710		return (ENOBUFS);
711	}
712	tp->t_state = TCPS_CLOSED;
713	return (0);
714}
715
716/*
717 * Initiate (or continue) disconnect.
718 * If embryonic state, just send reset (once).
719 * If in ``let data drain'' option and linger null, just drop.
720 * Otherwise (hard), mark socket disconnecting and drop
721 * current input data; switch states based on user close, and
722 * send segment to peer (with FIN).
723 */
724static struct tcpcb *
725tcp_disconnect(tp)
726	register struct tcpcb *tp;
727{
728	struct socket *so = tp->t_inpcb->inp_socket;
729
730	if (tp->t_state < TCPS_ESTABLISHED)
731		tp = tcp_close(tp);
732	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
733		tp = tcp_drop(tp, 0);
734	else {
735		soisdisconnecting(so);
736		sbflush(&so->so_rcv);
737		tp = tcp_usrclosed(tp);
738		if (tp)
739			(void) tcp_output(tp);
740	}
741	return (tp);
742}
743
744/*
745 * User issued close, and wish to trail through shutdown states:
746 * if never received SYN, just forget it.  If got a SYN from peer,
747 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
748 * If already got a FIN from peer, then almost done; go to LAST_ACK
749 * state.  In all other cases, have already sent FIN to peer (e.g.
750 * after PRU_SHUTDOWN), and just have to play tedious game waiting
751 * for peer to send FIN or not respond to keep-alives, etc.
752 * We can let the user exit from the close as soon as the FIN is acked.
753 */
754static struct tcpcb *
755tcp_usrclosed(tp)
756	register struct tcpcb *tp;
757{
758
759	switch (tp->t_state) {
760
761	case TCPS_CLOSED:
762	case TCPS_LISTEN:
763		tp->t_state = TCPS_CLOSED;
764		tp = tcp_close(tp);
765		break;
766
767	case TCPS_SYN_SENT:
768	case TCPS_SYN_RECEIVED:
769		tp->t_flags |= TF_NEEDFIN;
770		break;
771
772	case TCPS_ESTABLISHED:
773		tp->t_state = TCPS_FIN_WAIT_1;
774		break;
775
776	case TCPS_CLOSE_WAIT:
777		tp->t_state = TCPS_LAST_ACK;
778		break;
779	}
780	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
781		soisdisconnected(tp->t_inpcb->inp_socket);
782		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
783		if (tp->t_state == TCPS_FIN_WAIT_2)
784			tp->t_timer[TCPT_2MSL] = tcp_maxidle;
785	}
786	return (tp);
787}
788
789