Deleted Added
full compact
tcp_timewait.c (57576) tcp_timewait.c (58698)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
34 * $FreeBSD: head/sys/netinet/tcp_timewait.c 57576 2000-02-28 21:18:21Z ps $
34 * $FreeBSD: head/sys/netinet/tcp_timewait.c 58698 2000-03-27 19:14:27Z jlemon $
35 */
36
37#include "opt_compat.h"
38#include "opt_inet6.h"
39#include "opt_ipsec.h"
40#include "opt_tcpdebug.h"
41
35 */
36
37#include "opt_compat.h"
38#include "opt_inet6.h"
39#include "opt_ipsec.h"
40#include "opt_tcpdebug.h"
41
42#include <stddef.h>
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/callout.h>
45#include <sys/kernel.h>
46#include <sys/sysctl.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#ifdef INET6
50#include <sys/domain.h>
51#endif
52#include <sys/proc.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55#include <sys/protosw.h>
56
57#include <vm/vm_zone.h>
58
59#include <net/route.h>
60#include <net/if.h>
61
62#define _IP_VHL
63#include <netinet/in.h>
64#include <netinet/in_systm.h>
65#include <netinet/ip.h>
66#ifdef INET6
67#include <netinet/ip6.h>
68#endif
69#include <netinet/in_pcb.h>
70#ifdef INET6
71#include <netinet6/in6_pcb.h>
72#endif
73#include <netinet/in_var.h>
74#include <netinet/ip_var.h>
75#ifdef INET6
76#include <netinet6/ip6_var.h>
77#endif
78#include <netinet/tcp.h>
79#include <netinet/tcp_fsm.h>
80#include <netinet/tcp_seq.h>
81#include <netinet/tcp_timer.h>
82#include <netinet/tcp_var.h>
83#ifdef INET6
84#include <netinet6/tcp6_var.h>
85#endif
86#include <netinet/tcpip.h>
87#ifdef TCPDEBUG
88#include <netinet/tcp_debug.h>
89#endif
90#include <netinet6/ip6protosw.h>
91
92#ifdef IPSEC
93#include <netinet6/ipsec.h>
94#endif /*IPSEC*/
95
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/callout.h>
46#include <sys/kernel.h>
47#include <sys/sysctl.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#ifdef INET6
51#include <sys/domain.h>
52#endif
53#include <sys/proc.h>
54#include <sys/socket.h>
55#include <sys/socketvar.h>
56#include <sys/protosw.h>
57
58#include <vm/vm_zone.h>
59
60#include <net/route.h>
61#include <net/if.h>
62
63#define _IP_VHL
64#include <netinet/in.h>
65#include <netinet/in_systm.h>
66#include <netinet/ip.h>
67#ifdef INET6
68#include <netinet/ip6.h>
69#endif
70#include <netinet/in_pcb.h>
71#ifdef INET6
72#include <netinet6/in6_pcb.h>
73#endif
74#include <netinet/in_var.h>
75#include <netinet/ip_var.h>
76#ifdef INET6
77#include <netinet6/ip6_var.h>
78#endif
79#include <netinet/tcp.h>
80#include <netinet/tcp_fsm.h>
81#include <netinet/tcp_seq.h>
82#include <netinet/tcp_timer.h>
83#include <netinet/tcp_var.h>
84#ifdef INET6
85#include <netinet6/tcp6_var.h>
86#endif
87#include <netinet/tcpip.h>
88#ifdef TCPDEBUG
89#include <netinet/tcp_debug.h>
90#endif
91#include <netinet6/ip6protosw.h>
92
93#ifdef IPSEC
94#include <netinet6/ipsec.h>
95#endif /*IPSEC*/
96
97#include <machine/in_cksum.h>
98
96int tcp_mssdflt = TCP_MSS;
97SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
98 &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
99
100#ifdef INET6
101int tcp_v6mssdflt = TCP6_MSS;
102SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
103 CTLFLAG_RW, &tcp_v6mssdflt , 0,
104 "Default TCP Maximum Segment Size for IPv6");
105#endif
106
107#if 0
108static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
109SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW,
110 &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time");
111#endif
112
113static int tcp_do_rfc1323 = 1;
114SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
115 &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
116
117static int tcp_do_rfc1644 = 0;
118SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW,
119 &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions");
120
121static int tcp_tcbhashsize = 0;
122SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
123 &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
124
125static int do_tcpdrain = 1;
126SYSCTL_INT(_debug, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
127 "Enable non Net3 compliant tcp_drain");
128
129SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
130 &tcbinfo.ipi_count, 0, "Number of active PCBs");
131
132static void tcp_cleartaocache __P((void));
133static void tcp_notify __P((struct inpcb *, int));
134
135/*
136 * Target size of TCP PCB hash tables. Must be a power of two.
137 *
138 * Note that this can be overridden by the kernel environment
139 * variable net.inet.tcp.tcbhashsize
140 */
141#ifndef TCBHASHSIZE
142#define TCBHASHSIZE 512
143#endif
144
145/*
146 * This is the actual shape of what we allocate using the zone
147 * allocator. Doing it this way allows us to protect both structures
148 * using the same generation count, and also eliminates the overhead
149 * of allocating tcpcbs separately. By hiding the structure here,
150 * we avoid changing most of the rest of the code (although it needs
151 * to be changed, eventually, for greater efficiency).
152 */
153#define ALIGNMENT 32
154#define ALIGNM1 (ALIGNMENT - 1)
155struct inp_tp {
156 union {
157 struct inpcb inp;
158 char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
159 } inp_tp_u;
160 struct tcpcb tcb;
161 struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
162 struct callout inp_tp_delack;
163};
164#undef ALIGNMENT
165#undef ALIGNM1
166
167/*
168 * Tcp initialization
169 */
170void
171tcp_init()
172{
173 int hashsize;
174
175 tcp_iss = random(); /* wrong, but better than a constant */
176 tcp_ccgen = 1;
177 tcp_cleartaocache();
178
179 tcp_delacktime = TCPTV_DELACK;
180 tcp_keepinit = TCPTV_KEEP_INIT;
181 tcp_keepidle = TCPTV_KEEP_IDLE;
182 tcp_keepintvl = TCPTV_KEEPINTVL;
183 tcp_maxpersistidle = TCPTV_KEEP_IDLE;
184 tcp_msl = TCPTV_MSL;
185
186 LIST_INIT(&tcb);
187 tcbinfo.listhead = &tcb;
188 TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize);
189 if (!powerof2(hashsize)) {
190 printf("WARNING: TCB hash size not a power of 2\n");
191 hashsize = 512; /* safe default */
192 }
193 tcp_tcbhashsize = hashsize;
194 tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
195 tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
196 &tcbinfo.porthashmask);
197 tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
198 ZONE_INTERRUPT, 0);
199#ifdef INET6
200#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
201#else /* INET6 */
202#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
203#endif /* INET6 */
204 if (max_protohdr < TCP_MINPROTOHDR)
205 max_protohdr = TCP_MINPROTOHDR;
206 if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
207 panic("tcp_init");
208#undef TCP_MINPROTOHDR
209}
210
211/*
212 * Create template to be used to send tcp packets on a connection.
213 * Call after host entry created, allocates an mbuf and fills
214 * in a skeletal tcp/ip header, minimizing the amount of work
215 * necessary when the connection is used.
216 */
217struct tcptemp *
218tcp_template(tp)
219 struct tcpcb *tp;
220{
221 register struct inpcb *inp = tp->t_inpcb;
222 register struct mbuf *m;
223 register struct tcptemp *n;
224
225 if ((n = tp->t_template) == 0) {
226 m = m_get(M_DONTWAIT, MT_HEADER);
227 if (m == NULL)
228 return (0);
229 m->m_len = sizeof (struct tcptemp);
230 n = mtod(m, struct tcptemp *);
231 }
232#ifdef INET6
233 if ((inp->inp_vflag & INP_IPV6) != 0) {
234 register struct ip6_hdr *ip6;
235
236 ip6 = (struct ip6_hdr *)n->tt_ipgen;
237 ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
238 (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
239 ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
240 (IPV6_VERSION & IPV6_VERSION_MASK);
241 ip6->ip6_nxt = IPPROTO_TCP;
242 ip6->ip6_plen = sizeof(struct tcphdr);
243 ip6->ip6_src = inp->in6p_laddr;
244 ip6->ip6_dst = inp->in6p_faddr;
99int tcp_mssdflt = TCP_MSS;
100SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
101 &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
102
103#ifdef INET6
104int tcp_v6mssdflt = TCP6_MSS;
105SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
106 CTLFLAG_RW, &tcp_v6mssdflt , 0,
107 "Default TCP Maximum Segment Size for IPv6");
108#endif
109
110#if 0
111static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
112SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW,
113 &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time");
114#endif
115
116static int tcp_do_rfc1323 = 1;
117SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
118 &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
119
120static int tcp_do_rfc1644 = 0;
121SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW,
122 &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions");
123
124static int tcp_tcbhashsize = 0;
125SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
126 &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
127
128static int do_tcpdrain = 1;
129SYSCTL_INT(_debug, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
130 "Enable non Net3 compliant tcp_drain");
131
132SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
133 &tcbinfo.ipi_count, 0, "Number of active PCBs");
134
135static void tcp_cleartaocache __P((void));
136static void tcp_notify __P((struct inpcb *, int));
137
138/*
139 * Target size of TCP PCB hash tables. Must be a power of two.
140 *
141 * Note that this can be overridden by the kernel environment
142 * variable net.inet.tcp.tcbhashsize
143 */
144#ifndef TCBHASHSIZE
145#define TCBHASHSIZE 512
146#endif
147
148/*
149 * This is the actual shape of what we allocate using the zone
150 * allocator. Doing it this way allows us to protect both structures
151 * using the same generation count, and also eliminates the overhead
152 * of allocating tcpcbs separately. By hiding the structure here,
153 * we avoid changing most of the rest of the code (although it needs
154 * to be changed, eventually, for greater efficiency).
155 */
156#define ALIGNMENT 32
157#define ALIGNM1 (ALIGNMENT - 1)
158struct inp_tp {
159 union {
160 struct inpcb inp;
161 char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
162 } inp_tp_u;
163 struct tcpcb tcb;
164 struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
165 struct callout inp_tp_delack;
166};
167#undef ALIGNMENT
168#undef ALIGNM1
169
170/*
171 * Tcp initialization
172 */
173void
174tcp_init()
175{
176 int hashsize;
177
178 tcp_iss = random(); /* wrong, but better than a constant */
179 tcp_ccgen = 1;
180 tcp_cleartaocache();
181
182 tcp_delacktime = TCPTV_DELACK;
183 tcp_keepinit = TCPTV_KEEP_INIT;
184 tcp_keepidle = TCPTV_KEEP_IDLE;
185 tcp_keepintvl = TCPTV_KEEPINTVL;
186 tcp_maxpersistidle = TCPTV_KEEP_IDLE;
187 tcp_msl = TCPTV_MSL;
188
189 LIST_INIT(&tcb);
190 tcbinfo.listhead = &tcb;
191 TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize);
192 if (!powerof2(hashsize)) {
193 printf("WARNING: TCB hash size not a power of 2\n");
194 hashsize = 512; /* safe default */
195 }
196 tcp_tcbhashsize = hashsize;
197 tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
198 tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
199 &tcbinfo.porthashmask);
200 tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
201 ZONE_INTERRUPT, 0);
202#ifdef INET6
203#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
204#else /* INET6 */
205#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
206#endif /* INET6 */
207 if (max_protohdr < TCP_MINPROTOHDR)
208 max_protohdr = TCP_MINPROTOHDR;
209 if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
210 panic("tcp_init");
211#undef TCP_MINPROTOHDR
212}
213
214/*
215 * Create template to be used to send tcp packets on a connection.
216 * Call after host entry created, allocates an mbuf and fills
217 * in a skeletal tcp/ip header, minimizing the amount of work
218 * necessary when the connection is used.
219 */
220struct tcptemp *
221tcp_template(tp)
222 struct tcpcb *tp;
223{
224 register struct inpcb *inp = tp->t_inpcb;
225 register struct mbuf *m;
226 register struct tcptemp *n;
227
228 if ((n = tp->t_template) == 0) {
229 m = m_get(M_DONTWAIT, MT_HEADER);
230 if (m == NULL)
231 return (0);
232 m->m_len = sizeof (struct tcptemp);
233 n = mtod(m, struct tcptemp *);
234 }
235#ifdef INET6
236 if ((inp->inp_vflag & INP_IPV6) != 0) {
237 register struct ip6_hdr *ip6;
238
239 ip6 = (struct ip6_hdr *)n->tt_ipgen;
240 ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
241 (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
242 ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
243 (IPV6_VERSION & IPV6_VERSION_MASK);
244 ip6->ip6_nxt = IPPROTO_TCP;
245 ip6->ip6_plen = sizeof(struct tcphdr);
246 ip6->ip6_src = inp->in6p_laddr;
247 ip6->ip6_dst = inp->in6p_faddr;
248 n->tt_t.th_sum = 0;
245 } else
246#endif
247 {
249 } else
250#endif
251 {
248 register struct ipovly *ipov;
252 struct ip *ip = (struct ip *)n->tt_ipgen;
249
253
250 ipov = (struct ipovly *)n->tt_ipgen;
251 bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
252 ipov->ih_pr = IPPROTO_TCP;
253 ipov->ih_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
254 ipov->ih_src = inp->inp_laddr;
255 ipov->ih_dst = inp->inp_faddr;
254 bzero(ip, sizeof(struct ip)); /* XXX overkill? */
255 ip->ip_vhl = IP_VHL_BORING;
256 ip->ip_p = IPPROTO_TCP;
257 ip->ip_src = inp->inp_laddr;
258 ip->ip_dst = inp->inp_faddr;
259 n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
260 htons(sizeof(struct tcphdr) + IPPROTO_TCP));
256 }
257 n->tt_t.th_sport = inp->inp_lport;
258 n->tt_t.th_dport = inp->inp_fport;
259 n->tt_t.th_seq = 0;
260 n->tt_t.th_ack = 0;
261 n->tt_t.th_x2 = 0;
262 n->tt_t.th_off = 5;
263 n->tt_t.th_flags = 0;
264 n->tt_t.th_win = 0;
261 }
262 n->tt_t.th_sport = inp->inp_lport;
263 n->tt_t.th_dport = inp->inp_fport;
264 n->tt_t.th_seq = 0;
265 n->tt_t.th_ack = 0;
266 n->tt_t.th_x2 = 0;
267 n->tt_t.th_off = 5;
268 n->tt_t.th_flags = 0;
269 n->tt_t.th_win = 0;
265 n->tt_t.th_sum = 0;
266 n->tt_t.th_urp = 0;
267 return (n);
268}
269
270/*
271 * Send a single message to the TCP at address specified by
272 * the given TCP/IP header. If m == 0, then we make a copy
273 * of the tcpiphdr at ti and send directly to the addressed host.
274 * This is used to force keep alive messages out using the TCP
275 * template for a connection tp->t_template. If flags are given
276 * then we send a message back to the TCP which originated the
277 * segment ti, and discard the mbuf containing it and any other
278 * attached mbufs.
279 *
280 * In any case the ack and sequence number of the transmitted
281 * segment are as specified by the parameters.
282 *
283 * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
284 */
285void
286tcp_respond(tp, ipgen, th, m, ack, seq, flags)
287 struct tcpcb *tp;
288 void *ipgen;
289 register struct tcphdr *th;
290 register struct mbuf *m;
291 tcp_seq ack, seq;
292 int flags;
293{
294 register int tlen;
295 int win = 0;
296 struct route *ro = 0;
297 struct route sro;
298 struct ip *ip;
270 n->tt_t.th_urp = 0;
271 return (n);
272}
273
274/*
275 * Send a single message to the TCP at address specified by
276 * the given TCP/IP header. If m == 0, then we make a copy
277 * of the tcpiphdr at ti and send directly to the addressed host.
278 * This is used to force keep alive messages out using the TCP
279 * template for a connection tp->t_template. If flags are given
280 * then we send a message back to the TCP which originated the
281 * segment ti, and discard the mbuf containing it and any other
282 * attached mbufs.
283 *
284 * In any case the ack and sequence number of the transmitted
285 * segment are as specified by the parameters.
286 *
287 * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
288 */
289void
290tcp_respond(tp, ipgen, th, m, ack, seq, flags)
291 struct tcpcb *tp;
292 void *ipgen;
293 register struct tcphdr *th;
294 register struct mbuf *m;
295 tcp_seq ack, seq;
296 int flags;
297{
298 register int tlen;
299 int win = 0;
300 struct route *ro = 0;
301 struct route sro;
302 struct ip *ip;
299 struct ipovly *ipov;
300 struct tcphdr *nth;
301#ifdef INET6
302 struct route_in6 *ro6 = 0;
303 struct route_in6 sro6;
304 struct ip6_hdr *ip6;
305 int isipv6;
306#endif /* INET6 */
307 int ipflags = 0;
308
309#ifdef INET6
310 isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
311 ip6 = ipgen;
312#endif /* INET6 */
313 ip = ipgen;
303 struct tcphdr *nth;
304#ifdef INET6
305 struct route_in6 *ro6 = 0;
306 struct route_in6 sro6;
307 struct ip6_hdr *ip6;
308 int isipv6;
309#endif /* INET6 */
310 int ipflags = 0;
311
312#ifdef INET6
313 isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
314 ip6 = ipgen;
315#endif /* INET6 */
316 ip = ipgen;
314 ipov = ipgen;
315
316 if (tp) {
317 if (!(flags & TH_RST)) {
318 win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
319 if (win > (long)TCP_MAXWIN << tp->rcv_scale)
320 win = (long)TCP_MAXWIN << tp->rcv_scale;
321 }
322#ifdef INET6
323 if (isipv6)
324 ro6 = &tp->t_inpcb->in6p_route;
325 else
326#endif /* INET6 */
327 ro = &tp->t_inpcb->inp_route;
328 } else {
329#ifdef INET6
330 if (isipv6) {
331 ro6 = &sro6;
332 bzero(ro6, sizeof *ro6);
333 } else
334#endif /* INET6 */
335 {
336 ro = &sro;
337 bzero(ro, sizeof *ro);
338 }
339 }
340 if (m == 0) {
341 m = m_gethdr(M_DONTWAIT, MT_HEADER);
342 if (m == NULL)
343 return;
344#ifdef TCP_COMPAT_42
345 tlen = 1;
346#else
347 tlen = 0;
348#endif
349 m->m_data += max_linkhdr;
350#ifdef INET6
351 if (isipv6) {
352 bcopy((caddr_t)ip6, mtod(m, caddr_t),
353 sizeof(struct ip6_hdr));
354 ip6 = mtod(m, struct ip6_hdr *);
355 nth = (struct tcphdr *)(ip6 + 1);
356 } else
357#endif /* INET6 */
358 {
359 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
360 ip = mtod(m, struct ip *);
317
318 if (tp) {
319 if (!(flags & TH_RST)) {
320 win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
321 if (win > (long)TCP_MAXWIN << tp->rcv_scale)
322 win = (long)TCP_MAXWIN << tp->rcv_scale;
323 }
324#ifdef INET6
325 if (isipv6)
326 ro6 = &tp->t_inpcb->in6p_route;
327 else
328#endif /* INET6 */
329 ro = &tp->t_inpcb->inp_route;
330 } else {
331#ifdef INET6
332 if (isipv6) {
333 ro6 = &sro6;
334 bzero(ro6, sizeof *ro6);
335 } else
336#endif /* INET6 */
337 {
338 ro = &sro;
339 bzero(ro, sizeof *ro);
340 }
341 }
342 if (m == 0) {
343 m = m_gethdr(M_DONTWAIT, MT_HEADER);
344 if (m == NULL)
345 return;
346#ifdef TCP_COMPAT_42
347 tlen = 1;
348#else
349 tlen = 0;
350#endif
351 m->m_data += max_linkhdr;
352#ifdef INET6
353 if (isipv6) {
354 bcopy((caddr_t)ip6, mtod(m, caddr_t),
355 sizeof(struct ip6_hdr));
356 ip6 = mtod(m, struct ip6_hdr *);
357 nth = (struct tcphdr *)(ip6 + 1);
358 } else
359#endif /* INET6 */
360 {
361 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
362 ip = mtod(m, struct ip *);
361 ipov = mtod(m, struct ipovly *);
362 nth = (struct tcphdr *)(ip + 1);
363 }
364 bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
365 flags = TH_ACK;
366 } else {
367 m_freem(m->m_next);
368 m->m_next = 0;
369 m->m_data = (caddr_t)ipgen;
370 /* m_len is set later */
371 tlen = 0;
372#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
373#ifdef INET6
374 if (isipv6) {
375 xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
376 nth = (struct tcphdr *)(ip6 + 1);
377 } else
378#endif /* INET6 */
379 {
380 xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long);
381 nth = (struct tcphdr *)(ip + 1);
382 }
383 if (th != nth) {
384 /*
385 * this is usually a case when an extension header
386 * exists between the IPv6 header and the
387 * TCP header.
388 */
389 nth->th_sport = th->th_sport;
390 nth->th_dport = th->th_dport;
391 }
392 xchg(nth->th_dport, nth->th_sport, n_short);
393#undef xchg
394 }
395#ifdef INET6
396 if (isipv6) {
397 ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
398 tlen));
399 tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
400 } else
401#endif
402 {
363 nth = (struct tcphdr *)(ip + 1);
364 }
365 bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
366 flags = TH_ACK;
367 } else {
368 m_freem(m->m_next);
369 m->m_next = 0;
370 m->m_data = (caddr_t)ipgen;
371 /* m_len is set later */
372 tlen = 0;
373#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
374#ifdef INET6
375 if (isipv6) {
376 xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
377 nth = (struct tcphdr *)(ip6 + 1);
378 } else
379#endif /* INET6 */
380 {
381 xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long);
382 nth = (struct tcphdr *)(ip + 1);
383 }
384 if (th != nth) {
385 /*
386 * this is usually a case when an extension header
387 * exists between the IPv6 header and the
388 * TCP header.
389 */
390 nth->th_sport = th->th_sport;
391 nth->th_dport = th->th_dport;
392 }
393 xchg(nth->th_dport, nth->th_sport, n_short);
394#undef xchg
395 }
396#ifdef INET6
397 if (isipv6) {
398 ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
399 tlen));
400 tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
401 } else
402#endif
403 {
403 ipov->ih_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
404 tlen += sizeof (struct tcpiphdr);
404 tlen += sizeof (struct tcpiphdr);
405 ip->ip_len = tlen;
406 ip->ip_ttl = ip_defttl;
405 }
406 m->m_len = tlen;
407 m->m_pkthdr.len = tlen;
408 m->m_pkthdr.rcvif = (struct ifnet *) 0;
409 nth->th_seq = htonl(seq);
410 nth->th_ack = htonl(ack);
411 nth->th_x2 = 0;
412 nth->th_off = sizeof (struct tcphdr) >> 2;
413 nth->th_flags = flags;
414 if (tp)
415 nth->th_win = htons((u_short) (win >> tp->rcv_scale));
416 else
417 nth->th_win = htons((u_short)win);
418 nth->th_urp = 0;
407 }
408 m->m_len = tlen;
409 m->m_pkthdr.len = tlen;
410 m->m_pkthdr.rcvif = (struct ifnet *) 0;
411 nth->th_seq = htonl(seq);
412 nth->th_ack = htonl(ack);
413 nth->th_x2 = 0;
414 nth->th_off = sizeof (struct tcphdr) >> 2;
415 nth->th_flags = flags;
416 if (tp)
417 nth->th_win = htons((u_short) (win >> tp->rcv_scale));
418 else
419 nth->th_win = htons((u_short)win);
420 nth->th_urp = 0;
419 nth->th_sum = 0;
420#ifdef INET6
421 if (isipv6) {
422 nth->th_sum = in6_cksum(m, IPPROTO_TCP,
423 sizeof(struct ip6_hdr),
424 tlen - sizeof(struct ip6_hdr));
425 ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL,
426 ro6 && ro6->ro_rt ?
427 ro6->ro_rt->rt_ifp :
428 NULL);
429 } else
430#endif /* INET6 */
431 {
421#ifdef INET6
422 if (isipv6) {
423 nth->th_sum = in6_cksum(m, IPPROTO_TCP,
424 sizeof(struct ip6_hdr),
425 tlen - sizeof(struct ip6_hdr));
426 ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL,
427 ro6 && ro6->ro_rt ?
428 ro6->ro_rt->rt_ifp :
429 NULL);
430 } else
431#endif /* INET6 */
432 {
432 bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
433 nth->th_sum = in_cksum(m, tlen);
434#ifdef INET6
435 /* Re-initialization for later version check */
436 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, 0);
437#endif /* INET6 */
438 ip->ip_len = tlen;
439 ip->ip_ttl = ip_defttl;
433 nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
434 htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
435 m->m_pkthdr.csum_flags = CSUM_TCP;
436 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
440 }
441#ifdef TCPDEBUG
442 if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
443 tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
444#endif
445#ifdef IPSEC
446 if (tp != NULL) {
447 m->m_pkthdr.rcvif = (struct ifnet *)tp->t_inpcb->inp_socket;
448 ipflags |=
449#ifdef INET6
450 isipv6 ? IPV6_SOCKINMRCVIF :
451#endif
452 IP_SOCKINMRCVIF;
453 }
454#endif
455#ifdef INET6
456 if (isipv6) {
457 (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL);
458 if (ro6 == &sro6 && ro6->ro_rt) {
459 RTFREE(ro6->ro_rt);
460 ro6->ro_rt = NULL;
461 }
462 } else
463#endif /* INET6 */
464 {
465 (void) ip_output(m, NULL, ro, ipflags, NULL);
466 if (ro == &sro && ro->ro_rt) {
467 RTFREE(ro->ro_rt);
468 ro->ro_rt = NULL;
469 }
470 }
471}
472
473/*
474 * Create a new TCP control block, making an
475 * empty reassembly queue and hooking it to the argument
476 * protocol control block. The `inp' parameter must have
477 * come from the zone allocator set up in tcp_init().
478 */
479struct tcpcb *
480tcp_newtcpcb(inp)
481 struct inpcb *inp;
482{
483 struct inp_tp *it;
484 register struct tcpcb *tp;
485#ifdef INET6
486 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
487#endif /* INET6 */
488
489 it = (struct inp_tp *)inp;
490 tp = &it->tcb;
491 bzero((char *) tp, sizeof(struct tcpcb));
492 LIST_INIT(&tp->t_segq);
493 tp->t_maxseg = tp->t_maxopd =
494#ifdef INET6
495 isipv6 ? tcp_v6mssdflt :
496#endif /* INET6 */
497 tcp_mssdflt;
498
499 /* Set up our timeouts. */
500 callout_init(tp->tt_rexmt = &it->inp_tp_rexmt);
501 callout_init(tp->tt_persist = &it->inp_tp_persist);
502 callout_init(tp->tt_keep = &it->inp_tp_keep);
503 callout_init(tp->tt_2msl = &it->inp_tp_2msl);
504 callout_init(tp->tt_delack = &it->inp_tp_delack);
505
506 if (tcp_do_rfc1323)
507 tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
508 if (tcp_do_rfc1644)
509 tp->t_flags |= TF_REQ_CC;
510 tp->t_inpcb = inp; /* XXX */
511 /*
512 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
513 * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
514 * reasonable initial retransmit time.
515 */
516 tp->t_srtt = TCPTV_SRTTBASE;
517 tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
518 tp->t_rttmin = TCPTV_MIN;
519 tp->t_rxtcur = TCPTV_RTOBASE;
520 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
521 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
522 tp->t_rcvtime = ticks;
523 /*
524 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
525 * because the socket may be bound to an IPv6 wildcard address,
526 * which may match an IPv4-mapped IPv6 address.
527 */
528 inp->inp_ip_ttl = ip_defttl;
529 inp->inp_ppcb = (caddr_t)tp;
530 return (tp); /* XXX */
531}
532
533/*
534 * Drop a TCP connection, reporting
535 * the specified error. If connection is synchronized,
536 * then send a RST to peer.
537 */
538struct tcpcb *
539tcp_drop(tp, errno)
540 register struct tcpcb *tp;
541 int errno;
542{
543 struct socket *so = tp->t_inpcb->inp_socket;
544
545 if (TCPS_HAVERCVDSYN(tp->t_state)) {
546 tp->t_state = TCPS_CLOSED;
547 (void) tcp_output(tp);
548 tcpstat.tcps_drops++;
549 } else
550 tcpstat.tcps_conndrops++;
551 if (errno == ETIMEDOUT && tp->t_softerror)
552 errno = tp->t_softerror;
553 so->so_error = errno;
554 return (tcp_close(tp));
555}
556
557/*
558 * Close a TCP control block:
559 * discard all space held by the tcp
560 * discard internet protocol block
561 * wake up any sleepers
562 */
563struct tcpcb *
564tcp_close(tp)
565 register struct tcpcb *tp;
566{
567 register struct tseg_qent *q;
568 struct inpcb *inp = tp->t_inpcb;
569 struct socket *so = inp->inp_socket;
570#ifdef INET6
571 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
572#endif /* INET6 */
573 register struct rtentry *rt;
574 int dosavessthresh;
575
576 /*
577 * Make sure that all of our timers are stopped before we
578 * delete the PCB.
579 */
580 callout_stop(tp->tt_rexmt);
581 callout_stop(tp->tt_persist);
582 callout_stop(tp->tt_keep);
583 callout_stop(tp->tt_2msl);
584 callout_stop(tp->tt_delack);
585
586 /*
587 * If we got enough samples through the srtt filter,
588 * save the rtt and rttvar in the routing entry.
589 * 'Enough' is arbitrarily defined as the 16 samples.
590 * 16 samples is enough for the srtt filter to converge
591 * to within 5% of the correct value; fewer samples and
592 * we could save a very bogus rtt.
593 *
594 * Don't update the default route's characteristics and don't
595 * update anything that the user "locked".
596 */
597 if (tp->t_rttupdated >= 16) {
598 register u_long i = 0;
599#ifdef INET6
600 if (isipv6) {
601 struct sockaddr_in6 *sin6;
602
603 if ((rt = inp->in6p_route.ro_rt) == NULL)
604 goto no_valid_rt;
605 sin6 = (struct sockaddr_in6 *)rt_key(rt);
606 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
607 goto no_valid_rt;
608 }
609 else
610#endif /* INET6 */
611 if ((rt = inp->inp_route.ro_rt) == NULL ||
612 ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
613 == INADDR_ANY)
614 goto no_valid_rt;
615
616 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
617 i = tp->t_srtt *
618 (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
619 if (rt->rt_rmx.rmx_rtt && i)
620 /*
621 * filter this update to half the old & half
622 * the new values, converting scale.
623 * See route.h and tcp_var.h for a
624 * description of the scaling constants.
625 */
626 rt->rt_rmx.rmx_rtt =
627 (rt->rt_rmx.rmx_rtt + i) / 2;
628 else
629 rt->rt_rmx.rmx_rtt = i;
630 tcpstat.tcps_cachedrtt++;
631 }
632 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
633 i = tp->t_rttvar *
634 (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
635 if (rt->rt_rmx.rmx_rttvar && i)
636 rt->rt_rmx.rmx_rttvar =
637 (rt->rt_rmx.rmx_rttvar + i) / 2;
638 else
639 rt->rt_rmx.rmx_rttvar = i;
640 tcpstat.tcps_cachedrttvar++;
641 }
642 /*
643 * The old comment here said:
644 * update the pipelimit (ssthresh) if it has been updated
645 * already or if a pipesize was specified & the threshhold
646 * got below half the pipesize. I.e., wait for bad news
647 * before we start updating, then update on both good
648 * and bad news.
649 *
650 * But we want to save the ssthresh even if no pipesize is
651 * specified explicitly in the route, because such
652 * connections still have an implicit pipesize specified
653 * by the global tcp_sendspace. In the absence of a reliable
654 * way to calculate the pipesize, it will have to do.
655 */
656 i = tp->snd_ssthresh;
657 if (rt->rt_rmx.rmx_sendpipe != 0)
658 dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
659 else
660 dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
661 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
662 i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
663 || dosavessthresh) {
664 /*
665 * convert the limit from user data bytes to
666 * packets then to packet data bytes.
667 */
668 i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
669 if (i < 2)
670 i = 2;
671 i *= (u_long)(tp->t_maxseg +
672#ifdef INET6
673 (isipv6 ? sizeof (struct ip6_hdr) +
674 sizeof (struct tcphdr) :
675#endif
676 sizeof (struct tcpiphdr)
677#ifdef INET6
678 )
679#endif
680 );
681 if (rt->rt_rmx.rmx_ssthresh)
682 rt->rt_rmx.rmx_ssthresh =
683 (rt->rt_rmx.rmx_ssthresh + i) / 2;
684 else
685 rt->rt_rmx.rmx_ssthresh = i;
686 tcpstat.tcps_cachedssthresh++;
687 }
688 }
689 no_valid_rt:
690 /* free the reassembly queue, if any */
691 while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
692 LIST_REMOVE(q, tqe_q);
693 m_freem(q->tqe_m);
694 FREE(q, M_TSEGQ);
695 }
696 if (tp->t_template)
697 (void) m_free(dtom(tp->t_template));
698 inp->inp_ppcb = NULL;
699 soisdisconnected(so);
700#ifdef INET6
701 if (INP_CHECK_SOCKAF(so, AF_INET6))
702 in6_pcbdetach(inp);
703 else
704#endif /* INET6 */
705 in_pcbdetach(inp);
706 tcpstat.tcps_closed++;
707 return ((struct tcpcb *)0);
708}
709
710void
711tcp_drain()
712{
713 if (do_tcpdrain)
714 {
715 struct inpcb *inpb;
716 struct tcpcb *tcpb;
717 struct tseg_qent *te;
718
719 /*
720 * Walk the tcpbs, if existing, and flush the reassembly queue,
721 * if there is one...
722 * XXX: The "Net/3" implementation doesn't imply that the TCP
723 * reassembly queue should be flushed, but in a situation
724 * where we're really low on mbufs, this is potentially
725 * usefull.
726 */
727 for (inpb = tcbinfo.listhead->lh_first; inpb;
728 inpb = inpb->inp_list.le_next) {
729 if ((tcpb = intotcpcb(inpb))) {
730 while ((te = LIST_FIRST(&tcpb->t_segq))
731 != NULL) {
732 LIST_REMOVE(te, tqe_q);
733 m_freem(te->tqe_m);
734 FREE(te, M_TSEGQ);
735 }
736 }
737 }
738
739 }
740}
741
742/*
743 * Notify a tcp user of an asynchronous error;
744 * store error as soft error, but wake up user
745 * (for now, won't do anything until can select for soft error).
746 */
747static void
748tcp_notify(inp, error)
749 struct inpcb *inp;
750 int error;
751{
752 register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
753 register struct socket *so = inp->inp_socket;
754
755 /*
756 * Ignore some errors if we are hooked up.
757 * If connection hasn't completed, has retransmitted several times,
758 * and receives a second error, give up now. This is better
759 * than waiting a long time to establish a connection that
760 * can never complete.
761 */
762 if (tp->t_state == TCPS_ESTABLISHED &&
763 (error == EHOSTUNREACH || error == ENETUNREACH ||
764 error == EHOSTDOWN)) {
765 return;
766 } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
767 tp->t_softerror)
768 so->so_error = error;
769 else
770 tp->t_softerror = error;
771 wakeup((caddr_t) &so->so_timeo);
772 sorwakeup(so);
773 sowwakeup(so);
774}
775
776static int
777tcp_pcblist SYSCTL_HANDLER_ARGS
778{
779 int error, i, n, s;
780 struct inpcb *inp, **inp_list;
781 inp_gen_t gencnt;
782 struct xinpgen xig;
783
784 /*
785 * The process of preparing the TCB list is too time-consuming and
786 * resource-intensive to repeat twice on every request.
787 */
788 if (req->oldptr == 0) {
789 n = tcbinfo.ipi_count;
790 req->oldidx = 2 * (sizeof xig)
791 + (n + n/8) * sizeof(struct xtcpcb);
792 return 0;
793 }
794
795 if (req->newptr != 0)
796 return EPERM;
797
798 /*
799 * OK, now we're committed to doing something.
800 */
801 s = splnet();
802 gencnt = tcbinfo.ipi_gencnt;
803 n = tcbinfo.ipi_count;
804 splx(s);
805
806 xig.xig_len = sizeof xig;
807 xig.xig_count = n;
808 xig.xig_gen = gencnt;
809 xig.xig_sogen = so_gencnt;
810 error = SYSCTL_OUT(req, &xig, sizeof xig);
811 if (error)
812 return error;
813
814 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
815 if (inp_list == 0)
816 return ENOMEM;
817
818 s = splnet();
819 for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
820 inp = inp->inp_list.le_next) {
821 if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp))
822 inp_list[i++] = inp;
823 }
824 splx(s);
825 n = i;
826
827 error = 0;
828 for (i = 0; i < n; i++) {
829 inp = inp_list[i];
830 if (inp->inp_gencnt <= gencnt) {
831 struct xtcpcb xt;
832 caddr_t inp_ppcb;
833 xt.xt_len = sizeof xt;
834 /* XXX should avoid extra copy */
835 bcopy(inp, &xt.xt_inp, sizeof *inp);
836 inp_ppcb = inp->inp_ppcb;
837 if (inp_ppcb != NULL)
838 bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
839 else
840 bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
841 if (inp->inp_socket)
842 sotoxsocket(inp->inp_socket, &xt.xt_socket);
843 error = SYSCTL_OUT(req, &xt, sizeof xt);
844 }
845 }
846 if (!error) {
847 /*
848 * Give the user an updated idea of our state.
849 * If the generation differs from what we told
850 * her before, she knows that something happened
851 * while we were processing this request, and it
852 * might be necessary to retry.
853 */
854 s = splnet();
855 xig.xig_gen = tcbinfo.ipi_gencnt;
856 xig.xig_sogen = so_gencnt;
857 xig.xig_count = tcbinfo.ipi_count;
858 splx(s);
859 error = SYSCTL_OUT(req, &xig, sizeof xig);
860 }
861 free(inp_list, M_TEMP);
862 return error;
863}
864
865SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
866 tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
867
868static int
869tcp_getcred SYSCTL_HANDLER_ARGS
870{
871 struct sockaddr_in addrs[2];
872 struct inpcb *inp;
873 int error, s;
874
875 error = suser(req->p);
876 if (error)
877 return (error);
878 error = SYSCTL_IN(req, addrs, sizeof(addrs));
879 if (error)
880 return (error);
881 s = splnet();
882 inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
883 addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
884 if (inp == NULL || inp->inp_socket == NULL) {
885 error = ENOENT;
886 goto out;
887 }
888 error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred));
889out:
890 splx(s);
891 return (error);
892}
893
894SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
895 0, 0, tcp_getcred, "S,ucred", "Get the ucred of a TCP connection");
896
897#ifdef INET6
898static int
899tcp6_getcred SYSCTL_HANDLER_ARGS
900{
901 struct sockaddr_in6 addrs[2];
902 struct inpcb *inp;
903 int error, s, mapped = 0;
904
905 error = suser(req->p);
906 if (error)
907 return (error);
908 error = SYSCTL_IN(req, addrs, sizeof(addrs));
909 if (error)
910 return (error);
911 if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
912 if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
913 mapped = 1;
914 else
915 return (EINVAL);
916 }
917 s = splnet();
918 if (mapped == 1)
919 inp = in_pcblookup_hash(&tcbinfo,
920 *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
921 addrs[1].sin6_port,
922 *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
923 addrs[0].sin6_port,
924 0, NULL);
925 else
926 inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr,
927 addrs[1].sin6_port,
928 &addrs[0].sin6_addr, addrs[0].sin6_port,
929 0, NULL);
930 if (inp == NULL || inp->inp_socket == NULL) {
931 error = ENOENT;
932 goto out;
933 }
934 error = SYSCTL_OUT(req, inp->inp_socket->so_cred,
935 sizeof(struct ucred));
936out:
937 splx(s);
938 return (error);
939}
940
941SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
942 0, 0,
943 tcp6_getcred, "S,ucred", "Get the ucred of a TCP6 connection");
944#endif
945
946
947void
948tcp_ctlinput(cmd, sa, vip)
949 int cmd;
950 struct sockaddr *sa;
951 void *vip;
952{
953 register struct ip *ip = vip;
954 register struct tcphdr *th;
955 void (*notify) __P((struct inpcb *, int)) = tcp_notify;
956
957 if (cmd == PRC_QUENCH)
958 notify = tcp_quench;
959 else if (cmd == PRC_MSGSIZE)
960 notify = tcp_mtudisc;
961 else if (!PRC_IS_REDIRECT(cmd) &&
962 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
963 return;
964 if (ip) {
965 th = (struct tcphdr *)((caddr_t)ip
966 + (IP_VHL_HL(ip->ip_vhl) << 2));
967 in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
968 cmd, notify);
969 } else
970 in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
971}
972
973#ifdef INET6
974void
975tcp6_ctlinput(cmd, sa, d)
976 int cmd;
977 struct sockaddr *sa;
978 void *d;
979{
980 register struct tcphdr *thp;
981 struct tcphdr th;
982 void (*notify) __P((struct inpcb *, int)) = tcp_notify;
983 struct sockaddr_in6 sa6;
984 struct ip6_hdr *ip6;
985 struct mbuf *m;
986 int off;
987
988 if (sa->sa_family != AF_INET6 ||
989 sa->sa_len != sizeof(struct sockaddr_in6))
990 return;
991
992 if (cmd == PRC_QUENCH)
993 notify = tcp_quench;
994 else if (cmd == PRC_MSGSIZE)
995 notify = tcp_mtudisc;
996 else if (!PRC_IS_REDIRECT(cmd) &&
997 ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
998 return;
999
1000 /* if the parameter is from icmp6, decode it. */
1001 if (d != NULL) {
1002 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
1003 m = ip6cp->ip6c_m;
1004 ip6 = ip6cp->ip6c_ip6;
1005 off = ip6cp->ip6c_off;
1006 } else {
1007 m = NULL;
1008 ip6 = NULL;
1009 }
1010
1011 /*
1012 * Translate addresses into internal form.
1013 * Sa check if it is AF_INET6 is done at the top of this funciton.
1014 */
1015 sa6 = *(struct sockaddr_in6 *)sa;
1016 if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL &&
1017 m->m_pkthdr.rcvif != NULL)
1018 sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
1019
1020 if (ip6) {
1021 /*
1022 * XXX: We assume that when IPV6 is non NULL,
1023 * M and OFF are valid.
1024 */
1025 struct in6_addr s;
1026
1027 /* translate addresses into internal form */
1028 memcpy(&s, &ip6->ip6_src, sizeof(s));
1029 if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL &&
1030 m->m_pkthdr.rcvif != NULL)
1031 s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
1032
1033 if (m->m_len < off + sizeof(*thp)) {
1034 /*
1035 * this should be rare case
1036 * because now MINCLSIZE is "(MHLEN + 1)",
1037 * so we compromise on this copy...
1038 */
1039 m_copydata(m, off, sizeof(th), (caddr_t)&th);
1040 thp = &th;
1041 } else
1042 thp = (struct tcphdr *)(mtod(m, caddr_t) + off);
1043 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport,
1044 &s, thp->th_sport, cmd, notify);
1045 } else
1046 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr,
1047 0, cmd, notify);
1048}
1049#endif /* INET6 */
1050
1051/*
1052 * When a source quench is received, close congestion window
1053 * to one segment. We will gradually open it again as we proceed.
1054 */
1055void
1056tcp_quench(inp, errno)
1057 struct inpcb *inp;
1058 int errno;
1059{
1060 struct tcpcb *tp = intotcpcb(inp);
1061
1062 if (tp)
1063 tp->snd_cwnd = tp->t_maxseg;
1064}
1065
1066/*
1067 * When `need fragmentation' ICMP is received, update our idea of the MSS
1068 * based on the new value in the route. Also nudge TCP to send something,
1069 * since we know the packet we just sent was dropped.
1070 * This duplicates some code in the tcp_mss() function in tcp_input.c.
1071 */
1072void
1073tcp_mtudisc(inp, errno)
1074 struct inpcb *inp;
1075 int errno;
1076{
1077 struct tcpcb *tp = intotcpcb(inp);
1078 struct rtentry *rt;
1079 struct rmxp_tao *taop;
1080 struct socket *so = inp->inp_socket;
1081 int offered;
1082 int mss;
1083#ifdef INET6
1084 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
1085#endif /* INET6 */
1086
1087 if (tp) {
1088#ifdef INET6
1089 if (isipv6)
1090 rt = tcp_rtlookup6(inp);
1091 else
1092#endif /* INET6 */
1093 rt = tcp_rtlookup(inp);
1094 if (!rt || !rt->rt_rmx.rmx_mtu) {
1095 tp->t_maxopd = tp->t_maxseg =
1096#ifdef INET6
1097 isipv6 ? tcp_v6mssdflt :
1098#endif /* INET6 */
1099 tcp_mssdflt;
1100 return;
1101 }
1102 taop = rmx_taop(rt->rt_rmx);
1103 offered = taop->tao_mssopt;
1104 mss = rt->rt_rmx.rmx_mtu -
1105#ifdef INET6
1106 (isipv6 ?
1107 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
1108#endif /* INET6 */
1109 sizeof(struct tcpiphdr)
1110#ifdef INET6
1111 )
1112#endif /* INET6 */
1113 ;
1114
1115 if (offered)
1116 mss = min(mss, offered);
1117 /*
1118 * XXX - The above conditional probably violates the TCP
1119 * spec. The problem is that, since we don't know the
1120 * other end's MSS, we are supposed to use a conservative
1121 * default. But, if we do that, then MTU discovery will
1122 * never actually take place, because the conservative
1123 * default is much less than the MTUs typically seen
1124 * on the Internet today. For the moment, we'll sweep
1125 * this under the carpet.
1126 *
1127 * The conservative default might not actually be a problem
1128 * if the only case this occurs is when sending an initial
1129 * SYN with options and data to a host we've never talked
1130 * to before. Then, they will reply with an MSS value which
1131 * will get recorded and the new parameters should get
1132 * recomputed. For Further Study.
1133 */
1134 if (tp->t_maxopd <= mss)
1135 return;
1136 tp->t_maxopd = mss;
1137
1138 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
1139 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
1140 mss -= TCPOLEN_TSTAMP_APPA;
1141 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
1142 (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
1143 mss -= TCPOLEN_CC_APPA;
1144#if (MCLBYTES & (MCLBYTES - 1)) == 0
1145 if (mss > MCLBYTES)
1146 mss &= ~(MCLBYTES-1);
1147#else
1148 if (mss > MCLBYTES)
1149 mss = mss / MCLBYTES * MCLBYTES;
1150#endif
1151 if (so->so_snd.sb_hiwat < mss)
1152 mss = so->so_snd.sb_hiwat;
1153
1154 tp->t_maxseg = mss;
1155
1156 tcpstat.tcps_mturesent++;
1157 tp->t_rtttime = 0;
1158 tp->snd_nxt = tp->snd_una;
1159 tcp_output(tp);
1160 }
1161}
1162
1163/*
1164 * Look-up the routing entry to the peer of this inpcb. If no route
1165 * is found and it cannot be allocated the return NULL. This routine
1166 * is called by TCP routines that access the rmx structure and by tcp_mss
1167 * to get the interface MTU.
1168 */
1169struct rtentry *
1170tcp_rtlookup(inp)
1171 struct inpcb *inp;
1172{
1173 struct route *ro;
1174 struct rtentry *rt;
1175
1176 ro = &inp->inp_route;
1177 rt = ro->ro_rt;
1178 if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
1179 /* No route yet, so try to acquire one */
1180 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1181 ro->ro_dst.sa_family = AF_INET;
1182 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1183 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
1184 inp->inp_faddr;
1185 rtalloc(ro);
1186 rt = ro->ro_rt;
1187 }
1188 }
1189 return rt;
1190}
1191
1192#ifdef INET6
1193struct rtentry *
1194tcp_rtlookup6(inp)
1195 struct inpcb *inp;
1196{
1197 struct route_in6 *ro6;
1198 struct rtentry *rt;
1199
1200 ro6 = &inp->in6p_route;
1201 rt = ro6->ro_rt;
1202 if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
1203 /* No route yet, so try to acquire one */
1204 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1205 ro6->ro_dst.sin6_family = AF_INET6;
1206 ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst);
1207 ro6->ro_dst.sin6_addr = inp->in6p_faddr;
1208 rtalloc((struct route *)ro6);
1209 rt = ro6->ro_rt;
1210 }
1211 }
1212 return rt;
1213}
1214#endif /* INET6 */
1215
1216#ifdef IPSEC
1217/* compute ESP/AH header size for TCP, including outer IP header. */
1218size_t
1219ipsec_hdrsiz_tcp(tp)
1220 struct tcpcb *tp;
1221{
1222 struct inpcb *inp;
1223 struct mbuf *m;
1224 size_t hdrsiz;
1225 struct ip *ip;
1226#ifdef INET6
1227 struct ip6_hdr *ip6;
1228#endif /* INET6 */
1229 struct tcphdr *th;
1230
1231 if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
1232 return 0;
1233 MGETHDR(m, M_DONTWAIT, MT_DATA);
1234 if (!m)
1235 return 0;
1236
1237#ifdef INET6
1238 if ((inp->inp_vflag & INP_IPV6) != 0) {
1239 ip6 = mtod(m, struct ip6_hdr *);
1240 th = (struct tcphdr *)(ip6 + 1);
1241 m->m_pkthdr.len = m->m_len =
1242 sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
1243 bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
1244 sizeof(struct ip6_hdr));
1245 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
1246 sizeof(struct tcphdr));
1247 hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
1248 } else
1249#endif /* INET6 */
1250 {
1251 ip = mtod(m, struct ip *);
1252 th = (struct tcphdr *)(ip + 1);
1253 m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
1254 bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
1255 sizeof(struct ip));
1256 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
1257 sizeof(struct tcphdr));
1258 hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
1259 }
1260
1261 m_free(m);
1262 return hdrsiz;
1263}
1264#endif /*IPSEC*/
1265
1266/*
1267 * Return a pointer to the cached information about the remote host.
1268 * The cached information is stored in the protocol specific part of
1269 * the route metrics.
1270 */
1271struct rmxp_tao *
1272tcp_gettaocache(inp)
1273 struct inpcb *inp;
1274{
1275 struct rtentry *rt;
1276
1277#ifdef INET6
1278 if ((inp->inp_vflag & INP_IPV6) != 0)
1279 rt = tcp_rtlookup6(inp);
1280 else
1281#endif /* INET6 */
1282 rt = tcp_rtlookup(inp);
1283
1284 /* Make sure this is a host route and is up. */
1285 if (rt == NULL ||
1286 (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
1287 return NULL;
1288
1289 return rmx_taop(rt->rt_rmx);
1290}
1291
1292/*
1293 * Clear all the TAO cache entries, called from tcp_init.
1294 *
1295 * XXX
1296 * This routine is just an empty one, because we assume that the routing
1297 * routing tables are initialized at the same time when TCP, so there is
1298 * nothing in the cache left over.
1299 */
1300static void
1301tcp_cleartaocache()
1302{
1303}
437 }
438#ifdef TCPDEBUG
439 if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
440 tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
441#endif
442#ifdef IPSEC
443 if (tp != NULL) {
444 m->m_pkthdr.rcvif = (struct ifnet *)tp->t_inpcb->inp_socket;
445 ipflags |=
446#ifdef INET6
447 isipv6 ? IPV6_SOCKINMRCVIF :
448#endif
449 IP_SOCKINMRCVIF;
450 }
451#endif
452#ifdef INET6
453 if (isipv6) {
454 (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL);
455 if (ro6 == &sro6 && ro6->ro_rt) {
456 RTFREE(ro6->ro_rt);
457 ro6->ro_rt = NULL;
458 }
459 } else
460#endif /* INET6 */
461 {
462 (void) ip_output(m, NULL, ro, ipflags, NULL);
463 if (ro == &sro && ro->ro_rt) {
464 RTFREE(ro->ro_rt);
465 ro->ro_rt = NULL;
466 }
467 }
468}
469
470/*
471 * Create a new TCP control block, making an
472 * empty reassembly queue and hooking it to the argument
473 * protocol control block. The `inp' parameter must have
474 * come from the zone allocator set up in tcp_init().
475 */
476struct tcpcb *
477tcp_newtcpcb(inp)
478 struct inpcb *inp;
479{
480 struct inp_tp *it;
481 register struct tcpcb *tp;
482#ifdef INET6
483 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
484#endif /* INET6 */
485
486 it = (struct inp_tp *)inp;
487 tp = &it->tcb;
488 bzero((char *) tp, sizeof(struct tcpcb));
489 LIST_INIT(&tp->t_segq);
490 tp->t_maxseg = tp->t_maxopd =
491#ifdef INET6
492 isipv6 ? tcp_v6mssdflt :
493#endif /* INET6 */
494 tcp_mssdflt;
495
496 /* Set up our timeouts. */
497 callout_init(tp->tt_rexmt = &it->inp_tp_rexmt);
498 callout_init(tp->tt_persist = &it->inp_tp_persist);
499 callout_init(tp->tt_keep = &it->inp_tp_keep);
500 callout_init(tp->tt_2msl = &it->inp_tp_2msl);
501 callout_init(tp->tt_delack = &it->inp_tp_delack);
502
503 if (tcp_do_rfc1323)
504 tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
505 if (tcp_do_rfc1644)
506 tp->t_flags |= TF_REQ_CC;
507 tp->t_inpcb = inp; /* XXX */
508 /*
509 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
510 * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
511 * reasonable initial retransmit time.
512 */
513 tp->t_srtt = TCPTV_SRTTBASE;
514 tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
515 tp->t_rttmin = TCPTV_MIN;
516 tp->t_rxtcur = TCPTV_RTOBASE;
517 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
518 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
519 tp->t_rcvtime = ticks;
520 /*
521 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
522 * because the socket may be bound to an IPv6 wildcard address,
523 * which may match an IPv4-mapped IPv6 address.
524 */
525 inp->inp_ip_ttl = ip_defttl;
526 inp->inp_ppcb = (caddr_t)tp;
527 return (tp); /* XXX */
528}
529
530/*
531 * Drop a TCP connection, reporting
532 * the specified error. If connection is synchronized,
533 * then send a RST to peer.
534 */
535struct tcpcb *
536tcp_drop(tp, errno)
537 register struct tcpcb *tp;
538 int errno;
539{
540 struct socket *so = tp->t_inpcb->inp_socket;
541
542 if (TCPS_HAVERCVDSYN(tp->t_state)) {
543 tp->t_state = TCPS_CLOSED;
544 (void) tcp_output(tp);
545 tcpstat.tcps_drops++;
546 } else
547 tcpstat.tcps_conndrops++;
548 if (errno == ETIMEDOUT && tp->t_softerror)
549 errno = tp->t_softerror;
550 so->so_error = errno;
551 return (tcp_close(tp));
552}
553
554/*
555 * Close a TCP control block:
556 * discard all space held by the tcp
557 * discard internet protocol block
558 * wake up any sleepers
559 */
560struct tcpcb *
561tcp_close(tp)
562 register struct tcpcb *tp;
563{
564 register struct tseg_qent *q;
565 struct inpcb *inp = tp->t_inpcb;
566 struct socket *so = inp->inp_socket;
567#ifdef INET6
568 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
569#endif /* INET6 */
570 register struct rtentry *rt;
571 int dosavessthresh;
572
573 /*
574 * Make sure that all of our timers are stopped before we
575 * delete the PCB.
576 */
577 callout_stop(tp->tt_rexmt);
578 callout_stop(tp->tt_persist);
579 callout_stop(tp->tt_keep);
580 callout_stop(tp->tt_2msl);
581 callout_stop(tp->tt_delack);
582
583 /*
584 * If we got enough samples through the srtt filter,
585 * save the rtt and rttvar in the routing entry.
586 * 'Enough' is arbitrarily defined as the 16 samples.
587 * 16 samples is enough for the srtt filter to converge
588 * to within 5% of the correct value; fewer samples and
589 * we could save a very bogus rtt.
590 *
591 * Don't update the default route's characteristics and don't
592 * update anything that the user "locked".
593 */
594 if (tp->t_rttupdated >= 16) {
595 register u_long i = 0;
596#ifdef INET6
597 if (isipv6) {
598 struct sockaddr_in6 *sin6;
599
600 if ((rt = inp->in6p_route.ro_rt) == NULL)
601 goto no_valid_rt;
602 sin6 = (struct sockaddr_in6 *)rt_key(rt);
603 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
604 goto no_valid_rt;
605 }
606 else
607#endif /* INET6 */
608 if ((rt = inp->inp_route.ro_rt) == NULL ||
609 ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
610 == INADDR_ANY)
611 goto no_valid_rt;
612
613 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
614 i = tp->t_srtt *
615 (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
616 if (rt->rt_rmx.rmx_rtt && i)
617 /*
618 * filter this update to half the old & half
619 * the new values, converting scale.
620 * See route.h and tcp_var.h for a
621 * description of the scaling constants.
622 */
623 rt->rt_rmx.rmx_rtt =
624 (rt->rt_rmx.rmx_rtt + i) / 2;
625 else
626 rt->rt_rmx.rmx_rtt = i;
627 tcpstat.tcps_cachedrtt++;
628 }
629 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
630 i = tp->t_rttvar *
631 (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
632 if (rt->rt_rmx.rmx_rttvar && i)
633 rt->rt_rmx.rmx_rttvar =
634 (rt->rt_rmx.rmx_rttvar + i) / 2;
635 else
636 rt->rt_rmx.rmx_rttvar = i;
637 tcpstat.tcps_cachedrttvar++;
638 }
639 /*
640 * The old comment here said:
641 * update the pipelimit (ssthresh) if it has been updated
642 * already or if a pipesize was specified & the threshhold
643 * got below half the pipesize. I.e., wait for bad news
644 * before we start updating, then update on both good
645 * and bad news.
646 *
647 * But we want to save the ssthresh even if no pipesize is
648 * specified explicitly in the route, because such
649 * connections still have an implicit pipesize specified
650 * by the global tcp_sendspace. In the absence of a reliable
651 * way to calculate the pipesize, it will have to do.
652 */
653 i = tp->snd_ssthresh;
654 if (rt->rt_rmx.rmx_sendpipe != 0)
655 dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
656 else
657 dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
658 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
659 i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
660 || dosavessthresh) {
661 /*
662 * convert the limit from user data bytes to
663 * packets then to packet data bytes.
664 */
665 i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
666 if (i < 2)
667 i = 2;
668 i *= (u_long)(tp->t_maxseg +
669#ifdef INET6
670 (isipv6 ? sizeof (struct ip6_hdr) +
671 sizeof (struct tcphdr) :
672#endif
673 sizeof (struct tcpiphdr)
674#ifdef INET6
675 )
676#endif
677 );
678 if (rt->rt_rmx.rmx_ssthresh)
679 rt->rt_rmx.rmx_ssthresh =
680 (rt->rt_rmx.rmx_ssthresh + i) / 2;
681 else
682 rt->rt_rmx.rmx_ssthresh = i;
683 tcpstat.tcps_cachedssthresh++;
684 }
685 }
686 no_valid_rt:
687 /* free the reassembly queue, if any */
688 while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
689 LIST_REMOVE(q, tqe_q);
690 m_freem(q->tqe_m);
691 FREE(q, M_TSEGQ);
692 }
693 if (tp->t_template)
694 (void) m_free(dtom(tp->t_template));
695 inp->inp_ppcb = NULL;
696 soisdisconnected(so);
697#ifdef INET6
698 if (INP_CHECK_SOCKAF(so, AF_INET6))
699 in6_pcbdetach(inp);
700 else
701#endif /* INET6 */
702 in_pcbdetach(inp);
703 tcpstat.tcps_closed++;
704 return ((struct tcpcb *)0);
705}
706
707void
708tcp_drain()
709{
710 if (do_tcpdrain)
711 {
712 struct inpcb *inpb;
713 struct tcpcb *tcpb;
714 struct tseg_qent *te;
715
716 /*
717 * Walk the tcpbs, if existing, and flush the reassembly queue,
718 * if there is one...
719 * XXX: The "Net/3" implementation doesn't imply that the TCP
720 * reassembly queue should be flushed, but in a situation
721 * where we're really low on mbufs, this is potentially
722 * usefull.
723 */
724 for (inpb = tcbinfo.listhead->lh_first; inpb;
725 inpb = inpb->inp_list.le_next) {
726 if ((tcpb = intotcpcb(inpb))) {
727 while ((te = LIST_FIRST(&tcpb->t_segq))
728 != NULL) {
729 LIST_REMOVE(te, tqe_q);
730 m_freem(te->tqe_m);
731 FREE(te, M_TSEGQ);
732 }
733 }
734 }
735
736 }
737}
738
739/*
740 * Notify a tcp user of an asynchronous error;
741 * store error as soft error, but wake up user
742 * (for now, won't do anything until can select for soft error).
743 */
744static void
745tcp_notify(inp, error)
746 struct inpcb *inp;
747 int error;
748{
749 register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
750 register struct socket *so = inp->inp_socket;
751
752 /*
753 * Ignore some errors if we are hooked up.
754 * If connection hasn't completed, has retransmitted several times,
755 * and receives a second error, give up now. This is better
756 * than waiting a long time to establish a connection that
757 * can never complete.
758 */
759 if (tp->t_state == TCPS_ESTABLISHED &&
760 (error == EHOSTUNREACH || error == ENETUNREACH ||
761 error == EHOSTDOWN)) {
762 return;
763 } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
764 tp->t_softerror)
765 so->so_error = error;
766 else
767 tp->t_softerror = error;
768 wakeup((caddr_t) &so->so_timeo);
769 sorwakeup(so);
770 sowwakeup(so);
771}
772
773static int
774tcp_pcblist SYSCTL_HANDLER_ARGS
775{
776 int error, i, n, s;
777 struct inpcb *inp, **inp_list;
778 inp_gen_t gencnt;
779 struct xinpgen xig;
780
781 /*
782 * The process of preparing the TCB list is too time-consuming and
783 * resource-intensive to repeat twice on every request.
784 */
785 if (req->oldptr == 0) {
786 n = tcbinfo.ipi_count;
787 req->oldidx = 2 * (sizeof xig)
788 + (n + n/8) * sizeof(struct xtcpcb);
789 return 0;
790 }
791
792 if (req->newptr != 0)
793 return EPERM;
794
795 /*
796 * OK, now we're committed to doing something.
797 */
798 s = splnet();
799 gencnt = tcbinfo.ipi_gencnt;
800 n = tcbinfo.ipi_count;
801 splx(s);
802
803 xig.xig_len = sizeof xig;
804 xig.xig_count = n;
805 xig.xig_gen = gencnt;
806 xig.xig_sogen = so_gencnt;
807 error = SYSCTL_OUT(req, &xig, sizeof xig);
808 if (error)
809 return error;
810
811 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
812 if (inp_list == 0)
813 return ENOMEM;
814
815 s = splnet();
816 for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
817 inp = inp->inp_list.le_next) {
818 if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp))
819 inp_list[i++] = inp;
820 }
821 splx(s);
822 n = i;
823
824 error = 0;
825 for (i = 0; i < n; i++) {
826 inp = inp_list[i];
827 if (inp->inp_gencnt <= gencnt) {
828 struct xtcpcb xt;
829 caddr_t inp_ppcb;
830 xt.xt_len = sizeof xt;
831 /* XXX should avoid extra copy */
832 bcopy(inp, &xt.xt_inp, sizeof *inp);
833 inp_ppcb = inp->inp_ppcb;
834 if (inp_ppcb != NULL)
835 bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
836 else
837 bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
838 if (inp->inp_socket)
839 sotoxsocket(inp->inp_socket, &xt.xt_socket);
840 error = SYSCTL_OUT(req, &xt, sizeof xt);
841 }
842 }
843 if (!error) {
844 /*
845 * Give the user an updated idea of our state.
846 * If the generation differs from what we told
847 * her before, she knows that something happened
848 * while we were processing this request, and it
849 * might be necessary to retry.
850 */
851 s = splnet();
852 xig.xig_gen = tcbinfo.ipi_gencnt;
853 xig.xig_sogen = so_gencnt;
854 xig.xig_count = tcbinfo.ipi_count;
855 splx(s);
856 error = SYSCTL_OUT(req, &xig, sizeof xig);
857 }
858 free(inp_list, M_TEMP);
859 return error;
860}
861
862SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
863 tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
864
865static int
866tcp_getcred SYSCTL_HANDLER_ARGS
867{
868 struct sockaddr_in addrs[2];
869 struct inpcb *inp;
870 int error, s;
871
872 error = suser(req->p);
873 if (error)
874 return (error);
875 error = SYSCTL_IN(req, addrs, sizeof(addrs));
876 if (error)
877 return (error);
878 s = splnet();
879 inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
880 addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
881 if (inp == NULL || inp->inp_socket == NULL) {
882 error = ENOENT;
883 goto out;
884 }
885 error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred));
886out:
887 splx(s);
888 return (error);
889}
890
891SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
892 0, 0, tcp_getcred, "S,ucred", "Get the ucred of a TCP connection");
893
894#ifdef INET6
895static int
896tcp6_getcred SYSCTL_HANDLER_ARGS
897{
898 struct sockaddr_in6 addrs[2];
899 struct inpcb *inp;
900 int error, s, mapped = 0;
901
902 error = suser(req->p);
903 if (error)
904 return (error);
905 error = SYSCTL_IN(req, addrs, sizeof(addrs));
906 if (error)
907 return (error);
908 if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
909 if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
910 mapped = 1;
911 else
912 return (EINVAL);
913 }
914 s = splnet();
915 if (mapped == 1)
916 inp = in_pcblookup_hash(&tcbinfo,
917 *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
918 addrs[1].sin6_port,
919 *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
920 addrs[0].sin6_port,
921 0, NULL);
922 else
923 inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr,
924 addrs[1].sin6_port,
925 &addrs[0].sin6_addr, addrs[0].sin6_port,
926 0, NULL);
927 if (inp == NULL || inp->inp_socket == NULL) {
928 error = ENOENT;
929 goto out;
930 }
931 error = SYSCTL_OUT(req, inp->inp_socket->so_cred,
932 sizeof(struct ucred));
933out:
934 splx(s);
935 return (error);
936}
937
938SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
939 0, 0,
940 tcp6_getcred, "S,ucred", "Get the ucred of a TCP6 connection");
941#endif
942
943
944void
945tcp_ctlinput(cmd, sa, vip)
946 int cmd;
947 struct sockaddr *sa;
948 void *vip;
949{
950 register struct ip *ip = vip;
951 register struct tcphdr *th;
952 void (*notify) __P((struct inpcb *, int)) = tcp_notify;
953
954 if (cmd == PRC_QUENCH)
955 notify = tcp_quench;
956 else if (cmd == PRC_MSGSIZE)
957 notify = tcp_mtudisc;
958 else if (!PRC_IS_REDIRECT(cmd) &&
959 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
960 return;
961 if (ip) {
962 th = (struct tcphdr *)((caddr_t)ip
963 + (IP_VHL_HL(ip->ip_vhl) << 2));
964 in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
965 cmd, notify);
966 } else
967 in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
968}
969
970#ifdef INET6
971void
972tcp6_ctlinput(cmd, sa, d)
973 int cmd;
974 struct sockaddr *sa;
975 void *d;
976{
977 register struct tcphdr *thp;
978 struct tcphdr th;
979 void (*notify) __P((struct inpcb *, int)) = tcp_notify;
980 struct sockaddr_in6 sa6;
981 struct ip6_hdr *ip6;
982 struct mbuf *m;
983 int off;
984
985 if (sa->sa_family != AF_INET6 ||
986 sa->sa_len != sizeof(struct sockaddr_in6))
987 return;
988
989 if (cmd == PRC_QUENCH)
990 notify = tcp_quench;
991 else if (cmd == PRC_MSGSIZE)
992 notify = tcp_mtudisc;
993 else if (!PRC_IS_REDIRECT(cmd) &&
994 ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
995 return;
996
997 /* if the parameter is from icmp6, decode it. */
998 if (d != NULL) {
999 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
1000 m = ip6cp->ip6c_m;
1001 ip6 = ip6cp->ip6c_ip6;
1002 off = ip6cp->ip6c_off;
1003 } else {
1004 m = NULL;
1005 ip6 = NULL;
1006 }
1007
1008 /*
1009 * Translate addresses into internal form.
1010 * Sa check if it is AF_INET6 is done at the top of this funciton.
1011 */
1012 sa6 = *(struct sockaddr_in6 *)sa;
1013 if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL &&
1014 m->m_pkthdr.rcvif != NULL)
1015 sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
1016
1017 if (ip6) {
1018 /*
1019 * XXX: We assume that when IPV6 is non NULL,
1020 * M and OFF are valid.
1021 */
1022 struct in6_addr s;
1023
1024 /* translate addresses into internal form */
1025 memcpy(&s, &ip6->ip6_src, sizeof(s));
1026 if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL &&
1027 m->m_pkthdr.rcvif != NULL)
1028 s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
1029
1030 if (m->m_len < off + sizeof(*thp)) {
1031 /*
1032 * this should be rare case
1033 * because now MINCLSIZE is "(MHLEN + 1)",
1034 * so we compromise on this copy...
1035 */
1036 m_copydata(m, off, sizeof(th), (caddr_t)&th);
1037 thp = &th;
1038 } else
1039 thp = (struct tcphdr *)(mtod(m, caddr_t) + off);
1040 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport,
1041 &s, thp->th_sport, cmd, notify);
1042 } else
1043 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr,
1044 0, cmd, notify);
1045}
1046#endif /* INET6 */
1047
1048/*
1049 * When a source quench is received, close congestion window
1050 * to one segment. We will gradually open it again as we proceed.
1051 */
1052void
1053tcp_quench(inp, errno)
1054 struct inpcb *inp;
1055 int errno;
1056{
1057 struct tcpcb *tp = intotcpcb(inp);
1058
1059 if (tp)
1060 tp->snd_cwnd = tp->t_maxseg;
1061}
1062
1063/*
1064 * When `need fragmentation' ICMP is received, update our idea of the MSS
1065 * based on the new value in the route. Also nudge TCP to send something,
1066 * since we know the packet we just sent was dropped.
1067 * This duplicates some code in the tcp_mss() function in tcp_input.c.
1068 */
1069void
1070tcp_mtudisc(inp, errno)
1071 struct inpcb *inp;
1072 int errno;
1073{
1074 struct tcpcb *tp = intotcpcb(inp);
1075 struct rtentry *rt;
1076 struct rmxp_tao *taop;
1077 struct socket *so = inp->inp_socket;
1078 int offered;
1079 int mss;
1080#ifdef INET6
1081 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
1082#endif /* INET6 */
1083
1084 if (tp) {
1085#ifdef INET6
1086 if (isipv6)
1087 rt = tcp_rtlookup6(inp);
1088 else
1089#endif /* INET6 */
1090 rt = tcp_rtlookup(inp);
1091 if (!rt || !rt->rt_rmx.rmx_mtu) {
1092 tp->t_maxopd = tp->t_maxseg =
1093#ifdef INET6
1094 isipv6 ? tcp_v6mssdflt :
1095#endif /* INET6 */
1096 tcp_mssdflt;
1097 return;
1098 }
1099 taop = rmx_taop(rt->rt_rmx);
1100 offered = taop->tao_mssopt;
1101 mss = rt->rt_rmx.rmx_mtu -
1102#ifdef INET6
1103 (isipv6 ?
1104 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
1105#endif /* INET6 */
1106 sizeof(struct tcpiphdr)
1107#ifdef INET6
1108 )
1109#endif /* INET6 */
1110 ;
1111
1112 if (offered)
1113 mss = min(mss, offered);
1114 /*
1115 * XXX - The above conditional probably violates the TCP
1116 * spec. The problem is that, since we don't know the
1117 * other end's MSS, we are supposed to use a conservative
1118 * default. But, if we do that, then MTU discovery will
1119 * never actually take place, because the conservative
1120 * default is much less than the MTUs typically seen
1121 * on the Internet today. For the moment, we'll sweep
1122 * this under the carpet.
1123 *
1124 * The conservative default might not actually be a problem
1125 * if the only case this occurs is when sending an initial
1126 * SYN with options and data to a host we've never talked
1127 * to before. Then, they will reply with an MSS value which
1128 * will get recorded and the new parameters should get
1129 * recomputed. For Further Study.
1130 */
1131 if (tp->t_maxopd <= mss)
1132 return;
1133 tp->t_maxopd = mss;
1134
1135 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
1136 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
1137 mss -= TCPOLEN_TSTAMP_APPA;
1138 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
1139 (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
1140 mss -= TCPOLEN_CC_APPA;
1141#if (MCLBYTES & (MCLBYTES - 1)) == 0
1142 if (mss > MCLBYTES)
1143 mss &= ~(MCLBYTES-1);
1144#else
1145 if (mss > MCLBYTES)
1146 mss = mss / MCLBYTES * MCLBYTES;
1147#endif
1148 if (so->so_snd.sb_hiwat < mss)
1149 mss = so->so_snd.sb_hiwat;
1150
1151 tp->t_maxseg = mss;
1152
1153 tcpstat.tcps_mturesent++;
1154 tp->t_rtttime = 0;
1155 tp->snd_nxt = tp->snd_una;
1156 tcp_output(tp);
1157 }
1158}
1159
1160/*
1161 * Look-up the routing entry to the peer of this inpcb. If no route
1162 * is found and it cannot be allocated the return NULL. This routine
1163 * is called by TCP routines that access the rmx structure and by tcp_mss
1164 * to get the interface MTU.
1165 */
1166struct rtentry *
1167tcp_rtlookup(inp)
1168 struct inpcb *inp;
1169{
1170 struct route *ro;
1171 struct rtentry *rt;
1172
1173 ro = &inp->inp_route;
1174 rt = ro->ro_rt;
1175 if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
1176 /* No route yet, so try to acquire one */
1177 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1178 ro->ro_dst.sa_family = AF_INET;
1179 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1180 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
1181 inp->inp_faddr;
1182 rtalloc(ro);
1183 rt = ro->ro_rt;
1184 }
1185 }
1186 return rt;
1187}
1188
1189#ifdef INET6
1190struct rtentry *
1191tcp_rtlookup6(inp)
1192 struct inpcb *inp;
1193{
1194 struct route_in6 *ro6;
1195 struct rtentry *rt;
1196
1197 ro6 = &inp->in6p_route;
1198 rt = ro6->ro_rt;
1199 if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
1200 /* No route yet, so try to acquire one */
1201 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1202 ro6->ro_dst.sin6_family = AF_INET6;
1203 ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst);
1204 ro6->ro_dst.sin6_addr = inp->in6p_faddr;
1205 rtalloc((struct route *)ro6);
1206 rt = ro6->ro_rt;
1207 }
1208 }
1209 return rt;
1210}
1211#endif /* INET6 */
1212
1213#ifdef IPSEC
1214/* compute ESP/AH header size for TCP, including outer IP header. */
1215size_t
1216ipsec_hdrsiz_tcp(tp)
1217 struct tcpcb *tp;
1218{
1219 struct inpcb *inp;
1220 struct mbuf *m;
1221 size_t hdrsiz;
1222 struct ip *ip;
1223#ifdef INET6
1224 struct ip6_hdr *ip6;
1225#endif /* INET6 */
1226 struct tcphdr *th;
1227
1228 if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
1229 return 0;
1230 MGETHDR(m, M_DONTWAIT, MT_DATA);
1231 if (!m)
1232 return 0;
1233
1234#ifdef INET6
1235 if ((inp->inp_vflag & INP_IPV6) != 0) {
1236 ip6 = mtod(m, struct ip6_hdr *);
1237 th = (struct tcphdr *)(ip6 + 1);
1238 m->m_pkthdr.len = m->m_len =
1239 sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
1240 bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
1241 sizeof(struct ip6_hdr));
1242 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
1243 sizeof(struct tcphdr));
1244 hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
1245 } else
1246#endif /* INET6 */
1247 {
1248 ip = mtod(m, struct ip *);
1249 th = (struct tcphdr *)(ip + 1);
1250 m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
1251 bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
1252 sizeof(struct ip));
1253 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
1254 sizeof(struct tcphdr));
1255 hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
1256 }
1257
1258 m_free(m);
1259 return hdrsiz;
1260}
1261#endif /*IPSEC*/
1262
1263/*
1264 * Return a pointer to the cached information about the remote host.
1265 * The cached information is stored in the protocol specific part of
1266 * the route metrics.
1267 */
1268struct rmxp_tao *
1269tcp_gettaocache(inp)
1270 struct inpcb *inp;
1271{
1272 struct rtentry *rt;
1273
1274#ifdef INET6
1275 if ((inp->inp_vflag & INP_IPV6) != 0)
1276 rt = tcp_rtlookup6(inp);
1277 else
1278#endif /* INET6 */
1279 rt = tcp_rtlookup(inp);
1280
1281 /* Make sure this is a host route and is up. */
1282 if (rt == NULL ||
1283 (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
1284 return NULL;
1285
1286 return rmx_taop(rt->rt_rmx);
1287}
1288
1289/*
1290 * Clear all the TAO cache entries, called from tcp_init.
1291 *
1292 * XXX
1293 * This routine is just an empty one, because we assume that the routing
1294 * routing tables are initialized at the same time when TCP, so there is
1295 * nothing in the cache left over.
1296 */
1297static void
1298tcp_cleartaocache()
1299{
1300}