tcp_var.h revision 37183
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1993, 1994, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 3314482Shsu * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 3450477Speter * $Id: tcp_var.h,v 1.44 1998/05/15 20:11:35 wollman Exp $ 351541Srgrimes */ 361541Srgrimes 372165Spaul#ifndef _NETINET_TCP_VAR_H_ 382165Spaul#define _NETINET_TCP_VAR_H_ 392165Spaul/* 40102325Smike * Kernel variables for tcp. 41102325Smike */ 4234319Sdufault 43102325Smike/* 441541Srgrimes * Tcp control block, one per tcp; fields: 4582295Sdillon * Organized for 16 byte cacheline efficiency. 4682295Sdillon */ 4782295Sdillonstruct tcpcb { 4882295Sdillon struct tcpiphdr *seg_next; /* sequencing queue */ 4982295Sdillon struct tcpiphdr *seg_prev; 50102325Smike int t_dupacks; /* consecutive dup acks recd */ 5182295Sdillon struct tcpiphdr *t_template; /* skeletal packet for transmit */ 5282295Sdillon 531541Srgrimes int t_timer[TCPT_NTIMERS]; /* tcp timers */ 541541Srgrimes 5514482Shsu struct inpcb *t_inpcb; /* back pointer to internet pcb */ 561541Srgrimes int t_state; /* state of this connection */ 571541Srgrimes u_int t_flags; 581541Srgrimes#define TF_ACKNOW 0x0001 /* ack peer immediately */ 591541Srgrimes#define TF_DELACK 0x0002 /* ack, but try to delay it */ 601541Srgrimes#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ 611541Srgrimes#define TF_NOOPT 0x0008 /* don't use tcp options */ 621541Srgrimes#define TF_SENTFIN 0x0010 /* have sent FIN */ 631541Srgrimes#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ 649507Sdg#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ 659507Sdg#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ 669507Sdg#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ 671541Srgrimes#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ 68102325Smike#define TF_NEEDSYN 0x0400 /* send SYN (implicit state) */ 691541Srgrimes#define TF_NEEDFIN 0x0800 /* send FIN (implicit state) */ 701541Srgrimes#define TF_NOPUSH 0x1000 /* don't push */ 711541Srgrimes#define TF_REQ_CC 0x2000 /* have/will request CC */ 721541Srgrimes#define TF_RCVD_CC 0x4000 /* a CC was received in SYN */ 731541Srgrimes#define TF_SENDCCNEW 0x8000 /* send CCnew instead of CC in SYN */ 741541Srgrimes int t_force; /* 1 if forcing out a byte */ 7582295Sdillon 7682285Sdillon tcp_seq snd_una; /* send unacknowledged */ 771541Srgrimes tcp_seq snd_max; /* highest sequence number sent; 7842360Sjulian * used to recognize retransmits 7957550Sps */ 801541Srgrimes tcp_seq snd_nxt; /* send next */ 8157550Sps tcp_seq snd_up; /* send urgent pointer */ 82102325Smike 83102325Smike tcp_seq snd_wl1; /* window update seg seq number */ 84102325Smike tcp_seq snd_wl2; /* window update seg ack number */ 85102325Smike tcp_seq iss; /* initial send sequence number */ 86102325Smike tcp_seq irs; /* initial receive sequence number */ 87102325Smike 8857550Sps tcp_seq rcv_nxt; /* receive next */ 8957550Sps tcp_seq rcv_adv; /* advertised window */ 9057550Sps u_long rcv_wnd; /* receive window */ 91102325Smike tcp_seq rcv_up; /* receive urgent pointer */ 9257550Sps 93102325Smike u_long snd_wnd; /* send window */ 941541Srgrimes u_long snd_cwnd; /* congestion-controlled window */ 9534030Sdufault u_long snd_ssthresh; /* snd_cwnd size threshold for 9634030Sdufault * for slow start exponential to 9734030Sdufault * linear switch 9834030Sdufault */ 99102325Smike u_int t_maxopd; /* mss plus options */ 10034030Sdufault 10134030Sdufault u_int t_idle; /* inactivity time */ 10220346Salex u_long t_duration; /* connection duration */ 10320346Salex int t_rtt; /* round trip time */ 10432131Salex tcp_seq t_rtseq; /* sequence number being timed */ 10520346Salex 10620346Salex int t_rxtcur; /* current retransmit value */ 1077358Sdg u_int t_maxseg; /* maximum segment size */ 1087358Sdg int t_srtt; /* smoothed round-trip time */ 10931497Sdyson int t_rttvar; /* variance in round-trip time */ 1107358Sdg 1117358Sdg int t_rxtshift; /* log(2) of rexmt exp. backoff */ 1127358Sdg u_int t_rttmin; /* minimum rtt allowed */ 113102325Smike u_long t_rttupdated; /* number of times rtt sampled */ 1147358Sdg u_long max_sndwnd; /* largest window peer has offered */ 1151541Srgrimes 1161541Srgrimes int t_softerror; /* possible error not yet reported */ 1171541Srgrimes/* out-of-band data */ 1181541Srgrimes char t_oobflags; /* have some */ 1191541Srgrimes char t_iobc; /* input character */ 1201541Srgrimes#define TCPOOB_HAVEDATA 0x01 1211541Srgrimes#define TCPOOB_HADDATA 0x02 12215873Sdyson/* RFC 1323 variables */ 12354467Sdillon u_char snd_scale; /* window scaling for send window */ 12454467Sdillon u_char rcv_scale; /* window scaling for recv window */ 12557550Sps u_char request_r_scale; /* pending window scaling */ 12657550Sps u_char requested_s_scale; 127112881Swes u_long ts_recent; /* timestamp echo data */ 1281541Srgrimes 12915819Sdyson u_long ts_recent_age; /* when last updated */ 13015819Sdyson tcp_seq last_ack_sent; 13115819Sdyson/* RFC 1644 variables */ 13215819Sdyson tcp_cc cc_send; /* send connection count */ 13315819Sdyson tcp_cc cc_recv; /* receive connection count */ 13415819Sdyson}; 13515819Sdyson 13615819Sdyson/* 137102325Smike * Structure to hold TCP options that are only used during segment 13815819Sdyson * processing (in tcp_input), but not held in the tcpcb. 139102325Smike * It's basically used to reduce the number of parameters 140118684Sbms * to tcp_dooptions. 141102325Smike */ 142102325Smikestruct tcpopt { 143118684Sbms u_long to_flag; /* which options are present */ 144118684Sbms#define TOF_TS 0x0001 /* timestamp */ 145118684Sbms#define TOF_CC 0x0002 /* CC and CCnew are exclusive */ 146118684Sbms#define TOF_CCNEW 0x0004 147118684Sbms#define TOF_CCECHO 0x0008 148118684Sbms u_long to_tsval; 149118684Sbms u_long to_tsecr; 150102325Smike tcp_cc to_cc; /* holds CC or CCnew */ 151102325Smike tcp_cc to_ccecho; 152102325Smike}; 153102325Smike 154102325Smike/* 155102325Smike * The TAO cache entry which is stored in the protocol family specific 156102325Smike * portion of the route metrics. 157102325Smike */ 158102325Smikestruct rmxp_tao { 159102325Smike tcp_cc tao_cc; /* latest CC in valid SYN */ 160102325Smike tcp_cc tao_ccsent; /* latest CC sent to peer */ 161102325Smike u_short tao_mssopt; /* peer's cached MSS */ 162102325Smike#ifdef notyet 163102325Smike u_short tao_flags; /* cache status flags */ 164102325Smike#define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */ 165102325Smike#define TAOF_OK 0x0002 /* peer does understand rfc1644 */ 16655205Speter#define TAOF_UNDEF 0 /* we don't know yet */ 1671541Srgrimes#endif /* notyet */ 1681541Srgrimes}; 169102325Smike#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) 170118684Sbms 171103304Smike#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) 172102325Smike#define sototcpcb(so) (intotcpcb(sotoinpcb(so))) 173102325Smike 174102325Smike/* 175102325Smike * The smoothed round-trip time and estimated variance 176102325Smike * are stored as fixed point numbers scaled by the values below. 177102325Smike * For convenience, these scales are also used in smoothing the average 178102325Smike * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). 17924896Sbde * With these scales, srtt has 3 bits to the right of the binary point, 18024896Sbde * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the 18192719Salfred * binary point, and is smoothed with an ALPHA of 0.75. 18224896Sbde */ 18392719Salfred#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ 18492719Salfred#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ 18592719Salfred#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ 18692719Salfred#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ 187118684Sbms#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ 188118684Sbms 189118684Sbms/* 190102325Smike * The initial retransmission should happen at rtt + 4 * rttvar. 191118771Sbms * Because of the way we do the smoothing, srtt and rttvar 192118771Sbms * will each average +1/2 tick of bias. When we compute 193102325Smike * the retransmit timer, we want 1/2 tick of rounding and 194102325Smike * 1 extra tick because of +-1/2 tick uncertainty in the 19534030Sdufault * firing of the timer. The bias will give us exactly the 1961541Srgrimes * 1.5 tick we need. But, because the bias is 1971541Srgrimes * statistical, we have to test that we don't drop below 19855205Speter * the minimum feasible timer (which is 2 ticks). 1992165Spaul * This version of the macro adapted from a paper by Lawrence 200102325Smike * Brakmo and Larry Peterson which outlines a problem caused 201 * by insufficient precision in the original implementation, 202 * which results in inappropriately large RTO values for very 203 * fast networks. 204 */ 205#define TCP_REXMTVAL(tp) \ 206 max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ 207 + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) 208 209/* XXX 210 * We want to avoid doing m_pullup on incoming packets but that 211 * means avoiding dtom on the tcp reassembly code. That in turn means 212 * keeping an mbuf pointer in the reassembly queue (since we might 213 * have a cluster). As a quick hack, the source & destination 214 * port numbers (which are no longer needed once we've located the 215 * tcpcb) are overlayed with an mbuf pointer. 216 */ 217#define REASS_MBUF(ti) (*(struct mbuf **)&((ti)->ti_t)) 218 219/* 220 * TCP statistics. 221 * Many of these should be kept per connection, 222 * but that's inconvenient at the moment. 223 */ 224struct tcpstat { 225 u_long tcps_connattempt; /* connections initiated */ 226 u_long tcps_accepts; /* connections accepted */ 227 u_long tcps_connects; /* connections established */ 228 u_long tcps_drops; /* connections dropped */ 229 u_long tcps_conndrops; /* embryonic connections dropped */ 230 u_long tcps_closed; /* conn. closed (includes drops) */ 231 u_long tcps_segstimed; /* segs where we tried to get rtt */ 232 u_long tcps_rttupdated; /* times we succeeded */ 233 u_long tcps_delack; /* delayed acks sent */ 234 u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ 235 u_long tcps_rexmttimeo; /* retransmit timeouts */ 236 u_long tcps_persisttimeo; /* persist timeouts */ 237 u_long tcps_keeptimeo; /* keepalive timeouts */ 238 u_long tcps_keepprobe; /* keepalive probes sent */ 239 u_long tcps_keepdrops; /* connections dropped in keepalive */ 240 241 u_long tcps_sndtotal; /* total packets sent */ 242 u_long tcps_sndpack; /* data packets sent */ 243 u_long tcps_sndbyte; /* data bytes sent */ 244 u_long tcps_sndrexmitpack; /* data packets retransmitted */ 245 u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ 246 u_long tcps_sndacks; /* ack-only packets sent */ 247 u_long tcps_sndprobe; /* window probes sent */ 248 u_long tcps_sndurg; /* packets sent with URG only */ 249 u_long tcps_sndwinup; /* window update-only packets sent */ 250 u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ 251 252 u_long tcps_rcvtotal; /* total packets received */ 253 u_long tcps_rcvpack; /* packets received in sequence */ 254 u_long tcps_rcvbyte; /* bytes received in sequence */ 255 u_long tcps_rcvbadsum; /* packets received with ccksum errs */ 256 u_long tcps_rcvbadoff; /* packets received with bad offset */ 257 u_long tcps_rcvshort; /* packets received too short */ 258 u_long tcps_rcvduppack; /* duplicate-only packets received */ 259 u_long tcps_rcvdupbyte; /* duplicate-only bytes received */ 260 u_long tcps_rcvpartduppack; /* packets with some duplicate data */ 261 u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ 262 u_long tcps_rcvoopack; /* out-of-order packets received */ 263 u_long tcps_rcvoobyte; /* out-of-order bytes received */ 264 u_long tcps_rcvpackafterwin; /* packets with data after window */ 265 u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */ 266 u_long tcps_rcvafterclose; /* packets rcvd after "close" */ 267 u_long tcps_rcvwinprobe; /* rcvd window probe packets */ 268 u_long tcps_rcvdupack; /* rcvd duplicate acks */ 269 u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */ 270 u_long tcps_rcvackpack; /* rcvd ack packets */ 271 u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */ 272 u_long tcps_rcvwinupd; /* rcvd window update packets */ 273 u_long tcps_pawsdrop; /* segments dropped due to PAWS */ 274 u_long tcps_predack; /* times hdr predict ok for acks */ 275 u_long tcps_preddat; /* times hdr predict ok for data pkts */ 276 u_long tcps_pcbcachemiss; 277 u_long tcps_cachedrtt; /* times cached RTT in route updated */ 278 u_long tcps_cachedrttvar; /* times cached rttvar updated */ 279 u_long tcps_cachedssthresh; /* times cached ssthresh updated */ 280 u_long tcps_usedrtt; /* times RTT initialized from route */ 281 u_long tcps_usedrttvar; /* times RTTVAR initialized from rt */ 282 u_long tcps_usedssthresh; /* times ssthresh initialized from rt*/ 283 u_long tcps_persistdrop; /* timeout in persist state */ 284 u_long tcps_badsyn; /* bogus SYN, e.g. premature ACK */ 285 u_long tcps_mturesent; /* resends due to MTU discovery */ 286 u_long tcps_listendrop; /* listen queue overflows */ 287}; 288 289/* 290 * TCB structure exported to user-land via sysctl(3). 291 * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been 292 * included. Not all of our clients do. 293 */ 294#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) 295struct xtcpcb { 296 size_t xt_len; 297 struct inpcb xt_inp; 298 struct tcpcb xt_tp; 299 struct xsocket xt_socket; 300 u_quad_t xt_alignment_hack; 301}; 302#endif 303 304/* 305 * Names for TCP sysctl objects 306 */ 307#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ 308#define TCPCTL_DO_RFC1644 2 /* use RFC-1644 extensions */ 309#define TCPCTL_MSSDFLT 3 /* MSS default */ 310#define TCPCTL_STATS 4 /* statistics (read-only) */ 311#define TCPCTL_RTTDFLT 5 /* default RTT estimate */ 312#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ 313#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ 314#define TCPCTL_SENDSPACE 8 /* send buffer space */ 315#define TCPCTL_RECVSPACE 9 /* receive buffer space */ 316#define TCPCTL_KEEPINIT 10 /* receive buffer space */ 317#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ 318#define TCPCTL_MAXID 12 319 320#define TCPCTL_NAMES { \ 321 { 0, 0 }, \ 322 { "rfc1323", CTLTYPE_INT }, \ 323 { "rfc1644", CTLTYPE_INT }, \ 324 { "mssdflt", CTLTYPE_INT }, \ 325 { "stats", CTLTYPE_STRUCT }, \ 326 { "rttdflt", CTLTYPE_INT }, \ 327 { "keepidle", CTLTYPE_INT }, \ 328 { "keepintvl", CTLTYPE_INT }, \ 329 { "sendspace", CTLTYPE_INT }, \ 330 { "recvspace", CTLTYPE_INT }, \ 331 { "keepinit", CTLTYPE_INT }, \ 332 { "pcblist", CTLTYPE_STRUCT }, \ 333} 334 335#ifdef KERNEL 336extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ 337extern struct inpcbinfo tcbinfo; 338extern struct tcpstat tcpstat; /* tcp statistics */ 339extern int tcp_mssdflt; /* XXX */ 340extern u_long tcp_now; /* for RFC 1323 timestamps */ 341extern int tcp_delack_enabled; 342 343void tcp_canceltimers __P((struct tcpcb *)); 344struct tcpcb * 345 tcp_close __P((struct tcpcb *)); 346void tcp_ctlinput __P((int, struct sockaddr *, void *)); 347int tcp_ctloutput __P((int, struct socket *, int, int, struct mbuf **, 348 struct proc *)); 349struct tcpcb * 350 tcp_drop __P((struct tcpcb *, int)); 351void tcp_drain __P((void)); 352void tcp_fasttimo __P((void)); 353struct rmxp_tao * 354 tcp_gettaocache __P((struct inpcb *)); 355void tcp_init __P((void)); 356void tcp_input __P((struct mbuf *, int)); 357void tcp_mss __P((struct tcpcb *, int)); 358int tcp_mssopt __P((struct tcpcb *)); 359void tcp_mtudisc __P((struct inpcb *, int)); 360struct tcpcb * 361 tcp_newtcpcb __P((struct inpcb *)); 362int tcp_output __P((struct tcpcb *)); 363void tcp_quench __P((struct inpcb *, int)); 364void tcp_respond __P((struct tcpcb *, 365 struct tcpiphdr *, struct mbuf *, u_long, u_long, int)); 366struct rtentry * 367 tcp_rtlookup __P((struct inpcb *)); 368void tcp_setpersist __P((struct tcpcb *)); 369void tcp_slowtimo __P((void)); 370struct tcpiphdr * 371 tcp_template __P((struct tcpcb *)); 372struct tcpcb * 373 tcp_timers __P((struct tcpcb *, int)); 374void tcp_trace __P((int, int, struct tcpcb *, struct tcpiphdr *, int)); 375 376extern struct pr_usrreqs tcp_usrreqs; 377extern u_long tcp_sendspace; 378extern u_long tcp_recvspace; 379 380#endif /* KERNEL */ 381 382#endif /* _NETINET_TCP_VAR_H_ */ 383