tcp_hostcache.c revision 186222
1139823Simp/*- 2122922Sandre * Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG 3122922Sandre * All rights reserved. 4122922Sandre * 5122922Sandre * Redistribution and use in source and binary forms, with or without 6122922Sandre * modification, are permitted provided that the following conditions 7122922Sandre * are met: 8122922Sandre * 1. Redistributions of source code must retain the above copyright 9122922Sandre * notice, this list of conditions and the following disclaimer. 10122922Sandre * 2. Redistributions in binary form must reproduce the above copyright 11122922Sandre * notice, this list of conditions and the following disclaimer in the 12122922Sandre * documentation and/or other materials provided with the distribution. 13122922Sandre * 3. The name of the author may not be used to endorse or promote 14122922Sandre * products derived from this software without specific prior written 15122922Sandre * permission. 16122922Sandre * 17122922Sandre * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18122922Sandre * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19122922Sandre * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20122922Sandre * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21122922Sandre * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22122922Sandre * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23122922Sandre * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24122922Sandre * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25122922Sandre * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26122922Sandre * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27122922Sandre * SUCH DAMAGE. 28122922Sandre */ 29122922Sandre 30122922Sandre/* 31170030Srwatson * The tcp_hostcache moves the tcp-specific cached metrics from the routing 32170030Srwatson * table to a dedicated structure indexed by the remote IP address. It keeps 33170030Srwatson * information on the measured TCP parameters of past TCP sessions to allow 34170030Srwatson * better initial start values to be used with later connections to/from the 35170030Srwatson * same source. Depending on the network parameters (delay, bandwidth, max 36170030Srwatson * MTU, congestion window) between local and remote sites, this can lead to 37170030Srwatson * significant speed-ups for new TCP connections after the first one. 38122922Sandre * 39170030Srwatson * Due to the tcp_hostcache, all TCP-specific metrics information in the 40182411Srpaulo * routing table have been removed. The inpcb no longer keeps a pointer to 41170030Srwatson * the routing entry, and protocol-initiated route cloning has been removed 42170030Srwatson * as well. With these changes, the routing table has gone back to being 43170030Srwatson * more lightwight and only carries information related to packet forwarding. 44122922Sandre * 45170030Srwatson * tcp_hostcache is designed for multiple concurrent access in SMP 46170030Srwatson * environments and high contention. All bucket rows have their own lock and 47170030Srwatson * thus multiple lookups and modifies can be done at the same time as long as 48170030Srwatson * they are in different bucket rows. If a request for insertion of a new 49170030Srwatson * record can't be satisfied, it simply returns an empty structure. Nobody 50170030Srwatson * and nothing outside of tcp_hostcache.c will ever point directly to any 51170030Srwatson * entry in the tcp_hostcache. All communication is done in an 52170030Srwatson * object-oriented way and only functions of tcp_hostcache will manipulate 53170030Srwatson * hostcache entries. Otherwise, we are unable to achieve good behaviour in 54170030Srwatson * concurrent access situations. Since tcp_hostcache is only caching 55170030Srwatson * information, there are no fatal consequences if we either can't satisfy 56170030Srwatson * any particular request or have to drop/overwrite an existing entry because 57170030Srwatson * of bucket limit memory constrains. 58122922Sandre */ 59122922Sandre 60122922Sandre/* 61122922Sandre * Many thanks to jlemon for basic structure of tcp_syncache which is being 62122922Sandre * followed here. 63122922Sandre */ 64122922Sandre 65172467Ssilby#include <sys/cdefs.h> 66172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/tcp_hostcache.c 186222 2008-12-17 12:52:34Z bz $"); 67172467Ssilby 68122922Sandre#include "opt_inet6.h" 69122922Sandre 70122922Sandre#include <sys/param.h> 71122922Sandre#include <sys/systm.h> 72122922Sandre#include <sys/kernel.h> 73122922Sandre#include <sys/lock.h> 74122922Sandre#include <sys/mutex.h> 75122922Sandre#include <sys/malloc.h> 76122922Sandre#include <sys/socket.h> 77122922Sandre#include <sys/socketvar.h> 78122922Sandre#include <sys/sysctl.h> 79181803Sbz#include <sys/vimage.h> 80122922Sandre 81122922Sandre#include <net/if.h> 82122922Sandre 83122922Sandre#include <netinet/in.h> 84122922Sandre#include <netinet/in_systm.h> 85122922Sandre#include <netinet/ip.h> 86122922Sandre#include <netinet/in_var.h> 87122922Sandre#include <netinet/in_pcb.h> 88122922Sandre#include <netinet/ip_var.h> 89122922Sandre#ifdef INET6 90122922Sandre#include <netinet/ip6.h> 91122922Sandre#include <netinet6/ip6_var.h> 92122922Sandre#endif 93122922Sandre#include <netinet/tcp.h> 94122922Sandre#include <netinet/tcp_var.h> 95185571Sbz#include <netinet/tcp_hostcache.h> 96185571Sbz#include <netinet/vinet.h> 97122922Sandre#ifdef INET6 98122922Sandre#include <netinet6/tcp6_var.h> 99122922Sandre#endif 100122922Sandre 101122922Sandre#include <vm/uma.h> 102122922Sandre 103122922Sandre/* Arbitrary values */ 104122922Sandre#define TCP_HOSTCACHE_HASHSIZE 512 105122922Sandre#define TCP_HOSTCACHE_BUCKETLIMIT 30 106122922Sandre#define TCP_HOSTCACHE_EXPIRE 60*60 /* one hour */ 107122922Sandre#define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */ 108122922Sandre 109185088Szec#ifdef VIMAGE_GLOBALS 110122922Sandrestatic struct tcp_hostcache tcp_hostcache; 111122922Sandrestatic struct callout tcp_hc_callout; 112185088Szec#endif 113122922Sandre 114122922Sandrestatic struct hc_metrics *tcp_hc_lookup(struct in_conninfo *); 115122922Sandrestatic struct hc_metrics *tcp_hc_insert(struct in_conninfo *); 116122922Sandrestatic int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS); 117122922Sandrestatic void tcp_hc_purge(void *); 118122922Sandre 119182633SbrooksSYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, 120182633Sbrooks "TCP Host cache"); 121122922Sandre 122183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, cachelimit, 123183550Szec CTLFLAG_RDTUN, tcp_hostcache.cache_limit, 0, 124183550Szec "Overall entry limit for hostcache"); 125122922Sandre 126183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, hashsize, 127183550Szec CTLFLAG_RDTUN, tcp_hostcache.hashsize, 0, 128183550Szec "Size of TCP hostcache hashtable"); 129122922Sandre 130183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, bucketlimit, 131183550Szec CTLFLAG_RDTUN, tcp_hostcache.bucket_limit, 0, 132183550Szec "Per-bucket hash limit for hostcache"); 133122922Sandre 134183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, count, 135183550Szec CTLFLAG_RD, tcp_hostcache.cache_count, 0, 136183550Szec "Current number of entries in hostcache"); 137122922Sandre 138183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, expire, 139183550Szec CTLFLAG_RW, tcp_hostcache.expire, 0, 140183550Szec "Expire time of TCP hostcache entries"); 141122922Sandre 142183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, prune, 143183550Szec CTLFLAG_RW, tcp_hostcache.prune, 0, "Time between purge runs"); 144170434Syar 145183550SzecSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, purge, 146183550Szec CTLFLAG_RW, tcp_hostcache.purgeall, 0, 147183550Szec "Expire all entires on next purge run"); 148122922Sandre 149122922SandreSYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list, 150167784Sandre CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0, 151167784Sandre sysctl_tcp_hc_list, "A", "List of all hostcache entries"); 152122922Sandre 153122922Sandre 154122922Sandrestatic MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache"); 155122922Sandre 156122922Sandre#define HOSTCACHE_HASH(ip) \ 157133874Srwatson (((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) & \ 158181803Sbz V_tcp_hostcache.hashmask) 159122922Sandre 160122922Sandre/* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */ 161133874Srwatson#define HOSTCACHE_HASH6(ip6) \ 162122922Sandre (((ip6)->s6_addr32[0] ^ \ 163122922Sandre (ip6)->s6_addr32[1] ^ \ 164122922Sandre (ip6)->s6_addr32[2] ^ \ 165122922Sandre (ip6)->s6_addr32[3]) & \ 166181803Sbz V_tcp_hostcache.hashmask) 167122922Sandre 168122922Sandre#define THC_LOCK(lp) mtx_lock(lp) 169122922Sandre#define THC_UNLOCK(lp) mtx_unlock(lp) 170122922Sandre 171122922Sandrevoid 172122922Sandretcp_hc_init(void) 173122922Sandre{ 174183550Szec INIT_VNET_INET(curvnet); 175122922Sandre int i; 176122922Sandre 177122922Sandre /* 178170030Srwatson * Initialize hostcache structures. 179122922Sandre */ 180181803Sbz V_tcp_hostcache.cache_count = 0; 181181803Sbz V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; 182181803Sbz V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT; 183181803Sbz V_tcp_hostcache.cache_limit = 184181803Sbz V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit; 185181803Sbz V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE; 186181803Sbz V_tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE; 187122922Sandre 188133874Srwatson TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize", 189181803Sbz &V_tcp_hostcache.hashsize); 190133874Srwatson TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit", 191181803Sbz &V_tcp_hostcache.cache_limit); 192133874Srwatson TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit", 193181803Sbz &V_tcp_hostcache.bucket_limit); 194181803Sbz if (!powerof2(V_tcp_hostcache.hashsize)) { 195133874Srwatson printf("WARNING: hostcache hash size is not a power of 2.\n"); 196181803Sbz V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */ 197133874Srwatson } 198181803Sbz V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1; 199122922Sandre 200122922Sandre /* 201170030Srwatson * Allocate the hash table. 202122922Sandre */ 203181803Sbz V_tcp_hostcache.hashbase = (struct hc_head *) 204181803Sbz malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head), 205122922Sandre M_HOSTCACHE, M_WAITOK | M_ZERO); 206122922Sandre 207122922Sandre /* 208170030Srwatson * Initialize the hash buckets. 209122922Sandre */ 210181803Sbz for (i = 0; i < V_tcp_hostcache.hashsize; i++) { 211181803Sbz TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket); 212181803Sbz V_tcp_hostcache.hashbase[i].hch_length = 0; 213181803Sbz mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry", 214122922Sandre NULL, MTX_DEF); 215122922Sandre } 216122922Sandre 217122922Sandre /* 218122922Sandre * Allocate the hostcache entries. 219122922Sandre */ 220181887Sjulian V_tcp_hostcache.zone = 221181888Sjulian uma_zcreate("hostcache", sizeof(struct hc_metrics), 222181888Sjulian NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 223181803Sbz uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit); 224122922Sandre 225122922Sandre /* 226122922Sandre * Set up periodic cache cleanup. 227122922Sandre */ 228181803Sbz callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE); 229181887Sjulian callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz, 230181887Sjulian tcp_hc_purge, 0); 231122922Sandre} 232122922Sandre 233122922Sandre/* 234170030Srwatson * Internal function: look up an entry in the hostcache or return NULL. 235122922Sandre * 236122922Sandre * If an entry has been returned, the caller becomes responsible for 237122922Sandre * unlocking the bucket row after he is done reading/modifying the entry. 238122922Sandre */ 239122922Sandrestatic struct hc_metrics * 240122922Sandretcp_hc_lookup(struct in_conninfo *inc) 241122922Sandre{ 242183550Szec INIT_VNET_INET(curvnet); 243122922Sandre int hash; 244122922Sandre struct hc_head *hc_head; 245122922Sandre struct hc_metrics *hc_entry; 246122922Sandre 247122922Sandre KASSERT(inc != NULL, ("tcp_hc_lookup with NULL in_conninfo pointer")); 248122922Sandre 249122922Sandre /* 250122922Sandre * Hash the foreign ip address. 251122922Sandre */ 252186222Sbz if (inc->inc_flags & INC_ISIPV6) 253122922Sandre hash = HOSTCACHE_HASH6(&inc->inc6_faddr); 254122922Sandre else 255122922Sandre hash = HOSTCACHE_HASH(&inc->inc_faddr); 256122922Sandre 257181803Sbz hc_head = &V_tcp_hostcache.hashbase[hash]; 258122922Sandre 259122922Sandre /* 260170030Srwatson * Acquire lock for this bucket row; we release the lock if we don't 261170030Srwatson * find an entry, otherwise the caller has to unlock after he is 262170030Srwatson * done. 263122922Sandre */ 264122922Sandre THC_LOCK(&hc_head->hch_mtx); 265122922Sandre 266122922Sandre /* 267170030Srwatson * Iterate through entries in bucket row looking for a match. 268122922Sandre */ 269122922Sandre TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) { 270186222Sbz if (inc->inc_flags & INC_ISIPV6) { 271122922Sandre if (memcmp(&inc->inc6_faddr, &hc_entry->ip6, 272122922Sandre sizeof(inc->inc6_faddr)) == 0) 273122922Sandre return hc_entry; 274122922Sandre } else { 275122922Sandre if (memcmp(&inc->inc_faddr, &hc_entry->ip4, 276122922Sandre sizeof(inc->inc_faddr)) == 0) 277122922Sandre return hc_entry; 278122922Sandre } 279122922Sandre } 280122922Sandre 281122922Sandre /* 282170030Srwatson * We were unsuccessful and didn't find anything. 283122922Sandre */ 284122922Sandre THC_UNLOCK(&hc_head->hch_mtx); 285122922Sandre return NULL; 286122922Sandre} 287122922Sandre 288122922Sandre/* 289170030Srwatson * Internal function: insert an entry into the hostcache or return NULL if 290170030Srwatson * unable to allocate a new one. 291133874Srwatson * 292122922Sandre * If an entry has been returned, the caller becomes responsible for 293122922Sandre * unlocking the bucket row after he is done reading/modifying the entry. 294122922Sandre */ 295122922Sandrestatic struct hc_metrics * 296122922Sandretcp_hc_insert(struct in_conninfo *inc) 297122922Sandre{ 298183550Szec INIT_VNET_INET(curvnet); 299122922Sandre int hash; 300122922Sandre struct hc_head *hc_head; 301122922Sandre struct hc_metrics *hc_entry; 302122922Sandre 303122922Sandre KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer")); 304122922Sandre 305122922Sandre /* 306170030Srwatson * Hash the foreign ip address. 307122922Sandre */ 308186222Sbz if (inc->inc_flags & INC_ISIPV6) 309122922Sandre hash = HOSTCACHE_HASH6(&inc->inc6_faddr); 310122922Sandre else 311122922Sandre hash = HOSTCACHE_HASH(&inc->inc_faddr); 312122922Sandre 313181803Sbz hc_head = &V_tcp_hostcache.hashbase[hash]; 314122922Sandre 315122922Sandre /* 316170030Srwatson * Acquire lock for this bucket row; we release the lock if we don't 317170030Srwatson * find an entry, otherwise the caller has to unlock after he is 318170030Srwatson * done. 319122922Sandre */ 320122922Sandre THC_LOCK(&hc_head->hch_mtx); 321122922Sandre 322122922Sandre /* 323170030Srwatson * If the bucket limit is reached, reuse the least-used element. 324122922Sandre */ 325181803Sbz if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit || 326181803Sbz V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) { 327122922Sandre hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead); 328122922Sandre /* 329122922Sandre * At first we were dropping the last element, just to 330170030Srwatson * reacquire it in the next two lines again, which isn't very 331170030Srwatson * efficient. Instead just reuse the least used element. 332170030Srwatson * We may drop something that is still "in-use" but we can be 333170030Srwatson * "lossy". 334170405Sandre * Just give up if this bucket row is empty and we don't have 335170405Sandre * anything to replace. 336122922Sandre */ 337170405Sandre if (hc_entry == NULL) { 338170405Sandre THC_UNLOCK(&hc_head->hch_mtx); 339170405Sandre return NULL; 340170405Sandre } 341122922Sandre TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q); 342181803Sbz V_tcp_hostcache.hashbase[hash].hch_length--; 343181803Sbz V_tcp_hostcache.cache_count--; 344181803Sbz V_tcpstat.tcps_hc_bucketoverflow++; 345123028Sandre#if 0 346181803Sbz uma_zfree(V_tcp_hostcache.zone, hc_entry); 347122922Sandre#endif 348122922Sandre } else { 349122922Sandre /* 350170030Srwatson * Allocate a new entry, or balk if not possible. 351122922Sandre */ 352181803Sbz hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT); 353122922Sandre if (hc_entry == NULL) { 354122922Sandre THC_UNLOCK(&hc_head->hch_mtx); 355122922Sandre return NULL; 356122922Sandre } 357122922Sandre } 358122922Sandre 359122922Sandre /* 360170030Srwatson * Initialize basic information of hostcache entry. 361122922Sandre */ 362122922Sandre bzero(hc_entry, sizeof(*hc_entry)); 363186222Sbz if (inc->inc_flags & INC_ISIPV6) 364123113Sandre bcopy(&inc->inc6_faddr, &hc_entry->ip6, sizeof(hc_entry->ip6)); 365122922Sandre else 366122922Sandre hc_entry->ip4 = inc->inc_faddr; 367122922Sandre hc_entry->rmx_head = hc_head; 368181803Sbz hc_entry->rmx_expire = V_tcp_hostcache.expire; 369122922Sandre 370122922Sandre /* 371170030Srwatson * Put it upfront. 372122922Sandre */ 373122922Sandre TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q); 374181803Sbz V_tcp_hostcache.hashbase[hash].hch_length++; 375181803Sbz V_tcp_hostcache.cache_count++; 376181803Sbz V_tcpstat.tcps_hc_added++; 377122922Sandre 378122922Sandre return hc_entry; 379122922Sandre} 380122922Sandre 381122922Sandre/* 382170030Srwatson * External function: look up an entry in the hostcache and fill out the 383170030Srwatson * supplied TCP metrics structure. Fills in NULL when no entry was found or 384170030Srwatson * a value is not set. 385122922Sandre */ 386122922Sandrevoid 387122922Sandretcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite) 388122922Sandre{ 389183550Szec INIT_VNET_INET(curvnet); 390122922Sandre struct hc_metrics *hc_entry; 391122922Sandre 392122922Sandre /* 393170030Srwatson * Find the right bucket. 394122922Sandre */ 395122922Sandre hc_entry = tcp_hc_lookup(inc); 396122922Sandre 397122922Sandre /* 398170030Srwatson * If we don't have an existing object. 399122922Sandre */ 400122922Sandre if (hc_entry == NULL) { 401122922Sandre bzero(hc_metrics_lite, sizeof(*hc_metrics_lite)); 402122922Sandre return; 403122922Sandre } 404122922Sandre hc_entry->rmx_hits++; 405181803Sbz hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ 406122922Sandre 407122922Sandre hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu; 408122922Sandre hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh; 409122922Sandre hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt; 410122922Sandre hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar; 411122922Sandre hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth; 412122922Sandre hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd; 413122922Sandre hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe; 414122922Sandre hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe; 415122922Sandre 416122922Sandre /* 417170030Srwatson * Unlock bucket row. 418122922Sandre */ 419122922Sandre THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); 420122922Sandre} 421122922Sandre 422122922Sandre/* 423170030Srwatson * External function: look up an entry in the hostcache and return the 424170030Srwatson * discovered path MTU. Returns NULL if no entry is found or value is not 425138409Srwatson * set. 426122922Sandre */ 427122922Sandreu_long 428122922Sandretcp_hc_getmtu(struct in_conninfo *inc) 429122922Sandre{ 430183550Szec INIT_VNET_INET(curvnet); 431122922Sandre struct hc_metrics *hc_entry; 432122922Sandre u_long mtu; 433122922Sandre 434122922Sandre hc_entry = tcp_hc_lookup(inc); 435122922Sandre if (hc_entry == NULL) { 436122922Sandre return 0; 437122922Sandre } 438122922Sandre hc_entry->rmx_hits++; 439181803Sbz hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ 440122922Sandre 441122922Sandre mtu = hc_entry->rmx_mtu; 442122922Sandre THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); 443122922Sandre return mtu; 444122922Sandre} 445122922Sandre 446122922Sandre/* 447170030Srwatson * External function: update the MTU value of an entry in the hostcache. 448122922Sandre * Creates a new entry if none was found. 449122922Sandre */ 450122922Sandrevoid 451122922Sandretcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu) 452122922Sandre{ 453183550Szec INIT_VNET_INET(curvnet); 454122922Sandre struct hc_metrics *hc_entry; 455122922Sandre 456122922Sandre /* 457170030Srwatson * Find the right bucket. 458122922Sandre */ 459122922Sandre hc_entry = tcp_hc_lookup(inc); 460122922Sandre 461122922Sandre /* 462170030Srwatson * If we don't have an existing object, try to insert a new one. 463122922Sandre */ 464122922Sandre if (hc_entry == NULL) { 465122922Sandre hc_entry = tcp_hc_insert(inc); 466122922Sandre if (hc_entry == NULL) 467122922Sandre return; 468122922Sandre } 469122922Sandre hc_entry->rmx_updates++; 470181803Sbz hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ 471122922Sandre 472122922Sandre hc_entry->rmx_mtu = mtu; 473122922Sandre 474122922Sandre /* 475170030Srwatson * Put it upfront so we find it faster next time. 476122922Sandre */ 477122922Sandre TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); 478122922Sandre TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); 479122922Sandre 480122922Sandre /* 481170030Srwatson * Unlock bucket row. 482122922Sandre */ 483122922Sandre THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); 484122922Sandre} 485122922Sandre 486122922Sandre/* 487170030Srwatson * External function: update the TCP metrics of an entry in the hostcache. 488122922Sandre * Creates a new entry if none was found. 489122922Sandre */ 490122922Sandrevoid 491122922Sandretcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml) 492122922Sandre{ 493183550Szec INIT_VNET_INET(curvnet); 494122922Sandre struct hc_metrics *hc_entry; 495122922Sandre 496122922Sandre hc_entry = tcp_hc_lookup(inc); 497122922Sandre if (hc_entry == NULL) { 498122922Sandre hc_entry = tcp_hc_insert(inc); 499122922Sandre if (hc_entry == NULL) 500122922Sandre return; 501122922Sandre } 502122922Sandre hc_entry->rmx_updates++; 503181803Sbz hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ 504122922Sandre 505122922Sandre if (hcml->rmx_rtt != 0) { 506122922Sandre if (hc_entry->rmx_rtt == 0) 507122922Sandre hc_entry->rmx_rtt = hcml->rmx_rtt; 508122922Sandre else 509122922Sandre hc_entry->rmx_rtt = 510122922Sandre (hc_entry->rmx_rtt + hcml->rmx_rtt) / 2; 511181803Sbz V_tcpstat.tcps_cachedrtt++; 512122922Sandre } 513122922Sandre if (hcml->rmx_rttvar != 0) { 514122922Sandre if (hc_entry->rmx_rttvar == 0) 515133874Srwatson hc_entry->rmx_rttvar = hcml->rmx_rttvar; 516122922Sandre else 517122922Sandre hc_entry->rmx_rttvar = 518122922Sandre (hc_entry->rmx_rttvar + hcml->rmx_rttvar) / 2; 519181803Sbz V_tcpstat.tcps_cachedrttvar++; 520122922Sandre } 521122922Sandre if (hcml->rmx_ssthresh != 0) { 522122922Sandre if (hc_entry->rmx_ssthresh == 0) 523122922Sandre hc_entry->rmx_ssthresh = hcml->rmx_ssthresh; 524122922Sandre else 525122922Sandre hc_entry->rmx_ssthresh = 526122922Sandre (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2; 527181803Sbz V_tcpstat.tcps_cachedssthresh++; 528122922Sandre } 529122922Sandre if (hcml->rmx_bandwidth != 0) { 530122922Sandre if (hc_entry->rmx_bandwidth == 0) 531122922Sandre hc_entry->rmx_bandwidth = hcml->rmx_bandwidth; 532122922Sandre else 533122922Sandre hc_entry->rmx_bandwidth = 534122922Sandre (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2; 535181803Sbz /* V_tcpstat.tcps_cachedbandwidth++; */ 536122922Sandre } 537122922Sandre if (hcml->rmx_cwnd != 0) { 538122922Sandre if (hc_entry->rmx_cwnd == 0) 539122922Sandre hc_entry->rmx_cwnd = hcml->rmx_cwnd; 540122922Sandre else 541122922Sandre hc_entry->rmx_cwnd = 542122922Sandre (hc_entry->rmx_cwnd + hcml->rmx_cwnd) / 2; 543181803Sbz /* V_tcpstat.tcps_cachedcwnd++; */ 544122922Sandre } 545122922Sandre if (hcml->rmx_sendpipe != 0) { 546122922Sandre if (hc_entry->rmx_sendpipe == 0) 547122922Sandre hc_entry->rmx_sendpipe = hcml->rmx_sendpipe; 548122922Sandre else 549122922Sandre hc_entry->rmx_sendpipe = 550122922Sandre (hc_entry->rmx_sendpipe + hcml->rmx_sendpipe) /2; 551181803Sbz /* V_tcpstat.tcps_cachedsendpipe++; */ 552133874Srwatson } 553122922Sandre if (hcml->rmx_recvpipe != 0) { 554122922Sandre if (hc_entry->rmx_recvpipe == 0) 555122922Sandre hc_entry->rmx_recvpipe = hcml->rmx_recvpipe; 556122922Sandre else 557122922Sandre hc_entry->rmx_recvpipe = 558122922Sandre (hc_entry->rmx_recvpipe + hcml->rmx_recvpipe) /2; 559181803Sbz /* V_tcpstat.tcps_cachedrecvpipe++; */ 560122922Sandre } 561122922Sandre 562122922Sandre TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); 563122922Sandre TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); 564122922Sandre THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); 565122922Sandre} 566122922Sandre 567122922Sandre/* 568122922Sandre * Sysctl function: prints the list and values of all hostcache entries in 569122922Sandre * unsorted order. 570122922Sandre */ 571122922Sandrestatic int 572122922Sandresysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) 573122922Sandre{ 574183550Szec INIT_VNET_INET(curvnet); 575122922Sandre int bufsize; 576122922Sandre int linesize = 128; 577122922Sandre char *p, *buf; 578122922Sandre int len, i, error; 579122922Sandre struct hc_metrics *hc_entry; 580165118Sbz#ifdef INET6 581165118Sbz char ip6buf[INET6_ADDRSTRLEN]; 582165118Sbz#endif 583122922Sandre 584181803Sbz bufsize = linesize * (V_tcp_hostcache.cache_count + 1); 585122922Sandre 586122922Sandre p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 587122922Sandre 588122922Sandre len = snprintf(p, linesize, 589122922Sandre "\nIP address MTU SSTRESH RTT RTTVAR BANDWIDTH " 590122922Sandre " CWND SENDPIPE RECVPIPE HITS UPD EXP\n"); 591122922Sandre p += len; 592122922Sandre 593122922Sandre#define msec(u) (((u) + 500) / 1000) 594181803Sbz for (i = 0; i < V_tcp_hostcache.hashsize; i++) { 595181803Sbz THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); 596181803Sbz TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket, 597122922Sandre rmx_q) { 598122922Sandre len = snprintf(p, linesize, 599122922Sandre "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu " 600122922Sandre "%4lu %4lu %4i\n", 601122922Sandre hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) : 602122922Sandre#ifdef INET6 603165118Sbz ip6_sprintf(ip6buf, &hc_entry->ip6), 604122922Sandre#else 605122922Sandre "IPv6?", 606122922Sandre#endif 607122922Sandre hc_entry->rmx_mtu, 608122922Sandre hc_entry->rmx_ssthresh, 609122922Sandre msec(hc_entry->rmx_rtt * 610122922Sandre (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), 611122922Sandre msec(hc_entry->rmx_rttvar * 612122922Sandre (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), 613133477Sandre hc_entry->rmx_bandwidth * 8, 614122922Sandre hc_entry->rmx_cwnd, 615122922Sandre hc_entry->rmx_sendpipe, 616122922Sandre hc_entry->rmx_recvpipe, 617122922Sandre hc_entry->rmx_hits, 618122922Sandre hc_entry->rmx_updates, 619122922Sandre hc_entry->rmx_expire); 620122922Sandre p += len; 621122922Sandre } 622181803Sbz THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); 623122922Sandre } 624122922Sandre#undef msec 625122922Sandre error = SYSCTL_OUT(req, buf, p - buf); 626122922Sandre free(buf, M_TEMP); 627122922Sandre return(error); 628122922Sandre} 629122922Sandre 630122922Sandre/* 631170030Srwatson * Expire and purge (old|all) entries in the tcp_hostcache. Runs 632170030Srwatson * periodically from the callout. 633122922Sandre */ 634122922Sandrestatic void 635122922Sandretcp_hc_purge(void *arg) 636122922Sandre{ 637183550Szec INIT_VNET_INET(curvnet); 638128574Sandre struct hc_metrics *hc_entry, *hc_next; 639122922Sandre int all = (intptr_t)arg; 640122922Sandre int i; 641122922Sandre 642181803Sbz if (V_tcp_hostcache.purgeall) { 643122922Sandre all = 1; 644181803Sbz V_tcp_hostcache.purgeall = 0; 645122922Sandre } 646122922Sandre 647181803Sbz for (i = 0; i < V_tcp_hostcache.hashsize; i++) { 648181803Sbz THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); 649181887Sjulian TAILQ_FOREACH_SAFE(hc_entry, 650181888Sjulian &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q, hc_next) { 651122922Sandre if (all || hc_entry->rmx_expire <= 0) { 652181803Sbz TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket, 653122922Sandre hc_entry, rmx_q); 654181803Sbz uma_zfree(V_tcp_hostcache.zone, hc_entry); 655181803Sbz V_tcp_hostcache.hashbase[i].hch_length--; 656181803Sbz V_tcp_hostcache.cache_count--; 657122922Sandre } else 658181803Sbz hc_entry->rmx_expire -= V_tcp_hostcache.prune; 659122922Sandre } 660181803Sbz THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); 661122922Sandre } 662181887Sjulian 663181887Sjulian callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz, 664181887Sjulian tcp_hc_purge, arg); 665122922Sandre} 666