nfs_nfsdcache.c revision 331722
1176730Sjeff/*- 2176730Sjeff * Copyright (c) 1989, 1993 3176730Sjeff * The Regents of the University of California. All rights reserved. 4176730Sjeff * 5176730Sjeff * This code is derived from software contributed to Berkeley by 6176730Sjeff * Rick Macklem at The University of Guelph. 7176730Sjeff * 8176730Sjeff * Redistribution and use in source and binary forms, with or without 9176730Sjeff * modification, are permitted provided that the following conditions 10176730Sjeff * are met: 11176730Sjeff * 1. Redistributions of source code must retain the above copyright 12176730Sjeff * notice, this list of conditions and the following disclaimer. 13176730Sjeff * 2. Redistributions in binary form must reproduce the above copyright 14176730Sjeff * notice, this list of conditions and the following disclaimer in the 15176730Sjeff * documentation and/or other materials provided with the distribution. 16176730Sjeff * 4. Neither the name of the University nor the names of its contributors 17176730Sjeff * may be used to endorse or promote products derived from this software 18176730Sjeff * without specific prior written permission. 19176730Sjeff * 20176730Sjeff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21176730Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22176730Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23176730Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24176730Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25176730Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26176730Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27176730Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28176730Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29176730Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30176730Sjeff * SUCH DAMAGE. 31176730Sjeff * 32176730Sjeff */ 33176730Sjeff 34176730Sjeff#include <sys/cdefs.h> 35176730Sjeff__FBSDID("$FreeBSD: stable/11/sys/fs/nfsserver/nfs_nfsdcache.c 331722 2018-03-29 02:50:57Z eadler $"); 36176730Sjeff 37176730Sjeff/* 38176730Sjeff * Here is the basic algorithm: 39176730Sjeff * First, some design criteria I used: 40176730Sjeff * - I think a false hit is more serious than a false miss 41176730Sjeff * - A false hit for an RPC that has Op(s) that order via seqid# must be 42176730Sjeff * avoided at all cost 43176730Sjeff * - A valid hit will probably happen a long time after the original reply 44176730Sjeff * and the TCP socket that the original request was received on will no 45176730Sjeff * longer be active 46176730Sjeff * (The long time delay implies to me that LRU is not appropriate.) 47176730Sjeff * - The mechanism will satisfy the requirements of ordering Ops with seqid#s 48176730Sjeff * in them as well as minimizing the risk of redoing retried non-idempotent 49177738Sjeff * Ops. 50177738Sjeff * Because it is biased towards avoiding false hits, multiple entries with 51176730Sjeff * the same xid are to be expected, especially for the case of the entry 52176730Sjeff * in the cache being related to a seqid# sequenced Op. 53176730Sjeff * 54176730Sjeff * The basic algorithm I'm about to code up: 55176730Sjeff * - Null RPCs bypass the cache and are just done 56176730Sjeff * For TCP 57176730Sjeff * - key on <xid, NFS version> (as noted above, there can be several 58176730Sjeff * entries with the same key) 59176730Sjeff * When a request arrives: 60176730Sjeff * For all that match key 61176730Sjeff * - if RPC# != OR request_size != 62176730Sjeff * - not a match with this one 63176730Sjeff * - if NFSv4 and received on same TCP socket OR 64176730Sjeff * received on a TCP connection created before the 65176730Sjeff * entry was cached 66176730Sjeff * - not a match with this one 67176730Sjeff * (V2,3 clients might retry on same TCP socket) 68176730Sjeff * - calculate checksum on first N bytes of NFS XDR 69176730Sjeff * - if checksum != 70176730Sjeff * - not a match for this one 71176730Sjeff * If any of the remaining ones that match has a 72176730Sjeff * seqid_refcnt > 0 73176730Sjeff * - not a match (go do RPC, using new cache entry) 74176730Sjeff * If one match left 75176730Sjeff * - a hit (reply from cache) 76176730Sjeff * else 77176730Sjeff * - miss (go do RPC, using new cache entry) 78176730Sjeff * 79176730Sjeff * During processing of NFSv4 request: 80176730Sjeff * - set a flag when a non-idempotent Op is processed 81176730Sjeff * - when an Op that uses a seqid# (Open,...) is processed 82176730Sjeff * - if same seqid# as referenced entry in cache 83176730Sjeff * - free new cache entry 84176730Sjeff * - reply from referenced cache entry 85176730Sjeff * else if next seqid# in order 86176730Sjeff * - free referenced cache entry 87176730Sjeff * - increment seqid_refcnt on new cache entry 88176730Sjeff * - set pointer from Openowner/Lockowner to 89176730Sjeff * new cache entry (aka reference it) 90176730Sjeff * else if first seqid# in sequence 91176730Sjeff * - increment seqid_refcnt on new cache entry 92176730Sjeff * - set pointer from Openowner/Lockowner to 93176730Sjeff * new cache entry (aka reference it) 94176730Sjeff * 95176730Sjeff * At end of RPC processing: 96176730Sjeff * - if seqid_refcnt > 0 OR flagged non-idempotent on new 97176730Sjeff * cache entry 98176730Sjeff * - save reply in cache entry 99177738Sjeff * - calculate checksum on first N bytes of NFS XDR 100176730Sjeff * request 101177738Sjeff * - note op and length of XDR request (in bytes) 102177738Sjeff * - timestamp it 103176730Sjeff * else 104176730Sjeff * - free new cache entry 105176730Sjeff * - Send reply (noting info for socket activity check, below) 106176730Sjeff * 107176730Sjeff * For cache entries saved above: 108176730Sjeff * - if saved since seqid_refcnt was > 0 109176730Sjeff * - free when seqid_refcnt decrements to 0 110176730Sjeff * (when next one in sequence is processed above, or 111176730Sjeff * when Openowner/Lockowner is discarded) 112176730Sjeff * else { non-idempotent Op(s) } 113176730Sjeff * - free when 114176730Sjeff * - some further activity observed on same 115176730Sjeff * socket 116176730Sjeff * (I'm not yet sure how I'm going to do 117176730Sjeff * this. Maybe look at the TCP connection 118176730Sjeff * to see if the send_tcp_sequence# is well 119176730Sjeff * past sent reply OR K additional RPCs 120176730Sjeff * replied on same socket OR?) 121176730Sjeff * OR 122176730Sjeff * - when very old (hours, days, weeks?) 123176730Sjeff * 124176730Sjeff * For UDP (v2, 3 only), pretty much the old way: 125176730Sjeff * - key on <xid, NFS version, RPC#, Client host ip#> 126176730Sjeff * (at most one entry for each key) 127176730Sjeff * 128176730Sjeff * When a Request arrives: 129176730Sjeff * - if a match with entry via key 130176730Sjeff * - if RPC marked In_progress 131176730Sjeff * - discard request (don't send reply) 132176730Sjeff * else 133176730Sjeff * - reply from cache 134176730Sjeff * - timestamp cache entry 135176730Sjeff * else 136176730Sjeff * - add entry to cache, marked In_progress 137176730Sjeff * - do RPC 138176730Sjeff * - when RPC done 139176730Sjeff * - if RPC# non-idempotent 140176730Sjeff * - mark entry Done (not In_progress) 141176730Sjeff * - save reply 142176730Sjeff * - timestamp cache entry 143176730Sjeff * else 144176730Sjeff * - free cache entry 145176730Sjeff * - send reply 146176730Sjeff * 147176730Sjeff * Later, entries with saved replies are free'd a short time (few minutes) 148176730Sjeff * after reply sent (timestamp). 149176730Sjeff * Reference: Chet Juszczak, "Improving the Performance and Correctness 150176730Sjeff * of an NFS Server", in Proc. Winter 1989 USENIX Conference, 151176730Sjeff * pages 53-63. San Diego, February 1989. 152176730Sjeff * for the UDP case. 153176730Sjeff * nfsrc_floodlevel is set to the allowable upper limit for saved replies 154176730Sjeff * for TCP. For V3, a reply won't be saved when the flood level is 155176730Sjeff * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in 156176730Sjeff * that case. This level should be set high enough that this almost 157176730Sjeff * never happens. 158176730Sjeff */ 159176730Sjeff#ifndef APPLEKEXT 160176730Sjeff#include <fs/nfs/nfsport.h> 161176730Sjeff 162176730Sjeffextern struct nfsstatsv1 nfsstatsv1; 163176730Sjeffextern struct mtx nfsrc_udpmtx; 164176730Sjeffextern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 165176730Sjeffextern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 166176730Sjeffint nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; 167176730Sjeff#endif /* !APPLEKEXT */ 168176730Sjeff 169176730SjeffSYSCTL_DECL(_vfs_nfsd); 170176730Sjeff 171176730Sjeffstatic u_int nfsrc_tcphighwater = 0; 172176730Sjeffstatic int 173176730Sjeffsysctl_tcphighwater(SYSCTL_HANDLER_ARGS) 174176730Sjeff{ 175176730Sjeff int error, newhighwater; 176176730Sjeff 177176730Sjeff newhighwater = nfsrc_tcphighwater; 178176730Sjeff error = sysctl_handle_int(oidp, &newhighwater, 0, req); 179176730Sjeff if (error != 0 || req->newptr == NULL) 180176730Sjeff return (error); 181176730Sjeff if (newhighwater < 0) 182176730Sjeff return (EINVAL); 183176730Sjeff if (newhighwater >= nfsrc_floodlevel) 184176730Sjeff nfsrc_floodlevel = newhighwater + newhighwater / 5; 185176730Sjeff nfsrc_tcphighwater = newhighwater; 186176730Sjeff return (0); 187176730Sjeff} 188176730SjeffSYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0, 189176730Sjeff sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU", 190176730Sjeff "High water mark for TCP cache entries"); 191176730Sjeff 192176730Sjeffstatic u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER; 193176730SjeffSYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW, 194176730Sjeff &nfsrc_udphighwater, 0, 195176730Sjeff "High water mark for UDP cache entries"); 196176811Sjeffstatic u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT; 197176811SjeffSYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW, 198176730Sjeff &nfsrc_tcptimeout, 0, 199176730Sjeff "Timeout for TCP entries in the DRC"); 200176730Sjeffstatic u_int nfsrc_tcpnonidempotent = 1; 201176730SjeffSYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW, 202176730Sjeff &nfsrc_tcpnonidempotent, 0, 203176730Sjeff "Enable the DRC for NFS over TCP"); 204176811Sjeff 205176811Sjeffstatic int nfsrc_udpcachesize = 0; 206176811Sjeffstatic TAILQ_HEAD(, nfsrvcache) nfsrvudplru; 207176811Sjeffstatic struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; 208176811Sjeff 209176730Sjeff/* 210176730Sjeff * and the reverse mapping from generic to Version 2 procedure numbers 211176811Sjeff */ 212176730Sjeffstatic int newnfsv2_procid[NFS_V3NPROCS] = { 213176730Sjeff NFSV2PROC_NULL, 214176730Sjeff NFSV2PROC_GETATTR, 215176730Sjeff NFSV2PROC_SETATTR, 216176730Sjeff NFSV2PROC_LOOKUP, 217176730Sjeff NFSV2PROC_NOOP, 218176730Sjeff NFSV2PROC_READLINK, 219176730Sjeff NFSV2PROC_READ, 220176730Sjeff NFSV2PROC_WRITE, 221176730Sjeff NFSV2PROC_CREATE, 222176730Sjeff NFSV2PROC_MKDIR, 223176730Sjeff NFSV2PROC_SYMLINK, 224176730Sjeff NFSV2PROC_CREATE, 225176730Sjeff NFSV2PROC_REMOVE, 226176730Sjeff NFSV2PROC_RMDIR, 227176730Sjeff NFSV2PROC_RENAME, 228176730Sjeff NFSV2PROC_LINK, 229176730Sjeff NFSV2PROC_READDIR, 230176730Sjeff NFSV2PROC_NOOP, 231176730Sjeff NFSV2PROC_STATFS, 232176730Sjeff NFSV2PROC_NOOP, 233176730Sjeff NFSV2PROC_NOOP, 234176730Sjeff NFSV2PROC_NOOP, 235176730Sjeff}; 236176730Sjeff 237176730Sjeff#define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) 238176730Sjeff#define NFSRCUDPHASH(xid) \ 239176730Sjeff (&nfsrvudphashtbl[nfsrc_hash(xid)]) 240176730Sjeff#define NFSRCHASH(xid) \ 241176730Sjeff (&nfsrchash_table[nfsrc_hash(xid)].tbl) 242176730Sjeff#define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)]) 243176730Sjeff#define TRUE 1 244176730Sjeff#define FALSE 0 245176730Sjeff#define NFSRVCACHE_CHECKLEN 100 246176730Sjeff 247176730Sjeff/* True iff the rpc reply is an nfs status ONLY! */ 248176730Sjeffstatic int nfsv2_repstat[NFS_V3NPROCS] = { 249176730Sjeff FALSE, 250176730Sjeff FALSE, 251176730Sjeff FALSE, 252176730Sjeff FALSE, 253176730Sjeff FALSE, 254176811Sjeff FALSE, 255176811Sjeff FALSE, 256176730Sjeff FALSE, 257176730Sjeff FALSE, 258176811Sjeff FALSE, 259176730Sjeff TRUE, 260176730Sjeff TRUE, 261176730Sjeff TRUE, 262176730Sjeff TRUE, 263176730Sjeff FALSE, 264176730Sjeff TRUE, 265176730Sjeff FALSE, 266176730Sjeff FALSE, 267176730Sjeff FALSE, 268176730Sjeff FALSE, 269176730Sjeff FALSE, 270176730Sjeff FALSE, 271176730Sjeff}; 272176730Sjeff 273176730Sjeff/* 274176730Sjeff * Will NFS want to work over IPv6 someday? 275176730Sjeff */ 276176730Sjeff#define NETFAMILY(rp) \ 277176730Sjeff (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET) 278176730Sjeff 279176730Sjeff/* local functions */ 280176730Sjeffstatic int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 281176730Sjeffstatic int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 282176730Sjeffstatic void nfsrc_lock(struct nfsrvcache *rp); 283176730Sjeffstatic void nfsrc_unlock(struct nfsrvcache *rp); 284176730Sjeffstatic void nfsrc_wanted(struct nfsrvcache *rp); 285176730Sjeffstatic void nfsrc_freecache(struct nfsrvcache *rp); 286176730Sjeffstatic int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); 287176730Sjeffstatic void nfsrc_marksametcpconn(u_int64_t); 288176730Sjeff 289176811Sjeff/* 290176730Sjeff * Return the correct mutex for this cache entry. 291176730Sjeff */ 292176730Sjeffstatic __inline struct mtx * 293176730Sjeffnfsrc_cachemutex(struct nfsrvcache *rp) 294176730Sjeff{ 295176811Sjeff 296176811Sjeff if ((rp->rc_flag & RC_UDP) != 0) 297176811Sjeff return (&nfsrc_udpmtx); 298176811Sjeff return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx); 299176811Sjeff} 300176811Sjeff 301176811Sjeff/* 302176730Sjeff * Initialize the server request cache list 303176730Sjeff */ 304176730SjeffAPPLESTATIC void 305176730Sjeffnfsrvd_initcache(void) 306176730Sjeff{ 307176730Sjeff int i; 308176730Sjeff static int inited = 0; 309176730Sjeff 310176730Sjeff if (inited) 311176730Sjeff return; 312176730Sjeff inited = 1; 313176730Sjeff for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 314176730Sjeff LIST_INIT(&nfsrvudphashtbl[i]); 315176730Sjeff LIST_INIT(&nfsrchash_table[i].tbl); 316176730Sjeff LIST_INIT(&nfsrcahash_table[i].tbl); 317176730Sjeff } 318176730Sjeff TAILQ_INIT(&nfsrvudplru); 319177738Sjeff nfsrc_tcpsavedreplies = 0; 320176730Sjeff nfsrc_udpcachesize = 0; 321176730Sjeff nfsstatsv1.srvcache_tcppeak = 0; 322176730Sjeff nfsstatsv1.srvcache_size = 0; 323176730Sjeff} 324176730Sjeff 325176730Sjeff/* 326176730Sjeff * Get a cache entry for this request. Basically just malloc a new one 327176730Sjeff * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest. 328176730Sjeff */ 329176730SjeffAPPLESTATIC int 330176730Sjeffnfsrvd_getcache(struct nfsrv_descript *nd) 331176730Sjeff{ 332176730Sjeff struct nfsrvcache *newrp; 333176730Sjeff int ret; 334176730Sjeff 335176730Sjeff if (nd->nd_procnum == NFSPROC_NULL) 336177738Sjeff panic("nfsd cache null"); 337176730Sjeff MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache), 338176730Sjeff M_NFSRVCACHE, M_WAITOK); 339176730Sjeff NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache)); 340176730Sjeff if (nd->nd_flag & ND_NFSV4) 341176730Sjeff newrp->rc_flag = RC_NFSV4; 342176730Sjeff else if (nd->nd_flag & ND_NFSV3) 343176730Sjeff newrp->rc_flag = RC_NFSV3; 344176730Sjeff else 345176730Sjeff newrp->rc_flag = RC_NFSV2; 346176730Sjeff newrp->rc_xid = nd->nd_retxid; 347176730Sjeff newrp->rc_proc = nd->nd_procnum; 348176730Sjeff newrp->rc_sockref = nd->nd_sockref; 349176730Sjeff newrp->rc_cachetime = nd->nd_tcpconntime; 350176730Sjeff if (nd->nd_flag & ND_SAMETCPCONN) 351176730Sjeff newrp->rc_flag |= RC_SAMETCPCONN; 352176730Sjeff if (nd->nd_nam2 != NULL) { 353176730Sjeff newrp->rc_flag |= RC_UDP; 354176730Sjeff ret = nfsrc_getudp(nd, newrp); 355176730Sjeff } else { 356176730Sjeff ret = nfsrc_gettcp(nd, newrp); 357176730Sjeff } 358176730Sjeff NFSEXITCODE2(0, nd); 359176730Sjeff return (ret); 360176730Sjeff} 361176730Sjeff 362176730Sjeff/* 363176730Sjeff * For UDP (v2, v3): 364176730Sjeff * - key on <xid, NFS version, RPC#, Client host ip#> 365176730Sjeff * (at most one entry for each key) 366176730Sjeff */ 367176730Sjeffstatic int 368176730Sjeffnfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 369176730Sjeff{ 370176730Sjeff struct nfsrvcache *rp; 371176730Sjeff struct sockaddr_in *saddr; 372176730Sjeff struct sockaddr_in6 *saddr6; 373176730Sjeff struct nfsrvhashhead *hp; 374176730Sjeff int ret = 0; 375176730Sjeff struct mtx *mutex; 376176730Sjeff 377176730Sjeff mutex = nfsrc_cachemutex(newrp); 378176730Sjeff hp = NFSRCUDPHASH(newrp->rc_xid); 379176730Sjeffloop: 380176730Sjeff mtx_lock(mutex); 381176730Sjeff LIST_FOREACH(rp, hp, rc_hash) { 382176730Sjeff if (newrp->rc_xid == rp->rc_xid && 383176730Sjeff newrp->rc_proc == rp->rc_proc && 384176730Sjeff (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 385176730Sjeff nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { 386176730Sjeff if ((rp->rc_flag & RC_LOCKED) != 0) { 387176730Sjeff rp->rc_flag |= RC_WANTED; 388176730Sjeff (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, 389176730Sjeff "nfsrc", 10 * hz); 390176730Sjeff goto loop; 391176730Sjeff } 392176730Sjeff if (rp->rc_flag == 0) 393176730Sjeff panic("nfs udp cache0"); 394176730Sjeff rp->rc_flag |= RC_LOCKED; 395176730Sjeff TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 396176730Sjeff TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 397176730Sjeff if (rp->rc_flag & RC_INPROG) { 398176730Sjeff nfsstatsv1.srvcache_inproghits++; 399176730Sjeff mtx_unlock(mutex); 400176730Sjeff ret = RC_DROPIT; 401176730Sjeff } else if (rp->rc_flag & RC_REPSTATUS) { 402176730Sjeff /* 403177738Sjeff * V2 only. 404176730Sjeff */ 405176730Sjeff nfsstatsv1.srvcache_nonidemdonehits++; 406176730Sjeff mtx_unlock(mutex); 407176730Sjeff nfsrvd_rephead(nd); 408176730Sjeff *(nd->nd_errp) = rp->rc_status; 409176730Sjeff ret = RC_REPLY; 410176730Sjeff rp->rc_timestamp = NFSD_MONOSEC + 411176730Sjeff NFSRVCACHE_UDPTIMEOUT; 412176730Sjeff } else if (rp->rc_flag & RC_REPMBUF) { 413176730Sjeff nfsstatsv1.srvcache_nonidemdonehits++; 414176730Sjeff mtx_unlock(mutex); 415176730Sjeff nd->nd_mreq = m_copym(rp->rc_reply, 0, 416176730Sjeff M_COPYALL, M_WAITOK); 417176730Sjeff ret = RC_REPLY; 418176730Sjeff rp->rc_timestamp = NFSD_MONOSEC + 419176730Sjeff NFSRVCACHE_UDPTIMEOUT; 420176730Sjeff } else { 421176730Sjeff panic("nfs udp cache1"); 422176730Sjeff } 423176730Sjeff nfsrc_unlock(rp); 424176730Sjeff free((caddr_t)newrp, M_NFSRVCACHE); 425176730Sjeff goto out; 426176730Sjeff } 427176730Sjeff } 428176730Sjeff nfsstatsv1.srvcache_misses++; 429176730Sjeff atomic_add_int(&nfsstatsv1.srvcache_size, 1); 430176730Sjeff nfsrc_udpcachesize++; 431176730Sjeff 432176730Sjeff newrp->rc_flag |= RC_INPROG; 433176730Sjeff saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 434176730Sjeff if (saddr->sin_family == AF_INET) 435176730Sjeff newrp->rc_inet = saddr->sin_addr.s_addr; 436176730Sjeff else if (saddr->sin_family == AF_INET6) { 437176730Sjeff saddr6 = (struct sockaddr_in6 *)saddr; 438176730Sjeff NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6, 439176730Sjeff sizeof (struct in6_addr)); 440176730Sjeff newrp->rc_flag |= RC_INETIPV6; 441176811Sjeff } 442177738Sjeff LIST_INSERT_HEAD(hp, newrp, rc_hash); 443176730Sjeff TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); 444176730Sjeff mtx_unlock(mutex); 445176730Sjeff nd->nd_rp = newrp; 446176730Sjeff ret = RC_DOIT; 447176730Sjeff 448176730Sjeffout: 449176730Sjeff NFSEXITCODE2(0, nd); 450176730Sjeff return (ret); 451176730Sjeff} 452176730Sjeff 453176730Sjeff/* 454176730Sjeff * Update a request cache entry after the rpc has been done 455176730Sjeff */ 456176730SjeffAPPLESTATIC struct nfsrvcache * 457176730Sjeffnfsrvd_updatecache(struct nfsrv_descript *nd) 458176730Sjeff{ 459176730Sjeff struct nfsrvcache *rp; 460176730Sjeff struct nfsrvcache *retrp = NULL; 461176730Sjeff mbuf_t m; 462176730Sjeff struct mtx *mutex; 463176730Sjeff 464176811Sjeff rp = nd->nd_rp; 465176730Sjeff if (!rp) 466176730Sjeff panic("nfsrvd_updatecache null rp"); 467176730Sjeff nd->nd_rp = NULL; 468176730Sjeff mutex = nfsrc_cachemutex(rp); 469176730Sjeff mtx_lock(mutex); 470176730Sjeff nfsrc_lock(rp); 471176730Sjeff if (!(rp->rc_flag & RC_INPROG)) 472176730Sjeff panic("nfsrvd_updatecache not inprog"); 473176730Sjeff rp->rc_flag &= ~RC_INPROG; 474176730Sjeff if (rp->rc_flag & RC_UDP) { 475176730Sjeff TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 476176730Sjeff TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 477176730Sjeff } 478176730Sjeff 479176730Sjeff /* 480176730Sjeff * Reply from cache is a special case returned by nfsrv_checkseqid(). 481176730Sjeff */ 482176730Sjeff if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { 483176730Sjeff nfsstatsv1.srvcache_nonidemdonehits++; 484176730Sjeff mtx_unlock(mutex); 485176730Sjeff nd->nd_repstat = 0; 486176730Sjeff if (nd->nd_mreq) 487176730Sjeff mbuf_freem(nd->nd_mreq); 488176730Sjeff if (!(rp->rc_flag & RC_REPMBUF)) 489176730Sjeff panic("reply from cache"); 490176730Sjeff nd->nd_mreq = m_copym(rp->rc_reply, 0, 491176730Sjeff M_COPYALL, M_WAITOK); 492176730Sjeff rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 493176730Sjeff nfsrc_unlock(rp); 494176730Sjeff goto out; 495176730Sjeff } 496176730Sjeff 497176811Sjeff /* 498176811Sjeff * If rc_refcnt > 0, save it 499176811Sjeff * For UDP, save it if ND_SAVEREPLY is set 500176811Sjeff * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set 501176811Sjeff */ 502176811Sjeff if (nd->nd_repstat != NFSERR_DONTREPLY && 503176811Sjeff (rp->rc_refcnt > 0 || 504176811Sjeff ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || 505176811Sjeff ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && 506176811Sjeff nfsrc_tcpsavedreplies <= nfsrc_floodlevel && 507176811Sjeff nfsrc_tcpnonidempotent))) { 508176811Sjeff if (rp->rc_refcnt > 0) { 509176811Sjeff if (!(rp->rc_flag & RC_NFSV4)) 510176811Sjeff panic("update_cache refcnt"); 511176811Sjeff rp->rc_flag |= RC_REFCNT; 512177738Sjeff } 513176811Sjeff if ((nd->nd_flag & ND_NFSV2) && 514176811Sjeff nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { 515176811Sjeff rp->rc_status = nd->nd_repstat; 516176811Sjeff rp->rc_flag |= RC_REPSTATUS; 517176811Sjeff mtx_unlock(mutex); 518176811Sjeff } else { 519176811Sjeff if (!(rp->rc_flag & RC_UDP)) { 520177738Sjeff atomic_add_int(&nfsrc_tcpsavedreplies, 1); 521176811Sjeff if (nfsrc_tcpsavedreplies > 522176811Sjeff nfsstatsv1.srvcache_tcppeak) 523176811Sjeff nfsstatsv1.srvcache_tcppeak = 524176811Sjeff nfsrc_tcpsavedreplies; 525176811Sjeff } 526176811Sjeff mtx_unlock(mutex); 527176811Sjeff m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); 528177368Sjeff mtx_lock(mutex); 529177368Sjeff rp->rc_reply = m; 530176730Sjeff rp->rc_flag |= RC_REPMBUF; 531176730Sjeff mtx_unlock(mutex); 532176730Sjeff } 533176730Sjeff if (rp->rc_flag & RC_UDP) { 534176730Sjeff rp->rc_timestamp = NFSD_MONOSEC + 535176730Sjeff NFSRVCACHE_UDPTIMEOUT; 536176730Sjeff nfsrc_unlock(rp); 537176730Sjeff } else { 538176730Sjeff rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 539176730Sjeff if (rp->rc_refcnt > 0) 540176730Sjeff nfsrc_unlock(rp); 541176730Sjeff else 542176730Sjeff retrp = rp; 543176730Sjeff } 544176730Sjeff } else { 545176730Sjeff nfsrc_freecache(rp); 546176730Sjeff mtx_unlock(mutex); 547176730Sjeff } 548176730Sjeff 549176730Sjeffout: 550176730Sjeff NFSEXITCODE2(0, nd); 551176730Sjeff return (retrp); 552176730Sjeff} 553176730Sjeff 554176730Sjeff/* 555176730Sjeff * Invalidate and, if possible, free an in prog cache entry. 556176730Sjeff * Must not sleep. 557176730Sjeff */ 558176730SjeffAPPLESTATIC void 559176730Sjeffnfsrvd_delcache(struct nfsrvcache *rp) 560176730Sjeff{ 561176730Sjeff struct mtx *mutex; 562176811Sjeff 563176730Sjeff mutex = nfsrc_cachemutex(rp); 564176730Sjeff if (!(rp->rc_flag & RC_INPROG)) 565176730Sjeff panic("nfsrvd_delcache not in prog"); 566176730Sjeff mtx_lock(mutex); 567176730Sjeff rp->rc_flag &= ~RC_INPROG; 568176730Sjeff if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) 569176730Sjeff nfsrc_freecache(rp); 570176730Sjeff mtx_unlock(mutex); 571176730Sjeff} 572176730Sjeff 573176730Sjeff/* 574176730Sjeff * Called after nfsrvd_updatecache() once the reply is sent, to update 575176730Sjeff * the entry's sequence number and unlock it. The argument is 576176730Sjeff * the pointer returned by nfsrvd_updatecache(). 577177738Sjeff */ 578176730SjeffAPPLESTATIC void 579176730Sjeffnfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq) 580176730Sjeff{ 581176730Sjeff struct nfsrchash_bucket *hbp; 582176730Sjeff 583176730Sjeff KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked")); 584176730Sjeff if (have_seq) { 585176730Sjeff hbp = NFSRCAHASH(rp->rc_sockref); 586176730Sjeff mtx_lock(&hbp->mtx); 587176821Sjeff rp->rc_tcpseq = seq; 588176730Sjeff if (rp->rc_acked != RC_NO_ACK) 589176730Sjeff LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash); 590177738Sjeff rp->rc_acked = RC_NO_ACK; 591176730Sjeff mtx_unlock(&hbp->mtx); 592177738Sjeff } 593176730Sjeff nfsrc_unlock(rp); 594177738Sjeff} 595176730Sjeff 596176730Sjeff/* 597176730Sjeff * Get a cache entry for TCP 598176730Sjeff * - key on <xid, nfs version> 599176730Sjeff * (allow multiple entries for a given key) 600176730Sjeff */ 601177738Sjeffstatic int 602177738Sjeffnfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 603176730Sjeff{ 604176730Sjeff struct nfsrvcache *rp, *nextrp; 605176730Sjeff int i; 606176730Sjeff struct nfsrvcache *hitrp; 607176730Sjeff struct nfsrvhashhead *hp, nfsrc_templist; 608176730Sjeff int hit, ret = 0; 609176730Sjeff struct mtx *mutex; 610176730Sjeff 611176730Sjeff mutex = nfsrc_cachemutex(newrp); 612176730Sjeff hp = NFSRCHASH(newrp->rc_xid); 613176730Sjeff newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); 614176730Sjefftryagain: 615176730Sjeff mtx_lock(mutex); 616176730Sjeff hit = 1; 617176730Sjeff LIST_INIT(&nfsrc_templist); 618176730Sjeff /* 619176730Sjeff * Get all the matches and put them on the temp list. 620176730Sjeff */ 621176730Sjeff rp = LIST_FIRST(hp); 622176730Sjeff while (rp != LIST_END(hp)) { 623176730Sjeff nextrp = LIST_NEXT(rp, rc_hash); 624176730Sjeff if (newrp->rc_xid == rp->rc_xid && 625176730Sjeff (!(rp->rc_flag & RC_INPROG) || 626176730Sjeff ((newrp->rc_flag & RC_SAMETCPCONN) && 627176730Sjeff newrp->rc_sockref == rp->rc_sockref)) && 628176730Sjeff (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 629176730Sjeff newrp->rc_proc == rp->rc_proc && 630176730Sjeff ((newrp->rc_flag & RC_NFSV4) && 631176730Sjeff newrp->rc_sockref != rp->rc_sockref && 632176730Sjeff newrp->rc_cachetime >= rp->rc_cachetime) 633176730Sjeff && newrp->rc_reqlen == rp->rc_reqlen && 634176730Sjeff newrp->rc_cksum == rp->rc_cksum) { 635176730Sjeff LIST_REMOVE(rp, rc_hash); 636176730Sjeff LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash); 637176730Sjeff } 638176730Sjeff rp = nextrp; 639176730Sjeff } 640177738Sjeff 641176730Sjeff /* 642176730Sjeff * Now, use nfsrc_templist to decide if there is a match. 643176730Sjeff */ 644176730Sjeff i = 0; 645176730Sjeff LIST_FOREACH(rp, &nfsrc_templist, rc_hash) { 646176730Sjeff i++; 647176730Sjeff if (rp->rc_refcnt > 0) { 648176730Sjeff hit = 0; 649176730Sjeff break; 650176730Sjeff } 651176730Sjeff } 652176730Sjeff /* 653176730Sjeff * Can be a hit only if one entry left. 654176730Sjeff * Note possible hit entry and put nfsrc_templist back on hash 655176730Sjeff * list. 656176730Sjeff */ 657176730Sjeff if (i != 1) 658176730Sjeff hit = 0; 659176730Sjeff hitrp = rp = LIST_FIRST(&nfsrc_templist); 660176730Sjeff while (rp != LIST_END(&nfsrc_templist)) { 661176730Sjeff nextrp = LIST_NEXT(rp, rc_hash); 662176730Sjeff LIST_REMOVE(rp, rc_hash); 663176730Sjeff LIST_INSERT_HEAD(hp, rp, rc_hash); 664176730Sjeff rp = nextrp; 665176730Sjeff } 666176730Sjeff if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist)) 667176730Sjeff panic("nfs gettcp cache templist"); 668176730Sjeff 669176730Sjeff if (hit) { 670176730Sjeff rp = hitrp; 671176730Sjeff if ((rp->rc_flag & RC_LOCKED) != 0) { 672176730Sjeff rp->rc_flag |= RC_WANTED; 673176730Sjeff (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, 674176730Sjeff "nfsrc", 10 * hz); 675176730Sjeff goto tryagain; 676176730Sjeff } 677176730Sjeff if (rp->rc_flag == 0) 678176730Sjeff panic("nfs tcp cache0"); 679176730Sjeff rp->rc_flag |= RC_LOCKED; 680176730Sjeff if (rp->rc_flag & RC_INPROG) { 681176730Sjeff nfsstatsv1.srvcache_inproghits++; 682176730Sjeff mtx_unlock(mutex); 683176730Sjeff if (newrp->rc_sockref == rp->rc_sockref) 684176730Sjeff nfsrc_marksametcpconn(rp->rc_sockref); 685176730Sjeff ret = RC_DROPIT; 686176730Sjeff } else if (rp->rc_flag & RC_REPSTATUS) { 687176730Sjeff /* 688176730Sjeff * V2 only. 689177738Sjeff */ 690176730Sjeff nfsstatsv1.srvcache_nonidemdonehits++; 691176730Sjeff mtx_unlock(mutex); 692176730Sjeff if (newrp->rc_sockref == rp->rc_sockref) 693176730Sjeff nfsrc_marksametcpconn(rp->rc_sockref); 694176730Sjeff ret = RC_REPLY; 695177738Sjeff nfsrvd_rephead(nd); 696176730Sjeff *(nd->nd_errp) = rp->rc_status; 697177738Sjeff rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 698176730Sjeff } else if (rp->rc_flag & RC_REPMBUF) { 699176730Sjeff nfsstatsv1.srvcache_nonidemdonehits++; 700176730Sjeff mtx_unlock(mutex); 701176730Sjeff if (newrp->rc_sockref == rp->rc_sockref) 702176730Sjeff nfsrc_marksametcpconn(rp->rc_sockref); 703176730Sjeff ret = RC_REPLY; 704176730Sjeff nd->nd_mreq = m_copym(rp->rc_reply, 0, 705176730Sjeff M_COPYALL, M_WAITOK); 706176730Sjeff rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 707176730Sjeff } else { 708176730Sjeff panic("nfs tcp cache1"); 709176730Sjeff } 710176730Sjeff nfsrc_unlock(rp); 711176730Sjeff free((caddr_t)newrp, M_NFSRVCACHE); 712176730Sjeff goto out; 713176730Sjeff } 714176730Sjeff nfsstatsv1.srvcache_misses++; 715176730Sjeff atomic_add_int(&nfsstatsv1.srvcache_size, 1); 716176730Sjeff 717176730Sjeff /* 718176730Sjeff * For TCP, multiple entries for a key are allowed, so don't 719176730Sjeff * chain it into the hash table until done. 720176730Sjeff */ 721176730Sjeff newrp->rc_cachetime = NFSD_MONOSEC; 722176730Sjeff newrp->rc_flag |= RC_INPROG; 723176730Sjeff LIST_INSERT_HEAD(hp, newrp, rc_hash); 724176730Sjeff mtx_unlock(mutex); 725176730Sjeff nd->nd_rp = newrp; 726176730Sjeff ret = RC_DOIT; 727176730Sjeff 728176730Sjeffout: 729176730Sjeff NFSEXITCODE2(0, nd); 730176730Sjeff return (ret); 731176730Sjeff} 732176730Sjeff 733176730Sjeff/* 734176730Sjeff * Lock a cache entry. 735176730Sjeff */ 736176730Sjeffstatic void 737176730Sjeffnfsrc_lock(struct nfsrvcache *rp) 738176730Sjeff{ 739176730Sjeff struct mtx *mutex; 740176730Sjeff 741176730Sjeff mutex = nfsrc_cachemutex(rp); 742176730Sjeff mtx_assert(mutex, MA_OWNED); 743176730Sjeff while ((rp->rc_flag & RC_LOCKED) != 0) { 744176730Sjeff rp->rc_flag |= RC_WANTED; 745176730Sjeff (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0); 746176730Sjeff } 747176730Sjeff rp->rc_flag |= RC_LOCKED; 748176730Sjeff} 749176730Sjeff 750176730Sjeff/* 751176730Sjeff * Unlock a cache entry. 752176730Sjeff */ 753176730Sjeffstatic void 754177738Sjeffnfsrc_unlock(struct nfsrvcache *rp) 755176730Sjeff{ 756176730Sjeff struct mtx *mutex; 757176730Sjeff 758176730Sjeff mutex = nfsrc_cachemutex(rp); 759176730Sjeff mtx_lock(mutex); 760176730Sjeff rp->rc_flag &= ~RC_LOCKED; 761176730Sjeff nfsrc_wanted(rp); 762176730Sjeff mtx_unlock(mutex); 763177738Sjeff} 764176730Sjeff 765176730Sjeff/* 766176730Sjeff * Wakeup anyone wanting entry. 767176730Sjeff */ 768176730Sjeffstatic void 769176730Sjeffnfsrc_wanted(struct nfsrvcache *rp) 770176730Sjeff{ 771176730Sjeff if (rp->rc_flag & RC_WANTED) { 772176730Sjeff rp->rc_flag &= ~RC_WANTED; 773176730Sjeff wakeup((caddr_t)rp); 774176730Sjeff } 775176730Sjeff} 776176730Sjeff 777176730Sjeff/* 778176730Sjeff * Free up the entry. 779176730Sjeff * Must not sleep. 780176730Sjeff */ 781176730Sjeffstatic void 782177597Srunfsrc_freecache(struct nfsrvcache *rp) 783177597Sru{ 784177597Sru struct nfsrchash_bucket *hbp; 785177597Sru 786177597Sru LIST_REMOVE(rp, rc_hash); 787176730Sjeff if (rp->rc_flag & RC_UDP) { 788176730Sjeff TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 789176730Sjeff nfsrc_udpcachesize--; 790176730Sjeff } else if (rp->rc_acked != RC_NO_SEQ) { 791176730Sjeff hbp = NFSRCAHASH(rp->rc_sockref); 792176730Sjeff mtx_lock(&hbp->mtx); 793176730Sjeff if (rp->rc_acked == RC_NO_ACK) 794176730Sjeff LIST_REMOVE(rp, rc_ahash); 795176730Sjeff mtx_unlock(&hbp->mtx); 796176730Sjeff } 797176730Sjeff nfsrc_wanted(rp); 798177597Sru if (rp->rc_flag & RC_REPMBUF) { 799176730Sjeff mbuf_freem(rp->rc_reply); 800176811Sjeff if (!(rp->rc_flag & RC_UDP)) 801176811Sjeff atomic_add_int(&nfsrc_tcpsavedreplies, -1); 802176730Sjeff } 803176811Sjeff FREE((caddr_t)rp, M_NFSRVCACHE); 804176730Sjeff atomic_add_int(&nfsstatsv1.srvcache_size, -1); 805176730Sjeff} 806176730Sjeff 807176730Sjeff/* 808176730Sjeff * Clean out the cache. Called when nfsserver module is unloaded. 809176730Sjeff */ 810176730SjeffAPPLESTATIC void 811176730Sjeffnfsrvd_cleancache(void) 812176730Sjeff{ 813176730Sjeff struct nfsrvcache *rp, *nextrp; 814176730Sjeff int i; 815176730Sjeff 816176730Sjeff for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 817176730Sjeff mtx_lock(&nfsrchash_table[i].mtx); 818176730Sjeff LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) 819176730Sjeff nfsrc_freecache(rp); 820176730Sjeff mtx_unlock(&nfsrchash_table[i].mtx); 821176730Sjeff } 822177738Sjeff mtx_lock(&nfsrc_udpmtx); 823176730Sjeff for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 824177738Sjeff LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { 825176730Sjeff nfsrc_freecache(rp); 826176730Sjeff } 827176730Sjeff } 828176730Sjeff nfsstatsv1.srvcache_size = 0; 829176730Sjeff mtx_unlock(&nfsrc_udpmtx); 830176730Sjeff nfsrc_tcpsavedreplies = 0; 831176730Sjeff} 832176730Sjeff 833176730Sjeff#define HISTSIZE 16 834176730Sjeff/* 835176730Sjeff * The basic rule is to get rid of entries that are expired. 836176730Sjeff */ 837176730Sjeffvoid 838176730Sjeffnfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) 839176730Sjeff{ 840176730Sjeff struct nfsrchash_bucket *hbp; 841176730Sjeff struct nfsrvcache *rp, *nextrp; 842176730Sjeff int force, lastslot, i, j, k, tto, time_histo[HISTSIZE]; 843176730Sjeff time_t thisstamp; 844176730Sjeff static time_t udp_lasttrim = 0, tcp_lasttrim = 0; 845176730Sjeff static int onethread = 0, oneslot = 0; 846176730Sjeff 847176730Sjeff if (sockref != 0) { 848176730Sjeff hbp = NFSRCAHASH(sockref); 849176730Sjeff mtx_lock(&hbp->mtx); 850176730Sjeff LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) { 851176730Sjeff if (sockref == rp->rc_sockref) { 852176730Sjeff if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) { 853176730Sjeff rp->rc_acked = RC_ACK; 854176730Sjeff LIST_REMOVE(rp, rc_ahash); 855176730Sjeff } else if (final) { 856176730Sjeff rp->rc_acked = RC_NACK; 857176730Sjeff LIST_REMOVE(rp, rc_ahash); 858176730Sjeff } 859176730Sjeff } 860176730Sjeff } 861176730Sjeff mtx_unlock(&hbp->mtx); 862176730Sjeff } 863176730Sjeff 864176730Sjeff if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) 865177597Sru return; 866177597Sru if (NFSD_MONOSEC != udp_lasttrim || 867177597Sru nfsrc_udpcachesize >= (nfsrc_udphighwater + 868177597Sru nfsrc_udphighwater / 2)) { 869176730Sjeff mtx_lock(&nfsrc_udpmtx); 870176730Sjeff udp_lasttrim = NFSD_MONOSEC; 871176730Sjeff TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { 872176730Sjeff if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) 873176730Sjeff && rp->rc_refcnt == 0 874176730Sjeff && ((rp->rc_flag & RC_REFCNT) || 875176730Sjeff udp_lasttrim > rp->rc_timestamp || 876176730Sjeff nfsrc_udpcachesize > nfsrc_udphighwater)) 877176730Sjeff nfsrc_freecache(rp); 878176730Sjeff } 879176730Sjeff mtx_unlock(&nfsrc_udpmtx); 880176730Sjeff } 881176811Sjeff if (NFSD_MONOSEC != tcp_lasttrim || 882176811Sjeff nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) { 883176730Sjeff force = nfsrc_tcphighwater / 4; 884176811Sjeff if (force > 0 && 885176811Sjeff nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) { 886176730Sjeff for (i = 0; i < HISTSIZE; i++) 887176730Sjeff time_histo[i] = 0; 888176811Sjeff i = 0; 889176811Sjeff lastslot = NFSRVCACHE_HASHSIZE - 1; 890176811Sjeff } else { 891176811Sjeff force = 0; 892176811Sjeff if (NFSD_MONOSEC != tcp_lasttrim) { 893176811Sjeff i = 0; 894176811Sjeff lastslot = NFSRVCACHE_HASHSIZE - 1; 895176811Sjeff } else { 896176811Sjeff lastslot = i = oneslot; 897176811Sjeff if (++oneslot >= NFSRVCACHE_HASHSIZE) 898176811Sjeff oneslot = 0; 899176811Sjeff } 900176811Sjeff } 901176811Sjeff tto = nfsrc_tcptimeout; 902176811Sjeff tcp_lasttrim = NFSD_MONOSEC; 903176811Sjeff for (; i <= lastslot; i++) { 904176811Sjeff mtx_lock(&nfsrchash_table[i].mtx); 905176730Sjeff LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, 906176730Sjeff nextrp) { 907176730Sjeff if (!(rp->rc_flag & 908176730Sjeff (RC_INPROG|RC_LOCKED|RC_WANTED)) 909176730Sjeff && rp->rc_refcnt == 0) { 910176730Sjeff if ((rp->rc_flag & RC_REFCNT) || 911176730Sjeff tcp_lasttrim > rp->rc_timestamp || 912176730Sjeff rp->rc_acked == RC_ACK) { 913176730Sjeff nfsrc_freecache(rp); 914176730Sjeff continue; 915176730Sjeff } 916176730Sjeff 917176880Sjeff if (force == 0) 918176730Sjeff continue; 919176730Sjeff /* 920176730Sjeff * The timestamps range from roughly the 921176730Sjeff * present (tcp_lasttrim) to the present 922176730Sjeff * + nfsrc_tcptimeout. Generate a simple 923177738Sjeff * histogram of where the timeouts fall. 924176730Sjeff */ 925177738Sjeff j = rp->rc_timestamp - tcp_lasttrim; 926176730Sjeff if (j >= tto) 927176730Sjeff j = HISTSIZE - 1; 928176730Sjeff else if (j < 0) 929176730Sjeff j = 0; 930176730Sjeff else 931176730Sjeff j = j * HISTSIZE / tto; 932176730Sjeff time_histo[j]++; 933176730Sjeff } 934176730Sjeff } 935176730Sjeff mtx_unlock(&nfsrchash_table[i].mtx); 936176730Sjeff } 937176730Sjeff if (force) { 938176730Sjeff /* 939176730Sjeff * Trim some more with a smaller timeout of as little 940176730Sjeff * as 20% of nfsrc_tcptimeout to try and get below 941176730Sjeff * 80% of the nfsrc_tcphighwater. 942176730Sjeff */ 943176730Sjeff k = 0; 944176730Sjeff for (i = 0; i < (HISTSIZE - 2); i++) { 945176730Sjeff k += time_histo[i]; 946176730Sjeff if (k > force) 947176730Sjeff break; 948176730Sjeff } 949176730Sjeff k = tto * (i + 1) / HISTSIZE; 950176730Sjeff if (k < 1) 951176730Sjeff k = 1; 952176730Sjeff thisstamp = tcp_lasttrim + k; 953176730Sjeff for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 954176730Sjeff mtx_lock(&nfsrchash_table[i].mtx); 955176730Sjeff LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, 956176730Sjeff rc_hash, nextrp) { 957176730Sjeff if (!(rp->rc_flag & 958176730Sjeff (RC_INPROG|RC_LOCKED|RC_WANTED)) 959 && rp->rc_refcnt == 0 960 && ((rp->rc_flag & RC_REFCNT) || 961 thisstamp > rp->rc_timestamp || 962 rp->rc_acked == RC_ACK)) 963 nfsrc_freecache(rp); 964 } 965 mtx_unlock(&nfsrchash_table[i].mtx); 966 } 967 } 968 } 969 atomic_store_rel_int(&onethread, 0); 970} 971 972/* 973 * Add a seqid# reference to the cache entry. 974 */ 975APPLESTATIC void 976nfsrvd_refcache(struct nfsrvcache *rp) 977{ 978 struct mtx *mutex; 979 980 if (rp == NULL) 981 /* For NFSv4.1, there is no cache entry. */ 982 return; 983 mutex = nfsrc_cachemutex(rp); 984 mtx_lock(mutex); 985 if (rp->rc_refcnt < 0) 986 panic("nfs cache refcnt"); 987 rp->rc_refcnt++; 988 mtx_unlock(mutex); 989} 990 991/* 992 * Dereference a seqid# cache entry. 993 */ 994APPLESTATIC void 995nfsrvd_derefcache(struct nfsrvcache *rp) 996{ 997 struct mtx *mutex; 998 999 mutex = nfsrc_cachemutex(rp); 1000 mtx_lock(mutex); 1001 if (rp->rc_refcnt <= 0) 1002 panic("nfs cache derefcnt"); 1003 rp->rc_refcnt--; 1004 if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) 1005 nfsrc_freecache(rp); 1006 mtx_unlock(mutex); 1007} 1008 1009/* 1010 * Calculate the length of the mbuf list and a checksum on the first up to 1011 * NFSRVCACHE_CHECKLEN bytes. 1012 */ 1013static int 1014nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum) 1015{ 1016 int len = 0, cklen; 1017 mbuf_t m; 1018 1019 m = m1; 1020 while (m) { 1021 len += mbuf_len(m); 1022 m = mbuf_next(m); 1023 } 1024 cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len; 1025 *cksum = in_cksum(m1, cklen); 1026 return (len); 1027} 1028 1029/* 1030 * Mark a TCP connection that is seeing retries. Should never happen for 1031 * NFSv4. 1032 */ 1033static void 1034nfsrc_marksametcpconn(u_int64_t sockref) 1035{ 1036} 1037 1038