1191783Srmacklem/*- 2191783Srmacklem * Copyright (c) 1989, 1993 3191783Srmacklem * The Regents of the University of California. All rights reserved. 4191783Srmacklem * 5191783Srmacklem * This code is derived from software contributed to Berkeley by 6191783Srmacklem * Rick Macklem at The University of Guelph. 7191783Srmacklem * 8191783Srmacklem * Redistribution and use in source and binary forms, with or without 9191783Srmacklem * modification, are permitted provided that the following conditions 10191783Srmacklem * are met: 11191783Srmacklem * 1. Redistributions of source code must retain the above copyright 12191783Srmacklem * notice, this list of conditions and the following disclaimer. 13191783Srmacklem * 2. Redistributions in binary form must reproduce the above copyright 14191783Srmacklem * notice, this list of conditions and the following disclaimer in the 15191783Srmacklem * documentation and/or other materials provided with the distribution. 16191783Srmacklem * 4. Neither the name of the University nor the names of its contributors 17191783Srmacklem * may be used to endorse or promote products derived from this software 18191783Srmacklem * without specific prior written permission. 19191783Srmacklem * 20191783Srmacklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21191783Srmacklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22191783Srmacklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23191783Srmacklem * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24191783Srmacklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25191783Srmacklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26191783Srmacklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27191783Srmacklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28191783Srmacklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29191783Srmacklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30191783Srmacklem * SUCH DAMAGE. 31191783Srmacklem * 32191783Srmacklem */ 33191783Srmacklem 34191783Srmacklem#include <sys/cdefs.h> 35191783Srmacklem__FBSDID("$FreeBSD$"); 36191783Srmacklem 37191783Srmacklem/* 38191783Srmacklem * Here is the basic algorithm: 39191783Srmacklem * First, some design criteria I used: 40191783Srmacklem * - I think a false hit is more serious than a false miss 41191783Srmacklem * - A false hit for an RPC that has Op(s) that order via seqid# must be 42191783Srmacklem * avoided at all cost 43191783Srmacklem * - A valid hit will probably happen a long time after the original reply 44191783Srmacklem * and the TCP socket that the original request was received on will no 45191783Srmacklem * longer be active 46191783Srmacklem * (The long time delay implies to me that LRU is not appropriate.) 47191783Srmacklem * - The mechanism will satisfy the requirements of ordering Ops with seqid#s 48191783Srmacklem * in them as well as minimizing the risk of redoing retried non-idempotent 49191783Srmacklem * Ops. 50191783Srmacklem * Because it is biased towards avoiding false hits, multiple entries with 51191783Srmacklem * the same xid are to be expected, especially for the case of the entry 52191783Srmacklem * in the cache being related to a seqid# sequenced Op. 53191783Srmacklem * 54191783Srmacklem * The basic algorithm I'm about to code up: 55191783Srmacklem * - Null RPCs bypass the cache and are just done 56191783Srmacklem * For TCP 57191783Srmacklem * - key on <xid, NFS version> (as noted above, there can be several 58191783Srmacklem * entries with the same key) 59191783Srmacklem * When a request arrives: 60191783Srmacklem * For all that match key 61191783Srmacklem * - if RPC# != OR request_size != 62191783Srmacklem * - not a match with this one 63191783Srmacklem * - if NFSv4 and received on same TCP socket OR 64191783Srmacklem * received on a TCP connection created before the 65191783Srmacklem * entry was cached 66191783Srmacklem * - not a match with this one 67191783Srmacklem * (V2,3 clients might retry on same TCP socket) 68191783Srmacklem * - calculate checksum on first N bytes of NFS XDR 69191783Srmacklem * - if checksum != 70191783Srmacklem * - not a match for this one 71191783Srmacklem * If any of the remaining ones that match has a 72191783Srmacklem * seqid_refcnt > 0 73191783Srmacklem * - not a match (go do RPC, using new cache entry) 74191783Srmacklem * If one match left 75191783Srmacklem * - a hit (reply from cache) 76191783Srmacklem * else 77191783Srmacklem * - miss (go do RPC, using new cache entry) 78191783Srmacklem * 79191783Srmacklem * During processing of NFSv4 request: 80191783Srmacklem * - set a flag when a non-idempotent Op is processed 81191783Srmacklem * - when an Op that uses a seqid# (Open,...) is processed 82191783Srmacklem * - if same seqid# as referenced entry in cache 83191783Srmacklem * - free new cache entry 84191783Srmacklem * - reply from referenced cache entry 85191783Srmacklem * else if next seqid# in order 86191783Srmacklem * - free referenced cache entry 87191783Srmacklem * - increment seqid_refcnt on new cache entry 88191783Srmacklem * - set pointer from Openowner/Lockowner to 89191783Srmacklem * new cache entry (aka reference it) 90191783Srmacklem * else if first seqid# in sequence 91191783Srmacklem * - increment seqid_refcnt on new cache entry 92191783Srmacklem * - set pointer from Openowner/Lockowner to 93191783Srmacklem * new cache entry (aka reference it) 94191783Srmacklem * 95191783Srmacklem * At end of RPC processing: 96191783Srmacklem * - if seqid_refcnt > 0 OR flagged non-idempotent on new 97191783Srmacklem * cache entry 98191783Srmacklem * - save reply in cache entry 99191783Srmacklem * - calculate checksum on first N bytes of NFS XDR 100191783Srmacklem * request 101191783Srmacklem * - note op and length of XDR request (in bytes) 102191783Srmacklem * - timestamp it 103191783Srmacklem * else 104191783Srmacklem * - free new cache entry 105191783Srmacklem * - Send reply (noting info for socket activity check, below) 106191783Srmacklem * 107191783Srmacklem * For cache entries saved above: 108191783Srmacklem * - if saved since seqid_refcnt was > 0 109191783Srmacklem * - free when seqid_refcnt decrements to 0 110191783Srmacklem * (when next one in sequence is processed above, or 111191783Srmacklem * when Openowner/Lockowner is discarded) 112191783Srmacklem * else { non-idempotent Op(s) } 113191783Srmacklem * - free when 114191783Srmacklem * - some further activity observed on same 115191783Srmacklem * socket 116191783Srmacklem * (I'm not yet sure how I'm going to do 117191783Srmacklem * this. Maybe look at the TCP connection 118191783Srmacklem * to see if the send_tcp_sequence# is well 119191783Srmacklem * past sent reply OR K additional RPCs 120191783Srmacklem * replied on same socket OR?) 121191783Srmacklem * OR 122191783Srmacklem * - when very old (hours, days, weeks?) 123191783Srmacklem * 124191783Srmacklem * For UDP (v2, 3 only), pretty much the old way: 125191783Srmacklem * - key on <xid, NFS version, RPC#, Client host ip#> 126191783Srmacklem * (at most one entry for each key) 127191783Srmacklem * 128191783Srmacklem * When a Request arrives: 129191783Srmacklem * - if a match with entry via key 130191783Srmacklem * - if RPC marked In_progress 131191783Srmacklem * - discard request (don't send reply) 132191783Srmacklem * else 133191783Srmacklem * - reply from cache 134191783Srmacklem * - timestamp cache entry 135191783Srmacklem * else 136191783Srmacklem * - add entry to cache, marked In_progress 137191783Srmacklem * - do RPC 138191783Srmacklem * - when RPC done 139191783Srmacklem * - if RPC# non-idempotent 140191783Srmacklem * - mark entry Done (not In_progress) 141191783Srmacklem * - save reply 142191783Srmacklem * - timestamp cache entry 143191783Srmacklem * else 144191783Srmacklem * - free cache entry 145191783Srmacklem * - send reply 146191783Srmacklem * 147191783Srmacklem * Later, entries with saved replies are free'd a short time (few minutes) 148191783Srmacklem * after reply sent (timestamp). 149191783Srmacklem * Reference: Chet Juszczak, "Improving the Performance and Correctness 150191783Srmacklem * of an NFS Server", in Proc. Winter 1989 USENIX Conference, 151191783Srmacklem * pages 53-63. San Diego, February 1989. 152191783Srmacklem * for the UDP case. 153191783Srmacklem * nfsrc_floodlevel is set to the allowable upper limit for saved replies 154191783Srmacklem * for TCP. For V3, a reply won't be saved when the flood level is 155191783Srmacklem * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in 156191783Srmacklem * that case. This level should be set high enough that this almost 157191783Srmacklem * never happens. 158191783Srmacklem */ 159191783Srmacklem#ifndef APPLEKEXT 160191783Srmacklem#include <fs/nfs/nfsport.h> 161191783Srmacklem 162191783Srmacklemextern struct nfsstats newnfsstats; 163255532Srmacklemextern struct mtx nfsrc_udpmtx; 164255532Srmacklemextern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 165261067Smavextern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 166191783Srmacklemint nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; 167191783Srmacklem#endif /* !APPLEKEXT */ 168191783Srmacklem 169255532SrmacklemSYSCTL_DECL(_vfs_nfsd); 170255532Srmacklem 171255532Srmacklemstatic u_int nfsrc_tcphighwater = 0; 172255532Srmacklemstatic int 173255532Srmacklemsysctl_tcphighwater(SYSCTL_HANDLER_ARGS) 174255532Srmacklem{ 175255532Srmacklem int error, newhighwater; 176255532Srmacklem 177255532Srmacklem newhighwater = nfsrc_tcphighwater; 178255532Srmacklem error = sysctl_handle_int(oidp, &newhighwater, 0, req); 179255532Srmacklem if (error != 0 || req->newptr == NULL) 180255532Srmacklem return (error); 181255532Srmacklem if (newhighwater < 0) 182255532Srmacklem return (EINVAL); 183255532Srmacklem if (newhighwater >= nfsrc_floodlevel) 184255532Srmacklem nfsrc_floodlevel = newhighwater + newhighwater / 5; 185255532Srmacklem nfsrc_tcphighwater = newhighwater; 186255532Srmacklem return (0); 187255532Srmacklem} 188255532SrmacklemSYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0, 189255532Srmacklem sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU", 190255532Srmacklem "High water mark for TCP cache entries"); 191255532Srmacklem 192255532Srmacklemstatic u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER; 193255532SrmacklemSYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW, 194255532Srmacklem &nfsrc_udphighwater, 0, 195255532Srmacklem "High water mark for UDP cache entries"); 196255532Srmacklemstatic u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT; 197255532SrmacklemSYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW, 198255532Srmacklem &nfsrc_tcptimeout, 0, 199255532Srmacklem "Timeout for TCP entries in the DRC"); 200255532Srmacklemstatic u_int nfsrc_tcpnonidempotent = 1; 201255532SrmacklemSYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW, 202255532Srmacklem &nfsrc_tcpnonidempotent, 0, 203255532Srmacklem "Enable the DRC for NFS over TCP"); 204255532Srmacklem 205255532Srmacklemstatic int nfsrc_udpcachesize = 0; 206191783Srmacklemstatic TAILQ_HEAD(, nfsrvcache) nfsrvudplru; 207255532Srmacklemstatic struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; 208255532Srmacklem 209191783Srmacklem/* 210191783Srmacklem * and the reverse mapping from generic to Version 2 procedure numbers 211191783Srmacklem */ 212191783Srmacklemstatic int newnfsv2_procid[NFS_V3NPROCS] = { 213191783Srmacklem NFSV2PROC_NULL, 214191783Srmacklem NFSV2PROC_GETATTR, 215191783Srmacklem NFSV2PROC_SETATTR, 216191783Srmacklem NFSV2PROC_LOOKUP, 217191783Srmacklem NFSV2PROC_NOOP, 218191783Srmacklem NFSV2PROC_READLINK, 219191783Srmacklem NFSV2PROC_READ, 220191783Srmacklem NFSV2PROC_WRITE, 221191783Srmacklem NFSV2PROC_CREATE, 222191783Srmacklem NFSV2PROC_MKDIR, 223191783Srmacklem NFSV2PROC_SYMLINK, 224191783Srmacklem NFSV2PROC_CREATE, 225191783Srmacklem NFSV2PROC_REMOVE, 226191783Srmacklem NFSV2PROC_RMDIR, 227191783Srmacklem NFSV2PROC_RENAME, 228191783Srmacklem NFSV2PROC_LINK, 229191783Srmacklem NFSV2PROC_READDIR, 230191783Srmacklem NFSV2PROC_NOOP, 231191783Srmacklem NFSV2PROC_STATFS, 232191783Srmacklem NFSV2PROC_NOOP, 233191783Srmacklem NFSV2PROC_NOOP, 234191783Srmacklem NFSV2PROC_NOOP, 235191783Srmacklem}; 236191783Srmacklem 237255532Srmacklem#define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) 238191783Srmacklem#define NFSRCUDPHASH(xid) \ 239255532Srmacklem (&nfsrvudphashtbl[nfsrc_hash(xid)]) 240191783Srmacklem#define NFSRCHASH(xid) \ 241255532Srmacklem (&nfsrchash_table[nfsrc_hash(xid)].tbl) 242261067Smav#define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)]) 243191783Srmacklem#define TRUE 1 244191783Srmacklem#define FALSE 0 245191783Srmacklem#define NFSRVCACHE_CHECKLEN 100 246191783Srmacklem 247191783Srmacklem/* True iff the rpc reply is an nfs status ONLY! */ 248191783Srmacklemstatic int nfsv2_repstat[NFS_V3NPROCS] = { 249191783Srmacklem FALSE, 250191783Srmacklem FALSE, 251191783Srmacklem FALSE, 252191783Srmacklem FALSE, 253191783Srmacklem FALSE, 254191783Srmacklem FALSE, 255191783Srmacklem FALSE, 256191783Srmacklem FALSE, 257191783Srmacklem FALSE, 258191783Srmacklem FALSE, 259191783Srmacklem TRUE, 260191783Srmacklem TRUE, 261191783Srmacklem TRUE, 262191783Srmacklem TRUE, 263191783Srmacklem FALSE, 264191783Srmacklem TRUE, 265191783Srmacklem FALSE, 266191783Srmacklem FALSE, 267191783Srmacklem FALSE, 268191783Srmacklem FALSE, 269191783Srmacklem FALSE, 270191783Srmacklem FALSE, 271191783Srmacklem}; 272191783Srmacklem 273191783Srmacklem/* 274191783Srmacklem * Will NFS want to work over IPv6 someday? 275191783Srmacklem */ 276191783Srmacklem#define NETFAMILY(rp) \ 277191783Srmacklem (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET) 278191783Srmacklem 279191783Srmacklem/* local functions */ 280191783Srmacklemstatic int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 281191783Srmacklemstatic int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 282191783Srmacklemstatic void nfsrc_lock(struct nfsrvcache *rp); 283191783Srmacklemstatic void nfsrc_unlock(struct nfsrvcache *rp); 284191783Srmacklemstatic void nfsrc_wanted(struct nfsrvcache *rp); 285191783Srmacklemstatic void nfsrc_freecache(struct nfsrvcache *rp); 286191783Srmacklemstatic int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); 287191783Srmacklemstatic void nfsrc_marksametcpconn(u_int64_t); 288191783Srmacklem 289191783Srmacklem/* 290255532Srmacklem * Return the correct mutex for this cache entry. 291255532Srmacklem */ 292255532Srmacklemstatic __inline struct mtx * 293255532Srmacklemnfsrc_cachemutex(struct nfsrvcache *rp) 294255532Srmacklem{ 295255532Srmacklem 296255532Srmacklem if ((rp->rc_flag & RC_UDP) != 0) 297255532Srmacklem return (&nfsrc_udpmtx); 298255532Srmacklem return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx); 299255532Srmacklem} 300255532Srmacklem 301255532Srmacklem/* 302191783Srmacklem * Initialize the server request cache list 303191783Srmacklem */ 304191783SrmacklemAPPLESTATIC void 305191783Srmacklemnfsrvd_initcache(void) 306191783Srmacklem{ 307191783Srmacklem int i; 308191783Srmacklem static int inited = 0; 309191783Srmacklem 310191783Srmacklem if (inited) 311191783Srmacklem return; 312191783Srmacklem inited = 1; 313191783Srmacklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 314191783Srmacklem LIST_INIT(&nfsrvudphashtbl[i]); 315255532Srmacklem LIST_INIT(&nfsrchash_table[i].tbl); 316261067Smav LIST_INIT(&nfsrcahash_table[i].tbl); 317191783Srmacklem } 318191783Srmacklem TAILQ_INIT(&nfsrvudplru); 319191783Srmacklem nfsrc_tcpsavedreplies = 0; 320191783Srmacklem nfsrc_udpcachesize = 0; 321191783Srmacklem newnfsstats.srvcache_tcppeak = 0; 322191783Srmacklem newnfsstats.srvcache_size = 0; 323191783Srmacklem} 324191783Srmacklem 325191783Srmacklem/* 326191783Srmacklem * Get a cache entry for this request. Basically just malloc a new one 327191783Srmacklem * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest. 328191783Srmacklem */ 329191783SrmacklemAPPLESTATIC int 330261067Smavnfsrvd_getcache(struct nfsrv_descript *nd) 331191783Srmacklem{ 332191783Srmacklem struct nfsrvcache *newrp; 333191783Srmacklem int ret; 334191783Srmacklem 335191783Srmacklem if (nd->nd_procnum == NFSPROC_NULL) 336191783Srmacklem panic("nfsd cache null"); 337191783Srmacklem MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache), 338191783Srmacklem M_NFSRVCACHE, M_WAITOK); 339191783Srmacklem NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache)); 340191783Srmacklem if (nd->nd_flag & ND_NFSV4) 341191783Srmacklem newrp->rc_flag = RC_NFSV4; 342191783Srmacklem else if (nd->nd_flag & ND_NFSV3) 343191783Srmacklem newrp->rc_flag = RC_NFSV3; 344191783Srmacklem else 345191783Srmacklem newrp->rc_flag = RC_NFSV2; 346191783Srmacklem newrp->rc_xid = nd->nd_retxid; 347191783Srmacklem newrp->rc_proc = nd->nd_procnum; 348191783Srmacklem newrp->rc_sockref = nd->nd_sockref; 349191783Srmacklem newrp->rc_cachetime = nd->nd_tcpconntime; 350191783Srmacklem if (nd->nd_flag & ND_SAMETCPCONN) 351191783Srmacklem newrp->rc_flag |= RC_SAMETCPCONN; 352191783Srmacklem if (nd->nd_nam2 != NULL) { 353191783Srmacklem newrp->rc_flag |= RC_UDP; 354191783Srmacklem ret = nfsrc_getudp(nd, newrp); 355191783Srmacklem } else { 356191783Srmacklem ret = nfsrc_gettcp(nd, newrp); 357191783Srmacklem } 358224086Szack NFSEXITCODE2(0, nd); 359191783Srmacklem return (ret); 360191783Srmacklem} 361191783Srmacklem 362191783Srmacklem/* 363191783Srmacklem * For UDP (v2, v3): 364191783Srmacklem * - key on <xid, NFS version, RPC#, Client host ip#> 365191783Srmacklem * (at most one entry for each key) 366191783Srmacklem */ 367191783Srmacklemstatic int 368191783Srmacklemnfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 369191783Srmacklem{ 370191783Srmacklem struct nfsrvcache *rp; 371191783Srmacklem struct sockaddr_in *saddr; 372191783Srmacklem struct sockaddr_in6 *saddr6; 373191783Srmacklem struct nfsrvhashhead *hp; 374191783Srmacklem int ret = 0; 375255532Srmacklem struct mtx *mutex; 376191783Srmacklem 377255532Srmacklem mutex = nfsrc_cachemutex(newrp); 378191783Srmacklem hp = NFSRCUDPHASH(newrp->rc_xid); 379191783Srmacklemloop: 380255532Srmacklem mtx_lock(mutex); 381191783Srmacklem LIST_FOREACH(rp, hp, rc_hash) { 382191783Srmacklem if (newrp->rc_xid == rp->rc_xid && 383191783Srmacklem newrp->rc_proc == rp->rc_proc && 384191783Srmacklem (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 385191783Srmacklem nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { 386191783Srmacklem if ((rp->rc_flag & RC_LOCKED) != 0) { 387191783Srmacklem rp->rc_flag |= RC_WANTED; 388255532Srmacklem (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, 389255532Srmacklem "nfsrc", 10 * hz); 390191783Srmacklem goto loop; 391191783Srmacklem } 392191783Srmacklem if (rp->rc_flag == 0) 393191783Srmacklem panic("nfs udp cache0"); 394191783Srmacklem rp->rc_flag |= RC_LOCKED; 395191783Srmacklem TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 396191783Srmacklem TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 397191783Srmacklem if (rp->rc_flag & RC_INPROG) { 398191783Srmacklem newnfsstats.srvcache_inproghits++; 399255532Srmacklem mtx_unlock(mutex); 400191783Srmacklem ret = RC_DROPIT; 401191783Srmacklem } else if (rp->rc_flag & RC_REPSTATUS) { 402191783Srmacklem /* 403191783Srmacklem * V2 only. 404191783Srmacklem */ 405191783Srmacklem newnfsstats.srvcache_nonidemdonehits++; 406255532Srmacklem mtx_unlock(mutex); 407191783Srmacklem nfsrvd_rephead(nd); 408191783Srmacklem *(nd->nd_errp) = rp->rc_status; 409191783Srmacklem ret = RC_REPLY; 410191783Srmacklem rp->rc_timestamp = NFSD_MONOSEC + 411191783Srmacklem NFSRVCACHE_UDPTIMEOUT; 412191783Srmacklem } else if (rp->rc_flag & RC_REPMBUF) { 413191783Srmacklem newnfsstats.srvcache_nonidemdonehits++; 414255532Srmacklem mtx_unlock(mutex); 415191783Srmacklem nd->nd_mreq = m_copym(rp->rc_reply, 0, 416191783Srmacklem M_COPYALL, M_WAIT); 417191783Srmacklem ret = RC_REPLY; 418191783Srmacklem rp->rc_timestamp = NFSD_MONOSEC + 419191783Srmacklem NFSRVCACHE_UDPTIMEOUT; 420191783Srmacklem } else { 421191783Srmacklem panic("nfs udp cache1"); 422191783Srmacklem } 423191783Srmacklem nfsrc_unlock(rp); 424191783Srmacklem free((caddr_t)newrp, M_NFSRVCACHE); 425224086Szack goto out; 426191783Srmacklem } 427191783Srmacklem } 428191783Srmacklem newnfsstats.srvcache_misses++; 429255532Srmacklem atomic_add_int(&newnfsstats.srvcache_size, 1); 430191783Srmacklem nfsrc_udpcachesize++; 431191783Srmacklem 432191783Srmacklem newrp->rc_flag |= RC_INPROG; 433191783Srmacklem saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 434191783Srmacklem if (saddr->sin_family == AF_INET) 435191783Srmacklem newrp->rc_inet = saddr->sin_addr.s_addr; 436191783Srmacklem else if (saddr->sin_family == AF_INET6) { 437191783Srmacklem saddr6 = (struct sockaddr_in6 *)saddr; 438203848Srmacklem NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6, 439203848Srmacklem sizeof (struct in6_addr)); 440203848Srmacklem newrp->rc_flag |= RC_INETIPV6; 441191783Srmacklem } 442191783Srmacklem LIST_INSERT_HEAD(hp, newrp, rc_hash); 443191783Srmacklem TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); 444255532Srmacklem mtx_unlock(mutex); 445191783Srmacklem nd->nd_rp = newrp; 446224086Szack ret = RC_DOIT; 447224086Szack 448224086Szackout: 449224086Szack NFSEXITCODE2(0, nd); 450224086Szack return (ret); 451191783Srmacklem} 452191783Srmacklem 453191783Srmacklem/* 454191783Srmacklem * Update a request cache entry after the rpc has been done 455191783Srmacklem */ 456191783SrmacklemAPPLESTATIC struct nfsrvcache * 457261067Smavnfsrvd_updatecache(struct nfsrv_descript *nd) 458191783Srmacklem{ 459191783Srmacklem struct nfsrvcache *rp; 460191783Srmacklem struct nfsrvcache *retrp = NULL; 461223312Srmacklem mbuf_t m; 462255532Srmacklem struct mtx *mutex; 463191783Srmacklem 464191783Srmacklem rp = nd->nd_rp; 465191783Srmacklem if (!rp) 466191783Srmacklem panic("nfsrvd_updatecache null rp"); 467191783Srmacklem nd->nd_rp = NULL; 468255532Srmacklem mutex = nfsrc_cachemutex(rp); 469255532Srmacklem mtx_lock(mutex); 470191783Srmacklem nfsrc_lock(rp); 471191783Srmacklem if (!(rp->rc_flag & RC_INPROG)) 472191783Srmacklem panic("nfsrvd_updatecache not inprog"); 473191783Srmacklem rp->rc_flag &= ~RC_INPROG; 474191783Srmacklem if (rp->rc_flag & RC_UDP) { 475191783Srmacklem TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 476191783Srmacklem TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 477191783Srmacklem } 478191783Srmacklem 479191783Srmacklem /* 480191783Srmacklem * Reply from cache is a special case returned by nfsrv_checkseqid(). 481191783Srmacklem */ 482191783Srmacklem if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { 483191783Srmacklem newnfsstats.srvcache_nonidemdonehits++; 484255532Srmacklem mtx_unlock(mutex); 485191783Srmacklem nd->nd_repstat = 0; 486191783Srmacklem if (nd->nd_mreq) 487191783Srmacklem mbuf_freem(nd->nd_mreq); 488191783Srmacklem if (!(rp->rc_flag & RC_REPMBUF)) 489191783Srmacklem panic("reply from cache"); 490191783Srmacklem nd->nd_mreq = m_copym(rp->rc_reply, 0, 491191783Srmacklem M_COPYALL, M_WAIT); 492255532Srmacklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 493191783Srmacklem nfsrc_unlock(rp); 494224086Szack goto out; 495191783Srmacklem } 496191783Srmacklem 497191783Srmacklem /* 498191783Srmacklem * If rc_refcnt > 0, save it 499191783Srmacklem * For UDP, save it if ND_SAVEREPLY is set 500191783Srmacklem * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set 501191783Srmacklem */ 502191783Srmacklem if (nd->nd_repstat != NFSERR_DONTREPLY && 503191783Srmacklem (rp->rc_refcnt > 0 || 504191783Srmacklem ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || 505191783Srmacklem ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && 506191783Srmacklem nfsrc_tcpsavedreplies <= nfsrc_floodlevel && 507191783Srmacklem nfsrc_tcpnonidempotent))) { 508191783Srmacklem if (rp->rc_refcnt > 0) { 509191783Srmacklem if (!(rp->rc_flag & RC_NFSV4)) 510191783Srmacklem panic("update_cache refcnt"); 511191783Srmacklem rp->rc_flag |= RC_REFCNT; 512191783Srmacklem } 513191783Srmacklem if ((nd->nd_flag & ND_NFSV2) && 514191783Srmacklem nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { 515191783Srmacklem rp->rc_status = nd->nd_repstat; 516191783Srmacklem rp->rc_flag |= RC_REPSTATUS; 517255532Srmacklem mtx_unlock(mutex); 518191783Srmacklem } else { 519191783Srmacklem if (!(rp->rc_flag & RC_UDP)) { 520255532Srmacklem atomic_add_int(&nfsrc_tcpsavedreplies, 1); 521191783Srmacklem if (nfsrc_tcpsavedreplies > 522191783Srmacklem newnfsstats.srvcache_tcppeak) 523191783Srmacklem newnfsstats.srvcache_tcppeak = 524191783Srmacklem nfsrc_tcpsavedreplies; 525191783Srmacklem } 526255532Srmacklem mtx_unlock(mutex); 527223312Srmacklem m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAIT); 528255532Srmacklem mtx_lock(mutex); 529223312Srmacklem rp->rc_reply = m; 530191783Srmacklem rp->rc_flag |= RC_REPMBUF; 531255532Srmacklem mtx_unlock(mutex); 532191783Srmacklem } 533191783Srmacklem if (rp->rc_flag & RC_UDP) { 534191783Srmacklem rp->rc_timestamp = NFSD_MONOSEC + 535191783Srmacklem NFSRVCACHE_UDPTIMEOUT; 536191783Srmacklem nfsrc_unlock(rp); 537191783Srmacklem } else { 538255532Srmacklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 539191783Srmacklem if (rp->rc_refcnt > 0) 540191783Srmacklem nfsrc_unlock(rp); 541191783Srmacklem else 542191783Srmacklem retrp = rp; 543191783Srmacklem } 544191783Srmacklem } else { 545191783Srmacklem nfsrc_freecache(rp); 546255532Srmacklem mtx_unlock(mutex); 547191783Srmacklem } 548224086Szack 549224086Szackout: 550224086Szack NFSEXITCODE2(0, nd); 551191783Srmacklem return (retrp); 552191783Srmacklem} 553191783Srmacklem 554191783Srmacklem/* 555191783Srmacklem * Invalidate and, if possible, free an in prog cache entry. 556191783Srmacklem * Must not sleep. 557191783Srmacklem */ 558191783SrmacklemAPPLESTATIC void 559191783Srmacklemnfsrvd_delcache(struct nfsrvcache *rp) 560191783Srmacklem{ 561255532Srmacklem struct mtx *mutex; 562191783Srmacklem 563255532Srmacklem mutex = nfsrc_cachemutex(rp); 564191783Srmacklem if (!(rp->rc_flag & RC_INPROG)) 565191783Srmacklem panic("nfsrvd_delcache not in prog"); 566255532Srmacklem mtx_lock(mutex); 567191783Srmacklem rp->rc_flag &= ~RC_INPROG; 568191783Srmacklem if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) 569191783Srmacklem nfsrc_freecache(rp); 570255532Srmacklem mtx_unlock(mutex); 571191783Srmacklem} 572191783Srmacklem 573191783Srmacklem/* 574191783Srmacklem * Called after nfsrvd_updatecache() once the reply is sent, to update 575261067Smav * the entry's sequence number and unlock it. The argument is 576191783Srmacklem * the pointer returned by nfsrvd_updatecache(). 577191783Srmacklem */ 578191783SrmacklemAPPLESTATIC void 579261067Smavnfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq) 580191783Srmacklem{ 581261067Smav struct nfsrchash_bucket *hbp; 582191783Srmacklem 583261067Smav KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked")); 584261067Smav if (have_seq) { 585261067Smav hbp = NFSRCAHASH(rp->rc_sockref); 586261067Smav mtx_lock(&hbp->mtx); 587261067Smav rp->rc_tcpseq = seq; 588261067Smav if (rp->rc_acked != RC_NO_ACK) 589261067Smav LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash); 590261067Smav rp->rc_acked = RC_NO_ACK; 591261067Smav mtx_unlock(&hbp->mtx); 592191783Srmacklem } 593191783Srmacklem nfsrc_unlock(rp); 594191783Srmacklem} 595191783Srmacklem 596191783Srmacklem/* 597191783Srmacklem * Get a cache entry for TCP 598191783Srmacklem * - key on <xid, nfs version> 599191783Srmacklem * (allow multiple entries for a given key) 600191783Srmacklem */ 601191783Srmacklemstatic int 602191783Srmacklemnfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 603191783Srmacklem{ 604191783Srmacklem struct nfsrvcache *rp, *nextrp; 605191783Srmacklem int i; 606191783Srmacklem struct nfsrvcache *hitrp; 607191783Srmacklem struct nfsrvhashhead *hp, nfsrc_templist; 608191783Srmacklem int hit, ret = 0; 609255532Srmacklem struct mtx *mutex; 610191783Srmacklem 611255532Srmacklem mutex = nfsrc_cachemutex(newrp); 612191783Srmacklem hp = NFSRCHASH(newrp->rc_xid); 613191783Srmacklem newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); 614191783Srmacklemtryagain: 615255532Srmacklem mtx_lock(mutex); 616191783Srmacklem hit = 1; 617191783Srmacklem LIST_INIT(&nfsrc_templist); 618191783Srmacklem /* 619191783Srmacklem * Get all the matches and put them on the temp list. 620191783Srmacklem */ 621191783Srmacklem rp = LIST_FIRST(hp); 622191783Srmacklem while (rp != LIST_END(hp)) { 623191783Srmacklem nextrp = LIST_NEXT(rp, rc_hash); 624191783Srmacklem if (newrp->rc_xid == rp->rc_xid && 625191783Srmacklem (!(rp->rc_flag & RC_INPROG) || 626191783Srmacklem ((newrp->rc_flag & RC_SAMETCPCONN) && 627191783Srmacklem newrp->rc_sockref == rp->rc_sockref)) && 628191783Srmacklem (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 629191783Srmacklem newrp->rc_proc == rp->rc_proc && 630191783Srmacklem ((newrp->rc_flag & RC_NFSV4) && 631191783Srmacklem newrp->rc_sockref != rp->rc_sockref && 632191783Srmacklem newrp->rc_cachetime >= rp->rc_cachetime) 633191783Srmacklem && newrp->rc_reqlen == rp->rc_reqlen && 634191783Srmacklem newrp->rc_cksum == rp->rc_cksum) { 635191783Srmacklem LIST_REMOVE(rp, rc_hash); 636191783Srmacklem LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash); 637191783Srmacklem } 638191783Srmacklem rp = nextrp; 639191783Srmacklem } 640191783Srmacklem 641191783Srmacklem /* 642191783Srmacklem * Now, use nfsrc_templist to decide if there is a match. 643191783Srmacklem */ 644191783Srmacklem i = 0; 645191783Srmacklem LIST_FOREACH(rp, &nfsrc_templist, rc_hash) { 646191783Srmacklem i++; 647191783Srmacklem if (rp->rc_refcnt > 0) { 648191783Srmacklem hit = 0; 649191783Srmacklem break; 650191783Srmacklem } 651191783Srmacklem } 652191783Srmacklem /* 653191783Srmacklem * Can be a hit only if one entry left. 654191783Srmacklem * Note possible hit entry and put nfsrc_templist back on hash 655191783Srmacklem * list. 656191783Srmacklem */ 657191783Srmacklem if (i != 1) 658191783Srmacklem hit = 0; 659191783Srmacklem hitrp = rp = LIST_FIRST(&nfsrc_templist); 660191783Srmacklem while (rp != LIST_END(&nfsrc_templist)) { 661191783Srmacklem nextrp = LIST_NEXT(rp, rc_hash); 662191783Srmacklem LIST_REMOVE(rp, rc_hash); 663191783Srmacklem LIST_INSERT_HEAD(hp, rp, rc_hash); 664191783Srmacklem rp = nextrp; 665191783Srmacklem } 666191783Srmacklem if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist)) 667191783Srmacklem panic("nfs gettcp cache templist"); 668191783Srmacklem 669191783Srmacklem if (hit) { 670191783Srmacklem rp = hitrp; 671191783Srmacklem if ((rp->rc_flag & RC_LOCKED) != 0) { 672191783Srmacklem rp->rc_flag |= RC_WANTED; 673255532Srmacklem (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, 674255532Srmacklem "nfsrc", 10 * hz); 675191783Srmacklem goto tryagain; 676191783Srmacklem } 677191783Srmacklem if (rp->rc_flag == 0) 678191783Srmacklem panic("nfs tcp cache0"); 679191783Srmacklem rp->rc_flag |= RC_LOCKED; 680191783Srmacklem if (rp->rc_flag & RC_INPROG) { 681191783Srmacklem newnfsstats.srvcache_inproghits++; 682255532Srmacklem mtx_unlock(mutex); 683191783Srmacklem if (newrp->rc_sockref == rp->rc_sockref) 684191783Srmacklem nfsrc_marksametcpconn(rp->rc_sockref); 685191783Srmacklem ret = RC_DROPIT; 686191783Srmacklem } else if (rp->rc_flag & RC_REPSTATUS) { 687191783Srmacklem /* 688191783Srmacklem * V2 only. 689191783Srmacklem */ 690191783Srmacklem newnfsstats.srvcache_nonidemdonehits++; 691255532Srmacklem mtx_unlock(mutex); 692191783Srmacklem if (newrp->rc_sockref == rp->rc_sockref) 693191783Srmacklem nfsrc_marksametcpconn(rp->rc_sockref); 694191783Srmacklem ret = RC_REPLY; 695191783Srmacklem nfsrvd_rephead(nd); 696191783Srmacklem *(nd->nd_errp) = rp->rc_status; 697255532Srmacklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 698191783Srmacklem } else if (rp->rc_flag & RC_REPMBUF) { 699191783Srmacklem newnfsstats.srvcache_nonidemdonehits++; 700255532Srmacklem mtx_unlock(mutex); 701191783Srmacklem if (newrp->rc_sockref == rp->rc_sockref) 702191783Srmacklem nfsrc_marksametcpconn(rp->rc_sockref); 703191783Srmacklem ret = RC_REPLY; 704191783Srmacklem nd->nd_mreq = m_copym(rp->rc_reply, 0, 705191783Srmacklem M_COPYALL, M_WAIT); 706255532Srmacklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 707191783Srmacklem } else { 708191783Srmacklem panic("nfs tcp cache1"); 709191783Srmacklem } 710191783Srmacklem nfsrc_unlock(rp); 711191783Srmacklem free((caddr_t)newrp, M_NFSRVCACHE); 712224086Szack goto out; 713191783Srmacklem } 714191783Srmacklem newnfsstats.srvcache_misses++; 715255532Srmacklem atomic_add_int(&newnfsstats.srvcache_size, 1); 716191783Srmacklem 717191783Srmacklem /* 718191783Srmacklem * For TCP, multiple entries for a key are allowed, so don't 719191783Srmacklem * chain it into the hash table until done. 720191783Srmacklem */ 721191783Srmacklem newrp->rc_cachetime = NFSD_MONOSEC; 722191783Srmacklem newrp->rc_flag |= RC_INPROG; 723191783Srmacklem LIST_INSERT_HEAD(hp, newrp, rc_hash); 724255532Srmacklem mtx_unlock(mutex); 725191783Srmacklem nd->nd_rp = newrp; 726224086Szack ret = RC_DOIT; 727224086Szack 728224086Szackout: 729224086Szack NFSEXITCODE2(0, nd); 730224086Szack return (ret); 731191783Srmacklem} 732191783Srmacklem 733191783Srmacklem/* 734191783Srmacklem * Lock a cache entry. 735191783Srmacklem */ 736191783Srmacklemstatic void 737191783Srmacklemnfsrc_lock(struct nfsrvcache *rp) 738191783Srmacklem{ 739255532Srmacklem struct mtx *mutex; 740255532Srmacklem 741255532Srmacklem mutex = nfsrc_cachemutex(rp); 742255532Srmacklem mtx_assert(mutex, MA_OWNED); 743191783Srmacklem while ((rp->rc_flag & RC_LOCKED) != 0) { 744191783Srmacklem rp->rc_flag |= RC_WANTED; 745255532Srmacklem (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0); 746191783Srmacklem } 747191783Srmacklem rp->rc_flag |= RC_LOCKED; 748191783Srmacklem} 749191783Srmacklem 750191783Srmacklem/* 751191783Srmacklem * Unlock a cache entry. 752191783Srmacklem */ 753191783Srmacklemstatic void 754191783Srmacklemnfsrc_unlock(struct nfsrvcache *rp) 755191783Srmacklem{ 756255532Srmacklem struct mtx *mutex; 757223312Srmacklem 758255532Srmacklem mutex = nfsrc_cachemutex(rp); 759255532Srmacklem mtx_lock(mutex); 760191783Srmacklem rp->rc_flag &= ~RC_LOCKED; 761191783Srmacklem nfsrc_wanted(rp); 762255532Srmacklem mtx_unlock(mutex); 763191783Srmacklem} 764191783Srmacklem 765191783Srmacklem/* 766191783Srmacklem * Wakeup anyone wanting entry. 767191783Srmacklem */ 768191783Srmacklemstatic void 769191783Srmacklemnfsrc_wanted(struct nfsrvcache *rp) 770191783Srmacklem{ 771191783Srmacklem if (rp->rc_flag & RC_WANTED) { 772191783Srmacklem rp->rc_flag &= ~RC_WANTED; 773191783Srmacklem wakeup((caddr_t)rp); 774191783Srmacklem } 775191783Srmacklem} 776191783Srmacklem 777191783Srmacklem/* 778191783Srmacklem * Free up the entry. 779191783Srmacklem * Must not sleep. 780191783Srmacklem */ 781191783Srmacklemstatic void 782191783Srmacklemnfsrc_freecache(struct nfsrvcache *rp) 783191783Srmacklem{ 784261067Smav struct nfsrchash_bucket *hbp; 785191783Srmacklem 786191783Srmacklem LIST_REMOVE(rp, rc_hash); 787191783Srmacklem if (rp->rc_flag & RC_UDP) { 788191783Srmacklem TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 789191783Srmacklem nfsrc_udpcachesize--; 790261067Smav } else if (rp->rc_acked != RC_NO_SEQ) { 791261067Smav hbp = NFSRCAHASH(rp->rc_sockref); 792261067Smav mtx_lock(&hbp->mtx); 793261067Smav if (rp->rc_acked == RC_NO_ACK) 794261067Smav LIST_REMOVE(rp, rc_ahash); 795261067Smav mtx_unlock(&hbp->mtx); 796191783Srmacklem } 797191783Srmacklem nfsrc_wanted(rp); 798191783Srmacklem if (rp->rc_flag & RC_REPMBUF) { 799191783Srmacklem mbuf_freem(rp->rc_reply); 800191783Srmacklem if (!(rp->rc_flag & RC_UDP)) 801255532Srmacklem atomic_add_int(&nfsrc_tcpsavedreplies, -1); 802191783Srmacklem } 803191783Srmacklem FREE((caddr_t)rp, M_NFSRVCACHE); 804255532Srmacklem atomic_add_int(&newnfsstats.srvcache_size, -1); 805191783Srmacklem} 806191783Srmacklem 807191783Srmacklem/* 808217335Szack * Clean out the cache. Called when nfsserver module is unloaded. 809191783Srmacklem */ 810191783SrmacklemAPPLESTATIC void 811191783Srmacklemnfsrvd_cleancache(void) 812191783Srmacklem{ 813191783Srmacklem struct nfsrvcache *rp, *nextrp; 814191783Srmacklem int i; 815191783Srmacklem 816191783Srmacklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 817255532Srmacklem mtx_lock(&nfsrchash_table[i].mtx); 818255532Srmacklem LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) 819191783Srmacklem nfsrc_freecache(rp); 820255532Srmacklem mtx_unlock(&nfsrchash_table[i].mtx); 821191783Srmacklem } 822255532Srmacklem mtx_lock(&nfsrc_udpmtx); 823191783Srmacklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 824191783Srmacklem LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { 825191783Srmacklem nfsrc_freecache(rp); 826191783Srmacklem } 827191783Srmacklem } 828191783Srmacklem newnfsstats.srvcache_size = 0; 829255532Srmacklem mtx_unlock(&nfsrc_udpmtx); 830191783Srmacklem nfsrc_tcpsavedreplies = 0; 831191783Srmacklem} 832191783Srmacklem 833261063Smav#define HISTSIZE 16 834191783Srmacklem/* 835191783Srmacklem * The basic rule is to get rid of entries that are expired. 836191783Srmacklem */ 837261067Smavvoid 838261067Smavnfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) 839191783Srmacklem{ 840261067Smav struct nfsrchash_bucket *hbp; 841191783Srmacklem struct nfsrvcache *rp, *nextrp; 842261067Smav int force, lastslot, i, j, k, tto, time_histo[HISTSIZE]; 843255532Srmacklem time_t thisstamp; 844255532Srmacklem static time_t udp_lasttrim = 0, tcp_lasttrim = 0; 845261067Smav static int onethread = 0, oneslot = 0; 846191783Srmacklem 847261067Smav if (sockref != 0) { 848261067Smav hbp = NFSRCAHASH(sockref); 849261067Smav mtx_lock(&hbp->mtx); 850261067Smav LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) { 851261067Smav if (sockref == rp->rc_sockref) { 852261067Smav if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) { 853261067Smav rp->rc_acked = RC_ACK; 854261067Smav LIST_REMOVE(rp, rc_ahash); 855261067Smav } else if (final) { 856261067Smav rp->rc_acked = RC_NACK; 857261067Smav LIST_REMOVE(rp, rc_ahash); 858261067Smav } 859261067Smav } 860261067Smav } 861261067Smav mtx_unlock(&hbp->mtx); 862261067Smav } 863261067Smav 864255532Srmacklem if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) 865255532Srmacklem return; 866255532Srmacklem if (NFSD_MONOSEC != udp_lasttrim || 867255532Srmacklem nfsrc_udpcachesize >= (nfsrc_udphighwater + 868255532Srmacklem nfsrc_udphighwater / 2)) { 869255532Srmacklem mtx_lock(&nfsrc_udpmtx); 870255532Srmacklem udp_lasttrim = NFSD_MONOSEC; 871255532Srmacklem TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { 872191783Srmacklem if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) 873191783Srmacklem && rp->rc_refcnt == 0 874191783Srmacklem && ((rp->rc_flag & RC_REFCNT) || 875255532Srmacklem udp_lasttrim > rp->rc_timestamp || 876255532Srmacklem nfsrc_udpcachesize > nfsrc_udphighwater)) 877191783Srmacklem nfsrc_freecache(rp); 878191783Srmacklem } 879255532Srmacklem mtx_unlock(&nfsrc_udpmtx); 880191783Srmacklem } 881255532Srmacklem if (NFSD_MONOSEC != tcp_lasttrim || 882255532Srmacklem nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) { 883261067Smav force = nfsrc_tcphighwater / 4; 884261067Smav if (force > 0 && 885261067Smav nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) { 886261067Smav for (i = 0; i < HISTSIZE; i++) 887261067Smav time_histo[i] = 0; 888261067Smav i = 0; 889261067Smav lastslot = NFSRVCACHE_HASHSIZE - 1; 890261067Smav } else { 891261067Smav force = 0; 892261067Smav if (NFSD_MONOSEC != tcp_lasttrim) { 893261067Smav i = 0; 894261067Smav lastslot = NFSRVCACHE_HASHSIZE - 1; 895261067Smav } else { 896261067Smav lastslot = i = oneslot; 897261067Smav if (++oneslot >= NFSRVCACHE_HASHSIZE) 898261067Smav oneslot = 0; 899261067Smav } 900261067Smav } 901261063Smav tto = nfsrc_tcptimeout; 902261067Smav tcp_lasttrim = NFSD_MONOSEC; 903261067Smav for (; i <= lastslot; i++) { 904255532Srmacklem mtx_lock(&nfsrchash_table[i].mtx); 905255532Srmacklem LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, 906255532Srmacklem nextrp) { 907255532Srmacklem if (!(rp->rc_flag & 908255532Srmacklem (RC_INPROG|RC_LOCKED|RC_WANTED)) 909255532Srmacklem && rp->rc_refcnt == 0) { 910261063Smav if ((rp->rc_flag & RC_REFCNT) || 911261063Smav tcp_lasttrim > rp->rc_timestamp || 912261067Smav rp->rc_acked == RC_ACK) { 913261063Smav nfsrc_freecache(rp); 914261063Smav continue; 915261063Smav } 916261063Smav 917261067Smav if (force == 0) 918261063Smav continue; 919255532Srmacklem /* 920255532Srmacklem * The timestamps range from roughly the 921255532Srmacklem * present (tcp_lasttrim) to the present 922255532Srmacklem * + nfsrc_tcptimeout. Generate a simple 923255532Srmacklem * histogram of where the timeouts fall. 924255532Srmacklem */ 925255532Srmacklem j = rp->rc_timestamp - tcp_lasttrim; 926261063Smav if (j >= tto) 927261063Smav j = HISTSIZE - 1; 928261063Smav else if (j < 0) 929255532Srmacklem j = 0; 930261063Smav else 931261063Smav j = j * HISTSIZE / tto; 932255532Srmacklem time_histo[j]++; 933255532Srmacklem } 934255532Srmacklem } 935255532Srmacklem mtx_unlock(&nfsrchash_table[i].mtx); 936255532Srmacklem } 937261067Smav if (force) { 938255532Srmacklem /* 939255532Srmacklem * Trim some more with a smaller timeout of as little 940255532Srmacklem * as 20% of nfsrc_tcptimeout to try and get below 941255532Srmacklem * 80% of the nfsrc_tcphighwater. 942255532Srmacklem */ 943255532Srmacklem k = 0; 944261063Smav for (i = 0; i < (HISTSIZE - 2); i++) { 945255532Srmacklem k += time_histo[i]; 946261067Smav if (k > force) 947255532Srmacklem break; 948255532Srmacklem } 949261063Smav k = tto * (i + 1) / HISTSIZE; 950255532Srmacklem if (k < 1) 951255532Srmacklem k = 1; 952255532Srmacklem thisstamp = tcp_lasttrim + k; 953255532Srmacklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 954255532Srmacklem mtx_lock(&nfsrchash_table[i].mtx); 955255532Srmacklem LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, 956255532Srmacklem rc_hash, nextrp) { 957255532Srmacklem if (!(rp->rc_flag & 958255532Srmacklem (RC_INPROG|RC_LOCKED|RC_WANTED)) 959255532Srmacklem && rp->rc_refcnt == 0 960255532Srmacklem && ((rp->rc_flag & RC_REFCNT) || 961255532Srmacklem thisstamp > rp->rc_timestamp || 962261067Smav rp->rc_acked == RC_ACK)) 963255532Srmacklem nfsrc_freecache(rp); 964255532Srmacklem } 965255532Srmacklem mtx_unlock(&nfsrchash_table[i].mtx); 966255532Srmacklem } 967255532Srmacklem } 968255532Srmacklem } 969255532Srmacklem atomic_store_rel_int(&onethread, 0); 970191783Srmacklem} 971191783Srmacklem 972191783Srmacklem/* 973191783Srmacklem * Add a seqid# reference to the cache entry. 974191783Srmacklem */ 975191783SrmacklemAPPLESTATIC void 976191783Srmacklemnfsrvd_refcache(struct nfsrvcache *rp) 977191783Srmacklem{ 978255532Srmacklem struct mtx *mutex; 979191783Srmacklem 980255532Srmacklem mutex = nfsrc_cachemutex(rp); 981255532Srmacklem mtx_lock(mutex); 982191783Srmacklem if (rp->rc_refcnt < 0) 983191783Srmacklem panic("nfs cache refcnt"); 984191783Srmacklem rp->rc_refcnt++; 985255532Srmacklem mtx_unlock(mutex); 986191783Srmacklem} 987191783Srmacklem 988191783Srmacklem/* 989191783Srmacklem * Dereference a seqid# cache entry. 990191783Srmacklem */ 991191783SrmacklemAPPLESTATIC void 992191783Srmacklemnfsrvd_derefcache(struct nfsrvcache *rp) 993191783Srmacklem{ 994255532Srmacklem struct mtx *mutex; 995191783Srmacklem 996255532Srmacklem mutex = nfsrc_cachemutex(rp); 997255532Srmacklem mtx_lock(mutex); 998191783Srmacklem if (rp->rc_refcnt <= 0) 999191783Srmacklem panic("nfs cache derefcnt"); 1000191783Srmacklem rp->rc_refcnt--; 1001191783Srmacklem if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) 1002191783Srmacklem nfsrc_freecache(rp); 1003255532Srmacklem mtx_unlock(mutex); 1004191783Srmacklem} 1005191783Srmacklem 1006191783Srmacklem/* 1007191783Srmacklem * Calculate the length of the mbuf list and a checksum on the first up to 1008191783Srmacklem * NFSRVCACHE_CHECKLEN bytes. 1009191783Srmacklem */ 1010191783Srmacklemstatic int 1011191783Srmacklemnfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum) 1012191783Srmacklem{ 1013191783Srmacklem int len = 0, cklen; 1014191783Srmacklem mbuf_t m; 1015191783Srmacklem 1016191783Srmacklem m = m1; 1017191783Srmacklem while (m) { 1018191783Srmacklem len += mbuf_len(m); 1019191783Srmacklem m = mbuf_next(m); 1020191783Srmacklem } 1021191783Srmacklem cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len; 1022191783Srmacklem *cksum = in_cksum(m1, cklen); 1023191783Srmacklem return (len); 1024191783Srmacklem} 1025191783Srmacklem 1026191783Srmacklem/* 1027191783Srmacklem * Mark a TCP connection that is seeing retries. Should never happen for 1028191783Srmacklem * NFSv4. 1029191783Srmacklem */ 1030191783Srmacklemstatic void 1031191783Srmacklemnfsrc_marksametcpconn(u_int64_t sockref) 1032191783Srmacklem{ 1033191783Srmacklem} 1034191783Srmacklem 1035