1/* $NetBSD: bufcache.c,v 1.12 2008/04/28 20:23:08 martin Exp $ */ 2/*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include <sys/types.h> 32#include <sys/param.h> 33#include <sys/time.h> 34#include <sys/buf.h> 35#include <sys/queue.h> 36#include <sys/mount.h> 37 38#include <assert.h> 39#include <err.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <unistd.h> 44#include <util.h> 45 46#include "bufcache.h" 47#include "vnode.h" 48 49/* 50 * Definitions for the buffer free lists. 51 */ 52#define BQUEUES 3 /* number of free buffer queues */ 53 54#define BQ_LOCKED 0 /* super-blocks &c */ 55#define BQ_LRU 1 /* lru, useful buffers */ 56#define BQ_AGE 2 /* rubbish */ 57 58TAILQ_HEAD(bqueues, ubuf) bufqueues[BQUEUES]; 59 60struct bufhash_struct *bufhash; 61 62#define HASH_MAX 1024 63int hashmax = HASH_MAX; 64int hashmask = (HASH_MAX - 1); 65 66int maxbufs = BUF_CACHE_SIZE; 67int nbufs = 0; 68int cachehits = 0; 69int cachemisses = 0; 70int max_depth = 0; 71off_t locked_queue_bytes = 0; 72int locked_queue_count = 0; 73 74/* Simple buffer hash function */ 75static int 76vl_hash(struct uvnode * vp, daddr_t lbn) 77{ 78 return (int)((unsigned long) vp + lbn) & hashmask; 79} 80 81/* Initialize buffer cache */ 82void 83bufinit(int max) 84{ 85 int i; 86 87 if (max) { 88 for (hashmax = 1; hashmax < max; hashmax <<= 1) 89 ; 90 hashmask = hashmax - 1; 91 } 92 93 for (i = 0; i < BQUEUES; i++) { 94 TAILQ_INIT(&bufqueues[i]); 95 } 96 bufhash = emalloc(hashmax * sizeof(*bufhash)); 97 for (i = 0; i < hashmax; i++) 98 LIST_INIT(&bufhash[i]); 99} 100 101/* Widen the hash table. */ 102void bufrehash(int max) 103{ 104 int i, newhashmax, newhashmask; 105 struct ubuf *bp, *nbp; 106 struct bufhash_struct *np; 107 108 if (max < 0 || max < hashmax) 109 return; 110 111 /* Round up to a power of two */ 112 for (newhashmax = 1; newhashmax < max; newhashmax <<= 1) 113 ; 114 newhashmask = newhashmax - 1; 115 116 /* Allocate new empty hash table, if we can */ 117 np = emalloc(newhashmax * sizeof(*bufhash)); 118 for (i = 0; i < newhashmax; i++) 119 LIST_INIT(&np[i]); 120 121 /* Now reassign all existing buffers to their new hash chains. */ 122 for (i = 0; i < hashmax; i++) { 123 bp = LIST_FIRST(&bufhash[i]); 124 while(bp) { 125 nbp = LIST_NEXT(bp, b_hash); 126 LIST_REMOVE(bp, b_hash); 127 bp->b_hashval = vl_hash(bp->b_vp, bp->b_lblkno); 128 LIST_INSERT_HEAD(&np[bp->b_hashval], bp, b_hash); 129 bp = nbp; 130 } 131 } 132 133 /* Switch over and clean up */ 134 free(bufhash); 135 bufhash = np; 136 hashmax = newhashmax; 137 hashmask = newhashmask; 138} 139 140/* Print statistics of buffer cache usage */ 141void 142bufstats(void) 143{ 144 printf("buffer cache: %d hits %d misses (%2.2f%%); hash width %d, depth %d\n", 145 cachehits, cachemisses, 146 (cachehits * 100.0) / (cachehits + cachemisses), 147 hashmax, max_depth); 148} 149 150/* 151 * Remove a buffer from the cache. 152 * Caller must remove the buffer from its free list. 153 */ 154void 155buf_destroy(struct ubuf * bp) 156{ 157 bp->b_flags |= B_NEEDCOMMIT; 158 LIST_REMOVE(bp, b_vnbufs); 159 LIST_REMOVE(bp, b_hash); 160 if (!(bp->b_flags & B_DONTFREE)) 161 free(bp->b_data); 162 free(bp); 163 --nbufs; 164} 165 166/* Remove a buffer from its free list. */ 167void 168bremfree(struct ubuf * bp) 169{ 170 struct bqueues *dp = NULL; 171 172 /* 173 * We only calculate the head of the freelist when removing 174 * the last element of the list as that is the only time that 175 * it is needed (e.g. to reset the tail pointer). 176 * 177 * NB: This makes an assumption about how tailq's are implemented. 178 */ 179 if (bp->b_flags & B_LOCKED) { 180 locked_queue_bytes -= bp->b_bcount; 181 --locked_queue_count; 182 } 183 if (TAILQ_NEXT(bp, b_freelist) == NULL) { 184 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 185 if (dp->tqh_last == &bp->b_freelist.tqe_next) 186 break; 187 if (dp == &bufqueues[BQUEUES]) 188 errx(1, "bremfree: lost tail"); 189 } 190 ++bp->b_vp->v_usecount; 191 TAILQ_REMOVE(dp, bp, b_freelist); 192} 193 194/* Return a buffer if it is in the cache, otherwise return NULL. */ 195struct ubuf * 196incore(struct uvnode * vp, int lbn) 197{ 198 struct ubuf *bp; 199 int hash, depth; 200 201 hash = vl_hash(vp, lbn); 202 /* XXX use a real hash instead. */ 203 depth = 0; 204 LIST_FOREACH(bp, &bufhash[hash], b_hash) { 205 if (++depth > max_depth) 206 max_depth = depth; 207 assert(depth <= nbufs); 208 if (bp->b_vp == vp && bp->b_lblkno == lbn) { 209 return bp; 210 } 211 } 212 return NULL; 213} 214 215/* 216 * Return a buffer of the given size, lbn and uvnode. 217 * If none is in core, make a new one. 218 */ 219struct ubuf * 220getblk(struct uvnode * vp, daddr_t lbn, int size) 221{ 222 struct ubuf *bp; 223#ifdef DEBUG 224 static int warned; 225#endif 226 227 /* 228 * First check the buffer cache lists. 229 * We might sometimes need to resize a buffer. If we are growing 230 * the buffer, its contents are invalid; but shrinking is okay. 231 */ 232 if ((bp = incore(vp, lbn)) != NULL) { 233 assert(!(bp->b_flags & B_NEEDCOMMIT)); 234 assert(!(bp->b_flags & B_BUSY)); 235 bp->b_flags |= B_BUSY; 236 bremfree(bp); 237 if (bp->b_bcount == size) 238 return bp; 239 else if (bp->b_bcount > size) { 240 assert(!(bp->b_flags & B_DELWRI)); 241 bp->b_bcount = size; 242 bp->b_data = erealloc(bp->b_data, size); 243 return bp; 244 } 245 246 buf_destroy(bp); 247 bp = NULL; 248 } 249 250 /* 251 * Not on the list. 252 * Get a new block of the appropriate size and use that. 253 * If not enough space, free blocks from the AGE and LRU lists 254 * to make room. 255 */ 256 while (nbufs >= maxbufs + locked_queue_count) { 257 bp = TAILQ_FIRST(&bufqueues[BQ_AGE]); 258 if (bp) 259 TAILQ_REMOVE(&bufqueues[BQ_AGE], bp, b_freelist); 260 if (bp == NULL) { 261 bp = TAILQ_FIRST(&bufqueues[BQ_LRU]); 262 if (bp) 263 TAILQ_REMOVE(&bufqueues[BQ_LRU], bp, 264 b_freelist); 265 } 266 if (bp) { 267 if (bp->b_flags & B_DELWRI) 268 VOP_STRATEGY(bp); 269 buf_destroy(bp); 270 break; 271 } 272#ifdef DEBUG 273 else if (!warned) { 274 warnx("allocating more than %d buffers", maxbufs); 275 ++warned; 276 } 277#endif 278 break; 279 } 280 ++nbufs; 281 bp = ecalloc(1, sizeof(*bp)); 282 bp->b_data = ecalloc(1, size); 283 bp->b_vp = vp; 284 bp->b_blkno = bp->b_lblkno = lbn; 285 bp->b_bcount = size; 286 bp->b_hashval = vl_hash(vp, lbn); 287 LIST_INSERT_HEAD(&bufhash[bp->b_hashval], bp, b_hash); 288 LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs); 289 bp->b_flags = B_BUSY; 290 291 return bp; 292} 293 294/* Write a buffer to disk according to its strategy routine. */ 295void 296bwrite(struct ubuf * bp) 297{ 298 bp->b_flags &= ~(B_READ | B_DONE | B_DELWRI | B_LOCKED); 299 VOP_STRATEGY(bp); 300 bp->b_flags |= B_DONE; 301 reassignbuf(bp, bp->b_vp); 302 brelse(bp, 0); 303} 304 305/* Put a buffer back on its free list, clear B_BUSY. */ 306void 307brelse(struct ubuf * bp, int set) 308{ 309 int age; 310 311 assert(!(bp->b_flags & B_NEEDCOMMIT)); 312 assert(bp->b_flags & B_BUSY); 313 314 bp->b_flags |= set; 315 316 age = bp->b_flags & B_AGE; 317 bp->b_flags &= ~(B_BUSY | B_AGE); 318 if (bp->b_flags & B_INVAL) { 319 buf_destroy(bp); 320 return; 321 } 322 if (bp->b_flags & B_LOCKED) { 323 locked_queue_bytes += bp->b_bcount; 324 ++locked_queue_count; 325 TAILQ_INSERT_TAIL(&bufqueues[BQ_LOCKED], bp, b_freelist); 326 } else if (age) { 327 TAILQ_INSERT_TAIL(&bufqueues[BQ_AGE], bp, b_freelist); 328 } else { 329 TAILQ_INSERT_TAIL(&bufqueues[BQ_LRU], bp, b_freelist); 330 } 331 --bp->b_vp->v_usecount; 332 333 /* Move to the front of the hash chain */ 334 if (LIST_FIRST(&bufhash[bp->b_hashval]) != bp) { 335 LIST_REMOVE(bp, b_hash); 336 LIST_INSERT_HEAD(&bufhash[bp->b_hashval], bp, b_hash); 337 } 338} 339 340/* Read the given block from disk, return it B_BUSY. */ 341int 342bread(struct uvnode * vp, daddr_t lbn, int size, void * unused, 343 int flags, struct ubuf ** bpp) 344{ 345 struct ubuf *bp; 346 daddr_t daddr; 347 int error; 348 349 bp = getblk(vp, lbn, size); 350 *bpp = bp; 351 if (bp->b_flags & (B_DELWRI | B_DONE)) { 352 ++cachehits; 353 return 0; 354 } 355 ++cachemisses; 356 357 /* 358 * Not found. Need to find that block's location on disk, 359 * and load it in. 360 */ 361 daddr = -1; 362 error = VOP_BMAP(vp, lbn, &daddr); 363 bp->b_blkno = daddr; 364 if (daddr >= 0) { 365 bp->b_flags |= B_READ; 366 return VOP_STRATEGY(bp); 367 } 368 memset(bp->b_data, 0, bp->b_bcount); 369 return 0; 370} 371 372/* Move a buffer between dirty and clean block lists. */ 373void 374reassignbuf(struct ubuf * bp, struct uvnode * vp) 375{ 376 LIST_REMOVE(bp, b_vnbufs); 377 if (bp->b_flags & B_DELWRI) { 378 LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs); 379 } else { 380 LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs); 381 } 382} 383 384#ifdef DEBUG 385void 386dump_free_lists(void) 387{ 388 struct ubuf *bp; 389 int i; 390 391 for (i = 0; i <= BQ_LOCKED; i++) { 392 printf("==> free list %d:\n", i); 393 TAILQ_FOREACH(bp, &bufqueues[i], b_freelist) { 394 printf("vp %p lbn %" PRId64 " flags %lx\n", 395 bp->b_vp, bp->b_lblkno, bp->b_flags); 396 } 397 } 398} 399#endif 400