mpool.c revision 8870
1/*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#if defined(LIBC_SCCS) && !defined(lint) 35static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94"; 36#endif /* LIBC_SCCS and not lint */ 37 38#include <sys/param.h> 39#include <sys/stat.h> 40 41#include <errno.h> 42#include <stdio.h> 43#include <stdlib.h> 44#include <string.h> 45#include <unistd.h> 46 47#include <db.h> 48#define __MPOOLINTERFACE_PRIVATE 49#include "mpool.h" 50 51static BKT *mpool_bkt __P((MPOOL *)); 52static BKT *mpool_look __P((MPOOL *, pgno_t)); 53static int mpool_write __P((MPOOL *, BKT *)); 54#ifdef DEBUG 55static void __mpoolerr __P((const char *fmt, ...)); 56#endif 57 58/* 59 * MPOOL_OPEN -- initialize a memory pool. 60 * 61 * Parameters: 62 * key: Shared buffer key. 63 * fd: File descriptor. 64 * pagesize: File page size. 65 * maxcache: Max number of cached pages. 66 * 67 * Returns: 68 * MPOOL pointer, NULL on error. 69 */ 70MPOOL * 71mpool_open(key, fd, pagesize, maxcache) 72 DBT *key; 73 int fd; 74 pgno_t pagesize, maxcache; 75{ 76 struct stat sb; 77 MPOOL *mp; 78 int entry; 79 80 if (fstat(fd, &sb)) 81 return (NULL); 82 /* XXX 83 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 84 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 85 * fairly close. 86 */ 87 if (!S_ISREG(sb.st_mode)) { 88 errno = ESPIPE; 89 return (NULL); 90 } 91 92 if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL) 93 return (NULL); 94 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 95 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 96 for (entry = 0; entry < HASHSIZE; ++entry) 97 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 98 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 99 (BKT *)&mp->hashtable[entry]; 100 mp->curcache = 0; 101 mp->maxcache = maxcache; 102 mp->pagesize = pagesize; 103 mp->npages = sb.st_size / pagesize; 104 mp->fd = fd; 105 mp->pgcookie = NULL; 106 mp->pgin = mp->pgout = NULL; 107 108#ifdef STATISTICS 109 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 110 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 111 mp->pagewrite = 0; 112#endif 113 return (mp); 114} 115 116/* 117 * MPOOL_FILTER -- initialize input/output filters. 118 * 119 * Parameters: 120 * pgin: Page in conversion routine. 121 * pgout: Page out conversion routine. 122 * pgcookie: Cookie for page in/out routines. 123 */ 124void 125mpool_filter(mp, pgin, pgout, pgcookie) 126 MPOOL *mp; 127 void (*pgin) __P((void *, pgno_t, void *)); 128 void (*pgout) __P((void *, pgno_t, void *)); 129 void *pgcookie; 130{ 131 mp->pgin = pgin; 132 mp->pgout = pgout; 133 mp->pgcookie = pgcookie; 134} 135 136/* 137 * MPOOL_NEW -- get a new page 138 * 139 * Parameters: 140 * mp: mpool cookie 141 * pgnoadddr: place to store new page number 142 * Returns: 143 * RET_ERROR, RET_SUCCESS 144 */ 145void * 146mpool_new(mp, pgnoaddr) 147 MPOOL *mp; 148 pgno_t *pgnoaddr; 149{ 150 BKT *b; 151 BKTHDR *hp; 152 153#ifdef STATISTICS 154 ++mp->pagenew; 155#endif 156 /* 157 * Get a BKT from the cache. Assign a new page number, attach it to 158 * the hash and lru chains and return. 159 */ 160 if ((b = mpool_bkt(mp)) == NULL) 161 return (NULL); 162 *pgnoaddr = b->pgno = mp->npages++; 163 b->flags = MPOOL_PINNED; 164 inshash(b, b->pgno); 165 inschain(b, &mp->lru); 166 return (b->page); 167} 168 169/* 170 * MPOOL_GET -- get a page from the pool 171 * 172 * Parameters: 173 * mp: mpool cookie 174 * pgno: page number 175 * flags: not used 176 * 177 * Returns: 178 * RET_ERROR, RET_SUCCESS 179 */ 180void * 181mpool_get(mp, pgno, flags) 182 MPOOL *mp; 183 pgno_t pgno; 184 u_int flags; /* XXX not used? */ 185{ 186 BKT *b; 187 BKTHDR *hp; 188 off_t off; 189 int nr; 190 191 /* 192 * If asking for a specific page that is already in the cache, find 193 * it and return it. 194 */ 195 if (b = mpool_look(mp, pgno)) { 196#ifdef STATISTICS 197 ++mp->pageget; 198#endif 199#ifdef DEBUG 200 if (b->flags & MPOOL_PINNED) 201 __mpoolerr("mpool_get: page %d already pinned", 202 b->pgno); 203#endif 204 rmchain(b); 205 inschain(b, &mp->lru); 206 b->flags |= MPOOL_PINNED; 207 return (b->page); 208 } 209 210 /* Not allowed to retrieve a non-existent page. */ 211 if (pgno >= mp->npages) { 212 errno = EINVAL; 213 return (NULL); 214 } 215 216 /* Get a page from the cache. */ 217 if ((b = mpool_bkt(mp)) == NULL) 218 return (NULL); 219 b->pgno = pgno; 220 b->flags = MPOOL_PINNED; 221 222#ifdef STATISTICS 223 ++mp->pageread; 224#endif 225 /* Read in the contents. */ 226 off = mp->pagesize * pgno; 227 if (lseek(mp->fd, off, SEEK_SET) != off) 228 return (NULL); 229 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 230 if (nr >= 0) 231 errno = EFTYPE; 232 return (NULL); 233 } 234 if (mp->pgin) 235 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 236 237 inshash(b, b->pgno); 238 inschain(b, &mp->lru); 239#ifdef STATISTICS 240 ++mp->pageget; 241#endif 242 return (b->page); 243} 244 245/* 246 * MPOOL_PUT -- return a page to the pool 247 * 248 * Parameters: 249 * mp: mpool cookie 250 * page: page pointer 251 * pgno: page number 252 * 253 * Returns: 254 * RET_ERROR, RET_SUCCESS 255 */ 256int 257mpool_put(mp, page, flags) 258 MPOOL *mp; 259 void *page; 260 u_int flags; 261{ 262 BKT *baddr; 263#ifdef DEBUG 264 BKT *b; 265#endif 266 267#ifdef STATISTICS 268 ++mp->pageput; 269#endif 270 baddr = (BKT *)((char *)page - sizeof(BKT)); 271#ifdef DEBUG 272 if (!(baddr->flags & MPOOL_PINNED)) 273 __mpoolerr("mpool_put: page %d not pinned", b->pgno); 274 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 275 if (b == (BKT *)&mp->lru) 276 __mpoolerr("mpool_put: %0x: bad address", baddr); 277 if (b == baddr) 278 break; 279 } 280#endif 281 baddr->flags &= ~MPOOL_PINNED; 282 baddr->flags |= flags & MPOOL_DIRTY; 283 return (RET_SUCCESS); 284} 285 286/* 287 * MPOOL_CLOSE -- close the buffer pool 288 * 289 * Parameters: 290 * mp: mpool cookie 291 * 292 * Returns: 293 * RET_ERROR, RET_SUCCESS 294 */ 295int 296mpool_close(mp) 297 MPOOL *mp; 298{ 299 BKT *b, *next; 300 301 /* Free up any space allocated to the lru pages. */ 302 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 303 next = b->cprev; 304 free(b); 305 } 306 free(mp); 307 return (RET_SUCCESS); 308} 309 310/* 311 * MPOOL_SYNC -- sync the file to disk. 312 * 313 * Parameters: 314 * mp: mpool cookie 315 * 316 * Returns: 317 * RET_ERROR, RET_SUCCESS 318 */ 319int 320mpool_sync(mp) 321 MPOOL *mp; 322{ 323 BKT *b; 324 325 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 326 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 327 return (RET_ERROR); 328 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 329} 330 331/* 332 * MPOOL_BKT -- get/create a BKT from the cache 333 * 334 * Parameters: 335 * mp: mpool cookie 336 * 337 * Returns: 338 * NULL on failure and a pointer to the BKT on success 339 */ 340static BKT * 341mpool_bkt(mp) 342 MPOOL *mp; 343{ 344 BKT *b; 345 346 if (mp->curcache < mp->maxcache) 347 goto new; 348 349 /* 350 * If the cache is maxxed out, search the lru list for a buffer we 351 * can flush. If we find one, write it if necessary and take it off 352 * any lists. If we don't find anything we grow the cache anyway. 353 * The cache never shrinks. 354 */ 355 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 356 if (!(b->flags & MPOOL_PINNED)) { 357 if (b->flags & MPOOL_DIRTY && 358 mpool_write(mp, b) == RET_ERROR) 359 return (NULL); 360 rmhash(b); 361 rmchain(b); 362#ifdef STATISTICS 363 ++mp->pageflush; 364#endif 365#ifdef DEBUG 366 { 367 void *spage; 368 spage = b->page; 369 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 370 b->page = spage; 371 } 372#endif 373 return (b); 374 } 375 376new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 377 return (NULL); 378#ifdef STATISTICS 379 ++mp->pagealloc; 380#endif 381#ifdef DEBUG 382 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 383#endif 384 b->page = (char *)b + sizeof(BKT); 385 ++mp->curcache; 386 return (b); 387} 388 389/* 390 * MPOOL_WRITE -- sync a page to disk 391 * 392 * Parameters: 393 * mp: mpool cookie 394 * 395 * Returns: 396 * RET_ERROR, RET_SUCCESS 397 */ 398static int 399mpool_write(mp, b) 400 MPOOL *mp; 401 BKT *b; 402{ 403 off_t off; 404 405 if (mp->pgout) 406 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 407 408#ifdef STATISTICS 409 ++mp->pagewrite; 410#endif 411 off = mp->pagesize * b->pgno; 412 if (lseek(mp->fd, off, SEEK_SET) != off) 413 return (RET_ERROR); 414 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 415 return (RET_ERROR); 416 b->flags &= ~MPOOL_DIRTY; 417 return (RET_SUCCESS); 418} 419 420/* 421 * MPOOL_LOOK -- lookup a page 422 * 423 * Parameters: 424 * mp: mpool cookie 425 * pgno: page number 426 * 427 * Returns: 428 * NULL on failure and a pointer to the BKT on success 429 */ 430static BKT * 431mpool_look(mp, pgno) 432 MPOOL *mp; 433 pgno_t pgno; 434{ 435 register BKT *b; 436 register BKTHDR *tb; 437 438 /* XXX 439 * If find the buffer, put it first on the hash chain so can 440 * find it again quickly. 441 */ 442 tb = &mp->hashtable[HASHKEY(pgno)]; 443 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 444 if (b->pgno == pgno) { 445#ifdef STATISTICS 446 ++mp->cachehit; 447#endif 448 return (b); 449 } 450#ifdef STATISTICS 451 ++mp->cachemiss; 452#endif 453 return (NULL); 454} 455 456#ifdef STATISTICS 457/* 458 * MPOOL_STAT -- cache statistics 459 * 460 * Parameters: 461 * mp: mpool cookie 462 */ 463void 464mpool_stat(mp) 465 MPOOL *mp; 466{ 467 BKT *b; 468 int cnt; 469 char *sep; 470 471 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 472 (void)fprintf(stderr, 473 "page size %lu, cacheing %lu pages of %lu page max cache\n", 474 mp->pagesize, mp->curcache, mp->maxcache); 475 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 476 mp->pageput, mp->pageget, mp->pagenew); 477 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 478 mp->pagealloc, mp->pageflush); 479 if (mp->cachehit + mp->cachemiss) 480 (void)fprintf(stderr, 481 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 482 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 483 * 100, mp->cachehit, mp->cachemiss); 484 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 485 mp->pageread, mp->pagewrite); 486 487 sep = ""; 488 cnt = 0; 489 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 490 (void)fprintf(stderr, "%s%d", sep, b->pgno); 491 if (b->flags & MPOOL_DIRTY) 492 (void)fprintf(stderr, "d"); 493 if (b->flags & MPOOL_PINNED) 494 (void)fprintf(stderr, "P"); 495 if (++cnt == 10) { 496 sep = "\n"; 497 cnt = 0; 498 } else 499 sep = ", "; 500 501 } 502 (void)fprintf(stderr, "\n"); 503} 504#endif 505 506#ifdef DEBUG 507#if __STDC__ 508#include <stdarg.h> 509#else 510#include <varargs.h> 511#endif 512 513static void 514#if __STDC__ 515__mpoolerr(const char *fmt, ...) 516#else 517__mpoolerr(fmt, va_alist) 518 char *fmt; 519 va_dcl 520#endif 521{ 522 va_list ap; 523#if __STDC__ 524 va_start(ap, fmt); 525#else 526 va_start(ap); 527#endif 528 (void)vfprintf(stderr, fmt, ap); 529 va_end(ap); 530 (void)fprintf(stderr, "\n"); 531 abort(); 532 /* NOTREACHED */ 533} 534#endif 535