lfs_subr.c revision 1.43
1/* $NetBSD: lfs_subr.c,v 1.43 2003/08/07 16:34:38 agc Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38/* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95 67 */ 68 69#include <sys/cdefs.h> 70__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.43 2003/08/07 16:34:38 agc Exp $"); 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/namei.h> 75#include <sys/vnode.h> 76#include <sys/buf.h> 77#include <sys/mount.h> 78#include <sys/malloc.h> 79#include <sys/proc.h> 80 81#include <ufs/ufs/inode.h> 82#include <ufs/lfs/lfs.h> 83#include <ufs/lfs/lfs_extern.h> 84 85#include <uvm/uvm.h> 86 87/* 88 * Return buffer with the contents of block "offset" from the beginning of 89 * directory "ip". If "res" is non-zero, fill it in with a pointer to the 90 * remaining space in the directory. 91 */ 92int 93lfs_blkatoff(void *v) 94{ 95 struct vop_blkatoff_args /* { 96 struct vnode *a_vp; 97 off_t a_offset; 98 char **a_res; 99 struct buf **a_bpp; 100 } */ *ap = v; 101 struct lfs *fs; 102 struct inode *ip; 103 struct buf *bp; 104 daddr_t lbn; 105 int bsize, error; 106 107 ip = VTOI(ap->a_vp); 108 fs = ip->i_lfs; 109 lbn = lblkno(fs, ap->a_offset); 110 bsize = blksize(fs, ip, lbn); 111 112 *ap->a_bpp = NULL; 113 if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) { 114 brelse(bp); 115 return (error); 116 } 117 if (ap->a_res) 118 *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); 119 *ap->a_bpp = bp; 120 return (0); 121} 122 123#ifdef LFS_DEBUG_MALLOC 124char *lfs_res_names[LFS_NB_COUNT] = { 125 "summary", 126 "superblock", 127 "ifile block", 128 "cluster", 129 "clean", 130}; 131#endif 132 133int lfs_res_qty[LFS_NB_COUNT] = { 134 LFS_N_SUMMARIES, 135 LFS_N_SBLOCKS, 136 LFS_N_IBLOCKS, 137 LFS_N_CLUSTERS, 138 LFS_N_CLEAN, 139}; 140 141void 142lfs_setup_resblks(struct lfs *fs) 143{ 144 int i, j; 145 int maxbpp; 146 147 fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, 148 M_WAITOK); 149 for (i = 0; i < LFS_N_TOTAL; i++) { 150 fs->lfs_resblk[i].inuse = 0; 151 fs->lfs_resblk[i].p = NULL; 152 } 153 for (i = 0; i < LFS_RESHASH_WIDTH; i++) 154 LIST_INIT(fs->lfs_reshash + i); 155 156 /* 157 * These types of allocations can be larger than a page, 158 * so we can't use the pool subsystem for them. 159 */ 160 for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) 161 fs->lfs_resblk[i].size = fs->lfs_sumsize; 162 for (j = 0; j < LFS_N_SBLOCKS; j++, i++) 163 fs->lfs_resblk[i].size = LFS_SBPAD; 164 for (j = 0; j < LFS_N_IBLOCKS; j++, i++) 165 fs->lfs_resblk[i].size = fs->lfs_bsize; 166 for (j = 0; j < LFS_N_CLUSTERS; j++, i++) 167 fs->lfs_resblk[i].size = MAXPHYS; 168 for (j = 0; j < LFS_N_CLEAN; j++, i++) 169 fs->lfs_resblk[i].size = MAXPHYS; 170 171 for (i = 0; i < LFS_N_TOTAL; i++) { 172 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size, 173 M_SEGMENT, M_WAITOK); 174 } 175 176 /* 177 * Initialize pools for small types (XXX is BPP small?) 178 */ 179 pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 180 LFS_N_CL, "lfsclpl", &pool_allocator_nointr); 181 pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 182 LFS_N_SEG, "lfssegpool", &pool_allocator_nointr); 183 maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); 184 maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2); 185 pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 186 LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr); 187} 188 189void 190lfs_free_resblks(struct lfs *fs) 191{ 192 int i; 193 194 pool_destroy(&fs->lfs_bpppool); 195 pool_destroy(&fs->lfs_segpool); 196 pool_destroy(&fs->lfs_clpool); 197 198 for (i = 0; i < LFS_N_TOTAL; i++) { 199 while (fs->lfs_resblk[i].inuse) 200 tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0); 201 if (fs->lfs_resblk[i].p != NULL) 202 free(fs->lfs_resblk[i].p, M_SEGMENT); 203 } 204 free(fs->lfs_resblk, M_SEGMENT); 205} 206 207static unsigned int 208lfs_mhash(void *vp) 209{ 210 return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; 211} 212 213/* 214 * Return memory of the given size for the given purpose, or use one of a 215 * number of spare last-resort buffers, if malloc returns NULL. 216 */ 217void * 218lfs_malloc(struct lfs *fs, size_t size, int type) 219{ 220 struct lfs_res_blk *re; 221 void *r; 222 int i, s, start; 223 unsigned int h; 224 225 r = NULL; 226 227 /* If no mem allocated for this type, it just waits */ 228 if (lfs_res_qty[type] == 0) { 229 r = malloc(size, M_SEGMENT, M_WAITOK); 230 return r; 231 } 232 233 /* Otherwise try a quick malloc, and if it works, great */ 234 if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) { 235 return r; 236 } 237 238 /* 239 * If malloc returned NULL, we are forced to use one of our 240 * reserve blocks. We have on hand at least one summary block, 241 * at least one cluster block, at least one superblock, 242 * and several indirect blocks. 243 */ 244 /* skip over blocks of other types */ 245 for (i = 0, start = 0; i < type; i++) 246 start += lfs_res_qty[i]; 247 while (r == NULL) { 248 for (i = 0; i < lfs_res_qty[type]; i++) { 249 if (fs->lfs_resblk[start + i].inuse == 0) { 250 re = fs->lfs_resblk + start + i; 251 re->inuse = 1; 252 r = re->p; 253 KASSERT(re->size >= size); 254 h = lfs_mhash(r); 255 s = splbio(); 256 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); 257 splx(s); 258 return r; 259 } 260 } 261#ifdef LFS_DEBUG_MALLOC 262 printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]); 263#endif 264 tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0); 265#ifdef LFS_DEBUG_MALLOC 266 printf("done sleeping on %s\n", lfs_res_names[type]); 267#endif 268 } 269 /* NOTREACHED */ 270 return r; 271} 272 273void 274lfs_free(struct lfs *fs, void *p, int type) 275{ 276 int s; 277 unsigned int h; 278 res_t *re; 279#ifdef DEBUG 280 int i; 281#endif 282 283 h = lfs_mhash(p); 284 s = splbio(); 285 LIST_FOREACH(re, &fs->lfs_reshash[h], res) { 286 if (re->p == p) { 287 KASSERT(re->inuse == 1); 288 LIST_REMOVE(re, res); 289 re->inuse = 0; 290 wakeup(&fs->lfs_resblk); 291 splx(s); 292 return; 293 } 294 } 295#ifdef DEBUG 296 for (i = 0; i < LFS_N_TOTAL; i++) { 297 if (fs->lfs_resblk[i].p == p) 298 panic("lfs_free: inconsistent reserved block"); 299 } 300#endif 301 splx(s); 302 303 /* 304 * If we didn't find it, free it. 305 */ 306 free(p, M_SEGMENT); 307} 308 309/* 310 * lfs_seglock -- 311 * Single thread the segment writer. 312 */ 313int 314lfs_seglock(struct lfs *fs, unsigned long flags) 315{ 316 struct segment *sp; 317 318 simple_lock(&fs->lfs_interlock); 319 if (fs->lfs_seglock) { 320 if (fs->lfs_lockpid == curproc->p_pid) { 321 simple_unlock(&fs->lfs_interlock); 322 ++fs->lfs_seglock; 323 fs->lfs_sp->seg_flags |= flags; 324 return 0; 325 } else if (flags & SEGM_PAGEDAEMON) { 326 simple_unlock(&fs->lfs_interlock); 327 return EWOULDBLOCK; 328 } else while (fs->lfs_seglock) 329 (void)ltsleep(&fs->lfs_seglock, PRIBIO + 1, 330 "lfs seglock", 0, &fs->lfs_interlock); 331 } 332 333 fs->lfs_seglock = 1; 334 fs->lfs_lockpid = curproc->p_pid; 335 simple_unlock(&fs->lfs_interlock); 336 fs->lfs_cleanind = 0; 337 338 /* Drain fragment size changes out */ 339 lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0); 340 341 sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); 342 sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); 343 sp->seg_flags = flags; 344 sp->vp = NULL; 345 sp->seg_iocount = 0; 346 (void) lfs_initseg(fs); 347 348 /* 349 * Keep a cumulative count of the outstanding I/O operations. If the 350 * disk drive catches up with us it could go to zero before we finish, 351 * so we artificially increment it by one until we've scheduled all of 352 * the writes we intend to do. 353 */ 354 ++fs->lfs_iocount; 355 return 0; 356} 357 358static void lfs_unmark_dirop(struct lfs *); 359 360static void 361lfs_unmark_dirop(struct lfs *fs) 362{ 363 struct inode *ip, *nip; 364 struct vnode *vp; 365 int doit; 366 367 simple_lock(&fs->lfs_interlock); 368 doit = !(fs->lfs_flags & LFS_UNDIROP); 369 if (doit) 370 fs->lfs_flags |= LFS_UNDIROP; 371 simple_unlock(&fs->lfs_interlock); 372 if (!doit) 373 return; 374 375 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 376 nip = TAILQ_NEXT(ip, i_lfs_dchain); 377 vp = ITOV(ip); 378 379 if (VOP_ISLOCKED(vp) && 380 vp->v_lock.lk_lockholder != curproc->p_pid) { 381 continue; 382 } 383 if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) { 384 --lfs_dirvcount; 385 vp->v_flag &= ~VDIROP; 386 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 387 wakeup(&lfs_dirvcount); 388 fs->lfs_unlockvp = vp; 389 vrele(vp); 390 fs->lfs_unlockvp = NULL; 391 } 392 } 393 394 simple_lock(&fs->lfs_interlock); 395 fs->lfs_flags &= ~LFS_UNDIROP; 396 simple_unlock(&fs->lfs_interlock); 397} 398 399static void 400lfs_auto_segclean(struct lfs *fs) 401{ 402 int i, error; 403 404 /* 405 * Now that we've swapped lfs_activesb, but while we still 406 * hold the segment lock, run through the segment list marking 407 * the empty ones clean. 408 * XXX - do we really need to do them all at once? 409 */ 410 for (i = 0; i < fs->lfs_nseg; i++) { 411 if ((fs->lfs_suflags[0][i] & 412 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 413 (SEGUSE_DIRTY | SEGUSE_EMPTY) && 414 (fs->lfs_suflags[1][i] & 415 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 416 (SEGUSE_DIRTY | SEGUSE_EMPTY)) { 417 418 if ((error = lfs_do_segclean(fs, i)) != 0) { 419#ifdef DEBUG 420 printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i); 421#endif /* DEBUG */ 422 } 423 } 424 fs->lfs_suflags[1 - fs->lfs_activesb][i] = 425 fs->lfs_suflags[fs->lfs_activesb][i]; 426 } 427} 428 429/* 430 * lfs_segunlock -- 431 * Single thread the segment writer. 432 */ 433void 434lfs_segunlock(struct lfs *fs) 435{ 436 struct segment *sp; 437 unsigned long sync, ckp; 438 struct buf *bp; 439 int do_unmark_dirop = 0; 440 441 sp = fs->lfs_sp; 442 443 simple_lock(&fs->lfs_interlock); 444 if (fs->lfs_seglock == 1) { 445 if ((sp->seg_flags & SEGM_PROT) == 0) 446 do_unmark_dirop = 1; 447 simple_unlock(&fs->lfs_interlock); 448 sync = sp->seg_flags & SEGM_SYNC; 449 ckp = sp->seg_flags & SEGM_CKP; 450 if (sp->bpp != sp->cbpp) { 451 /* Free allocated segment summary */ 452 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 453 bp = *sp->bpp; 454 lfs_freebuf(fs, bp); 455 } else 456 printf ("unlock to 0 with no summary"); 457 458 pool_put(&fs->lfs_bpppool, sp->bpp); 459 sp->bpp = NULL; 460 461 /* 462 * If we're not sync, we're done with sp, get rid of it. 463 * Otherwise, we keep a local copy around but free 464 * fs->lfs_sp so another process can use it (we have to 465 * wait but they don't have to wait for us). 466 */ 467 if (!sync) 468 pool_put(&fs->lfs_segpool, sp); 469 fs->lfs_sp = NULL; 470 471 /* 472 * If the I/O count is non-zero, sleep until it reaches zero. 473 * At the moment, the user's process hangs around so we can 474 * sleep. 475 */ 476 if (--fs->lfs_iocount == 0) { 477 lfs_countlocked(&locked_queue_count, 478 &locked_queue_bytes, "lfs_segunlock"); 479 wakeup(&locked_queue_count); 480 } 481 if (fs->lfs_iocount <= 1) 482 wakeup(&fs->lfs_iocount); 483 /* 484 * If we're not checkpointing, we don't have to block 485 * other processes to wait for a synchronous write 486 * to complete. 487 */ 488 if (!ckp) { 489 simple_lock(&fs->lfs_interlock); 490 --fs->lfs_seglock; 491 fs->lfs_lockpid = 0; 492 simple_unlock(&fs->lfs_interlock); 493 wakeup(&fs->lfs_seglock); 494 } 495 /* 496 * We let checkpoints happen asynchronously. That means 497 * that during recovery, we have to roll forward between 498 * the two segments described by the first and second 499 * superblocks to make sure that the checkpoint described 500 * by a superblock completed. 501 */ 502 while (ckp && sync && fs->lfs_iocount) 503 (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, 504 "lfs_iocount", 0); 505 while (sync && sp->seg_iocount) { 506 (void)tsleep(&sp->seg_iocount, PRIBIO + 1, 507 "seg_iocount", 0); 508 /* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */ 509 } 510 if (sync) 511 pool_put(&fs->lfs_segpool, sp); 512 513 if (ckp) { 514 fs->lfs_nactive = 0; 515 /* If we *know* everything's on disk, write both sbs */ 516 /* XXX should wait for this one */ 517 if (sync) 518 lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]); 519 lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]); 520 if (!(fs->lfs_ivnode->v_mount->mnt_flag & MNT_UNMOUNT)) 521 lfs_auto_segclean(fs); 522 fs->lfs_activesb = 1 - fs->lfs_activesb; 523 simple_lock(&fs->lfs_interlock); 524 --fs->lfs_seglock; 525 fs->lfs_lockpid = 0; 526 simple_unlock(&fs->lfs_interlock); 527 wakeup(&fs->lfs_seglock); 528 } 529 /* Reenable fragment size changes */ 530 lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); 531 if (do_unmark_dirop) 532 lfs_unmark_dirop(fs); 533 } else if (fs->lfs_seglock == 0) { 534 simple_unlock(&fs->lfs_interlock); 535 panic ("Seglock not held"); 536 } else { 537 --fs->lfs_seglock; 538 simple_unlock(&fs->lfs_interlock); 539 } 540} 541 542/* 543 * drain dirops and start writer. 544 */ 545int 546lfs_writer_enter(struct lfs *fs, const char *wmesg) 547{ 548 int error = 0; 549 550 simple_lock(&fs->lfs_interlock); 551 552 /* disallow dirops during flush */ 553 fs->lfs_writer++; 554 555 while (fs->lfs_dirops > 0) { 556 ++fs->lfs_diropwait; 557 error = ltsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0, 558 &fs->lfs_interlock); 559 --fs->lfs_diropwait; 560 } 561 562 if (error) 563 fs->lfs_writer--; 564 565 simple_unlock(&fs->lfs_interlock); 566 567 return error; 568} 569 570void 571lfs_writer_leave(struct lfs *fs) 572{ 573 boolean_t dowakeup; 574 575 simple_lock(&fs->lfs_interlock); 576 dowakeup = !(--fs->lfs_writer); 577 simple_unlock(&fs->lfs_interlock); 578 if (dowakeup) 579 wakeup(&fs->lfs_dirops); 580} 581