lfs_subr.c revision 1.42
1/* $NetBSD: lfs_subr.c,v 1.42 2003/07/12 16:17:08 yamt Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38/* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95 71 */ 72 73#include <sys/cdefs.h> 74__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.42 2003/07/12 16:17:08 yamt Exp $"); 75 76#include <sys/param.h> 77#include <sys/systm.h> 78#include <sys/namei.h> 79#include <sys/vnode.h> 80#include <sys/buf.h> 81#include <sys/mount.h> 82#include <sys/malloc.h> 83#include <sys/proc.h> 84 85#include <ufs/ufs/inode.h> 86#include <ufs/lfs/lfs.h> 87#include <ufs/lfs/lfs_extern.h> 88 89#include <uvm/uvm.h> 90 91/* 92 * Return buffer with the contents of block "offset" from the beginning of 93 * directory "ip". If "res" is non-zero, fill it in with a pointer to the 94 * remaining space in the directory. 95 */ 96int 97lfs_blkatoff(void *v) 98{ 99 struct vop_blkatoff_args /* { 100 struct vnode *a_vp; 101 off_t a_offset; 102 char **a_res; 103 struct buf **a_bpp; 104 } */ *ap = v; 105 struct lfs *fs; 106 struct inode *ip; 107 struct buf *bp; 108 daddr_t lbn; 109 int bsize, error; 110 111 ip = VTOI(ap->a_vp); 112 fs = ip->i_lfs; 113 lbn = lblkno(fs, ap->a_offset); 114 bsize = blksize(fs, ip, lbn); 115 116 *ap->a_bpp = NULL; 117 if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) { 118 brelse(bp); 119 return (error); 120 } 121 if (ap->a_res) 122 *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); 123 *ap->a_bpp = bp; 124 return (0); 125} 126 127#ifdef LFS_DEBUG_MALLOC 128char *lfs_res_names[LFS_NB_COUNT] = { 129 "summary", 130 "superblock", 131 "ifile block", 132 "cluster", 133 "clean", 134}; 135#endif 136 137int lfs_res_qty[LFS_NB_COUNT] = { 138 LFS_N_SUMMARIES, 139 LFS_N_SBLOCKS, 140 LFS_N_IBLOCKS, 141 LFS_N_CLUSTERS, 142 LFS_N_CLEAN, 143}; 144 145void 146lfs_setup_resblks(struct lfs *fs) 147{ 148 int i, j; 149 int maxbpp; 150 151 fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, 152 M_WAITOK); 153 for (i = 0; i < LFS_N_TOTAL; i++) { 154 fs->lfs_resblk[i].inuse = 0; 155 fs->lfs_resblk[i].p = NULL; 156 } 157 for (i = 0; i < LFS_RESHASH_WIDTH; i++) 158 LIST_INIT(fs->lfs_reshash + i); 159 160 /* 161 * These types of allocations can be larger than a page, 162 * so we can't use the pool subsystem for them. 163 */ 164 for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) 165 fs->lfs_resblk[i].size = fs->lfs_sumsize; 166 for (j = 0; j < LFS_N_SBLOCKS; j++, i++) 167 fs->lfs_resblk[i].size = LFS_SBPAD; 168 for (j = 0; j < LFS_N_IBLOCKS; j++, i++) 169 fs->lfs_resblk[i].size = fs->lfs_bsize; 170 for (j = 0; j < LFS_N_CLUSTERS; j++, i++) 171 fs->lfs_resblk[i].size = MAXPHYS; 172 for (j = 0; j < LFS_N_CLEAN; j++, i++) 173 fs->lfs_resblk[i].size = MAXPHYS; 174 175 for (i = 0; i < LFS_N_TOTAL; i++) { 176 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size, 177 M_SEGMENT, M_WAITOK); 178 } 179 180 /* 181 * Initialize pools for small types (XXX is BPP small?) 182 */ 183 pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 184 LFS_N_CL, "lfsclpl", &pool_allocator_nointr); 185 pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 186 LFS_N_SEG, "lfssegpool", &pool_allocator_nointr); 187 maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); 188 maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2); 189 pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 190 LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr); 191} 192 193void 194lfs_free_resblks(struct lfs *fs) 195{ 196 int i; 197 198 pool_destroy(&fs->lfs_bpppool); 199 pool_destroy(&fs->lfs_segpool); 200 pool_destroy(&fs->lfs_clpool); 201 202 for (i = 0; i < LFS_N_TOTAL; i++) { 203 while (fs->lfs_resblk[i].inuse) 204 tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0); 205 if (fs->lfs_resblk[i].p != NULL) 206 free(fs->lfs_resblk[i].p, M_SEGMENT); 207 } 208 free(fs->lfs_resblk, M_SEGMENT); 209} 210 211static unsigned int 212lfs_mhash(void *vp) 213{ 214 return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; 215} 216 217/* 218 * Return memory of the given size for the given purpose, or use one of a 219 * number of spare last-resort buffers, if malloc returns NULL. 220 */ 221void * 222lfs_malloc(struct lfs *fs, size_t size, int type) 223{ 224 struct lfs_res_blk *re; 225 void *r; 226 int i, s, start; 227 unsigned int h; 228 229 r = NULL; 230 231 /* If no mem allocated for this type, it just waits */ 232 if (lfs_res_qty[type] == 0) { 233 r = malloc(size, M_SEGMENT, M_WAITOK); 234 return r; 235 } 236 237 /* Otherwise try a quick malloc, and if it works, great */ 238 if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) { 239 return r; 240 } 241 242 /* 243 * If malloc returned NULL, we are forced to use one of our 244 * reserve blocks. We have on hand at least one summary block, 245 * at least one cluster block, at least one superblock, 246 * and several indirect blocks. 247 */ 248 /* skip over blocks of other types */ 249 for (i = 0, start = 0; i < type; i++) 250 start += lfs_res_qty[i]; 251 while (r == NULL) { 252 for (i = 0; i < lfs_res_qty[type]; i++) { 253 if (fs->lfs_resblk[start + i].inuse == 0) { 254 re = fs->lfs_resblk + start + i; 255 re->inuse = 1; 256 r = re->p; 257 KASSERT(re->size >= size); 258 h = lfs_mhash(r); 259 s = splbio(); 260 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); 261 splx(s); 262 return r; 263 } 264 } 265#ifdef LFS_DEBUG_MALLOC 266 printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]); 267#endif 268 tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0); 269#ifdef LFS_DEBUG_MALLOC 270 printf("done sleeping on %s\n", lfs_res_names[type]); 271#endif 272 } 273 /* NOTREACHED */ 274 return r; 275} 276 277void 278lfs_free(struct lfs *fs, void *p, int type) 279{ 280 int s; 281 unsigned int h; 282 res_t *re; 283#ifdef DEBUG 284 int i; 285#endif 286 287 h = lfs_mhash(p); 288 s = splbio(); 289 LIST_FOREACH(re, &fs->lfs_reshash[h], res) { 290 if (re->p == p) { 291 KASSERT(re->inuse == 1); 292 LIST_REMOVE(re, res); 293 re->inuse = 0; 294 wakeup(&fs->lfs_resblk); 295 splx(s); 296 return; 297 } 298 } 299#ifdef DEBUG 300 for (i = 0; i < LFS_N_TOTAL; i++) { 301 if (fs->lfs_resblk[i].p == p) 302 panic("lfs_free: inconsistent reserved block"); 303 } 304#endif 305 splx(s); 306 307 /* 308 * If we didn't find it, free it. 309 */ 310 free(p, M_SEGMENT); 311} 312 313/* 314 * lfs_seglock -- 315 * Single thread the segment writer. 316 */ 317int 318lfs_seglock(struct lfs *fs, unsigned long flags) 319{ 320 struct segment *sp; 321 322 simple_lock(&fs->lfs_interlock); 323 if (fs->lfs_seglock) { 324 if (fs->lfs_lockpid == curproc->p_pid) { 325 simple_unlock(&fs->lfs_interlock); 326 ++fs->lfs_seglock; 327 fs->lfs_sp->seg_flags |= flags; 328 return 0; 329 } else if (flags & SEGM_PAGEDAEMON) { 330 simple_unlock(&fs->lfs_interlock); 331 return EWOULDBLOCK; 332 } else while (fs->lfs_seglock) 333 (void)ltsleep(&fs->lfs_seglock, PRIBIO + 1, 334 "lfs seglock", 0, &fs->lfs_interlock); 335 } 336 337 fs->lfs_seglock = 1; 338 fs->lfs_lockpid = curproc->p_pid; 339 simple_unlock(&fs->lfs_interlock); 340 fs->lfs_cleanind = 0; 341 342 /* Drain fragment size changes out */ 343 lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0); 344 345 sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); 346 sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); 347 sp->seg_flags = flags; 348 sp->vp = NULL; 349 sp->seg_iocount = 0; 350 (void) lfs_initseg(fs); 351 352 /* 353 * Keep a cumulative count of the outstanding I/O operations. If the 354 * disk drive catches up with us it could go to zero before we finish, 355 * so we artificially increment it by one until we've scheduled all of 356 * the writes we intend to do. 357 */ 358 ++fs->lfs_iocount; 359 return 0; 360} 361 362static void lfs_unmark_dirop(struct lfs *); 363 364static void 365lfs_unmark_dirop(struct lfs *fs) 366{ 367 struct inode *ip, *nip; 368 struct vnode *vp; 369 int doit; 370 371 simple_lock(&fs->lfs_interlock); 372 doit = !(fs->lfs_flags & LFS_UNDIROP); 373 if (doit) 374 fs->lfs_flags |= LFS_UNDIROP; 375 simple_unlock(&fs->lfs_interlock); 376 if (!doit) 377 return; 378 379 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 380 nip = TAILQ_NEXT(ip, i_lfs_dchain); 381 vp = ITOV(ip); 382 383 if (VOP_ISLOCKED(vp) && 384 vp->v_lock.lk_lockholder != curproc->p_pid) { 385 continue; 386 } 387 if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) { 388 --lfs_dirvcount; 389 vp->v_flag &= ~VDIROP; 390 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 391 wakeup(&lfs_dirvcount); 392 fs->lfs_unlockvp = vp; 393 vrele(vp); 394 fs->lfs_unlockvp = NULL; 395 } 396 } 397 398 simple_lock(&fs->lfs_interlock); 399 fs->lfs_flags &= ~LFS_UNDIROP; 400 simple_unlock(&fs->lfs_interlock); 401} 402 403static void 404lfs_auto_segclean(struct lfs *fs) 405{ 406 int i, error; 407 408 /* 409 * Now that we've swapped lfs_activesb, but while we still 410 * hold the segment lock, run through the segment list marking 411 * the empty ones clean. 412 * XXX - do we really need to do them all at once? 413 */ 414 for (i = 0; i < fs->lfs_nseg; i++) { 415 if ((fs->lfs_suflags[0][i] & 416 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 417 (SEGUSE_DIRTY | SEGUSE_EMPTY) && 418 (fs->lfs_suflags[1][i] & 419 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 420 (SEGUSE_DIRTY | SEGUSE_EMPTY)) { 421 422 if ((error = lfs_do_segclean(fs, i)) != 0) { 423#ifdef DEBUG 424 printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i); 425#endif /* DEBUG */ 426 } 427 } 428 fs->lfs_suflags[1 - fs->lfs_activesb][i] = 429 fs->lfs_suflags[fs->lfs_activesb][i]; 430 } 431} 432 433/* 434 * lfs_segunlock -- 435 * Single thread the segment writer. 436 */ 437void 438lfs_segunlock(struct lfs *fs) 439{ 440 struct segment *sp; 441 unsigned long sync, ckp; 442 struct buf *bp; 443 int do_unmark_dirop = 0; 444 445 sp = fs->lfs_sp; 446 447 simple_lock(&fs->lfs_interlock); 448 if (fs->lfs_seglock == 1) { 449 if ((sp->seg_flags & SEGM_PROT) == 0) 450 do_unmark_dirop = 1; 451 simple_unlock(&fs->lfs_interlock); 452 sync = sp->seg_flags & SEGM_SYNC; 453 ckp = sp->seg_flags & SEGM_CKP; 454 if (sp->bpp != sp->cbpp) { 455 /* Free allocated segment summary */ 456 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 457 bp = *sp->bpp; 458 lfs_freebuf(fs, bp); 459 } else 460 printf ("unlock to 0 with no summary"); 461 462 pool_put(&fs->lfs_bpppool, sp->bpp); 463 sp->bpp = NULL; 464 465 /* 466 * If we're not sync, we're done with sp, get rid of it. 467 * Otherwise, we keep a local copy around but free 468 * fs->lfs_sp so another process can use it (we have to 469 * wait but they don't have to wait for us). 470 */ 471 if (!sync) 472 pool_put(&fs->lfs_segpool, sp); 473 fs->lfs_sp = NULL; 474 475 /* 476 * If the I/O count is non-zero, sleep until it reaches zero. 477 * At the moment, the user's process hangs around so we can 478 * sleep. 479 */ 480 if (--fs->lfs_iocount == 0) { 481 lfs_countlocked(&locked_queue_count, 482 &locked_queue_bytes, "lfs_segunlock"); 483 wakeup(&locked_queue_count); 484 } 485 if (fs->lfs_iocount <= 1) 486 wakeup(&fs->lfs_iocount); 487 /* 488 * If we're not checkpointing, we don't have to block 489 * other processes to wait for a synchronous write 490 * to complete. 491 */ 492 if (!ckp) { 493 simple_lock(&fs->lfs_interlock); 494 --fs->lfs_seglock; 495 fs->lfs_lockpid = 0; 496 simple_unlock(&fs->lfs_interlock); 497 wakeup(&fs->lfs_seglock); 498 } 499 /* 500 * We let checkpoints happen asynchronously. That means 501 * that during recovery, we have to roll forward between 502 * the two segments described by the first and second 503 * superblocks to make sure that the checkpoint described 504 * by a superblock completed. 505 */ 506 while (ckp && sync && fs->lfs_iocount) 507 (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, 508 "lfs_iocount", 0); 509 while (sync && sp->seg_iocount) { 510 (void)tsleep(&sp->seg_iocount, PRIBIO + 1, 511 "seg_iocount", 0); 512 /* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */ 513 } 514 if (sync) 515 pool_put(&fs->lfs_segpool, sp); 516 517 if (ckp) { 518 fs->lfs_nactive = 0; 519 /* If we *know* everything's on disk, write both sbs */ 520 /* XXX should wait for this one */ 521 if (sync) 522 lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]); 523 lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]); 524 if (!(fs->lfs_ivnode->v_mount->mnt_flag & MNT_UNMOUNT)) 525 lfs_auto_segclean(fs); 526 fs->lfs_activesb = 1 - fs->lfs_activesb; 527 simple_lock(&fs->lfs_interlock); 528 --fs->lfs_seglock; 529 fs->lfs_lockpid = 0; 530 simple_unlock(&fs->lfs_interlock); 531 wakeup(&fs->lfs_seglock); 532 } 533 /* Reenable fragment size changes */ 534 lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); 535 if (do_unmark_dirop) 536 lfs_unmark_dirop(fs); 537 } else if (fs->lfs_seglock == 0) { 538 simple_unlock(&fs->lfs_interlock); 539 panic ("Seglock not held"); 540 } else { 541 --fs->lfs_seglock; 542 simple_unlock(&fs->lfs_interlock); 543 } 544} 545 546/* 547 * drain dirops and start writer. 548 */ 549int 550lfs_writer_enter(struct lfs *fs, const char *wmesg) 551{ 552 int error = 0; 553 554 simple_lock(&fs->lfs_interlock); 555 556 /* disallow dirops during flush */ 557 fs->lfs_writer++; 558 559 while (fs->lfs_dirops > 0) { 560 ++fs->lfs_diropwait; 561 error = ltsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0, 562 &fs->lfs_interlock); 563 --fs->lfs_diropwait; 564 } 565 566 if (error) 567 fs->lfs_writer--; 568 569 simple_unlock(&fs->lfs_interlock); 570 571 return error; 572} 573 574void 575lfs_writer_leave(struct lfs *fs) 576{ 577 boolean_t dowakeup; 578 579 simple_lock(&fs->lfs_interlock); 580 dowakeup = !(--fs->lfs_writer); 581 simple_unlock(&fs->lfs_interlock); 582 if (dowakeup) 583 wakeup(&fs->lfs_dirops); 584} 585