lfs_subr.c revision 1.36
1/* $NetBSD: lfs_subr.c,v 1.36 2003/03/08 02:55:49 perseant Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38/* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95 71 */ 72 73#include <sys/cdefs.h> 74__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.36 2003/03/08 02:55:49 perseant Exp $"); 75 76#include <sys/param.h> 77#include <sys/systm.h> 78#include <sys/namei.h> 79#include <sys/vnode.h> 80#include <sys/buf.h> 81#include <sys/mount.h> 82#include <sys/malloc.h> 83#include <sys/proc.h> 84 85#include <ufs/ufs/inode.h> 86#include <ufs/lfs/lfs.h> 87#include <ufs/lfs/lfs_extern.h> 88 89#include <uvm/uvm.h> 90 91/* 92 * Return buffer with the contents of block "offset" from the beginning of 93 * directory "ip". If "res" is non-zero, fill it in with a pointer to the 94 * remaining space in the directory. 95 */ 96int 97lfs_blkatoff(void *v) 98{ 99 struct vop_blkatoff_args /* { 100 struct vnode *a_vp; 101 off_t a_offset; 102 char **a_res; 103 struct buf **a_bpp; 104 } */ *ap = v; 105 struct lfs *fs; 106 struct inode *ip; 107 struct buf *bp; 108 daddr_t lbn; 109 int bsize, error; 110 111 ip = VTOI(ap->a_vp); 112 fs = ip->i_lfs; 113 lbn = lblkno(fs, ap->a_offset); 114 bsize = blksize(fs, ip, lbn); 115 116 *ap->a_bpp = NULL; 117 if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) { 118 brelse(bp); 119 return (error); 120 } 121 if (ap->a_res) 122 *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); 123 *ap->a_bpp = bp; 124 return (0); 125} 126 127#ifdef LFS_DEBUG_MALLOC 128char *lfs_res_names[LFS_NB_COUNT] = { 129 "summary", 130 "superblock", 131 "ifile block", 132 "cluster", 133 "clean", 134}; 135#endif 136 137int lfs_res_qty[LFS_NB_COUNT] = { 138 LFS_N_SUMMARIES, 139 LFS_N_SBLOCKS, 140 LFS_N_IBLOCKS, 141 LFS_N_CLUSTERS, 142 LFS_N_CLEAN, 143}; 144 145void 146lfs_setup_resblks(struct lfs *fs) 147{ 148 int i, j; 149 int maxbpp; 150 151 fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, 152 M_WAITOK); 153 for (i = 0; i < LFS_N_TOTAL; i++) { 154 fs->lfs_resblk[i].inuse = 0; 155 fs->lfs_resblk[i].p = NULL; 156 } 157 for (i = 0; i < LFS_RESHASH_WIDTH; i++) 158 LIST_INIT(fs->lfs_reshash + i); 159 160 /* 161 * These types of allocations can be larger than a page, 162 * so we can't use the pool subsystem for them. 163 */ 164 for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) 165 fs->lfs_resblk[i].size = fs->lfs_sumsize; 166 for (j = 0; j < LFS_N_SBLOCKS; j++, i++) 167 fs->lfs_resblk[i].size = LFS_SBPAD; 168 for (j = 0; j < LFS_N_IBLOCKS; j++, i++) 169 fs->lfs_resblk[i].size = fs->lfs_bsize; 170 for (j = 0; j < LFS_N_CLUSTERS; j++, i++) 171 fs->lfs_resblk[i].size = MAXPHYS; 172 for (j = 0; j < LFS_N_CLEAN; j++, i++) 173 fs->lfs_resblk[i].size = MAXPHYS; 174 175 for (i = 0; i < LFS_N_TOTAL; i++) { 176 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size, 177 M_SEGMENT, M_WAITOK); 178 } 179 180 /* 181 * Initialize pools for small types (XXX is BPP small?) 182 */ 183 pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 184 LFS_N_CL, "lfsclpl", &pool_allocator_nointr); 185 pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 186 LFS_N_SEG, "lfssegpool", &pool_allocator_nointr); 187 maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); 188 maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2); 189 pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 190 LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr); 191} 192 193void 194lfs_free_resblks(struct lfs *fs) 195{ 196 int i; 197 198 pool_destroy(&fs->lfs_bpppool); 199 pool_destroy(&fs->lfs_segpool); 200 pool_destroy(&fs->lfs_clpool); 201 202 for (i = 0; i < LFS_N_TOTAL; i++) { 203 while(fs->lfs_resblk[i].inuse) 204 tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0); 205 if (fs->lfs_resblk[i].p != NULL) 206 free(fs->lfs_resblk[i].p, M_SEGMENT); 207 } 208 free(fs->lfs_resblk, M_SEGMENT); 209} 210 211static unsigned int 212lfs_mhash(void *vp) 213{ 214 return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; 215} 216 217/* 218 * Return memory of the given size for the given purpose, or use one of a 219 * number of spare last-resort buffers, if malloc returns NULL. 220 */ 221void * 222lfs_malloc(struct lfs *fs, size_t size, int type) 223{ 224 struct lfs_res_blk *re; 225 void *r; 226 int i, s, start; 227 unsigned int h; 228 229 r = NULL; 230 231 /* If no mem allocated for this type, it just waits */ 232 if (lfs_res_qty[type] == 0) { 233 r = malloc(size, M_SEGMENT, M_WAITOK); 234 return r; 235 } 236 237 /* Otherwise try a quick malloc, and if it works, great */ 238 if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) { 239 return r; 240 } 241 242 /* 243 * If malloc returned NULL, we are forced to use one of our 244 * reserve blocks. We have on hand at least one summary block, 245 * at least one cluster block, at least one superblock, 246 * and several indirect blocks. 247 */ 248 /* skip over blocks of other types */ 249 for (i = 0, start = 0; i < type; i++) 250 start += lfs_res_qty[i]; 251 while (r == NULL) { 252 for (i = 0; i < lfs_res_qty[type]; i++) { 253 if (fs->lfs_resblk[start + i].inuse == 0) { 254 re = fs->lfs_resblk + start + i; 255 re->inuse = 1; 256 r = re->p; 257 KASSERT(re->size >= size); 258 h = lfs_mhash(r); 259 s = splbio(); 260 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); 261 splx(s); 262 return r; 263 } 264 } 265#ifdef LFS_DEBUG_MALLOC 266 printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]); 267#endif 268 tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0); 269#ifdef LFS_DEBUG_MALLOC 270 printf("done sleeping on %s\n", lfs_res_names[type]); 271#endif 272 } 273 /* NOTREACHED */ 274 return r; 275} 276 277void 278lfs_free(struct lfs *fs, void *p, int type) 279{ 280 int s; 281 unsigned int h; 282 res_t *re; 283#ifdef DEBUG 284 int i; 285#endif 286 287 h = lfs_mhash(p); 288 s = splbio(); 289 LIST_FOREACH(re, &fs->lfs_reshash[h], res) { 290 if (re->p == p) { 291 KASSERT(re->inuse == 1); 292 LIST_REMOVE(re, res); 293 re->inuse = 0; 294 wakeup(&fs->lfs_resblk); 295 splx(s); 296 return; 297 } 298 } 299#ifdef DEBUG 300 for (i = 0; i < LFS_N_TOTAL; i++) { 301 if (fs->lfs_resblk[i].p == p) 302 panic("lfs_free: inconsistent reserved block"); 303 } 304#endif 305 splx(s); 306 307 /* 308 * If we didn't find it, free it. 309 */ 310 free(p, M_SEGMENT); 311} 312 313/* 314 * lfs_seglock -- 315 * Single thread the segment writer. 316 */ 317int 318lfs_seglock(struct lfs *fs, unsigned long flags) 319{ 320 struct segment *sp; 321 322 if (fs->lfs_seglock) { 323 if (fs->lfs_lockpid == curproc->p_pid) { 324 ++fs->lfs_seglock; 325 fs->lfs_sp->seg_flags |= flags; 326 return 0; 327 } else if (flags & SEGM_PAGEDAEMON) 328 return EWOULDBLOCK; 329 else while (fs->lfs_seglock) 330 (void)tsleep(&fs->lfs_seglock, PRIBIO + 1, 331 "lfs seglock", 0); 332 } 333 334 fs->lfs_seglock = 1; 335 fs->lfs_lockpid = curproc->p_pid; 336 fs->lfs_cleanind = 0; 337 338 /* Drain fragment size changes out */ 339 lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0); 340 341 sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); 342 sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); 343 sp->seg_flags = flags; 344 sp->vp = NULL; 345 sp->seg_iocount = 0; 346 (void) lfs_initseg(fs); 347 348 /* 349 * Keep a cumulative count of the outstanding I/O operations. If the 350 * disk drive catches up with us it could go to zero before we finish, 351 * so we artificially increment it by one until we've scheduled all of 352 * the writes we intend to do. 353 */ 354 ++fs->lfs_iocount; 355 return 0; 356} 357 358static void lfs_unmark_dirop(struct lfs *); 359 360static void 361lfs_unmark_dirop(struct lfs *fs) 362{ 363 struct inode *ip, *nip; 364 struct vnode *vp; 365 extern int lfs_dirvcount; 366 367 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 368 nip = TAILQ_NEXT(ip, i_lfs_dchain); 369 vp = ITOV(ip); 370 371 if (VOP_ISLOCKED(vp) && 372 vp->v_lock.lk_lockholder != curproc->p_pid) { 373 continue; 374 } 375 if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) { 376 --lfs_dirvcount; 377 vp->v_flag &= ~VDIROP; 378 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 379 wakeup(&lfs_dirvcount); 380 fs->lfs_unlockvp = vp; 381 vrele(vp); 382 fs->lfs_unlockvp = NULL; 383 } 384 } 385} 386 387static void 388lfs_auto_segclean(struct lfs *fs) 389{ 390 int i, error; 391 392 /* 393 * Now that we've swapped lfs_activesb, but while we still 394 * hold the segment lock, run through the segment list marking 395 * the empty ones clean. 396 * XXX - do we really need to do them all at once? 397 */ 398 for (i = 0; i < fs->lfs_nseg; i++) { 399 if ((fs->lfs_suflags[0][i] & 400 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 401 (SEGUSE_DIRTY | SEGUSE_EMPTY) && 402 (fs->lfs_suflags[1][i] & 403 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 404 (SEGUSE_DIRTY | SEGUSE_EMPTY)) { 405 406 if ((error = lfs_do_segclean(fs, i)) != 0) { 407#ifdef DEBUG 408 printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i); 409#endif /* DEBUG */ 410 } 411 } 412 fs->lfs_suflags[1 - fs->lfs_activesb][i] = 413 fs->lfs_suflags[fs->lfs_activesb][i]; 414 } 415} 416 417/* 418 * lfs_segunlock -- 419 * Single thread the segment writer. 420 */ 421void 422lfs_segunlock(struct lfs *fs) 423{ 424 struct segment *sp; 425 unsigned long sync, ckp; 426 struct buf *bp; 427#ifdef LFS_MALLOC_SUMMARY 428 extern int locked_queue_count; 429 extern long locked_queue_bytes; 430#endif 431 432 sp = fs->lfs_sp; 433 434 if (fs->lfs_seglock == 1) { 435 if ((sp->seg_flags & SEGM_PROT) == 0) 436 lfs_unmark_dirop(fs); 437 sync = sp->seg_flags & SEGM_SYNC; 438 ckp = sp->seg_flags & SEGM_CKP; 439 if (sp->bpp != sp->cbpp) { 440 /* Free allocated segment summary */ 441 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 442 bp = *sp->bpp; 443#ifdef LFS_MALLOC_SUMMARY 444 lfs_freebuf(fs, bp); 445#else 446 s = splbio(); 447 bremfree(bp); 448 bp->b_flags |= B_DONE|B_INVAL; 449 bp->b_flags &= ~B_DELWRI; 450 reassignbuf(bp,bp->b_vp); 451 splx(s); 452 brelse(bp); 453#endif 454 } else 455 printf ("unlock to 0 with no summary"); 456 457 pool_put(&fs->lfs_bpppool, sp->bpp); 458 sp->bpp = NULL; 459 460 /* 461 * If we're not sync, we're done with sp, get rid of it. 462 * Otherwise, we keep a local copy around but free 463 * fs->lfs_sp so another process can use it (we have to 464 * wait but they don't have to wait for us). 465 */ 466 if (!sync) 467 pool_put(&fs->lfs_segpool, sp); 468 fs->lfs_sp = NULL; 469 470 /* 471 * If the I/O count is non-zero, sleep until it reaches zero. 472 * At the moment, the user's process hangs around so we can 473 * sleep. 474 */ 475 if (--fs->lfs_iocount == 0) { 476 lfs_countlocked(&locked_queue_count, 477 &locked_queue_bytes, "lfs_segunlock"); 478 wakeup(&locked_queue_count); 479 } 480 if (fs->lfs_iocount <= 1) 481 wakeup(&fs->lfs_iocount); 482 /* 483 * If we're not checkpointing, we don't have to block 484 * other processes to wait for a synchronous write 485 * to complete. 486 */ 487 if (!ckp) { 488 --fs->lfs_seglock; 489 fs->lfs_lockpid = 0; 490 wakeup(&fs->lfs_seglock); 491 } 492 /* 493 * We let checkpoints happen asynchronously. That means 494 * that during recovery, we have to roll forward between 495 * the two segments described by the first and second 496 * superblocks to make sure that the checkpoint described 497 * by a superblock completed. 498 */ 499 while (ckp && sync && fs->lfs_iocount) 500 (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, 501 "lfs_iocount", 0); 502 while (sync && sp->seg_iocount) { 503 (void)tsleep(&sp->seg_iocount, PRIBIO + 1, 504 "seg_iocount", 0); 505 /* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */ 506 } 507 if (sync) 508 pool_put(&fs->lfs_segpool, sp); 509 510 if (ckp) { 511 fs->lfs_nactive = 0; 512 /* If we *know* everything's on disk, write both sbs */ 513 /* XXX should wait for this one */ 514 if (sync) 515 lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]); 516 lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]); 517 if (!(fs->lfs_ivnode->v_mount->mnt_flag & MNT_UNMOUNT)) 518 lfs_auto_segclean(fs); 519 fs->lfs_activesb = 1 - fs->lfs_activesb; 520 --fs->lfs_seglock; 521 fs->lfs_lockpid = 0; 522 wakeup(&fs->lfs_seglock); 523 } 524 /* Reenable fragment size changes */ 525 lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); 526 } else if (fs->lfs_seglock == 0) { 527 panic ("Seglock not held"); 528 } else { 529 --fs->lfs_seglock; 530 } 531} 532