lfs_subr.c revision 1.32
1/* $NetBSD: lfs_subr.c,v 1.32 2003/02/19 12:58:53 yamt Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38/* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95 71 */ 72 73#include <sys/cdefs.h> 74__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.32 2003/02/19 12:58:53 yamt Exp $"); 75 76#include <sys/param.h> 77#include <sys/systm.h> 78#include <sys/namei.h> 79#include <sys/vnode.h> 80#include <sys/buf.h> 81#include <sys/mount.h> 82#include <sys/malloc.h> 83#include <sys/proc.h> 84 85#include <ufs/ufs/inode.h> 86#include <ufs/lfs/lfs.h> 87#include <ufs/lfs/lfs_extern.h> 88 89#include <uvm/uvm.h> 90 91/* 92 * Return buffer with the contents of block "offset" from the beginning of 93 * directory "ip". If "res" is non-zero, fill it in with a pointer to the 94 * remaining space in the directory. 95 */ 96int 97lfs_blkatoff(void *v) 98{ 99 struct vop_blkatoff_args /* { 100 struct vnode *a_vp; 101 off_t a_offset; 102 char **a_res; 103 struct buf **a_bpp; 104 } */ *ap = v; 105 struct lfs *fs; 106 struct inode *ip; 107 struct buf *bp; 108 daddr_t lbn; 109 int bsize, error; 110 111 ip = VTOI(ap->a_vp); 112 fs = ip->i_lfs; 113 lbn = lblkno(fs, ap->a_offset); 114 bsize = blksize(fs, ip, lbn); 115 116 *ap->a_bpp = NULL; 117 if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) { 118 brelse(bp); 119 return (error); 120 } 121 if (ap->a_res) 122 *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); 123 *ap->a_bpp = bp; 124 return (0); 125} 126 127#ifdef LFS_DEBUG_MALLOC 128char *lfs_res_names[LFS_NB_COUNT] = { 129 "summary", 130 "superblock", 131 "ifile block", 132 "cluster", 133 "clean", 134}; 135#endif 136 137int lfs_res_qty[LFS_NB_COUNT] = { 138 LFS_N_SUMMARIES, 139 LFS_N_SBLOCKS, 140 LFS_N_IBLOCKS, 141 LFS_N_CLUSTERS, 142 LFS_N_CLEAN, 143}; 144 145void 146lfs_setup_resblks(struct lfs *fs) 147{ 148 int i, j; 149 int maxbpp; 150 151 fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, 152 M_WAITOK); 153 for (i = 0; i < LFS_N_TOTAL; i++) { 154 fs->lfs_resblk[i].inuse = 0; 155 fs->lfs_resblk[i].p = NULL; 156 } 157 for (i = 0; i < LFS_RESHASH_WIDTH; i++) 158 LIST_INIT(fs->lfs_reshash + i); 159 160 /* 161 * These types of allocations can be larger than a page, 162 * so we can't use the pool subsystem for them. 163 */ 164 for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) 165 fs->lfs_resblk[i].p = malloc(fs->lfs_sumsize, M_SEGMENT, 166 M_WAITOK); 167 for (j = 0; j < LFS_N_SBLOCKS; j++, i++) 168 fs->lfs_resblk[i].p = malloc(LFS_SBPAD, M_SEGMENT, M_WAITOK); 169 for (j = 0; j < LFS_N_IBLOCKS; j++, i++) 170 fs->lfs_resblk[i].p = malloc(fs->lfs_bsize, M_SEGMENT, M_WAITOK); 171 for (j = 0; j < LFS_N_CLUSTERS; j++, i++) 172 fs->lfs_resblk[i].p = malloc(MAXPHYS, M_SEGMENT, M_WAITOK); 173 for (j = 0; j < LFS_N_CLEAN; j++, i++) 174 fs->lfs_resblk[i].p = malloc(MAXPHYS, M_SEGMENT, M_WAITOK); 175 176 /* 177 * Initialize pools for small types (XXX is BPP small?) 178 */ 179 maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); 180 maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2); 181 pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 182 LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr); 183 pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 184 LFS_N_CL, "lfsclpl", &pool_allocator_nointr); 185 pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 186 LFS_N_SEG, "lfssegpool", &pool_allocator_nointr); 187} 188 189void 190lfs_free_resblks(struct lfs *fs) 191{ 192 int i; 193 194 pool_destroy(&fs->lfs_bpppool); 195 pool_destroy(&fs->lfs_segpool); 196 pool_destroy(&fs->lfs_clpool); 197 198 for (i = 0; i < LFS_N_TOTAL; i++) { 199 while(fs->lfs_resblk[i].inuse) 200 tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0); 201 if (fs->lfs_resblk[i].p != NULL) 202 free(fs->lfs_resblk[i].p, M_SEGMENT); 203 } 204 free(fs->lfs_resblk, M_SEGMENT); 205} 206 207static unsigned int 208lfs_mhash(void *vp) 209{ 210 return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; 211} 212 213/* 214 * Return memory of the given size for the given purpose, or use one of a 215 * number of spare last-resort buffers, if malloc returns NULL. 216 */ 217void * 218lfs_malloc(struct lfs *fs, size_t size, int type) 219{ 220 struct lfs_res_blk *re; 221 void *r; 222 int i, s, start; 223 unsigned int h; 224 225 /* If no mem allocated for this type, it just waits */ 226 if (lfs_res_qty[type] == 0) 227 return malloc(size, M_SEGMENT, M_WAITOK); 228 229 /* Otherwise try a quick malloc, and if it works, great */ 230 if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) 231 return r; 232 233 /* 234 * If malloc returned NULL, we are forced to use one of our 235 * reserve blocks. We have on hand at least one summary block, 236 * at least one cluster block, at least one superblock, 237 * and several indirect blocks. 238 */ 239 /* skip over blocks of other types */ 240 for (i = 0, start = 0; i < type; i++) 241 start += lfs_res_qty[i]; 242 while (r == NULL) { 243 for (i = 0; i < lfs_res_qty[type]; i++) { 244 if (fs->lfs_resblk[start + i].inuse == 0) { 245 re = fs->lfs_resblk + start + i; 246 re->inuse = 1; 247 r = re->p; 248 h = lfs_mhash(r); 249 s = splbio(); 250 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); 251 splx(s); 252 return r; 253 } 254 } 255#ifdef LFS_DEBUG_MALLOC 256 printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]); 257#endif 258 tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0); 259#ifdef LFS_DEBUG_MALLOC 260 printf("done sleeping on %s\n", lfs_res_names[type]); 261#endif 262 } 263 /* NOTREACHED */ 264 return r; 265} 266 267void 268lfs_free(struct lfs *fs, void *p, int type) 269{ 270 int s; 271 unsigned int h; 272 res_t *re; 273#ifdef DEBUG 274 int i; 275#endif 276 277 h = lfs_mhash(p); 278 s = splbio(); 279 LIST_FOREACH(re, &fs->lfs_reshash[h], res) { 280 if (re->p == p) { 281 KASSERT(re->inuse == 1); 282 LIST_REMOVE(re, res); 283 re->inuse = 0; 284 wakeup(&fs->lfs_resblk); 285 splx(s); 286 return; 287 } 288 } 289#ifdef DEBUG 290 for (i = 0; i < LFS_N_TOTAL; i++) { 291 if (fs->lfs_resblk[i].p == p) 292 panic("lfs_free: inconsist reserved block"); 293 } 294#endif 295 splx(s); 296 297 /* 298 * If we didn't find it, free it. 299 */ 300 free(p, M_SEGMENT); 301} 302 303/* 304 * lfs_seglock -- 305 * Single thread the segment writer. 306 */ 307int 308lfs_seglock(struct lfs *fs, unsigned long flags) 309{ 310 struct segment *sp; 311 312 if (fs->lfs_seglock) { 313 if (fs->lfs_lockpid == curproc->p_pid) { 314 ++fs->lfs_seglock; 315 fs->lfs_sp->seg_flags |= flags; 316 return 0; 317 } else if (flags & SEGM_PAGEDAEMON) 318 return EWOULDBLOCK; 319 else while (fs->lfs_seglock) 320 (void)tsleep(&fs->lfs_seglock, PRIBIO + 1, 321 "lfs seglock", 0); 322 } 323 324 fs->lfs_seglock = 1; 325 fs->lfs_lockpid = curproc->p_pid; 326 327 /* Drain fragment size changes out */ 328 lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0); 329 330 sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); 331 sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); 332 sp->seg_flags = flags; 333 sp->vp = NULL; 334 sp->seg_iocount = 0; 335 (void) lfs_initseg(fs); 336 337 /* 338 * Keep a cumulative count of the outstanding I/O operations. If the 339 * disk drive catches up with us it could go to zero before we finish, 340 * so we artificially increment it by one until we've scheduled all of 341 * the writes we intend to do. 342 */ 343 ++fs->lfs_iocount; 344 return 0; 345} 346 347static void lfs_unmark_dirop(struct lfs *); 348 349static void 350lfs_unmark_dirop(struct lfs *fs) 351{ 352 struct inode *ip, *nip; 353 struct vnode *vp; 354 extern int lfs_dirvcount; 355 356 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 357 nip = TAILQ_NEXT(ip, i_lfs_dchain); 358 vp = ITOV(ip); 359 360 if (VOP_ISLOCKED(vp) && 361 vp->v_lock.lk_lockholder != curproc->p_pid) { 362 continue; 363 } 364 if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) { 365 --lfs_dirvcount; 366 vp->v_flag &= ~VDIROP; 367 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 368 wakeup(&lfs_dirvcount); 369 fs->lfs_unlockvp = vp; 370 vrele(vp); 371 fs->lfs_unlockvp = NULL; 372 } 373 } 374} 375 376#ifndef LFS_NO_AUTO_SEGCLEAN 377static void 378lfs_auto_segclean(struct lfs *fs) 379{ 380 int i, error; 381 382 /* 383 * Now that we've swapped lfs_activesb, but while we still 384 * hold the segment lock, run through the segment list marking 385 * the empty ones clean. 386 * XXX - do we really need to do them all at once? 387 */ 388 for (i = 0; i < fs->lfs_nseg; i++) { 389 if ((fs->lfs_suflags[0][i] & 390 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 391 (SEGUSE_DIRTY | SEGUSE_EMPTY) && 392 (fs->lfs_suflags[1][i] & 393 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 394 (SEGUSE_DIRTY | SEGUSE_EMPTY)) { 395 396 if ((error = lfs_do_segclean(fs, i)) != 0) { 397#ifdef DEBUG 398 printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i); 399#endif /* DEBUG */ 400 } 401 } 402 fs->lfs_suflags[1 - fs->lfs_activesb][i] = 403 fs->lfs_suflags[fs->lfs_activesb][i]; 404 } 405} 406#endif /* LFS_AUTO_SEGCLEAN */ 407 408/* 409 * lfs_segunlock -- 410 * Single thread the segment writer. 411 */ 412void 413lfs_segunlock(struct lfs *fs) 414{ 415 struct segment *sp; 416 unsigned long sync, ckp; 417 struct buf *bp; 418#ifdef LFS_MALLOC_SUMMARY 419 extern int locked_queue_count; 420 extern long locked_queue_bytes; 421#endif 422 423 sp = fs->lfs_sp; 424 425 if (fs->lfs_seglock == 1) { 426 if ((sp->seg_flags & SEGM_PROT) == 0) 427 lfs_unmark_dirop(fs); 428 sync = sp->seg_flags & SEGM_SYNC; 429 ckp = sp->seg_flags & SEGM_CKP; 430 if (sp->bpp != sp->cbpp) { 431 /* Free allocated segment summary */ 432 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 433 bp = *sp->bpp; 434#ifdef LFS_MALLOC_SUMMARY 435 lfs_freebuf(fs, bp); 436#else 437 s = splbio(); 438 bremfree(bp); 439 bp->b_flags |= B_DONE|B_INVAL; 440 bp->b_flags &= ~B_DELWRI; 441 reassignbuf(bp,bp->b_vp); 442 splx(s); 443 brelse(bp); 444#endif 445 } else 446 printf ("unlock to 0 with no summary"); 447 448 pool_put(&fs->lfs_bpppool, sp->bpp); 449 sp->bpp = NULL; 450 /* The sync case holds a reference in `sp' to be freed below */ 451 if (!sync) 452 pool_put(&fs->lfs_segpool, sp); 453 fs->lfs_sp = NULL; 454 455 /* 456 * If the I/O count is non-zero, sleep until it reaches zero. 457 * At the moment, the user's process hangs around so we can 458 * sleep. 459 */ 460 if (--fs->lfs_iocount == 0) { 461 lfs_countlocked(&locked_queue_count, 462 &locked_queue_bytes, "lfs_segunlock"); 463 wakeup(&locked_queue_count); 464 wakeup(&fs->lfs_iocount); 465 } 466 /* 467 * If we're not checkpointing, we don't have to block 468 * other processes to wait for a synchronous write 469 * to complete. 470 */ 471 if (!ckp) { 472 --fs->lfs_seglock; 473 fs->lfs_lockpid = 0; 474 wakeup(&fs->lfs_seglock); 475 } 476 /* 477 * We let checkpoints happen asynchronously. That means 478 * that during recovery, we have to roll forward between 479 * the two segments described by the first and second 480 * superblocks to make sure that the checkpoint described 481 * by a superblock completed. 482 */ 483 while (ckp && sync && fs->lfs_iocount) 484 (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, 485 "lfs_iocount", 0); 486 while (sync && sp->seg_iocount) { 487 (void)tsleep(&sp->seg_iocount, PRIBIO + 1, 488 "seg_iocount", 0); 489 /* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */ 490 } 491 if (sync) 492 pool_put(&fs->lfs_segpool, sp); 493 if (ckp) { 494 fs->lfs_nactive = 0; 495 /* If we *know* everything's on disk, write both sbs */ 496 /* XXX should wait for this one */ 497 if (sync) 498 lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]); 499 lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]); 500#ifndef LFS_NO_AUTO_SEGCLEAN 501 lfs_auto_segclean(fs); 502#endif 503 fs->lfs_activesb = 1 - fs->lfs_activesb; 504 --fs->lfs_seglock; 505 fs->lfs_lockpid = 0; 506 wakeup(&fs->lfs_seglock); 507 } 508 /* Reenable fragment size changes */ 509 lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); 510 } else if (fs->lfs_seglock == 0) { 511 panic ("Seglock not held"); 512 } else { 513 --fs->lfs_seglock; 514 } 515} 516