1/* $NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31/* 32 * Copyright (c) 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94 60 */ 61 62#include <sys/cdefs.h> 63__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $"); 64 65#if defined(_KERNEL_OPT) 66#include "opt_quota.h" 67#endif 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/kernel.h> 72#include <sys/buf.h> 73#include <sys/lock.h> 74#include <sys/vnode.h> 75#include <sys/syslog.h> 76#include <sys/mount.h> 77#include <sys/malloc.h> 78#include <sys/pool.h> 79#include <sys/proc.h> 80#include <sys/kauth.h> 81 82#include <ufs/lfs/ulfs_quotacommon.h> 83#include <ufs/lfs/ulfs_inode.h> 84#include <ufs/lfs/ulfsmount.h> 85#include <ufs/lfs/ulfs_extern.h> 86 87#include <ufs/lfs/lfs.h> 88#include <ufs/lfs/lfs_accessors.h> 89#include <ufs/lfs/lfs_extern.h> 90#include <ufs/lfs/lfs_kernel.h> 91 92/* Constants for inode free bitmap */ 93#define BMSHIFT 5 /* 2 ** 5 = 32 */ 94#define BMMASK ((1 << BMSHIFT) - 1) 95#define SET_BITMAP_FREE(F, I) do { \ 96 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \ 97 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \ 98 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK)); \ 99} while (0) 100#define CLR_BITMAP_FREE(F, I) do { \ 101 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \ 102 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \ 103 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK)); \ 104} while(0) 105 106#define ISSET_BITMAP_FREE(F, I) \ 107 ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK))) 108 109/* 110 * Add a new block to the Ifile, to accommodate future file creations. 111 * Called with the segment lock held. 112 */ 113int 114lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred) 115{ 116 struct vnode *vp; 117 struct inode *ip; 118 IFILE64 *ifp64; 119 IFILE32 *ifp32; 120 IFILE_V1 *ifp_v1; 121 struct buf *bp, *cbp; 122 int error; 123 daddr_t i, blkno, xmax; 124 ino_t oldlast, maxino; 125 CLEANERINFO *cip; 126 127 ASSERT_SEGLOCK(fs); 128 129 /* XXX should check or assert that we aren't readonly. */ 130 131 /* 132 * Get a block and extend the ifile inode. Leave the buffer for 133 * the block in bp. 134 */ 135 136 vp = fs->lfs_ivnode; 137 ip = VTOI(vp); 138 blkno = lfs_lblkno(fs, ip->i_size); 139 if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0, 140 &bp)) != 0) { 141 return (error); 142 } 143 ip->i_size += lfs_sb_getbsize(fs); 144 lfs_dino_setsize(fs, ip->i_din, ip->i_size); 145 uvm_vnp_setsize(vp, ip->i_size); 146 147 /* 148 * Compute the new number of inodes, and reallocate the in-memory 149 * inode freemap. 150 */ 151 152 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) - 153 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); 154 fs->lfs_ino_bitmap = (lfs_bm_t *) 155 realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) * 156 sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK); 157 KASSERT(fs->lfs_ino_bitmap != NULL); 158 159 /* first new inode number */ 160 i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) * 161 lfs_sb_getifpb(fs); 162 163 /* 164 * We insert the new inodes at the head of the free list. 165 * Under normal circumstances, the free list is empty here, 166 * so we are also incidentally placing them at the end (which 167 * we must do if we are to keep them in order). 168 */ 169 LFS_GET_HEADFREE(fs, cip, cbp, &oldlast); 170 LFS_PUT_HEADFREE(fs, cip, cbp, i); 171 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM), 172 "inode 0 allocated [2]"); 173 174 /* inode number to stop at (XXX: why *x*max?) */ 175 xmax = i + lfs_sb_getifpb(fs); 176 177 /* 178 * Initialize the ifile block. 179 * 180 * XXX: these loops should be restructured to use the accessor 181 * functions instead of using cutpaste polymorphism. 182 */ 183 184 if (fs->lfs_is64) { 185 for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) { 186 SET_BITMAP_FREE(fs, i); 187 ifp64->if_version = 1; 188 ifp64->if_daddr = LFS_UNUSED_DADDR; 189 ifp64->if_nextfree = ++i; 190 } 191 ifp64--; 192 ifp64->if_nextfree = oldlast; 193 } else if (lfs_sb_getversion(fs) > 1) { 194 for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) { 195 SET_BITMAP_FREE(fs, i); 196 ifp32->if_version = 1; 197 ifp32->if_daddr = LFS_UNUSED_DADDR; 198 ifp32->if_nextfree = ++i; 199 } 200 ifp32--; 201 ifp32->if_nextfree = oldlast; 202 } else { 203 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) { 204 SET_BITMAP_FREE(fs, i); 205 ifp_v1->if_version = 1; 206 ifp_v1->if_daddr = LFS_UNUSED_DADDR; 207 ifp_v1->if_nextfree = ++i; 208 } 209 ifp_v1--; 210 ifp_v1->if_nextfree = oldlast; 211 } 212 LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1); 213 214 /* 215 * Write out the new block. 216 */ 217 218 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 219 220 return 0; 221} 222 223/* 224 * Allocate an inode for a new file. 225 * 226 * Takes the segment lock. Also (while holding it) takes lfs_lock 227 * to frob fs->lfs_fmod. 228 * 229 * XXX: the mode argument is unused; should just get rid of it. 230 */ 231/* ARGSUSED */ 232/* VOP_BWRITE 2i times */ 233int 234lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, 235 ino_t *ino, int *gen) 236{ 237 struct lfs *fs; 238 struct buf *bp, *cbp; 239 IFILE *ifp; 240 int error; 241 CLEANERINFO *cip; 242 243 fs = VTOI(pvp)->i_lfs; 244 if (fs->lfs_ronly) 245 return EROFS; 246 247 ASSERT_NO_SEGLOCK(fs); 248 249 lfs_seglock(fs, SEGM_PROT); 250 251 /* Get the head of the freelist. */ 252 LFS_GET_HEADFREE(fs, cip, cbp, ino); 253 254 /* paranoia */ 255 KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM); 256 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n", 257 *ino)); 258 259 /* Update the in-memory inode freemap */ 260 CLR_BITMAP_FREE(fs, *ino); 261 262 /* 263 * Fetch the ifile entry and make sure the inode is really 264 * free. 265 */ 266 LFS_IENTRY(ifp, fs, *ino, bp); 267 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR) 268 panic("lfs_valloc: inuse inode %" PRId64 " on the free list", 269 *ino); 270 271 /* Update the inode freelist head in the superblock. */ 272 LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp)); 273 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n", 274 *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp))); 275 276 /* 277 * Retrieve the version number from the ifile entry. It was 278 * bumped by vfree, so don't bump it again. 279 */ 280 *gen = lfs_if_getversion(fs, ifp); 281 282 /* Done with ifile entry */ 283 brelse(bp, 0); 284 285 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { 286 /* 287 * No more inodes; extend the ifile so that the next 288 * lfs_valloc will succeed. 289 */ 290 if ((error = lfs_extend_ifile(fs, cred)) != 0) { 291 /* restore the freelist */ 292 LFS_PUT_HEADFREE(fs, cip, cbp, *ino); 293 294 /* unlock and return */ 295 lfs_segunlock(fs); 296 return error; 297 } 298 } 299 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM), 300 "inode 0 allocated [3]"); 301 302 /* Set superblock modified bit */ 303 mutex_enter(&lfs_lock); 304 fs->lfs_fmod = 1; 305 mutex_exit(&lfs_lock); 306 307 /* increment file count */ 308 lfs_sb_addnfiles(fs, 1); 309 310 /* done */ 311 lfs_segunlock(fs); 312 return 0; 313} 314 315/* 316 * Allocate an inode for a new file, with given inode number and 317 * version. 318 * 319 * Called in the same context as lfs_valloc and therefore shares the 320 * same locking assumptions. 321 */ 322int 323lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers) 324{ 325 IFILE *ifp; 326 struct buf *bp, *cbp; 327 ino_t headino, thisino, oldnext; 328 CLEANERINFO *cip; 329 330 if (fs->lfs_ronly) 331 return EROFS; 332 333 ASSERT_NO_SEGLOCK(fs); 334 335 lfs_seglock(fs, SEGM_PROT); 336 337 /* 338 * If the ifile is too short to contain this inum, extend it. 339 * 340 * XXX: lfs_extend_ifile should take a size instead of always 341 * doing just one block at time. 342 */ 343 while (VTOI(fs->lfs_ivnode)->i_size <= (ino / 344 lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs)) 345 << lfs_sb_getbshift(fs)) { 346 lfs_extend_ifile(fs, NOCRED); 347 } 348 349 /* 350 * fetch the ifile entry; get the inode freelist next pointer, 351 * and set the version as directed. 352 */ 353 LFS_IENTRY(ifp, fs, ino, bp); 354 oldnext = lfs_if_getnextfree(fs, ifp); 355 lfs_if_setversion(fs, ifp, vers); 356 brelse(bp, 0); 357 358 /* Get head of inode freelist */ 359 LFS_GET_HEADFREE(fs, cip, cbp, &headino); 360 if (headino == ino) { 361 /* Easy case: the inode we wanted was at the head */ 362 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext); 363 } else { 364 ino_t nextfree; 365 366 /* Have to find the desired inode in the freelist... */ 367 368 thisino = headino; 369 while (1) { 370 /* read this ifile entry */ 371 LFS_IENTRY(ifp, fs, thisino, bp); 372 nextfree = lfs_if_getnextfree(fs, ifp); 373 /* stop if we find it or we hit the end */ 374 if (nextfree == ino || 375 nextfree == LFS_UNUSED_INUM) 376 break; 377 /* nope, keep going... */ 378 thisino = nextfree; 379 brelse(bp, 0); 380 } 381 if (nextfree == LFS_UNUSED_INUM) { 382 /* hit the end -- this inode is not available */ 383 brelse(bp, 0); 384 lfs_segunlock(fs); 385 return ENOENT; 386 } 387 /* found it; update the next pointer */ 388 lfs_if_setnextfree(fs, ifp, oldnext); 389 /* write the ifile block */ 390 LFS_BWRITE_LOG(bp); 391 } 392 393 /* done */ 394 lfs_segunlock(fs); 395 return 0; 396} 397 398#if 0 399/* 400 * Find the highest-numbered allocated inode. 401 * This will be used to shrink the Ifile. 402 */ 403static inline ino_t 404lfs_last_alloc_ino(struct lfs *fs) 405{ 406 ino_t ino, maxino; 407 408 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) - 409 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * 410 lfs_sb_getifpb(fs); 411 for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) { 412 if (ISSET_BITMAP_FREE(fs, ino) == 0) 413 break; 414 } 415 return ino; 416} 417#endif 418 419/* 420 * Find the previous (next lowest numbered) free inode, if any. 421 * If there is none, return LFS_UNUSED_INUM. 422 * 423 * XXX: locking? 424 */ 425static inline ino_t 426lfs_freelist_prev(struct lfs *fs, ino_t ino) 427{ 428 ino_t tino, bound, bb, freehdbb; 429 430 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { 431 /* No free inodes at all */ 432 return LFS_UNUSED_INUM; 433 } 434 435 /* Search our own word first */ 436 bound = ino & ~BMMASK; 437 for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--) 438 if (ISSET_BITMAP_FREE(fs, tino)) 439 return tino; 440 /* If there are no lower words to search, just return */ 441 if (ino >> BMSHIFT == 0) 442 return LFS_UNUSED_INUM; 443 444 /* 445 * Find a word with a free inode in it. We have to be a bit 446 * careful here since ino_t is unsigned. 447 */ 448 freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT); 449 for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb) 450 if (fs->lfs_ino_bitmap[bb]) 451 break; 452 if (fs->lfs_ino_bitmap[bb] == 0) 453 return LFS_UNUSED_INUM; 454 455 /* Search the word we found */ 456 for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) && 457 tino > LFS_UNUSED_INUM; tino--) 458 if (ISSET_BITMAP_FREE(fs, tino)) 459 break; 460 461 /* Avoid returning reserved inode numbers */ 462 if (tino <= LFS_IFILE_INUM) 463 tino = LFS_UNUSED_INUM; 464 465 return tino; 466} 467 468/* 469 * Free an inode. 470 * 471 * Takes lfs_seglock. Also (independently) takes vp->v_interlock. 472 */ 473/* ARGUSED */ 474/* VOP_BWRITE 2i times */ 475int 476lfs_vfree(struct vnode *vp, ino_t ino, int mode) 477{ 478 SEGUSE *sup; 479 CLEANERINFO *cip; 480 struct buf *cbp, *bp; 481 IFILE *ifp; 482 struct inode *ip; 483 struct lfs *fs; 484 daddr_t old_iaddr; 485 ino_t otail; 486 487 /* Get the inode number and file system. */ 488 ip = VTOI(vp); 489 fs = ip->i_lfs; 490 ino = ip->i_number; 491 492 /* XXX: assert not readonly */ 493 494 ASSERT_NO_SEGLOCK(fs); 495 DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino)); 496 497 /* Drain of pending writes */ 498 mutex_enter(vp->v_interlock); 499 while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) { 500 cv_wait(&vp->v_cv, vp->v_interlock); 501 } 502 mutex_exit(vp->v_interlock); 503 504 lfs_seglock(fs, SEGM_PROT); 505 506 /* 507 * If the inode was in a dirop, it isn't now. 508 * 509 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP) 510 * not updated together in one function? (and why do both exist, 511 * anyway?) 512 */ 513 UNMARK_VNODE(vp); 514 515 mutex_enter(&lfs_lock); 516 if (vp->v_uflag & VU_DIROP) { 517 vp->v_uflag &= ~VU_DIROP; 518 --lfs_dirvcount; 519 --fs->lfs_dirvcount; 520 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 521 wakeup(&fs->lfs_dirvcount); 522 wakeup(&lfs_dirvcount); 523 mutex_exit(&lfs_lock); 524 vrele(vp); 525 526 /* 527 * If this inode is not going to be written any more, any 528 * segment accounting left over from its truncation needs 529 * to occur at the end of the next dirops flush. Attach 530 * them to the fs-wide list for that purpose. 531 */ 532 if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) { 533 struct segdelta *sd; 534 535 while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) { 536 LIST_REMOVE(sd, list); 537 LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list); 538 } 539 } 540 } else { 541 /* 542 * If it's not a dirop, we can finalize right away. 543 */ 544 mutex_exit(&lfs_lock); 545 lfs_finalize_ino_seguse(fs, ip); 546 } 547 548 /* it is no longer an unwritten inode, so update the counts */ 549 mutex_enter(&lfs_lock); 550 LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED); 551 mutex_exit(&lfs_lock); 552 553 /* Turn off all inode modification flags */ 554 ip->i_state &= ~IN_ALLMOD; 555 556 /* Mark it deleted */ 557 ip->i_lfs_iflags |= LFSI_DELETED; 558 559 /* Mark it free in the in-memory inode freemap */ 560 SET_BITMAP_FREE(fs, ino); 561 562 /* 563 * Set the ifile's inode entry to unused, increment its version number 564 * and link it onto the free chain. 565 */ 566 567 /* fetch the ifile entry */ 568 LFS_IENTRY(ifp, fs, ino, bp); 569 570 /* update the on-disk address (to "nowhere") */ 571 old_iaddr = lfs_if_getdaddr(fs, ifp); 572 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR); 573 574 /* bump the version */ 575 lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1); 576 577 if (lfs_sb_getversion(fs) == 1) { 578 ino_t nextfree; 579 580 /* insert on freelist */ 581 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree); 582 lfs_if_setnextfree(fs, ifp, nextfree); 583 LFS_PUT_HEADFREE(fs, cip, cbp, ino); 584 585 /* write the ifile block */ 586 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 587 } else { 588 ino_t tino, onf; 589 590 /* 591 * Clear the freelist next pointer and write the ifile 592 * block. XXX: why? I'm sure there must be a reason but 593 * it seems both silly and dangerous. 594 */ 595 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM); 596 (void) LFS_BWRITE_LOG(bp); /* Ifile */ 597 598 /* 599 * Insert on freelist in order. 600 */ 601 602 /* Find the next lower (by number) free inode */ 603 tino = lfs_freelist_prev(fs, ino); 604 605 if (tino == LFS_UNUSED_INUM) { 606 ino_t nextfree; 607 608 /* 609 * There isn't one; put us on the freelist head. 610 */ 611 612 /* reload the ifile block */ 613 LFS_IENTRY(ifp, fs, ino, bp); 614 /* update the list */ 615 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree); 616 lfs_if_setnextfree(fs, ifp, nextfree); 617 LFS_PUT_HEADFREE(fs, cip, cbp, ino); 618 DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n", 619 (long long)nextfree, (long long)ino)); 620 /* write the ifile block */ 621 LFS_BWRITE_LOG(bp); /* Ifile */ 622 623 /* If the list was empty, set tail too */ 624 LFS_GET_TAILFREE(fs, cip, cbp, &otail); 625 if (otail == LFS_UNUSED_INUM) { 626 LFS_PUT_TAILFREE(fs, cip, cbp, ino); 627 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld " 628 "-> %lld\n", (long long)otail, 629 (long long)ino)); 630 } 631 } else { 632 /* 633 * Insert this inode into the list after tino. 634 * We hold the segment lock so we don't have to 635 * worry about blocks being written out of order. 636 */ 637 638 DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld " 639 " after %lld\n", ino, tino)); 640 641 /* load the previous inode's ifile block */ 642 LFS_IENTRY(ifp, fs, tino, bp); 643 /* update the list pointer */ 644 onf = lfs_if_getnextfree(fs, ifp); 645 lfs_if_setnextfree(fs, ifp, ino); 646 /* write the block */ 647 LFS_BWRITE_LOG(bp); /* Ifile */ 648 649 /* load this inode's ifile block */ 650 LFS_IENTRY(ifp, fs, ino, bp); 651 /* update the list pointer */ 652 lfs_if_setnextfree(fs, ifp, onf); 653 /* write the block */ 654 LFS_BWRITE_LOG(bp); /* Ifile */ 655 656 /* If we're last, put us on the tail */ 657 if (onf == LFS_UNUSED_INUM) { 658 LFS_GET_TAILFREE(fs, cip, cbp, &otail); 659 LFS_PUT_TAILFREE(fs, cip, cbp, ino); 660 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld " 661 "-> %lld\n", (long long)otail, 662 (long long)ino)); 663 } 664 } 665 } 666 /* XXX: shouldn't this check be further up *before* we trash the fs? */ 667 KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed"); 668 669 /* 670 * Update the segment summary for the segment where the on-disk 671 * copy used to be. 672 */ 673 if (old_iaddr != LFS_UNUSED_DADDR) { 674 /* load it */ 675 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); 676 /* the number of bytes in the segment should not become < 0 */ 677 KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)), 678 "lfs_vfree: negative byte count" 679 " (segment %" PRIu32 " short by %d)\n", 680 lfs_dtosn(fs, old_iaddr), 681 (int)DINOSIZE(fs) - sup->su_nbytes); 682 /* update the number of bytes in the segment */ 683 sup->su_nbytes -= DINOSIZE(fs); 684 /* write the segment entry */ 685 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */ 686 } 687 688 /* Set superblock modified bit. */ 689 mutex_enter(&lfs_lock); 690 fs->lfs_fmod = 1; 691 mutex_exit(&lfs_lock); 692 693 /* Decrement file count. */ 694 lfs_sb_subnfiles(fs, 1); 695 696 lfs_segunlock(fs); 697 698 return (0); 699} 700 701/* 702 * Sort the freelist and set up the free-inode bitmap. 703 * To be called by lfs_mountfs(). 704 * 705 * Takes the segmenet lock. 706 */ 707void 708lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp) 709{ 710 CLEANERINFO *cip; 711 IFILE *ifp = NULL; 712 struct buf *bp; 713 ino_t ino, firstino, lastino, maxino; 714 ino_t *orphan = NULL; 715 size_t norphan = 0; 716 size_t norphan_alloc = 0; 717 718 ASSERT_NO_SEGLOCK(fs); 719 lfs_seglock(fs, SEGM_PROT); 720 721 /* largest inode on fs */ 722 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) - 723 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); 724 725 /* allocate the in-memory inode freemap */ 726 /* XXX: assert that fs->lfs_ino_bitmap is null here */ 727 fs->lfs_ino_bitmap = 728 malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t), 729 M_SEGMENT, M_WAITOK | M_ZERO); 730 KASSERT(fs->lfs_ino_bitmap != NULL); 731 732 /* 733 * Scan the ifile. 734 */ 735 736 firstino = lastino = LFS_UNUSED_INUM; 737 for (ino = 0; ino < maxino; ino++) { 738 /* Load this inode's ifile entry. */ 739 if (ino % lfs_sb_getifpb(fs) == 0) 740 LFS_IENTRY(ifp, fs, ino, bp); 741 else 742 LFS_IENTRY_NEXT(ifp, fs); 743 744 /* Don't put zero or ifile on the free list */ 745 if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM) 746 continue; 747 748 /* 749 * Address orphaned files. 750 * 751 * The idea of this is to free inodes belonging to 752 * files that were unlinked but not reclaimed, I guess 753 * because if we're going to scan the whole ifile 754 * anyway it costs very little to do this. I don't 755 * immediately see any reason this should be disabled, 756 * but presumably it doesn't work... not sure what 757 * happens to such files currently. -- dholland 20160806 758 */ 759 if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) { 760 if (orphan == NULL) { 761 norphan_alloc = 32; /* XXX pulled from arse */ 762 orphan = kmem_zalloc(sizeof(orphan[0]) * 763 norphan_alloc, KM_SLEEP); 764 } else if (norphan == norphan_alloc) { 765 ino_t *orphan_new; 766 if (norphan_alloc >= 4096) 767 norphan_alloc += 4096; 768 else 769 norphan_alloc *= 2; 770 orphan_new = kmem_zalloc(sizeof(orphan[0]) * 771 norphan_alloc, KM_SLEEP); 772 memcpy(orphan_new, orphan, sizeof(orphan[0]) * 773 norphan); 774 kmem_free(orphan, sizeof(orphan[0]) * norphan); 775 orphan = orphan_new; 776 } 777 orphan[norphan++] = ino; 778 } 779 780 if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) { 781 782 /* 783 * This inode is free. Put it on the free list. 784 */ 785 786 if (firstino == LFS_UNUSED_INUM) { 787 /* XXX: assert lastino == LFS_UNUSED_INUM? */ 788 /* remember the first free inode */ 789 firstino = ino; 790 } else { 791 /* release this inode's ifile entry */ 792 brelse(bp, 0); 793 794 /* XXX: assert lastino != LFS_UNUSED_INUM? */ 795 796 /* load lastino's ifile entry */ 797 LFS_IENTRY(ifp, fs, lastino, bp); 798 /* set the list pointer */ 799 lfs_if_setnextfree(fs, ifp, ino); 800 /* write the block */ 801 LFS_BWRITE_LOG(bp); 802 803 /* reload this inode's ifile entry */ 804 LFS_IENTRY(ifp, fs, ino, bp); 805 } 806 /* remember the last free inode seen so far */ 807 lastino = ino; 808 809 /* Mark this inode free in the in-memory freemap */ 810 SET_BITMAP_FREE(fs, ino); 811 } 812 813 /* If moving to the next ifile block, release the buffer. */ 814 if ((ino + 1) % lfs_sb_getifpb(fs) == 0) 815 brelse(bp, 0); 816 } 817 818 /* Write the freelist head and tail pointers */ 819 /* XXX: do we need to mark the superblock dirty? */ 820 LFS_PUT_HEADFREE(fs, cip, bp, firstino); 821 LFS_PUT_TAILFREE(fs, cip, bp, lastino); 822 823 /* done */ 824 lfs_segunlock(fs); 825 826 /* 827 * Shrink the array of orphans so we don't have to carry around 828 * the allocation size. 829 */ 830 if (norphan < norphan_alloc) { 831 ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan, 832 KM_SLEEP); 833 memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan); 834 kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc); 835 orphan = orphan_new; 836 norphan_alloc = norphan; 837 } 838 839 *orphanp = orphan; 840 *norphanp = norphan; 841} 842 843/* 844 * Mark a file orphaned (unlinked but not yet reclaimed) by inode 845 * number. Do this with a magic freelist next pointer. 846 * 847 * XXX: howzabout some locking? 848 */ 849void 850lfs_orphan(struct lfs *fs, ino_t ino) 851{ 852 IFILE *ifp; 853 struct buf *bp; 854 855 LFS_IENTRY(ifp, fs, ino, bp); 856 lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs)); 857 LFS_BWRITE_LOG(bp); 858} 859 860/* 861 * Free orphans discovered during mount. This is a separate stage 862 * because it requires fs->lfs_suflags to be set up, which is not done 863 * by the time we run lfs_order_freelist. It's possible that we could 864 * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner) 865 * but that requires more thought than I can put into this at the 866 * moment. 867 */ 868void 869lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan) 870{ 871 size_t i; 872 873 for (i = 0; i < norphan; i++) { 874 ino_t ino = orphan[i]; 875 unsigned segno; 876 struct vnode *vp; 877 struct inode *ip; 878 struct buf *bp; 879 IFILE *ifp; 880 SEGUSE *sup; 881 int error; 882 883 /* Get the segment the inode is in on disk. */ 884 LFS_IENTRY(ifp, fs, ino, bp); 885 segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp)); 886 brelse(bp, 0); 887 888 /* 889 * Try to get the vnode. If we can't, tough -- hope 890 * you have backups! 891 */ 892 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, 893 &vp); 894 if (error) { 895 printf("orphan %jd vget error %d\n", (intmax_t)ino, 896 error); 897 continue; 898 } 899 900 /* 901 * Sanity-check the inode. 902 * 903 * XXX What to do if it is still referenced? 904 */ 905 ip = VTOI(vp); 906 if (ip->i_nlink != 0) 907 printf("orphan %jd nlink %d\n", (intmax_t)ino, 908 ip->i_nlink); 909 910 /* 911 * Truncate the inode, to free any blocks allocated for 912 * it, and release it, to free the inode number. 913 * 914 * XXX Isn't it redundant to truncate? Won't vput do 915 * that for us? 916 */ 917 error = lfs_truncate(vp, 0, 0, NOCRED); 918 if (error) 919 printf("orphan %jd truncate error %d", (intmax_t)ino, 920 error); 921 vput(vp); 922 923 /* Update the number of bytes in the segment summary. */ 924 LFS_SEGENTRY(sup, fs, segno, bp); 925 KASSERT(sup->su_nbytes >= DINOSIZE(fs)); 926 sup->su_nbytes -= DINOSIZE(fs); 927 LFS_WRITESEGENTRY(sup, fs, segno, bp); 928 929 /* Drop the on-disk address. */ 930 LFS_IENTRY(ifp, fs, ino, bp); 931 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR); 932 LFS_BWRITE_LOG(bp); 933 } 934 935 if (orphan) 936 kmem_free(orphan, sizeof(orphan[0]) * norphan); 937} 938