1/* $NetBSD: segwrite.c,v 1.48 2020/05/14 08:34:17 msaitoh Exp $ */ 2/*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30/* 31 * Copyright (c) 1991, 1993 32 * The Regents of the University of California. All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. Neither the name of the University nor the names of its contributors 43 * may be used to endorse or promote products derived from this software 44 * without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 * 58 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 59 */ 60 61/* 62 * Partial segment writer, taken from the kernel and adapted for userland. 63 */ 64#include <sys/types.h> 65#include <sys/param.h> 66#include <sys/time.h> 67#include <sys/buf.h> 68#include <sys/mount.h> 69 70/* Override certain things to make <ufs/lfs/lfs.h> work */ 71#define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */ 72#define vnode uvnode 73#define buf ubuf 74#define panic call_panic 75 76#include <ufs/lfs/lfs.h> 77#include <ufs/lfs/lfs_accessors.h> 78#include <ufs/lfs/lfs_inode.h> 79 80#include <assert.h> 81#include <stdio.h> 82#include <stdlib.h> 83#include <string.h> 84#include <err.h> 85#include <errno.h> 86#include <util.h> 87 88#include "bufcache.h" 89#include "extern.h" 90#include "lfs_user.h" 91#include "segwrite.h" 92 93/* Compatibility definitions */ 94off_t written_bytes = 0; 95off_t written_data = 0; 96off_t written_indir = 0; 97off_t written_dev = 0; 98int written_inodes = 0; 99 100/* Global variables */ 101time_t write_time; 102 103static void lfs_shellsort(struct lfs *, 104 struct ubuf **, union lfs_blocks *, int, int); 105 106/* 107 * Logical block number match routines used when traversing the dirty block 108 * chain. 109 */ 110int 111lfs_match_data(struct lfs * fs, struct ubuf * bp) 112{ 113 return (bp->b_lblkno >= 0); 114} 115 116int 117lfs_match_indir(struct lfs * fs, struct ubuf * bp) 118{ 119 daddr_t lbn; 120 121 lbn = bp->b_lblkno; 122 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 0); 123} 124 125int 126lfs_match_dindir(struct lfs * fs, struct ubuf * bp) 127{ 128 daddr_t lbn; 129 130 lbn = bp->b_lblkno; 131 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 1); 132} 133 134int 135lfs_match_tindir(struct lfs * fs, struct ubuf * bp) 136{ 137 daddr_t lbn; 138 139 lbn = bp->b_lblkno; 140 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 2); 141} 142 143/* 144 * Do a checkpoint. 145 */ 146int 147lfs_segwrite(struct lfs * fs, int flags) 148{ 149 struct inode *ip; 150 struct segment *sp; 151 struct uvnode *vp; 152 SEGSUM *ssp; 153 int redo; 154 155 lfs_seglock(fs, flags | SEGM_CKP); 156 sp = fs->lfs_sp; 157 158 lfs_writevnodes(fs, sp, VN_REG); 159 lfs_writevnodes(fs, sp, VN_DIROP); 160 ssp = (SEGSUM *)sp->segsum; 161 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); 162 163 do { 164 vp = fs->lfs_ivnode; 165 fs->lfs_flags &= ~LFS_IFDIRTY; 166 ip = VTOI(vp); 167 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL || lfs_sb_getidaddr(fs) <= 0) 168 lfs_writefile(fs, sp, vp); 169 170 redo = lfs_writeinode(fs, sp, ip); 171 redo += lfs_writeseg(fs, sp); 172 redo += (fs->lfs_flags & LFS_IFDIRTY); 173 } while (redo); 174 175 lfs_segunlock(fs); 176#if 0 177 printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n", 178 written_bytes, (ulfs_daddr_t)lfs_btofsb(fs, written_bytes)); 179 printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n", 180 written_data, (ulfs_daddr_t)lfs_btofsb(fs, written_data)); 181 printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n", 182 written_indir, (ulfs_daddr_t)lfs_btofsb(fs, written_indir)); 183 printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n", 184 written_dev, (ulfs_daddr_t)lfs_btofsb(fs, written_dev)); 185 printf("wrote %d inodes (%" PRId32 " fsb)\n", 186 written_inodes, lfs_btofsb(fs, written_inodes * fs->lfs_ibsize)); 187#endif 188 return 0; 189} 190 191/* 192 * Write the dirty blocks associated with a vnode. 193 */ 194void 195lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp) 196{ 197 struct ubuf *bp; 198 FINFO *fip; 199 struct inode *ip; 200 IFILE *ifp; 201 SEGSUM *ssp; 202 203 ip = VTOI(vp); 204 205 if (sp->seg_bytes_left < lfs_sb_getbsize(fs) || 206 sp->sum_bytes_left < FINFOSIZE(fs) + LFS_BLKPTRSIZE(fs)) 207 (void) lfs_writeseg(fs, sp); 208 209 sp->sum_bytes_left -= FINFOSIZE(fs); 210 ssp = (SEGSUM *)sp->segsum; 211 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1); 212 213 if (vp->v_uflag & VU_DIROP) { 214 lfs_ss_setflags(fs, ssp, 215 lfs_ss_getflags(fs, ssp) | (SS_DIROP | SS_CONT)); 216 } 217 218 fip = sp->fip; 219 lfs_fi_setnblocks(fs, fip, 0); 220 lfs_fi_setino(fs, fip, ip->i_number); 221 LFS_IENTRY(ifp, fs, lfs_fi_getino(fs, fip), bp); 222 lfs_fi_setversion(fs, fip, lfs_if_getversion(fs, ifp)); 223 brelse(bp, 0); 224 225 lfs_gather(fs, sp, vp, lfs_match_data); 226 lfs_gather(fs, sp, vp, lfs_match_indir); 227 lfs_gather(fs, sp, vp, lfs_match_dindir); 228 lfs_gather(fs, sp, vp, lfs_match_tindir); 229 230 fip = sp->fip; 231 if (lfs_fi_getnblocks(fs, fip) != 0) { 232 sp->fip = NEXT_FINFO(fs, fip); 233 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip); 234 } else { 235 /* XXX shouldn't this update sp->fip? */ 236 sp->sum_bytes_left += FINFOSIZE(fs); 237 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) - 1); 238 } 239} 240 241int 242lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip) 243{ 244 struct ubuf *bp, *ibp; 245 union lfs_dinode *cdp; 246 IFILE *ifp; 247 SEGUSE *sup; 248 SEGSUM *ssp; 249 daddr_t daddr; 250 ino_t ino; 251 IINFO *iip; 252 int i, fsb = 0; 253 int redo_ifile = 0; 254 struct timespec ts; 255 int gotblk = 0; 256 257 /* Allocate a new inode block if necessary. */ 258 if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) && 259 sp->ibp == NULL) { 260 /* Allocate a new segment if necessary. */ 261 if (sp->seg_bytes_left < lfs_sb_getibsize(fs) || 262 sp->sum_bytes_left < LFS_BLKPTRSIZE(fs)) 263 (void) lfs_writeseg(fs, sp); 264 265 /* Get next inode block. */ 266 daddr = lfs_sb_getoffset(fs); 267 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs))); 268 sp->ibp = *sp->cbpp++ = 269 getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), 270 lfs_sb_getibsize(fs)); 271 sp->ibp->b_flags |= B_GATHERED; 272 gotblk++; 273 274 /* Zero out inode numbers */ 275 for (i = 0; i < LFS_INOPB(fs); ++i) { 276 union lfs_dinode *tmpdip; 277 278 tmpdip = DINO_IN_BLOCK(fs, sp->ibp->b_data, i); 279 lfs_dino_setinumber(fs, tmpdip, 0); 280 } 281 282 ++sp->start_bpp; 283 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs))); 284 /* Set remaining space counters. */ 285 sp->seg_bytes_left -= lfs_sb_getibsize(fs); 286 sp->sum_bytes_left -= LFS_BLKPTRSIZE(fs); 287 288 /* Store the address in the segment summary. */ 289 iip = NTH_IINFO(fs, sp->segsum, sp->ninodes / LFS_INOPB(fs)); 290 lfs_ii_setblock(fs, iip, daddr); 291 } 292 /* Update the inode times and copy the inode onto the inode page. */ 293 ts.tv_nsec = 0; 294 ts.tv_sec = write_time; 295 /* XXX kludge --- don't redirty the ifile just to put times on it */ 296 if (ip->i_number != LFS_IFILE_INUM) 297 LFS_ITIMES(ip, &ts, &ts, &ts); 298 299 /* 300 * If this is the Ifile, and we've already written the Ifile in this 301 * partial segment, just overwrite it (it's not on disk yet) and 302 * continue. 303 * 304 * XXX we know that the bp that we get the second time around has 305 * already been gathered. 306 */ 307 if (ip->i_number == LFS_IFILE_INUM && sp->idp) { 308 lfs_copy_dinode(fs, sp->idp, ip->i_din); 309 ip->i_lfs_osize = lfs_dino_getsize(fs, ip->i_din); 310 return 0; 311 } 312 bp = sp->ibp; 313 cdp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs)); 314 lfs_copy_dinode(fs, cdp, ip->i_din); 315 316 /* If all blocks are goig to disk, update the "size on disk" */ 317 ip->i_lfs_osize = lfs_dino_getsize(fs, ip->i_din); 318 319 if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */ 320 sp->idp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs)); 321 if (gotblk) { 322 LFS_LOCK_BUF(bp); 323 assert(!(bp->b_flags & B_INVAL)); 324 brelse(bp, 0); 325 } 326 /* Increment inode count in segment summary block. */ 327 ssp = (SEGSUM *)sp->segsum; 328 lfs_ss_setninos(fs, ssp, lfs_ss_getninos(fs, ssp) + 1); 329 330 /* If this page is full, set flag to allocate a new page. */ 331 if (++sp->ninodes % LFS_INOPB(fs) == 0) 332 sp->ibp = NULL; 333 334 /* 335 * If updating the ifile, update the super-block. Update the disk 336 * address for this inode in the ifile. 337 */ 338 ino = ip->i_number; 339 if (ino == LFS_IFILE_INUM) { 340 daddr = lfs_sb_getidaddr(fs); 341 lfs_sb_setidaddr(fs, LFS_DBTOFSB(fs, bp->b_blkno)); 342 sbdirty(); 343 } else { 344 LFS_IENTRY(ifp, fs, ino, ibp); 345 daddr = lfs_if_getdaddr(fs, ifp); 346 lfs_if_setdaddr(fs, ifp, LFS_DBTOFSB(fs, bp->b_blkno) + fsb); 347 (void)LFS_BWRITE_LOG(ibp); /* Ifile */ 348 } 349 350 /* 351 * Account the inode: it no longer belongs to its former segment, 352 * though it will not belong to the new segment until that segment 353 * is actually written. 354 */ 355 if (daddr != LFS_UNUSED_DADDR) { 356 u_int32_t oldsn = lfs_dtosn(fs, daddr); 357 LFS_SEGENTRY(sup, fs, oldsn, bp); 358 sup->su_nbytes -= DINOSIZE(fs); 359 redo_ifile = 360 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 361 if (redo_ifile) 362 fs->lfs_flags |= LFS_IFDIRTY; 363 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ 364 } 365 return redo_ifile; 366} 367 368int 369lfs_gatherblock(struct segment * sp, struct ubuf * bp) 370{ 371 struct lfs *fs; 372 SEGSUM *ssp; 373 int version; 374 int j, blksinblk; 375 376 /* 377 * If full, finish this segment. We may be doing I/O, so 378 * release and reacquire the splbio(). 379 */ 380 fs = sp->fs; 381 blksinblk = howmany(bp->b_bcount, lfs_sb_getbsize(fs)); 382 if (sp->sum_bytes_left < LFS_BLKPTRSIZE(fs) * blksinblk || 383 sp->seg_bytes_left < bp->b_bcount) { 384 lfs_updatemeta(sp); 385 386 version = lfs_fi_getversion(fs, sp->fip); 387 (void) lfs_writeseg(fs, sp); 388 389 lfs_fi_setversion(fs, sp->fip, version); 390 lfs_fi_setino(fs, sp->fip, VTOI(sp->vp)->i_number); 391 /* Add the current file to the segment summary. */ 392 ssp = (SEGSUM *)sp->segsum; 393 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1); 394 sp->sum_bytes_left -= FINFOSIZE(fs); 395 396 return 1; 397 } 398 /* Insert into the buffer list, update the FINFO block. */ 399 bp->b_flags |= B_GATHERED; 400 /* bp->b_flags &= ~B_DONE; */ 401 402 *sp->cbpp++ = bp; 403 for (j = 0; j < blksinblk; j++) { 404 unsigned bn; 405 406 bn = lfs_fi_getnblocks(fs, sp->fip); 407 lfs_fi_setnblocks(fs, sp->fip, bn + 1); 408 lfs_fi_setblock(fs, sp->fip, bn, bp->b_lblkno + j); 409 } 410 411 sp->sum_bytes_left -= LFS_BLKPTRSIZE(fs) * blksinblk; 412 sp->seg_bytes_left -= bp->b_bcount; 413 return 0; 414} 415 416int 417lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *)) 418{ 419 struct ubuf *bp, *nbp; 420 int count = 0; 421 422 sp->vp = vp; 423loop: 424 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 425 nbp = LIST_NEXT(bp, b_vnbufs); 426 427 assert(bp->b_flags & B_DELWRI); 428 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) { 429 continue; 430 } 431 if (lfs_gatherblock(sp, bp)) { 432 goto loop; 433 } 434 count++; 435 } 436 437 lfs_updatemeta(sp); 438 sp->vp = NULL; 439 return count; 440} 441 442 443/* 444 * Change the given block's address to ndaddr, finding its previous 445 * location using ulfs_bmaparray(). 446 * 447 * Account for this change in the segment table. 448 */ 449static void 450lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn, 451 daddr_t ndaddr, int size) 452{ 453 SEGUSE *sup; 454 struct ubuf *bp; 455 struct indir a[ULFS_NIADDR + 2], *ap; 456 struct inode *ip; 457 struct uvnode *vp; 458 daddr_t daddr, ooff; 459 int num, error; 460 int osize; 461 int frags, ofrags; 462 463 vp = sp->vp; 464 ip = VTOI(vp); 465 466 error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num); 467 if (error) 468 errx(EXIT_FAILURE, "%s: ulfs_bmaparray returned %d looking up lbn %" 469 PRId64 "", __func__, error, lbn); 470 if (daddr > 0) 471 daddr = LFS_DBTOFSB(fs, daddr); 472 473 frags = lfs_numfrags(fs, size); 474 switch (num) { 475 case 0: 476 ooff = lfs_dino_getdb(fs, ip->i_din, lbn); 477 if (ooff == UNWRITTEN) 478 lfs_dino_setblocks(fs, ip->i_din, 479 lfs_dino_getblocks(fs, ip->i_din) + frags); 480 else { 481 /* possible fragment truncation or extension */ 482 ofrags = lfs_btofsb(fs, ip->i_lfs_fragsize[lbn]); 483 lfs_dino_setblocks(fs, ip->i_din, 484 lfs_dino_getblocks(fs, ip->i_din) + (frags - ofrags)); 485 } 486 lfs_dino_setdb(fs, ip->i_din, lbn, ndaddr); 487 break; 488 case 1: 489 ooff = lfs_dino_getib(fs, ip->i_din, a[0].in_off); 490 if (ooff == UNWRITTEN) 491 lfs_dino_setblocks(fs, ip->i_din, 492 lfs_dino_getblocks(fs, ip->i_din) + frags); 493 lfs_dino_setib(fs, ip->i_din, a[0].in_off, ndaddr); 494 break; 495 default: 496 ap = &a[num - 1]; 497 if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp)) 498 errx(EXIT_FAILURE, "%s: bread bno %" PRId64, __func__, 499 ap->in_lbn); 500 501 ooff = lfs_iblock_get(fs, bp->b_data, ap->in_off); 502 if (ooff == UNWRITTEN) 503 lfs_dino_setblocks(fs, ip->i_din, 504 lfs_dino_getblocks(fs, ip->i_din) + frags); 505 lfs_iblock_set(fs, bp->b_data, ap->in_off, ndaddr); 506 (void) VOP_BWRITE(bp); 507 } 508 509 /* 510 * Update segment usage information, based on old size 511 * and location. 512 */ 513 if (daddr > 0) { 514 u_int32_t oldsn = lfs_dtosn(fs, daddr); 515 if (lbn >= 0 && lbn < ULFS_NDADDR) 516 osize = ip->i_lfs_fragsize[lbn]; 517 else 518 osize = lfs_sb_getbsize(fs); 519 LFS_SEGENTRY(sup, fs, oldsn, bp); 520 sup->su_nbytes -= osize; 521 if (!(bp->b_flags & B_GATHERED)) 522 fs->lfs_flags |= LFS_IFDIRTY; 523 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); 524 } 525 /* 526 * Now that this block has a new address, and its old 527 * segment no longer owns it, we can forget about its 528 * old size. 529 */ 530 if (lbn >= 0 && lbn < ULFS_NDADDR) 531 ip->i_lfs_fragsize[lbn] = size; 532} 533 534/* 535 * Update the metadata that points to the blocks listed in the FINFO 536 * array. 537 */ 538void 539lfs_updatemeta(struct segment * sp) 540{ 541 struct ubuf *sbp; 542 struct lfs *fs; 543 struct uvnode *vp; 544 daddr_t lbn; 545 int i, nblocks, num; 546 int frags; 547 int bytesleft, size; 548 union lfs_blocks tmpptr; 549 550 fs = sp->fs; 551 vp = sp->vp; 552 553 /* 554 * This code was cutpasted from the kernel. See the 555 * corresponding comment in lfs_segment.c. 556 */ 557#if 0 558 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 559#else 560 lfs_blocks_fromvoid(fs, &tmpptr, (void *)NEXT_FINFO(fs, sp->fip)); 561 nblocks = lfs_blocks_sub(fs, &tmpptr, &sp->start_lbp); 562 //nblocks_orig = nblocks; 563#endif 564 565 if (vp == NULL || nblocks == 0) 566 return; 567 568 /* 569 * This count may be high due to oversize blocks from lfs_gop_write. 570 * Correct for this. (XXX we should be able to keep track of these.) 571 */ 572 for (i = 0; i < nblocks; i++) { 573 if (sp->start_bpp[i] == NULL) { 574 printf("nblocks = %d, not %d\n", i, nblocks); 575 nblocks = i; 576 break; 577 } 578 num = howmany(sp->start_bpp[i]->b_bcount, lfs_sb_getbsize(fs)); 579 nblocks -= num - 1; 580 } 581 582 /* 583 * Sort the blocks. 584 */ 585 lfs_shellsort(fs, sp->start_bpp, &sp->start_lbp, nblocks, lfs_sb_getbsize(fs)); 586 587 /* 588 * Record the length of the last block in case it's a fragment. 589 * If there are indirect blocks present, they sort last. An 590 * indirect block will be lfs_bsize and its presence indicates 591 * that you cannot have fragments. 592 */ 593 lfs_fi_setlastlength(fs, sp->fip, ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & 594 lfs_sb_getbmask(fs)) + 1); 595 596 /* 597 * Assign disk addresses, and update references to the logical 598 * block and the segment usage information. 599 */ 600 for (i = nblocks; i--; ++sp->start_bpp) { 601 sbp = *sp->start_bpp; 602 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0); 603 604 sbp->b_blkno = LFS_FSBTODB(fs, lfs_sb_getoffset(fs)); 605 606 /* 607 * If we write a frag in the wrong place, the cleaner won't 608 * be able to correctly identify its size later, and the 609 * segment will be uncleanable. (Even worse, it will assume 610 * that the indirect block that actually ends the list 611 * is of a smaller size!) 612 */ 613 if ((sbp->b_bcount & lfs_sb_getbmask(fs)) && i != 0) 614 errx(EXIT_FAILURE, "%s: fragment is not last block", __func__); 615 616 /* 617 * For each subblock in this possibly oversized block, 618 * update its address on disk. 619 */ 620 for (bytesleft = sbp->b_bcount; bytesleft > 0; 621 bytesleft -= lfs_sb_getbsize(fs)) { 622 size = MIN(bytesleft, lfs_sb_getbsize(fs)); 623 frags = lfs_numfrags(fs, size); 624 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0); 625 lfs_blocks_inc(fs, &sp->start_lbp); 626 lfs_update_single(fs, sp, lbn, lfs_sb_getoffset(fs), size); 627 lfs_sb_addoffset(fs, frags); 628 } 629 630 } 631} 632 633/* 634 * Start a new segment. 635 */ 636int 637lfs_initseg(struct lfs * fs) 638{ 639 struct segment *sp; 640 SEGUSE *sup; 641 SEGSUM *ssp; 642 struct ubuf *bp, *sbp; 643 int repeat; 644 645 sp = fs->lfs_sp; 646 647 repeat = 0; 648 649 /* Advance to the next segment. */ 650 if (!LFS_PARTIAL_FITS(fs)) { 651 /* lfs_avail eats the remaining space */ 652 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) - 653 lfs_sb_getcurseg(fs))); 654 lfs_newseg(fs); 655 repeat = 1; 656 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs)); 657 658 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 659 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs)); 660 661 /* 662 * If the segment contains a superblock, update the offset 663 * and summary address to skip over it. 664 */ 665 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 666 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 667 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD)); 668 sp->seg_bytes_left -= LFS_SBPAD; 669 } 670 brelse(bp, 0); 671 /* Segment zero could also contain the labelpad */ 672 if (lfs_sb_getversion(fs) > 1 && sp->seg_number == 0 && 673 lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD)) { 674 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs)); 675 sp->seg_bytes_left -= LFS_LABELPAD - lfs_fsbtob(fs, lfs_sb_gets0addr(fs)); 676 } 677 } else { 678 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 679 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs) - 680 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs))); 681 } 682 lfs_sb_setlastpseg(fs, lfs_sb_getoffset(fs)); 683 684 sp->fs = fs; 685 sp->ibp = NULL; 686 sp->idp = NULL; 687 sp->ninodes = 0; 688 sp->ndupino = 0; 689 690 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 691 sp->cbpp = sp->bpp; 692 sbp = *sp->cbpp = getblk(fs->lfs_devvp, 693 LFS_FSBTODB(fs, lfs_sb_getoffset(fs)), lfs_sb_getsumsize(fs)); 694 sp->segsum = sbp->b_data; 695 memset(sp->segsum, 0, lfs_sb_getsumsize(fs)); 696 sp->start_bpp = ++sp->cbpp; 697 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 698 699 /* Set point to SEGSUM, initialize it. */ 700 ssp = sp->segsum; 701 lfs_ss_setnext(fs, ssp, lfs_sb_getnextseg(fs)); 702 lfs_ss_setnfinfo(fs, ssp, 0); 703 lfs_ss_setninos(fs, ssp, 0); 704 lfs_ss_setmagic(fs, ssp, SS_MAGIC); 705 706 /* Set pointer to first FINFO, initialize it. */ 707 sp->fip = SEGSUM_FINFOBASE(fs, ssp); 708 lfs_fi_setnblocks(fs, sp->fip, 0); 709 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip); 710 lfs_fi_setlastlength(fs, sp->fip, 0); 711 712 sp->seg_bytes_left -= lfs_sb_getsumsize(fs); 713 sp->sum_bytes_left = lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs); 714 715 LFS_LOCK_BUF(sbp); 716 brelse(sbp, 0); 717 return repeat; 718} 719 720/* 721 * Return the next segment to write. 722 */ 723void 724lfs_newseg(struct lfs * fs) 725{ 726 CLEANERINFO *cip; 727 SEGUSE *sup; 728 struct ubuf *bp; 729 int curseg, isdirty, sn; 730 731 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 732 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 733 sup->su_nbytes = 0; 734 sup->su_nsums = 0; 735 sup->su_ninos = 0; 736 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 737 738 LFS_CLEANERINFO(cip, fs, bp); 739 lfs_ci_shiftcleantodirty(fs, cip, 1); 740 lfs_sb_setnclean(fs, lfs_ci_getclean(fs, cip)); 741 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 742 743 lfs_sb_setlastseg(fs, lfs_sb_getcurseg(fs)); 744 lfs_sb_setcurseg(fs, lfs_sb_getnextseg(fs)); 745 for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs)) + lfs_sb_getinterleave(fs);;) { 746 sn = (sn + 1) % lfs_sb_getnseg(fs); 747 if (sn == curseg) 748 errx(EXIT_FAILURE, "%s: no clean segments", __func__); 749 LFS_SEGENTRY(sup, fs, sn, bp); 750 isdirty = sup->su_flags & SEGUSE_DIRTY; 751 brelse(bp, 0); 752 753 if (!isdirty) 754 break; 755 } 756 757 ++fs->lfs_nactive; 758 lfs_sb_setnextseg(fs, lfs_sntod(fs, sn)); 759} 760 761 762int 763lfs_writeseg(struct lfs * fs, struct segment * sp) 764{ 765 struct ubuf **bpp, *bp; 766 SEGUSE *sup; 767 SEGSUM *ssp; 768 char *datap, *dp; 769 int i; 770 int do_again, nblocks, byteoffset; 771 size_t el_size; 772 u_short ninos; 773 size_t sumstart; 774 struct uvnode *devvp; 775 776 /* 777 * If there are no buffers other than the segment summary to write 778 * and it is not a checkpoint, don't do anything. On a checkpoint, 779 * even if there aren't any buffers, you need to write the superblock. 780 */ 781 nblocks = sp->cbpp - sp->bpp; 782#if 0 783 printf("write %d blocks at 0x%x\n", 784 nblocks, (int)LFS_DBTOFSB(fs, (*sp->bpp)->b_blkno)); 785#endif 786 if (nblocks == 1) 787 return 0; 788 789 devvp = fs->lfs_devvp; 790 791 /* Update the segment usage information. */ 792 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 793 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 794 795 /* Loop through all blocks, except the segment summary. */ 796 for (bpp = sp->bpp; ++bpp < sp->cbpp;) { 797 if ((*bpp)->b_vp != devvp) { 798 sup->su_nbytes += (*bpp)->b_bcount; 799 } 800 assert(lfs_dtosn(fs, LFS_DBTOFSB(fs, (*bpp)->b_blkno)) == sp->seg_number); 801 } 802 803 ssp = (SEGSUM *) sp->segsum; 804 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) | SS_RFW); 805 806 ninos = (lfs_ss_getninos(fs, ssp) + LFS_INOPB(fs) - 1) / LFS_INOPB(fs); 807 sup->su_nbytes += lfs_ss_getninos(fs, ssp) * DINOSIZE(fs); 808 809 if (lfs_sb_getversion(fs) == 1) 810 sup->su_olastmod = write_time; 811 else 812 sup->su_lastmod = write_time; 813 sup->su_ninos += ninos; 814 ++sup->su_nsums; 815 lfs_sb_adddmeta(fs, (lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + lfs_btofsb(fs, ninos * 816 lfs_sb_getibsize(fs)))); 817 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 818 819 do_again = !(bp->b_flags & B_GATHERED); 820 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ 821 822 /* 823 * Compute checksum across data and then across summary; the first 824 * block (the summary block) is skipped. Set the create time here 825 * so that it's guaranteed to be later than the inode mod times. 826 */ 827 if (lfs_sb_getversion(fs) == 1) 828 el_size = sizeof(u_long); 829 else 830 el_size = sizeof(u_int32_t); 831 datap = dp = emalloc(nblocks * el_size); 832 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 833 ++bpp; 834 /* Loop through gop_write cluster blocks */ 835 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; 836 byteoffset += lfs_sb_getbsize(fs)) { 837 memcpy(dp, (*bpp)->b_data + byteoffset, el_size); 838 dp += el_size; 839 } 840 bremfree(*bpp); 841 (*bpp)->b_flags |= B_BUSY; 842 } 843 if (lfs_sb_getversion(fs) == 1) 844 lfs_ss_setocreate(fs, ssp, write_time); 845 else { 846 lfs_ss_setcreate(fs, ssp, write_time); 847 lfs_sb_addserial(fs, 1); 848 lfs_ss_setserial(fs, ssp, lfs_sb_getserial(fs)); 849 lfs_ss_setident(fs, ssp, lfs_sb_getident(fs)); 850 } 851 /* Set the summary block busy too */ 852 bremfree(*(sp->bpp)); 853 (*(sp->bpp))->b_flags |= B_BUSY; 854 855 lfs_ss_setdatasum(fs, ssp, cksum(datap, (nblocks - 1) * el_size)); 856 sumstart = lfs_ss_getsumstart(fs); 857 lfs_ss_setsumsum(fs, ssp, 858 cksum((char *)ssp + sumstart, lfs_sb_getsumsize(fs) - sumstart)); 859 free(datap); 860 datap = dp = NULL; 861 lfs_sb_subbfree(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) + 862 lfs_btofsb(fs, lfs_sb_getsumsize(fs)))); 863 864 if (devvp == NULL) 865 errx(EXIT_FAILURE, "devvp is NULL"); 866 for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) { 867 bp = *bpp; 868#if 0 869 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n", 870 nblocks - i, bp, bp->b_flags, bp->b_blkno); 871 printf(" vp = %p\n", bp->b_vp); 872 if (bp->b_vp != fs->lfs_devvp) 873 printf(" ino = %d lbn = %" PRId64 "\n", 874 VTOI(bp->b_vp)->i_number, bp->b_lblkno); 875#endif 876 if (bp->b_vp == fs->lfs_devvp) 877 written_dev += bp->b_bcount; 878 else { 879 if (bp->b_lblkno >= 0) 880 written_data += bp->b_bcount; 881 else 882 written_indir += bp->b_bcount; 883 } 884 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR | 885 B_LOCKED); 886 bwrite(bp); 887 written_bytes += bp->b_bcount; 888 } 889 written_inodes += ninos; 890 891 return (lfs_initseg(fs) || do_again); 892} 893 894/* 895 * Our own copy of shellsort. XXX use qsort or heapsort. 896 */ 897static void 898lfs_shellsort(struct lfs *fs, 899 struct ubuf ** bp_array, union lfs_blocks *lb_array, int nmemb, int size) 900{ 901 static int __rsshell_increments[] = {4, 1, 0}; 902 int incr, *incrp, t1, t2; 903 struct ubuf *bp_temp; 904 905 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 906 for (t1 = incr; t1 < nmemb; ++t1) 907 for (t2 = t1 - incr; t2 >= 0;) 908 if ((u_int32_t) bp_array[t2]->b_lblkno > 909 (u_int32_t) bp_array[t2 + incr]->b_lblkno) { 910 bp_temp = bp_array[t2]; 911 bp_array[t2] = bp_array[t2 + incr]; 912 bp_array[t2 + incr] = bp_temp; 913 t2 -= incr; 914 } else 915 break; 916 917 /* Reform the list of logical blocks */ 918 incr = 0; 919 for (t1 = 0; t1 < nmemb; t1++) { 920 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 921 lfs_blocks_set(fs, lb_array, incr++, 922 bp_array[t1]->b_lblkno + t2); 923 } 924 } 925} 926 927 928/* 929 * lfs_seglock -- 930 * Single thread the segment writer. 931 */ 932int 933lfs_seglock(struct lfs * fs, unsigned long flags) 934{ 935 struct segment *sp; 936 size_t allocsize; 937 938 if (fs->lfs_seglock) { 939 ++fs->lfs_seglock; 940 fs->lfs_sp->seg_flags |= flags; 941 return 0; 942 } 943 fs->lfs_seglock = 1; 944 945 sp = fs->lfs_sp = emalloc(sizeof(*sp)); 946 allocsize = lfs_sb_getssize(fs) * sizeof(struct ubuf *); 947 sp->bpp = emalloc(allocsize); 948 if (!sp->bpp) 949 err(!preen, "Could not allocate %zu bytes", allocsize); 950 sp->seg_flags = flags; 951 sp->vp = NULL; 952 sp->seg_iocount = 0; 953 (void) lfs_initseg(fs); 954 955 return 0; 956} 957 958/* 959 * lfs_segunlock -- 960 * Single thread the segment writer. 961 */ 962void 963lfs_segunlock(struct lfs * fs) 964{ 965 struct segment *sp; 966 struct ubuf *bp; 967 968 sp = fs->lfs_sp; 969 970 if (fs->lfs_seglock == 1) { 971 if (sp->bpp != sp->cbpp) { 972 /* Free allocated segment summary */ 973 lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 974 bp = *sp->bpp; 975 bremfree(bp); 976 bp->b_flags |= B_DONE | B_INVAL; 977 bp->b_flags &= ~B_DELWRI; 978 reassignbuf(bp, bp->b_vp); 979 bp->b_flags |= B_BUSY; /* XXX */ 980 brelse(bp, 0); 981 } else 982 printf("unlock to 0 with no summary"); 983 984 free(sp->bpp); 985 sp->bpp = NULL; 986 free(sp); 987 fs->lfs_sp = NULL; 988 989 fs->lfs_nactive = 0; 990 991 /* Since we *know* everything's on disk, write both sbs */ 992 lfs_writesuper(fs, lfs_sb_getsboff(fs, 0)); 993 lfs_writesuper(fs, lfs_sb_getsboff(fs, 1)); 994 995 --fs->lfs_seglock; 996 fs->lfs_lockpid = 0; 997 } else if (fs->lfs_seglock == 0) { 998 errx(EXIT_FAILURE, "Seglock not held"); 999 } else { 1000 --fs->lfs_seglock; 1001 } 1002} 1003 1004int 1005lfs_writevnodes(struct lfs *fs, struct segment *sp, int op) 1006{ 1007 struct inode *ip; 1008 struct uvnode *vp; 1009 int inodes_written = 0; 1010 1011 LIST_FOREACH(vp, &vnodelist, v_mntvnodes) { 1012 if (vp->v_bmap_op != lfs_vop_bmap) 1013 continue; 1014 1015 ip = VTOI(vp); 1016 1017 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || 1018 (op != VN_DIROP && (vp->v_uflag & VU_DIROP))) { 1019 continue; 1020 } 1021 /* 1022 * Write the inode/file if dirty and it's not the IFILE. 1023 */ 1024 if (ip->i_state & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) { 1025 if (ip->i_number != LFS_IFILE_INUM) 1026 lfs_writefile(fs, sp, vp); 1027 (void) lfs_writeinode(fs, sp, ip); 1028 inodes_written++; 1029 } 1030 } 1031 return inodes_written; 1032} 1033 1034void 1035lfs_writesuper(struct lfs *fs, daddr_t daddr) 1036{ 1037 struct ubuf *bp; 1038 1039 /* Set timestamp of this version of the superblock */ 1040 if (lfs_sb_getversion(fs) == 1) 1041 lfs_sb_setotstamp(fs, write_time); 1042 lfs_sb_settstamp(fs, write_time); 1043 1044 __CTASSERT(sizeof(struct dlfs) == sizeof(struct dlfs64)); 1045 1046 /* Checksum the superblock and copy it into a buffer. */ 1047 lfs_sb_setcksum(fs, lfs_sb_cksum(fs)); 1048 assert(daddr > 0); 1049 bp = getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), LFS_SBPAD); 1050 memcpy(bp->b_data, &fs->lfs_dlfs_u, sizeof(struct dlfs)); 1051 memset(bp->b_data + sizeof(struct dlfs), 0, 1052 LFS_SBPAD - sizeof(struct dlfs)); 1053 1054 bwrite(bp); 1055} 1056