suj.c revision 207141
1207141Sjeff/*- 2207141Sjeff * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org> 3207141Sjeff * All rights reserved. 4207141Sjeff * 5207141Sjeff * Redistribution and use in source and binary forms, with or without 6207141Sjeff * modification, are permitted provided that the following conditions 7207141Sjeff * are met: 8207141Sjeff * 1. Redistributions of source code must retain the above copyright 9207141Sjeff * notice, this list of conditions and the following disclaimer. 10207141Sjeff * 2. Redistributions in binary form must reproduce the above copyright 11207141Sjeff * notice, this list of conditions and the following disclaimer in the 12207141Sjeff * documentation and/or other materials provided with the distribution. 13207141Sjeff * 14207141Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15207141Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16207141Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17207141Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18207141Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19207141Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20207141Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21207141Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22207141Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23207141Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24207141Sjeff * SUCH DAMAGE. 25207141Sjeff */ 26207141Sjeff 27207141Sjeff#include <sys/cdefs.h> 28207141Sjeff__FBSDID("$FreeBSD: head/sbin/fsck_ffs/suj.c 207141 2010-04-24 07:05:35Z jeff $"); 29207141Sjeff 30207141Sjeff#include <sys/param.h> 31207141Sjeff#include <sys/disklabel.h> 32207141Sjeff#include <sys/mount.h> 33207141Sjeff#include <sys/stat.h> 34207141Sjeff 35207141Sjeff#include <ufs/ufs/ufsmount.h> 36207141Sjeff#include <ufs/ufs/dinode.h> 37207141Sjeff#include <ufs/ufs/dir.h> 38207141Sjeff#include <ufs/ffs/fs.h> 39207141Sjeff 40207141Sjeff#include <stdio.h> 41207141Sjeff#include <stdlib.h> 42207141Sjeff#include <stdint.h> 43207141Sjeff#include <libufs.h> 44207141Sjeff#include <string.h> 45207141Sjeff#include <strings.h> 46207141Sjeff#include <err.h> 47207141Sjeff#include <assert.h> 48207141Sjeff 49207141Sjeff#include "fsck.h" 50207141Sjeff 51207141Sjeff#define DOTDOT_OFFSET DIRECTSIZ(1) 52207141Sjeff#define SUJ_HASHSIZE 2048 53207141Sjeff#define SUJ_HASHMASK (SUJ_HASHSIZE - 1) 54207141Sjeff#define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) 55207141Sjeff 56207141Sjeffstruct suj_seg { 57207141Sjeff TAILQ_ENTRY(suj_seg) ss_next; 58207141Sjeff struct jsegrec ss_rec; 59207141Sjeff uint8_t *ss_blk; 60207141Sjeff}; 61207141Sjeff 62207141Sjeffstruct suj_rec { 63207141Sjeff TAILQ_ENTRY(suj_rec) sr_next; 64207141Sjeff union jrec *sr_rec; 65207141Sjeff}; 66207141SjeffTAILQ_HEAD(srechd, suj_rec); 67207141Sjeff 68207141Sjeffstruct suj_ino { 69207141Sjeff LIST_ENTRY(suj_ino) si_next; 70207141Sjeff struct srechd si_recs; 71207141Sjeff struct srechd si_newrecs; 72207141Sjeff struct srechd si_movs; 73207141Sjeff struct jtrncrec *si_trunc; 74207141Sjeff ino_t si_ino; 75207141Sjeff char si_skipparent; 76207141Sjeff char si_hasrecs; 77207141Sjeff char si_blkadj; 78207141Sjeff char si_linkadj; 79207141Sjeff int si_mode; 80207141Sjeff nlink_t si_nlinkadj; 81207141Sjeff nlink_t si_nlink; 82207141Sjeff nlink_t si_dotlinks; 83207141Sjeff}; 84207141SjeffLIST_HEAD(inohd, suj_ino); 85207141Sjeff 86207141Sjeffstruct suj_blk { 87207141Sjeff LIST_ENTRY(suj_blk) sb_next; 88207141Sjeff struct srechd sb_recs; 89207141Sjeff ufs2_daddr_t sb_blk; 90207141Sjeff}; 91207141SjeffLIST_HEAD(blkhd, suj_blk); 92207141Sjeff 93207141Sjeffstruct data_blk { 94207141Sjeff LIST_ENTRY(data_blk) db_next; 95207141Sjeff uint8_t *db_buf; 96207141Sjeff ufs2_daddr_t db_blk; 97207141Sjeff int db_size; 98207141Sjeff int db_dirty; 99207141Sjeff}; 100207141Sjeff 101207141Sjeffstruct ino_blk { 102207141Sjeff LIST_ENTRY(ino_blk) ib_next; 103207141Sjeff uint8_t *ib_buf; 104207141Sjeff int ib_dirty; 105207141Sjeff ufs2_daddr_t ib_blk; 106207141Sjeff}; 107207141SjeffLIST_HEAD(iblkhd, ino_blk); 108207141Sjeff 109207141Sjeffstruct suj_cg { 110207141Sjeff LIST_ENTRY(suj_cg) sc_next; 111207141Sjeff struct blkhd sc_blkhash[SUJ_HASHSIZE]; 112207141Sjeff struct inohd sc_inohash[SUJ_HASHSIZE]; 113207141Sjeff struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; 114207141Sjeff struct ino_blk *sc_lastiblk; 115207141Sjeff struct suj_ino *sc_lastino; 116207141Sjeff struct suj_blk *sc_lastblk; 117207141Sjeff uint8_t *sc_cgbuf; 118207141Sjeff struct cg *sc_cgp; 119207141Sjeff int sc_dirty; 120207141Sjeff int sc_cgx; 121207141Sjeff}; 122207141Sjeff 123207141SjeffLIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; 124207141SjeffLIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; 125207141Sjeffstruct suj_cg *lastcg; 126207141Sjeffstruct data_blk *lastblk; 127207141Sjeff 128207141SjeffTAILQ_HEAD(seghd, suj_seg) allsegs; 129207141Sjeffuint64_t oldseq; 130207141Sjeffstatic struct uufsd *disk = NULL; 131207141Sjeffstatic struct fs *fs = NULL; 132207141Sjeffino_t sujino; 133207141Sjeff 134207141Sjeff/* 135207141Sjeff * Summary statistics. 136207141Sjeff */ 137207141Sjeffuint64_t freefrags; 138207141Sjeffuint64_t freeblocks; 139207141Sjeffuint64_t freeinos; 140207141Sjeffuint64_t freedir; 141207141Sjeffuint64_t jbytes; 142207141Sjeffuint64_t jrecs; 143207141Sjeff 144207141Sjefftypedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); 145207141Sjeffstatic void ino_trunc(ino_t, off_t); 146207141Sjeffstatic void ino_decr(ino_t); 147207141Sjeffstatic void ino_adjust(struct suj_ino *); 148207141Sjeffstatic void ino_build(struct suj_ino *); 149207141Sjeffstatic int blk_isfree(ufs2_daddr_t); 150207141Sjeff 151207141Sjeffstatic void * 152207141Sjefferrmalloc(size_t n) 153207141Sjeff{ 154207141Sjeff void *a; 155207141Sjeff 156207141Sjeff a = malloc(n); 157207141Sjeff if (a == NULL) 158207141Sjeff errx(1, "malloc(%zu)", n); 159207141Sjeff return (a); 160207141Sjeff} 161207141Sjeff 162207141Sjeff/* 163207141Sjeff * Open the given provider, load superblock. 164207141Sjeff */ 165207141Sjeffstatic void 166207141Sjeffopendisk(const char *devnam) 167207141Sjeff{ 168207141Sjeff if (disk != NULL) 169207141Sjeff return; 170207141Sjeff disk = malloc(sizeof(*disk)); 171207141Sjeff if (disk == NULL) 172207141Sjeff errx(1, "malloc(%zu)", sizeof(*disk)); 173207141Sjeff if (ufs_disk_fillout(disk, devnam) == -1) { 174207141Sjeff err(1, "ufs_disk_fillout(%s) failed: %s", devnam, 175207141Sjeff disk->d_error); 176207141Sjeff } 177207141Sjeff fs = &disk->d_fs; 178207141Sjeff} 179207141Sjeff 180207141Sjeff/* 181207141Sjeff * Mark file system as clean, write the super-block back, close the disk. 182207141Sjeff */ 183207141Sjeffstatic void 184207141Sjeffclosedisk(const char *devnam) 185207141Sjeff{ 186207141Sjeff struct csum *cgsum; 187207141Sjeff int i; 188207141Sjeff 189207141Sjeff /* 190207141Sjeff * Recompute the fs summary info from correct cs summaries. 191207141Sjeff */ 192207141Sjeff bzero(&fs->fs_cstotal, sizeof(struct csum_total)); 193207141Sjeff for (i = 0; i < fs->fs_ncg; i++) { 194207141Sjeff cgsum = &fs->fs_cs(fs, i); 195207141Sjeff fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; 196207141Sjeff fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; 197207141Sjeff fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; 198207141Sjeff fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; 199207141Sjeff } 200207141Sjeff fs->fs_pendinginodes = 0; 201207141Sjeff fs->fs_pendingblocks = 0; 202207141Sjeff fs->fs_clean = 1; 203207141Sjeff fs->fs_time = time(NULL); 204207141Sjeff fs->fs_mtime = time(NULL); 205207141Sjeff if (sbwrite(disk, 0) == -1) 206207141Sjeff err(1, "sbwrite(%s)", devnam); 207207141Sjeff if (ufs_disk_close(disk) == -1) 208207141Sjeff err(1, "ufs_disk_close(%s)", devnam); 209207141Sjeff free(disk); 210207141Sjeff disk = NULL; 211207141Sjeff fs = NULL; 212207141Sjeff} 213207141Sjeff 214207141Sjeff/* 215207141Sjeff * Lookup a cg by number in the hash so we can keep track of which cgs 216207141Sjeff * need stats rebuilt. 217207141Sjeff */ 218207141Sjeffstatic struct suj_cg * 219207141Sjeffcg_lookup(int cgx) 220207141Sjeff{ 221207141Sjeff struct cghd *hd; 222207141Sjeff struct suj_cg *sc; 223207141Sjeff 224207141Sjeff if (cgx < 0 || cgx >= fs->fs_ncg) { 225207141Sjeff abort(); 226207141Sjeff errx(1, "Bad cg number %d", cgx); 227207141Sjeff } 228207141Sjeff if (lastcg && lastcg->sc_cgx == cgx) 229207141Sjeff return (lastcg); 230207141Sjeff hd = &cghash[SUJ_HASH(cgx)]; 231207141Sjeff LIST_FOREACH(sc, hd, sc_next) 232207141Sjeff if (sc->sc_cgx == cgx) { 233207141Sjeff lastcg = sc; 234207141Sjeff return (sc); 235207141Sjeff } 236207141Sjeff sc = errmalloc(sizeof(*sc)); 237207141Sjeff bzero(sc, sizeof(*sc)); 238207141Sjeff sc->sc_cgbuf = errmalloc(fs->fs_bsize); 239207141Sjeff sc->sc_cgp = (struct cg *)sc->sc_cgbuf; 240207141Sjeff sc->sc_cgx = cgx; 241207141Sjeff LIST_INSERT_HEAD(hd, sc, sc_next); 242207141Sjeff if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 243207141Sjeff fs->fs_bsize) == -1) 244207141Sjeff err(1, "Unable to read cylinder group %d", sc->sc_cgx); 245207141Sjeff 246207141Sjeff return (sc); 247207141Sjeff} 248207141Sjeff 249207141Sjeff/* 250207141Sjeff * Lookup an inode number in the hash and allocate a suj_ino if it does 251207141Sjeff * not exist. 252207141Sjeff */ 253207141Sjeffstatic struct suj_ino * 254207141Sjeffino_lookup(ino_t ino, int creat) 255207141Sjeff{ 256207141Sjeff struct suj_ino *sino; 257207141Sjeff struct inohd *hd; 258207141Sjeff struct suj_cg *sc; 259207141Sjeff 260207141Sjeff sc = cg_lookup(ino_to_cg(fs, ino)); 261207141Sjeff if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) 262207141Sjeff return (sc->sc_lastino); 263207141Sjeff hd = &sc->sc_inohash[SUJ_HASH(ino)]; 264207141Sjeff LIST_FOREACH(sino, hd, si_next) 265207141Sjeff if (sino->si_ino == ino) 266207141Sjeff return (sino); 267207141Sjeff if (creat == 0) 268207141Sjeff return (NULL); 269207141Sjeff sino = errmalloc(sizeof(*sino)); 270207141Sjeff bzero(sino, sizeof(*sino)); 271207141Sjeff sino->si_ino = ino; 272207141Sjeff TAILQ_INIT(&sino->si_recs); 273207141Sjeff TAILQ_INIT(&sino->si_newrecs); 274207141Sjeff TAILQ_INIT(&sino->si_movs); 275207141Sjeff LIST_INSERT_HEAD(hd, sino, si_next); 276207141Sjeff 277207141Sjeff return (sino); 278207141Sjeff} 279207141Sjeff 280207141Sjeff/* 281207141Sjeff * Lookup a block number in the hash and allocate a suj_blk if it does 282207141Sjeff * not exist. 283207141Sjeff */ 284207141Sjeffstatic struct suj_blk * 285207141Sjeffblk_lookup(ufs2_daddr_t blk, int creat) 286207141Sjeff{ 287207141Sjeff struct suj_blk *sblk; 288207141Sjeff struct suj_cg *sc; 289207141Sjeff struct blkhd *hd; 290207141Sjeff 291207141Sjeff sc = cg_lookup(dtog(fs, blk)); 292207141Sjeff if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) 293207141Sjeff return (sc->sc_lastblk); 294207141Sjeff hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))]; 295207141Sjeff LIST_FOREACH(sblk, hd, sb_next) 296207141Sjeff if (sblk->sb_blk == blk) 297207141Sjeff return (sblk); 298207141Sjeff if (creat == 0) 299207141Sjeff return (NULL); 300207141Sjeff sblk = errmalloc(sizeof(*sblk)); 301207141Sjeff bzero(sblk, sizeof(*sblk)); 302207141Sjeff sblk->sb_blk = blk; 303207141Sjeff TAILQ_INIT(&sblk->sb_recs); 304207141Sjeff LIST_INSERT_HEAD(hd, sblk, sb_next); 305207141Sjeff 306207141Sjeff return (sblk); 307207141Sjeff} 308207141Sjeff 309207141Sjeffstatic struct data_blk * 310207141Sjeffdblk_lookup(ufs2_daddr_t blk) 311207141Sjeff{ 312207141Sjeff struct data_blk *dblk; 313207141Sjeff struct dblkhd *hd; 314207141Sjeff 315207141Sjeff hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))]; 316207141Sjeff if (lastblk && lastblk->db_blk == blk) 317207141Sjeff return (lastblk); 318207141Sjeff LIST_FOREACH(dblk, hd, db_next) 319207141Sjeff if (dblk->db_blk == blk) 320207141Sjeff return (dblk); 321207141Sjeff /* 322207141Sjeff * The inode block wasn't located, allocate a new one. 323207141Sjeff */ 324207141Sjeff dblk = errmalloc(sizeof(*dblk)); 325207141Sjeff bzero(dblk, sizeof(*dblk)); 326207141Sjeff LIST_INSERT_HEAD(hd, dblk, db_next); 327207141Sjeff dblk->db_blk = blk; 328207141Sjeff return (dblk); 329207141Sjeff} 330207141Sjeff 331207141Sjeffstatic uint8_t * 332207141Sjeffdblk_read(ufs2_daddr_t blk, int size) 333207141Sjeff{ 334207141Sjeff struct data_blk *dblk; 335207141Sjeff 336207141Sjeff dblk = dblk_lookup(blk); 337207141Sjeff /* 338207141Sjeff * I doubt size mismatches can happen in practice but it is trivial 339207141Sjeff * to handle. 340207141Sjeff */ 341207141Sjeff if (size != dblk->db_size) { 342207141Sjeff if (dblk->db_buf) 343207141Sjeff free(dblk->db_buf); 344207141Sjeff dblk->db_buf = errmalloc(size); 345207141Sjeff dblk->db_size = size; 346207141Sjeff if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) 347207141Sjeff err(1, "Failed to read data block %jd", blk); 348207141Sjeff } 349207141Sjeff return (dblk->db_buf); 350207141Sjeff} 351207141Sjeff 352207141Sjeffstatic void 353207141Sjeffdblk_dirty(ufs2_daddr_t blk) 354207141Sjeff{ 355207141Sjeff struct data_blk *dblk; 356207141Sjeff 357207141Sjeff dblk = dblk_lookup(blk); 358207141Sjeff dblk->db_dirty = 1; 359207141Sjeff} 360207141Sjeff 361207141Sjeffstatic void 362207141Sjeffdblk_write(void) 363207141Sjeff{ 364207141Sjeff struct data_blk *dblk; 365207141Sjeff int i; 366207141Sjeff 367207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) { 368207141Sjeff LIST_FOREACH(dblk, &dbhash[i], db_next) { 369207141Sjeff if (dblk->db_dirty == 0 || dblk->db_size == 0) 370207141Sjeff continue; 371207141Sjeff if (bwrite(disk, fsbtodb(fs, dblk->db_blk), 372207141Sjeff dblk->db_buf, dblk->db_size) == -1) 373207141Sjeff err(1, "Unable to write block %jd", 374207141Sjeff dblk->db_blk); 375207141Sjeff } 376207141Sjeff } 377207141Sjeff} 378207141Sjeff 379207141Sjeffstatic union dinode * 380207141Sjeffino_read(ino_t ino) 381207141Sjeff{ 382207141Sjeff struct ino_blk *iblk; 383207141Sjeff struct iblkhd *hd; 384207141Sjeff struct suj_cg *sc; 385207141Sjeff ufs2_daddr_t blk; 386207141Sjeff int off; 387207141Sjeff 388207141Sjeff blk = ino_to_fsba(fs, ino); 389207141Sjeff sc = cg_lookup(ino_to_cg(fs, ino)); 390207141Sjeff iblk = sc->sc_lastiblk; 391207141Sjeff if (iblk && iblk->ib_blk == blk) 392207141Sjeff goto found; 393207141Sjeff hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; 394207141Sjeff LIST_FOREACH(iblk, hd, ib_next) 395207141Sjeff if (iblk->ib_blk == blk) 396207141Sjeff goto found; 397207141Sjeff /* 398207141Sjeff * The inode block wasn't located, allocate a new one. 399207141Sjeff */ 400207141Sjeff iblk = errmalloc(sizeof(*iblk)); 401207141Sjeff bzero(iblk, sizeof(*iblk)); 402207141Sjeff iblk->ib_buf = errmalloc(fs->fs_bsize); 403207141Sjeff iblk->ib_blk = blk; 404207141Sjeff LIST_INSERT_HEAD(hd, iblk, ib_next); 405207141Sjeff if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) 406207141Sjeff err(1, "Failed to read inode block %jd", blk); 407207141Sjefffound: 408207141Sjeff sc->sc_lastiblk = iblk; 409207141Sjeff off = ino_to_fsbo(fs, ino); 410207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 411207141Sjeff return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; 412207141Sjeff else 413207141Sjeff return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; 414207141Sjeff} 415207141Sjeff 416207141Sjeffstatic void 417207141Sjeffino_dirty(ino_t ino) 418207141Sjeff{ 419207141Sjeff struct ino_blk *iblk; 420207141Sjeff struct iblkhd *hd; 421207141Sjeff struct suj_cg *sc; 422207141Sjeff ufs2_daddr_t blk; 423207141Sjeff 424207141Sjeff blk = ino_to_fsba(fs, ino); 425207141Sjeff sc = cg_lookup(ino_to_cg(fs, ino)); 426207141Sjeff iblk = sc->sc_lastiblk; 427207141Sjeff if (iblk && iblk->ib_blk == blk) { 428207141Sjeff iblk->ib_dirty = 1; 429207141Sjeff return; 430207141Sjeff } 431207141Sjeff hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; 432207141Sjeff LIST_FOREACH(iblk, hd, ib_next) { 433207141Sjeff if (iblk->ib_blk == blk) { 434207141Sjeff iblk->ib_dirty = 1; 435207141Sjeff return; 436207141Sjeff } 437207141Sjeff } 438207141Sjeff ino_read(ino); 439207141Sjeff ino_dirty(ino); 440207141Sjeff} 441207141Sjeff 442207141Sjeffstatic void 443207141Sjeffiblk_write(struct ino_blk *iblk) 444207141Sjeff{ 445207141Sjeff 446207141Sjeff if (iblk->ib_dirty == 0) 447207141Sjeff return; 448207141Sjeff if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, 449207141Sjeff fs->fs_bsize) == -1) 450207141Sjeff err(1, "Failed to write inode block %jd", iblk->ib_blk); 451207141Sjeff} 452207141Sjeff 453207141Sjeffstatic int 454207141Sjeffblk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) 455207141Sjeff{ 456207141Sjeff ufs2_daddr_t bstart; 457207141Sjeff ufs2_daddr_t bend; 458207141Sjeff ufs2_daddr_t end; 459207141Sjeff 460207141Sjeff end = start + frags; 461207141Sjeff bstart = brec->jb_blkno + brec->jb_oldfrags; 462207141Sjeff bend = bstart + brec->jb_frags; 463207141Sjeff if (start < bend && end > bstart) 464207141Sjeff return (1); 465207141Sjeff return (0); 466207141Sjeff} 467207141Sjeff 468207141Sjeffstatic int 469207141Sjeffblk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, 470207141Sjeff int frags) 471207141Sjeff{ 472207141Sjeff 473207141Sjeff if (brec->jb_ino != ino || brec->jb_lbn != lbn) 474207141Sjeff return (0); 475207141Sjeff if (brec->jb_blkno + brec->jb_oldfrags != start) 476207141Sjeff return (0); 477207141Sjeff if (brec->jb_frags != frags) 478207141Sjeff return (0); 479207141Sjeff return (1); 480207141Sjeff} 481207141Sjeff 482207141Sjeffstatic void 483207141Sjeffblk_setmask(struct jblkrec *brec, int *mask) 484207141Sjeff{ 485207141Sjeff int i; 486207141Sjeff 487207141Sjeff for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) 488207141Sjeff *mask |= 1 << i; 489207141Sjeff} 490207141Sjeff 491207141Sjeff/* 492207141Sjeff * Determine whether a given block has been reallocated to a new location. 493207141Sjeff * Returns a mask of overlapping bits if any frags have been reused or 494207141Sjeff * zero if the block has not been re-used and the contents can be trusted. 495207141Sjeff * 496207141Sjeff * This is used to ensure that an orphaned pointer due to truncate is safe 497207141Sjeff * to be freed. The mask value can be used to free partial blocks. 498207141Sjeff */ 499207141Sjeffstatic int 500207141Sjeffblk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) 501207141Sjeff{ 502207141Sjeff struct suj_blk *sblk; 503207141Sjeff struct suj_rec *srec; 504207141Sjeff struct jblkrec *brec; 505207141Sjeff int mask; 506207141Sjeff int off; 507207141Sjeff 508207141Sjeff /* 509207141Sjeff * To be certain we're not freeing a reallocated block we lookup 510207141Sjeff * this block in the blk hash and see if there is an allocation 511207141Sjeff * journal record that overlaps with any fragments in the block 512207141Sjeff * we're concerned with. If any fragments have ben reallocated 513207141Sjeff * the block has already been freed and re-used for another purpose. 514207141Sjeff */ 515207141Sjeff mask = 0; 516207141Sjeff sblk = blk_lookup(blknum(fs, blk), 0); 517207141Sjeff if (sblk == NULL) 518207141Sjeff return (0); 519207141Sjeff off = blk - sblk->sb_blk; 520207141Sjeff TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 521207141Sjeff brec = (struct jblkrec *)srec->sr_rec; 522207141Sjeff /* 523207141Sjeff * If the block overlaps but does not match 524207141Sjeff * exactly it's a new allocation. If it matches 525207141Sjeff * exactly this record refers to the current 526207141Sjeff * location. 527207141Sjeff */ 528207141Sjeff if (blk_overlaps(brec, blk, frags) == 0) 529207141Sjeff continue; 530207141Sjeff if (blk_equals(brec, ino, lbn, blk, frags) == 1) 531207141Sjeff mask = 0; 532207141Sjeff else 533207141Sjeff blk_setmask(brec, &mask); 534207141Sjeff } 535207141Sjeff if (debug) 536207141Sjeff printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", 537207141Sjeff blk, sblk->sb_blk, off, mask); 538207141Sjeff return (mask >> off); 539207141Sjeff} 540207141Sjeff 541207141Sjeff/* 542207141Sjeff * Determine whether it is safe to follow an indirect. It is not safe 543207141Sjeff * if any part of the indirect has been reallocated or the last journal 544207141Sjeff * entry was an allocation. Just allocated indirects may not have valid 545207141Sjeff * pointers yet and all of their children will have their own records. 546207141Sjeff * It is also not safe to follow an indirect if the cg bitmap has been 547207141Sjeff * cleared as a new allocation may write to the block prior to the journal 548207141Sjeff * being written. 549207141Sjeff * 550207141Sjeff * Returns 1 if it's safe to follow the indirect and 0 otherwise. 551207141Sjeff */ 552207141Sjeffstatic int 553207141Sjeffblk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) 554207141Sjeff{ 555207141Sjeff struct suj_blk *sblk; 556207141Sjeff struct jblkrec *brec; 557207141Sjeff 558207141Sjeff sblk = blk_lookup(blk, 0); 559207141Sjeff if (sblk == NULL) 560207141Sjeff return (1); 561207141Sjeff if (TAILQ_EMPTY(&sblk->sb_recs)) 562207141Sjeff return (1); 563207141Sjeff brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; 564207141Sjeff if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) 565207141Sjeff if (brec->jb_op == JOP_FREEBLK) 566207141Sjeff return (!blk_isfree(blk)); 567207141Sjeff return (0); 568207141Sjeff} 569207141Sjeff 570207141Sjeff/* 571207141Sjeff * Clear an inode from the cg bitmap. If the inode was already clear return 572207141Sjeff * 0 so the caller knows it does not have to check the inode contents. 573207141Sjeff */ 574207141Sjeffstatic int 575207141Sjeffino_free(ino_t ino, int mode) 576207141Sjeff{ 577207141Sjeff struct suj_cg *sc; 578207141Sjeff uint8_t *inosused; 579207141Sjeff struct cg *cgp; 580207141Sjeff int cg; 581207141Sjeff 582207141Sjeff cg = ino_to_cg(fs, ino); 583207141Sjeff ino = ino % fs->fs_ipg; 584207141Sjeff sc = cg_lookup(cg); 585207141Sjeff cgp = sc->sc_cgp; 586207141Sjeff inosused = cg_inosused(cgp); 587207141Sjeff /* 588207141Sjeff * The bitmap may never have made it to the disk so we have to 589207141Sjeff * conditionally clear. We can avoid writing the cg in this case. 590207141Sjeff */ 591207141Sjeff if (isclr(inosused, ino)) 592207141Sjeff return (0); 593207141Sjeff freeinos++; 594207141Sjeff clrbit(inosused, ino); 595207141Sjeff if (ino < cgp->cg_irotor) 596207141Sjeff cgp->cg_irotor = ino; 597207141Sjeff cgp->cg_cs.cs_nifree++; 598207141Sjeff if ((mode & IFMT) == IFDIR) { 599207141Sjeff freedir++; 600207141Sjeff cgp->cg_cs.cs_ndir--; 601207141Sjeff } 602207141Sjeff sc->sc_dirty = 1; 603207141Sjeff 604207141Sjeff return (1); 605207141Sjeff} 606207141Sjeff 607207141Sjeff/* 608207141Sjeff * Free 'frags' frags starting at filesystem block 'bno' skipping any frags 609207141Sjeff * set in the mask. 610207141Sjeff */ 611207141Sjeffstatic void 612207141Sjeffblk_free(ufs2_daddr_t bno, int mask, int frags) 613207141Sjeff{ 614207141Sjeff ufs1_daddr_t fragno, cgbno; 615207141Sjeff struct suj_cg *sc; 616207141Sjeff struct cg *cgp; 617207141Sjeff int i, cg; 618207141Sjeff uint8_t *blksfree; 619207141Sjeff 620207141Sjeff if (debug) 621207141Sjeff printf("Freeing %d frags at blk %jd\n", frags, bno); 622207141Sjeff cg = dtog(fs, bno); 623207141Sjeff sc = cg_lookup(cg); 624207141Sjeff cgp = sc->sc_cgp; 625207141Sjeff cgbno = dtogd(fs, bno); 626207141Sjeff blksfree = cg_blksfree(cgp); 627207141Sjeff 628207141Sjeff /* 629207141Sjeff * If it's not allocated we only wrote the journal entry 630207141Sjeff * and never the bitmaps. Here we unconditionally clear and 631207141Sjeff * resolve the cg summary later. 632207141Sjeff */ 633207141Sjeff if (frags == fs->fs_frag && mask == 0) { 634207141Sjeff fragno = fragstoblks(fs, cgbno); 635207141Sjeff ffs_setblock(fs, blksfree, fragno); 636207141Sjeff freeblocks++; 637207141Sjeff } else { 638207141Sjeff /* 639207141Sjeff * deallocate the fragment 640207141Sjeff */ 641207141Sjeff for (i = 0; i < frags; i++) 642207141Sjeff if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { 643207141Sjeff freefrags++; 644207141Sjeff setbit(blksfree, cgbno + i); 645207141Sjeff } 646207141Sjeff } 647207141Sjeff sc->sc_dirty = 1; 648207141Sjeff} 649207141Sjeff 650207141Sjeff/* 651207141Sjeff * Returns 1 if the whole block starting at 'bno' is marked free and 0 652207141Sjeff * otherwise. 653207141Sjeff */ 654207141Sjeffstatic int 655207141Sjeffblk_isfree(ufs2_daddr_t bno) 656207141Sjeff{ 657207141Sjeff struct suj_cg *sc; 658207141Sjeff 659207141Sjeff sc = cg_lookup(dtog(fs, bno)); 660207141Sjeff return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); 661207141Sjeff} 662207141Sjeff 663207141Sjeff/* 664207141Sjeff * Fetch an indirect block to find the block at a given lbn. The lbn 665207141Sjeff * may be negative to fetch a specific indirect block pointer or positive 666207141Sjeff * to fetch a specific block. 667207141Sjeff */ 668207141Sjeffstatic ufs2_daddr_t 669207141Sjeffindir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn) 670207141Sjeff{ 671207141Sjeff ufs2_daddr_t *bap2; 672207141Sjeff ufs2_daddr_t *bap1; 673207141Sjeff ufs_lbn_t lbnadd; 674207141Sjeff ufs_lbn_t base; 675207141Sjeff int level; 676207141Sjeff int i; 677207141Sjeff 678207141Sjeff if (blk == 0) 679207141Sjeff return (0); 680207141Sjeff level = lbn_level(cur); 681207141Sjeff if (level == -1) 682207141Sjeff errx(1, "Invalid indir lbn %jd", lbn); 683207141Sjeff if (level == 0 && lbn < 0) 684207141Sjeff errx(1, "Invalid lbn %jd", lbn); 685207141Sjeff bap2 = (void *)dblk_read(blk, fs->fs_bsize); 686207141Sjeff bap1 = (void *)bap2; 687207141Sjeff lbnadd = 1; 688207141Sjeff base = -(cur + level); 689207141Sjeff for (i = level; i > 0; i--) 690207141Sjeff lbnadd *= NINDIR(fs); 691207141Sjeff if (lbn > 0) 692207141Sjeff i = (lbn - base) / lbnadd; 693207141Sjeff else 694207141Sjeff i = (-lbn - base) / lbnadd; 695207141Sjeff if (i < 0 || i >= NINDIR(fs)) 696207141Sjeff errx(1, "Invalid indirect index %d produced by lbn %jd", 697207141Sjeff i, lbn); 698207141Sjeff if (level == 0) 699207141Sjeff cur = base + (i * lbnadd); 700207141Sjeff else 701207141Sjeff cur = -(base + (i * lbnadd)) - (level - 1); 702207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 703207141Sjeff blk = bap1[i]; 704207141Sjeff else 705207141Sjeff blk = bap2[i]; 706207141Sjeff if (cur == lbn) 707207141Sjeff return (blk); 708207141Sjeff if (level == 0) { 709207141Sjeff abort(); 710207141Sjeff errx(1, "Invalid lbn %jd at level 0", lbn); 711207141Sjeff } 712207141Sjeff return indir_blkatoff(blk, ino, cur, lbn); 713207141Sjeff} 714207141Sjeff 715207141Sjeff/* 716207141Sjeff * Finds the disk block address at the specified lbn within the inode 717207141Sjeff * specified by ip. This follows the whole tree and honors di_size and 718207141Sjeff * di_extsize so it is a true test of reachability. The lbn may be 719207141Sjeff * negative if an extattr or indirect block is requested. 720207141Sjeff */ 721207141Sjeffstatic ufs2_daddr_t 722207141Sjeffino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) 723207141Sjeff{ 724207141Sjeff ufs_lbn_t tmpval; 725207141Sjeff ufs_lbn_t cur; 726207141Sjeff ufs_lbn_t next; 727207141Sjeff int i; 728207141Sjeff 729207141Sjeff /* 730207141Sjeff * Handle extattr blocks first. 731207141Sjeff */ 732207141Sjeff if (lbn < 0 && lbn >= -NXADDR) { 733207141Sjeff lbn = -1 - lbn; 734207141Sjeff if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) 735207141Sjeff return (0); 736207141Sjeff *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); 737207141Sjeff return (ip->dp2.di_extb[lbn]); 738207141Sjeff } 739207141Sjeff /* 740207141Sjeff * Now direct and indirect. 741207141Sjeff */ 742207141Sjeff if (DIP(ip, di_mode) == IFLNK && 743207141Sjeff DIP(ip, di_size) < fs->fs_maxsymlinklen) 744207141Sjeff return (0); 745207141Sjeff if (lbn >= 0 && lbn < NDADDR) { 746207141Sjeff *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); 747207141Sjeff return (DIP(ip, di_db[lbn])); 748207141Sjeff } 749207141Sjeff *frags = fs->fs_frag; 750207141Sjeff 751207141Sjeff for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++, 752207141Sjeff tmpval *= NINDIR(fs), cur = next) { 753207141Sjeff next = cur + tmpval; 754207141Sjeff if (lbn == -cur - i) 755207141Sjeff return (DIP(ip, di_ib[i])); 756207141Sjeff /* 757207141Sjeff * Determine whether the lbn in question is within this tree. 758207141Sjeff */ 759207141Sjeff if (lbn < 0 && -lbn >= next) 760207141Sjeff continue; 761207141Sjeff if (lbn > 0 && lbn >= next) 762207141Sjeff continue; 763207141Sjeff return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn); 764207141Sjeff } 765207141Sjeff errx(1, "lbn %jd not in ino", lbn); 766207141Sjeff} 767207141Sjeff 768207141Sjeff/* 769207141Sjeff * Determine whether a block exists at a particular lbn in an inode. 770207141Sjeff * Returns 1 if found, 0 if not. lbn may be negative for indirects 771207141Sjeff * or ext blocks. 772207141Sjeff */ 773207141Sjeffstatic int 774207141Sjeffblk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) 775207141Sjeff{ 776207141Sjeff union dinode *ip; 777207141Sjeff ufs2_daddr_t nblk; 778207141Sjeff 779207141Sjeff ip = ino_read(ino); 780207141Sjeff 781207141Sjeff if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) 782207141Sjeff return (0); 783207141Sjeff nblk = ino_blkatoff(ip, ino, lbn, frags); 784207141Sjeff 785207141Sjeff return (nblk == blk); 786207141Sjeff} 787207141Sjeff 788207141Sjeff/* 789207141Sjeff * Determines whether a pointer to an inode exists within a directory 790207141Sjeff * at a specified offset. Returns the mode of the found entry. 791207141Sjeff */ 792207141Sjeffstatic int 793207141Sjeffino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) 794207141Sjeff{ 795207141Sjeff union dinode *dip; 796207141Sjeff struct direct *dp; 797207141Sjeff ufs2_daddr_t blk; 798207141Sjeff uint8_t *block; 799207141Sjeff ufs_lbn_t lbn; 800207141Sjeff int blksize; 801207141Sjeff int frags; 802207141Sjeff int dpoff; 803207141Sjeff int doff; 804207141Sjeff 805207141Sjeff *isdot = 0; 806207141Sjeff dip = ino_read(parent); 807207141Sjeff *mode = DIP(dip, di_mode); 808207141Sjeff if ((*mode & IFMT) != IFDIR) { 809207141Sjeff if (debug) { 810207141Sjeff /* 811207141Sjeff * This can happen if the parent inode 812207141Sjeff * was reallocated. 813207141Sjeff */ 814207141Sjeff if (*mode != 0) 815207141Sjeff printf("Directory %d has bad mode %o\n", 816207141Sjeff parent, *mode); 817207141Sjeff else 818207141Sjeff printf("Directory %d zero inode\n", parent); 819207141Sjeff } 820207141Sjeff return (0); 821207141Sjeff } 822207141Sjeff lbn = lblkno(fs, diroff); 823207141Sjeff doff = blkoff(fs, diroff); 824207141Sjeff blksize = sblksize(fs, DIP(dip, di_size), lbn); 825207141Sjeff if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { 826207141Sjeff if (debug) 827207141Sjeff printf("ino %d absent from %d due to offset %jd" 828207141Sjeff " exceeding size %jd\n", 829207141Sjeff child, parent, diroff, DIP(dip, di_size)); 830207141Sjeff return (0); 831207141Sjeff } 832207141Sjeff blk = ino_blkatoff(dip, parent, lbn, &frags); 833207141Sjeff if (blk <= 0) { 834207141Sjeff if (debug) 835207141Sjeff printf("Sparse directory %d", parent); 836207141Sjeff return (0); 837207141Sjeff } 838207141Sjeff block = dblk_read(blk, blksize); 839207141Sjeff /* 840207141Sjeff * Walk through the records from the start of the block to be 841207141Sjeff * certain we hit a valid record and not some junk in the middle 842207141Sjeff * of a file name. Stop when we reach or pass the expected offset. 843207141Sjeff */ 844207141Sjeff dpoff = (doff / DIRBLKSIZ) * DIRBLKSIZ; 845207141Sjeff do { 846207141Sjeff dp = (struct direct *)&block[dpoff]; 847207141Sjeff if (dpoff == doff) 848207141Sjeff break; 849207141Sjeff if (dp->d_reclen == 0) 850207141Sjeff break; 851207141Sjeff dpoff += dp->d_reclen; 852207141Sjeff } while (dpoff <= doff); 853207141Sjeff if (dpoff > fs->fs_bsize) 854207141Sjeff errx(1, "Corrupt directory block in dir ino %d", parent); 855207141Sjeff /* Not found. */ 856207141Sjeff if (dpoff != doff) { 857207141Sjeff if (debug) 858207141Sjeff printf("ino %d not found in %d, lbn %jd, dpoff %d\n", 859207141Sjeff child, parent, lbn, dpoff); 860207141Sjeff return (0); 861207141Sjeff } 862207141Sjeff /* 863207141Sjeff * We found the item in question. Record the mode and whether it's 864207141Sjeff * a . or .. link for the caller. 865207141Sjeff */ 866207141Sjeff if (dp->d_ino == child) { 867207141Sjeff if (child == parent) 868207141Sjeff *isdot = 1; 869207141Sjeff else if (dp->d_namlen == 2 && 870207141Sjeff dp->d_name[0] == '.' && dp->d_name[1] == '.') 871207141Sjeff *isdot = 1; 872207141Sjeff *mode = DTTOIF(dp->d_type); 873207141Sjeff return (1); 874207141Sjeff } 875207141Sjeff if (debug) 876207141Sjeff printf("ino %d doesn't match dirent ino %d in parent %d\n", 877207141Sjeff child, dp->d_ino, parent); 878207141Sjeff return (0); 879207141Sjeff} 880207141Sjeff 881207141Sjeff#define VISIT_INDIR 0x0001 882207141Sjeff#define VISIT_EXT 0x0002 883207141Sjeff#define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ 884207141Sjeff 885207141Sjeff/* 886207141Sjeff * Read an indirect level which may or may not be linked into an inode. 887207141Sjeff */ 888207141Sjeffstatic void 889207141Sjeffindir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, 890207141Sjeff ino_visitor visitor, int flags) 891207141Sjeff{ 892207141Sjeff ufs2_daddr_t *bap2; 893207141Sjeff ufs1_daddr_t *bap1; 894207141Sjeff ufs_lbn_t lbnadd; 895207141Sjeff ufs2_daddr_t nblk; 896207141Sjeff ufs_lbn_t nlbn; 897207141Sjeff int level; 898207141Sjeff int i; 899207141Sjeff 900207141Sjeff /* 901207141Sjeff * Don't visit indirect blocks with contents we can't trust. This 902207141Sjeff * should only happen when indir_visit() is called to complete a 903207141Sjeff * truncate that never finished and not when a pointer is found via 904207141Sjeff * an inode. 905207141Sjeff */ 906207141Sjeff if (blk == 0) 907207141Sjeff return; 908207141Sjeff level = lbn_level(lbn); 909207141Sjeff if (level == -1) 910207141Sjeff errx(1, "Invalid level for lbn %jd", lbn); 911207141Sjeff if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { 912207141Sjeff if (debug) 913207141Sjeff printf("blk %jd ino %d lbn %jd(%d) is not indir.\n", 914207141Sjeff blk, ino, lbn, level); 915207141Sjeff goto out; 916207141Sjeff } 917207141Sjeff lbnadd = 1; 918207141Sjeff for (i = level; i > 0; i--) 919207141Sjeff lbnadd *= NINDIR(fs); 920207141Sjeff bap1 = (void *)dblk_read(blk, fs->fs_bsize); 921207141Sjeff bap2 = (void *)bap1; 922207141Sjeff for (i = 0; i < NINDIR(fs); i++) { 923207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 924207141Sjeff nblk = *bap1++; 925207141Sjeff else 926207141Sjeff nblk = *bap2++; 927207141Sjeff if (nblk == 0) 928207141Sjeff continue; 929207141Sjeff if (level == 0) { 930207141Sjeff nlbn = -lbn + i * lbnadd; 931207141Sjeff (*frags) += fs->fs_frag; 932207141Sjeff visitor(ino, nlbn, nblk, fs->fs_frag); 933207141Sjeff } else { 934207141Sjeff nlbn = (lbn + 1) - (i * lbnadd); 935207141Sjeff indir_visit(ino, nlbn, nblk, frags, visitor, flags); 936207141Sjeff } 937207141Sjeff } 938207141Sjeffout: 939207141Sjeff if (flags & VISIT_INDIR) { 940207141Sjeff (*frags) += fs->fs_frag; 941207141Sjeff visitor(ino, lbn, blk, fs->fs_frag); 942207141Sjeff } 943207141Sjeff} 944207141Sjeff 945207141Sjeff/* 946207141Sjeff * Visit each block in an inode as specified by 'flags' and call a 947207141Sjeff * callback function. The callback may inspect or free blocks. The 948207141Sjeff * count of frags found according to the size in the file is returned. 949207141Sjeff * This is not valid for sparse files but may be used to determine 950207141Sjeff * the correct di_blocks for a file. 951207141Sjeff */ 952207141Sjeffstatic uint64_t 953207141Sjeffino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) 954207141Sjeff{ 955207141Sjeff ufs_lbn_t nextlbn; 956207141Sjeff ufs_lbn_t tmpval; 957207141Sjeff ufs_lbn_t lbn; 958207141Sjeff uint64_t size; 959207141Sjeff uint64_t fragcnt; 960207141Sjeff int mode; 961207141Sjeff int frags; 962207141Sjeff int i; 963207141Sjeff 964207141Sjeff size = DIP(ip, di_size); 965207141Sjeff mode = DIP(ip, di_mode) & IFMT; 966207141Sjeff fragcnt = 0; 967207141Sjeff if ((flags & VISIT_EXT) && 968207141Sjeff fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { 969207141Sjeff for (i = 0; i < NXADDR; i++) { 970207141Sjeff if (ip->dp2.di_extb[i] == 0) 971207141Sjeff continue; 972207141Sjeff frags = sblksize(fs, ip->dp2.di_extsize, i); 973207141Sjeff frags = numfrags(fs, frags); 974207141Sjeff fragcnt += frags; 975207141Sjeff visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); 976207141Sjeff } 977207141Sjeff } 978207141Sjeff /* Skip datablocks for short links and devices. */ 979207141Sjeff if (mode == IFBLK || mode == IFCHR || 980207141Sjeff (mode == IFLNK && size < fs->fs_maxsymlinklen)) 981207141Sjeff return (fragcnt); 982207141Sjeff for (i = 0; i < NDADDR; i++) { 983207141Sjeff if (DIP(ip, di_db[i]) == 0) 984207141Sjeff continue; 985207141Sjeff frags = sblksize(fs, size, i); 986207141Sjeff frags = numfrags(fs, frags); 987207141Sjeff fragcnt += frags; 988207141Sjeff visitor(ino, i, DIP(ip, di_db[i]), frags); 989207141Sjeff } 990207141Sjeff /* 991207141Sjeff * We know the following indirects are real as we're following 992207141Sjeff * real pointers to them. 993207141Sjeff */ 994207141Sjeff flags |= VISIT_ROOT; 995207141Sjeff for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, 996207141Sjeff lbn = nextlbn) { 997207141Sjeff nextlbn = lbn + tmpval; 998207141Sjeff tmpval *= NINDIR(fs); 999207141Sjeff if (DIP(ip, di_ib[i]) == 0) 1000207141Sjeff continue; 1001207141Sjeff indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, 1002207141Sjeff flags); 1003207141Sjeff } 1004207141Sjeff return (fragcnt); 1005207141Sjeff} 1006207141Sjeff 1007207141Sjeff/* 1008207141Sjeff * Null visitor function used when we just want to count blocks and 1009207141Sjeff * record the lbn. 1010207141Sjeff */ 1011207141Sjeffufs_lbn_t visitlbn; 1012207141Sjeffstatic void 1013207141Sjeffnull_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1014207141Sjeff{ 1015207141Sjeff if (lbn > 0) 1016207141Sjeff visitlbn = lbn; 1017207141Sjeff} 1018207141Sjeff 1019207141Sjeff/* 1020207141Sjeff * Recalculate di_blocks when we discover that a block allocation or 1021207141Sjeff * free was not successfully completed. The kernel does not roll this back 1022207141Sjeff * because it would be too expensive to compute which indirects were 1023207141Sjeff * reachable at the time the inode was written. 1024207141Sjeff */ 1025207141Sjeffstatic void 1026207141Sjeffino_adjblks(struct suj_ino *sino) 1027207141Sjeff{ 1028207141Sjeff union dinode *ip; 1029207141Sjeff uint64_t blocks; 1030207141Sjeff uint64_t frags; 1031207141Sjeff off_t isize; 1032207141Sjeff off_t size; 1033207141Sjeff ino_t ino; 1034207141Sjeff 1035207141Sjeff ino = sino->si_ino; 1036207141Sjeff ip = ino_read(ino); 1037207141Sjeff /* No need to adjust zero'd inodes. */ 1038207141Sjeff if (DIP(ip, di_mode) == 0) 1039207141Sjeff return; 1040207141Sjeff /* 1041207141Sjeff * Visit all blocks and count them as well as recording the last 1042207141Sjeff * valid lbn in the file. If the file size doesn't agree with the 1043207141Sjeff * last lbn we need to truncate to fix it. Otherwise just adjust 1044207141Sjeff * the blocks count. 1045207141Sjeff */ 1046207141Sjeff visitlbn = 0; 1047207141Sjeff frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 1048207141Sjeff blocks = fsbtodb(fs, frags); 1049207141Sjeff /* 1050207141Sjeff * We assume the size and direct block list is kept coherent by 1051207141Sjeff * softdep. For files that have extended into indirects we truncate 1052207141Sjeff * to the size in the inode or the maximum size permitted by 1053207141Sjeff * populated indirects. 1054207141Sjeff */ 1055207141Sjeff if (visitlbn >= NDADDR) { 1056207141Sjeff isize = DIP(ip, di_size); 1057207141Sjeff size = lblktosize(fs, visitlbn + 1); 1058207141Sjeff if (isize > size) 1059207141Sjeff isize = size; 1060207141Sjeff /* Always truncate to free any unpopulated indirects. */ 1061207141Sjeff ino_trunc(sino->si_ino, isize); 1062207141Sjeff return; 1063207141Sjeff } 1064207141Sjeff if (blocks == DIP(ip, di_blocks)) 1065207141Sjeff return; 1066207141Sjeff if (debug) 1067207141Sjeff printf("ino %d adjusting block count from %jd to %jd\n", 1068207141Sjeff ino, DIP(ip, di_blocks), blocks); 1069207141Sjeff DIP_SET(ip, di_blocks, blocks); 1070207141Sjeff ino_dirty(ino); 1071207141Sjeff} 1072207141Sjeff 1073207141Sjeffstatic void 1074207141Sjeffblk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1075207141Sjeff{ 1076207141Sjeff int mask; 1077207141Sjeff 1078207141Sjeff mask = blk_freemask(blk, ino, lbn, frags); 1079207141Sjeff if (debug) 1080207141Sjeff printf("blk %jd freemask 0x%X\n", blk, mask); 1081207141Sjeff blk_free(blk, mask, frags); 1082207141Sjeff} 1083207141Sjeff 1084207141Sjeff/* 1085207141Sjeff * Free a block or tree of blocks that was previously rooted in ino at 1086207141Sjeff * the given lbn. If the lbn is an indirect all children are freed 1087207141Sjeff * recursively. 1088207141Sjeff */ 1089207141Sjeffstatic void 1090207141Sjeffblk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) 1091207141Sjeff{ 1092207141Sjeff uint64_t resid; 1093207141Sjeff int mask; 1094207141Sjeff 1095207141Sjeff mask = blk_freemask(blk, ino, lbn, frags); 1096207141Sjeff if (debug) 1097207141Sjeff printf("blk %jd freemask 0x%X\n", blk, mask); 1098207141Sjeff resid = 0; 1099207141Sjeff if (lbn <= -NDADDR && follow && mask == 0) 1100207141Sjeff indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); 1101207141Sjeff else 1102207141Sjeff blk_free(blk, mask, frags); 1103207141Sjeff} 1104207141Sjeff 1105207141Sjeffstatic void 1106207141Sjeffino_setskip(struct suj_ino *sino, ino_t parent) 1107207141Sjeff{ 1108207141Sjeff int isdot; 1109207141Sjeff int mode; 1110207141Sjeff 1111207141Sjeff if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) 1112207141Sjeff sino->si_skipparent = 1; 1113207141Sjeff} 1114207141Sjeff 1115207141Sjeff/* 1116207141Sjeff * Free the children of a directory when the directory is discarded. 1117207141Sjeff */ 1118207141Sjeffstatic void 1119207141Sjeffino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1120207141Sjeff{ 1121207141Sjeff struct suj_ino *sino; 1122207141Sjeff struct suj_rec *srec; 1123207141Sjeff struct jrefrec *rrec; 1124207141Sjeff struct direct *dp; 1125207141Sjeff off_t diroff; 1126207141Sjeff uint8_t *block; 1127207141Sjeff int skipparent; 1128207141Sjeff int isparent; 1129207141Sjeff int dpoff; 1130207141Sjeff int size; 1131207141Sjeff 1132207141Sjeff sino = ino_lookup(ino, 0); 1133207141Sjeff if (sino) 1134207141Sjeff skipparent = sino->si_skipparent; 1135207141Sjeff else 1136207141Sjeff skipparent = 0; 1137207141Sjeff size = lfragtosize(fs, frags); 1138207141Sjeff block = dblk_read(blk, size); 1139207141Sjeff dp = (struct direct *)&block[0]; 1140207141Sjeff for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { 1141207141Sjeff dp = (struct direct *)&block[dpoff]; 1142207141Sjeff if (dp->d_ino == 0 || dp->d_ino == WINO) 1143207141Sjeff continue; 1144207141Sjeff if (dp->d_namlen == 1 && dp->d_name[0] == '.') 1145207141Sjeff continue; 1146207141Sjeff isparent = dp->d_namlen == 2 && dp->d_name[0] == '.' && 1147207141Sjeff dp->d_name[1] == '.'; 1148207141Sjeff if (isparent && skipparent == 1) 1149207141Sjeff continue; 1150207141Sjeff if (debug) 1151207141Sjeff printf("Directory %d removing ino %d name %s\n", 1152207141Sjeff ino, dp->d_ino, dp->d_name); 1153207141Sjeff /* 1154207141Sjeff * Lookup this inode to see if we have a record for it. 1155207141Sjeff * If not, we've already adjusted it assuming this path 1156207141Sjeff * was valid and we have to adjust once more. 1157207141Sjeff */ 1158207141Sjeff sino = ino_lookup(dp->d_ino, 0); 1159207141Sjeff if (sino == NULL || sino->si_hasrecs == 0) { 1160207141Sjeff ino_decr(ino); 1161207141Sjeff continue; 1162207141Sjeff } 1163207141Sjeff /* 1164207141Sjeff * Use ino_adjust() so if we lose the last non-dot reference 1165207141Sjeff * to a directory it can be discarded. 1166207141Sjeff */ 1167207141Sjeff if (sino->si_linkadj) { 1168207141Sjeff sino->si_nlink--; 1169207141Sjeff if (isparent) 1170207141Sjeff sino->si_dotlinks--; 1171207141Sjeff ino_adjust(sino); 1172207141Sjeff } 1173207141Sjeff /* 1174207141Sjeff * Tell any child directories we've already removed their 1175207141Sjeff * parent. Don't try to adjust our link down again. 1176207141Sjeff */ 1177207141Sjeff if (isparent == 0) 1178207141Sjeff ino_setskip(sino, ino); 1179207141Sjeff /* 1180207141Sjeff * If we haven't yet processed this inode we need to make 1181207141Sjeff * sure we will successfully discover the lost path. If not 1182207141Sjeff * use nlinkadj to remember. 1183207141Sjeff */ 1184207141Sjeff diroff = lblktosize(fs, lbn) + dpoff; 1185207141Sjeff TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1186207141Sjeff rrec = (struct jrefrec *)srec->sr_rec; 1187207141Sjeff if (rrec->jr_parent == ino && 1188207141Sjeff rrec->jr_diroff == diroff) 1189207141Sjeff break; 1190207141Sjeff } 1191207141Sjeff if (srec == NULL) 1192207141Sjeff sino->si_nlinkadj++; 1193207141Sjeff } 1194207141Sjeff} 1195207141Sjeff 1196207141Sjeff/* 1197207141Sjeff * Reclaim an inode, freeing all blocks and decrementing all children's 1198207141Sjeff * link counts. Free the inode back to the cg. 1199207141Sjeff */ 1200207141Sjeffstatic void 1201207141Sjeffino_reclaim(union dinode *ip, ino_t ino, int mode) 1202207141Sjeff{ 1203207141Sjeff uint32_t gen; 1204207141Sjeff 1205207141Sjeff if (ino == ROOTINO) 1206207141Sjeff errx(1, "Attempting to free ROOTINO"); 1207207141Sjeff if (debug) 1208207141Sjeff printf("Truncating and freeing ino %d, nlink %d, mode %o\n", 1209207141Sjeff ino, DIP(ip, di_nlink), DIP(ip, di_mode)); 1210207141Sjeff 1211207141Sjeff /* We are freeing an inode or directory. */ 1212207141Sjeff if ((DIP(ip, di_mode) & IFMT) == IFDIR) 1213207141Sjeff ino_visit(ip, ino, ino_free_children, 0); 1214207141Sjeff DIP_SET(ip, di_nlink, 0); 1215207141Sjeff ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); 1216207141Sjeff /* Here we have to clear the inode and release any blocks it holds. */ 1217207141Sjeff gen = DIP(ip, di_gen); 1218207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 1219207141Sjeff bzero(ip, sizeof(struct ufs1_dinode)); 1220207141Sjeff else 1221207141Sjeff bzero(ip, sizeof(struct ufs2_dinode)); 1222207141Sjeff DIP_SET(ip, di_gen, gen); 1223207141Sjeff ino_dirty(ino); 1224207141Sjeff ino_free(ino, mode); 1225207141Sjeff return; 1226207141Sjeff} 1227207141Sjeff 1228207141Sjeff/* 1229207141Sjeff * Adjust an inode's link count down by one when a directory goes away. 1230207141Sjeff */ 1231207141Sjeffstatic void 1232207141Sjeffino_decr(ino_t ino) 1233207141Sjeff{ 1234207141Sjeff union dinode *ip; 1235207141Sjeff int reqlink; 1236207141Sjeff int nlink; 1237207141Sjeff int mode; 1238207141Sjeff 1239207141Sjeff ip = ino_read(ino); 1240207141Sjeff nlink = DIP(ip, di_nlink); 1241207141Sjeff mode = DIP(ip, di_mode); 1242207141Sjeff if (nlink < 1) 1243207141Sjeff errx(1, "Inode %d link count %d invalid", ino, nlink); 1244207141Sjeff if (mode == 0) 1245207141Sjeff errx(1, "Inode %d has a link of %d with 0 mode.", ino, nlink); 1246207141Sjeff nlink--; 1247207141Sjeff if ((mode & IFMT) == IFDIR) 1248207141Sjeff reqlink = 2; 1249207141Sjeff else 1250207141Sjeff reqlink = 1; 1251207141Sjeff if (nlink < reqlink) { 1252207141Sjeff if (debug) 1253207141Sjeff printf("ino %d not enough links to live %d < %d\n", 1254207141Sjeff ino, nlink, reqlink); 1255207141Sjeff ino_reclaim(ip, ino, mode); 1256207141Sjeff return; 1257207141Sjeff } 1258207141Sjeff DIP_SET(ip, di_nlink, nlink); 1259207141Sjeff ino_dirty(ino); 1260207141Sjeff} 1261207141Sjeff 1262207141Sjeff/* 1263207141Sjeff * Adjust the inode link count to 'nlink'. If the count reaches zero 1264207141Sjeff * free it. 1265207141Sjeff */ 1266207141Sjeffstatic void 1267207141Sjeffino_adjust(struct suj_ino *sino) 1268207141Sjeff{ 1269207141Sjeff struct jrefrec *rrec; 1270207141Sjeff struct suj_rec *srec; 1271207141Sjeff struct suj_ino *stmp; 1272207141Sjeff union dinode *ip; 1273207141Sjeff nlink_t nlink; 1274207141Sjeff int reqlink; 1275207141Sjeff int mode; 1276207141Sjeff ino_t ino; 1277207141Sjeff 1278207141Sjeff nlink = sino->si_nlink; 1279207141Sjeff ino = sino->si_ino; 1280207141Sjeff /* 1281207141Sjeff * If it's a directory with no real names pointing to it go ahead 1282207141Sjeff * and truncate it. This will free any children. 1283207141Sjeff */ 1284207141Sjeff if ((sino->si_mode & IFMT) == IFDIR && 1285207141Sjeff nlink - sino->si_dotlinks == 0) { 1286207141Sjeff sino->si_nlink = nlink = 0; 1287207141Sjeff /* 1288207141Sjeff * Mark any .. links so they know not to free this inode 1289207141Sjeff * when they are removed. 1290207141Sjeff */ 1291207141Sjeff TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1292207141Sjeff rrec = (struct jrefrec *)srec->sr_rec; 1293207141Sjeff if (rrec->jr_diroff == DOTDOT_OFFSET) { 1294207141Sjeff stmp = ino_lookup(rrec->jr_parent, 0); 1295207141Sjeff if (stmp) 1296207141Sjeff ino_setskip(stmp, ino); 1297207141Sjeff } 1298207141Sjeff } 1299207141Sjeff } 1300207141Sjeff ip = ino_read(ino); 1301207141Sjeff mode = DIP(ip, di_mode) & IFMT; 1302207141Sjeff if (nlink > LINK_MAX) 1303207141Sjeff errx(1, 1304207141Sjeff "ino %d nlink manipulation error, new link %d, old link %d", 1305207141Sjeff ino, nlink, DIP(ip, di_nlink)); 1306207141Sjeff if (debug) 1307207141Sjeff printf("Adjusting ino %d, nlink %d, old link %d lastmode %o\n", 1308207141Sjeff ino, nlink, DIP(ip, di_nlink), sino->si_mode); 1309207141Sjeff if (mode == 0) { 1310207141Sjeff if (debug) 1311207141Sjeff printf("ino %d, zero inode freeing bitmap\n", ino); 1312207141Sjeff ino_free(ino, sino->si_mode); 1313207141Sjeff return; 1314207141Sjeff } 1315207141Sjeff /* XXX Should be an assert? */ 1316207141Sjeff if (mode != sino->si_mode && debug) 1317207141Sjeff printf("ino %d, mode %o != %o\n", ino, mode, sino->si_mode); 1318207141Sjeff if ((mode & IFMT) == IFDIR) 1319207141Sjeff reqlink = 2; 1320207141Sjeff else 1321207141Sjeff reqlink = 1; 1322207141Sjeff /* If the inode doesn't have enough links to live, free it. */ 1323207141Sjeff if (nlink < reqlink) { 1324207141Sjeff if (debug) 1325207141Sjeff printf("ino %d not enough links to live %d < %d\n", 1326207141Sjeff ino, nlink, reqlink); 1327207141Sjeff ino_reclaim(ip, ino, mode); 1328207141Sjeff return; 1329207141Sjeff } 1330207141Sjeff /* If required write the updated link count. */ 1331207141Sjeff if (DIP(ip, di_nlink) == nlink) { 1332207141Sjeff if (debug) 1333207141Sjeff printf("ino %d, link matches, skipping.\n", ino); 1334207141Sjeff return; 1335207141Sjeff } 1336207141Sjeff DIP_SET(ip, di_nlink, nlink); 1337207141Sjeff ino_dirty(ino); 1338207141Sjeff} 1339207141Sjeff 1340207141Sjeff/* 1341207141Sjeff * Truncate some or all blocks in an indirect, freeing any that are required 1342207141Sjeff * and zeroing the indirect. 1343207141Sjeff */ 1344207141Sjeffstatic void 1345207141Sjeffindir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn) 1346207141Sjeff{ 1347207141Sjeff ufs2_daddr_t *bap2; 1348207141Sjeff ufs1_daddr_t *bap1; 1349207141Sjeff ufs_lbn_t lbnadd; 1350207141Sjeff ufs2_daddr_t nblk; 1351207141Sjeff ufs_lbn_t next; 1352207141Sjeff ufs_lbn_t nlbn; 1353207141Sjeff int dirty; 1354207141Sjeff int level; 1355207141Sjeff int i; 1356207141Sjeff 1357207141Sjeff if (blk == 0) 1358207141Sjeff return; 1359207141Sjeff dirty = 0; 1360207141Sjeff level = lbn_level(lbn); 1361207141Sjeff if (level == -1) 1362207141Sjeff errx(1, "Invalid level for lbn %jd", lbn); 1363207141Sjeff lbnadd = 1; 1364207141Sjeff for (i = level; i > 0; i--) 1365207141Sjeff lbnadd *= NINDIR(fs); 1366207141Sjeff bap1 = (void *)dblk_read(blk, fs->fs_bsize); 1367207141Sjeff bap2 = (void *)bap1; 1368207141Sjeff for (i = 0; i < NINDIR(fs); i++) { 1369207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 1370207141Sjeff nblk = *bap1++; 1371207141Sjeff else 1372207141Sjeff nblk = *bap2++; 1373207141Sjeff if (nblk == 0) 1374207141Sjeff continue; 1375207141Sjeff if (level != 0) { 1376207141Sjeff nlbn = (lbn + 1) - (i * lbnadd); 1377207141Sjeff /* 1378207141Sjeff * Calculate the lbn of the next indirect to 1379207141Sjeff * determine if any of this indirect must be 1380207141Sjeff * reclaimed. 1381207141Sjeff */ 1382207141Sjeff next = -(lbn + level) + ((i+1) * lbnadd); 1383207141Sjeff if (next <= lastlbn) 1384207141Sjeff continue; 1385207141Sjeff indir_trunc(ino, nlbn, nblk, lastlbn); 1386207141Sjeff /* If all of this indirect was reclaimed, free it. */ 1387207141Sjeff nlbn = next - lbnadd; 1388207141Sjeff if (nlbn < lastlbn) 1389207141Sjeff continue; 1390207141Sjeff } else { 1391207141Sjeff nlbn = -lbn + i * lbnadd; 1392207141Sjeff if (nlbn < lastlbn) 1393207141Sjeff continue; 1394207141Sjeff } 1395207141Sjeff dirty = 1; 1396207141Sjeff blk_free(nblk, 0, fs->fs_frag); 1397207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 1398207141Sjeff *(bap1 - 1) = 0; 1399207141Sjeff else 1400207141Sjeff *(bap2 - 1) = 0; 1401207141Sjeff } 1402207141Sjeff if (dirty) 1403207141Sjeff dblk_dirty(blk); 1404207141Sjeff} 1405207141Sjeff 1406207141Sjeff/* 1407207141Sjeff * Truncate an inode to the minimum of the given size or the last populated 1408207141Sjeff * block after any over size have been discarded. The kernel would allocate 1409207141Sjeff * the last block in the file but fsck does not and neither do we. This 1410207141Sjeff * code never extends files, only shrinks them. 1411207141Sjeff */ 1412207141Sjeffstatic void 1413207141Sjeffino_trunc(ino_t ino, off_t size) 1414207141Sjeff{ 1415207141Sjeff union dinode *ip; 1416207141Sjeff ufs2_daddr_t bn; 1417207141Sjeff uint64_t totalfrags; 1418207141Sjeff ufs_lbn_t nextlbn; 1419207141Sjeff ufs_lbn_t lastlbn; 1420207141Sjeff ufs_lbn_t tmpval; 1421207141Sjeff ufs_lbn_t lbn; 1422207141Sjeff ufs_lbn_t i; 1423207141Sjeff int frags; 1424207141Sjeff off_t cursize; 1425207141Sjeff off_t off; 1426207141Sjeff int mode; 1427207141Sjeff 1428207141Sjeff ip = ino_read(ino); 1429207141Sjeff mode = DIP(ip, di_mode) & IFMT; 1430207141Sjeff cursize = DIP(ip, di_size); 1431207141Sjeff if (debug) 1432207141Sjeff printf("Truncating ino %d, mode %o to size %jd from size %jd\n", 1433207141Sjeff ino, mode, size, cursize); 1434207141Sjeff 1435207141Sjeff /* Skip datablocks for short links and devices. */ 1436207141Sjeff if (mode == 0 || mode == IFBLK || mode == IFCHR || 1437207141Sjeff (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) 1438207141Sjeff return; 1439207141Sjeff /* Don't extend. */ 1440207141Sjeff if (size > cursize) 1441207141Sjeff size = cursize; 1442207141Sjeff lastlbn = lblkno(fs, blkroundup(fs, size)); 1443207141Sjeff for (i = lastlbn; i < NDADDR; i++) { 1444207141Sjeff if (DIP(ip, di_db[i]) == 0) 1445207141Sjeff continue; 1446207141Sjeff frags = sblksize(fs, cursize, i); 1447207141Sjeff frags = numfrags(fs, frags); 1448207141Sjeff blk_free(DIP(ip, di_db[i]), 0, frags); 1449207141Sjeff DIP_SET(ip, di_db[i], 0); 1450207141Sjeff } 1451207141Sjeff /* 1452207141Sjeff * Follow indirect blocks, freeing anything required. 1453207141Sjeff */ 1454207141Sjeff for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, 1455207141Sjeff lbn = nextlbn) { 1456207141Sjeff nextlbn = lbn + tmpval; 1457207141Sjeff tmpval *= NINDIR(fs); 1458207141Sjeff /* If we're not freeing any in this indirect range skip it. */ 1459207141Sjeff if (lastlbn >= nextlbn) 1460207141Sjeff continue; 1461207141Sjeff if (DIP(ip, di_ib[i]) == 0) 1462207141Sjeff continue; 1463207141Sjeff indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn); 1464207141Sjeff /* If we freed everything in this indirect free the indir. */ 1465207141Sjeff if (lastlbn > lbn) 1466207141Sjeff continue; 1467207141Sjeff blk_free(DIP(ip, di_ib[i]), 0, frags); 1468207141Sjeff DIP_SET(ip, di_ib[i], 0); 1469207141Sjeff } 1470207141Sjeff ino_dirty(ino); 1471207141Sjeff /* 1472207141Sjeff * Now that we've freed any whole blocks that exceed the desired 1473207141Sjeff * truncation size, figure out how many blocks remain and what the 1474207141Sjeff * last populated lbn is. We will set the size to this last lbn 1475207141Sjeff * rather than worrying about allocating the final lbn as the kernel 1476207141Sjeff * would've done. This is consistent with normal fsck behavior. 1477207141Sjeff */ 1478207141Sjeff visitlbn = 0; 1479207141Sjeff totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 1480207141Sjeff if (size > lblktosize(fs, visitlbn + 1)) 1481207141Sjeff size = lblktosize(fs, visitlbn + 1); 1482207141Sjeff /* 1483207141Sjeff * If we're truncating direct blocks we have to adjust frags 1484207141Sjeff * accordingly. 1485207141Sjeff */ 1486207141Sjeff if (visitlbn < NDADDR && totalfrags) { 1487207141Sjeff long oldspace, newspace; 1488207141Sjeff 1489207141Sjeff bn = DIP(ip, di_db[visitlbn]); 1490207141Sjeff if (bn == 0) 1491207141Sjeff errx(1, "Bad blk at ino %d lbn %jd\n", ino, visitlbn); 1492207141Sjeff oldspace = sblksize(fs, cursize, visitlbn); 1493207141Sjeff newspace = sblksize(fs, size, visitlbn); 1494207141Sjeff if (oldspace != newspace) { 1495207141Sjeff bn += numfrags(fs, newspace); 1496207141Sjeff frags = numfrags(fs, oldspace - newspace); 1497207141Sjeff blk_free(bn, 0, frags); 1498207141Sjeff totalfrags -= frags; 1499207141Sjeff } 1500207141Sjeff } 1501207141Sjeff DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags)); 1502207141Sjeff DIP_SET(ip, di_size, size); 1503207141Sjeff /* 1504207141Sjeff * If we've truncated into the middle of a block or frag we have 1505207141Sjeff * to zero it here. Otherwise the file could extend into 1506207141Sjeff * uninitialized space later. 1507207141Sjeff */ 1508207141Sjeff off = blkoff(fs, size); 1509207141Sjeff if (off) { 1510207141Sjeff uint8_t *buf; 1511207141Sjeff long clrsize; 1512207141Sjeff 1513207141Sjeff bn = ino_blkatoff(ip, ino, visitlbn, &frags); 1514207141Sjeff if (bn == 0) 1515207141Sjeff errx(1, "Block missing from ino %d at lbn %jd\n", 1516207141Sjeff ino, visitlbn); 1517207141Sjeff clrsize = frags * fs->fs_fsize; 1518207141Sjeff buf = dblk_read(bn, clrsize); 1519207141Sjeff clrsize -= off; 1520207141Sjeff buf += off; 1521207141Sjeff bzero(buf, clrsize); 1522207141Sjeff dblk_dirty(bn); 1523207141Sjeff } 1524207141Sjeff return; 1525207141Sjeff} 1526207141Sjeff 1527207141Sjeff/* 1528207141Sjeff * Process records available for one inode and determine whether the 1529207141Sjeff * link count is correct or needs adjusting. 1530207141Sjeff */ 1531207141Sjeffstatic void 1532207141Sjeffino_check(struct suj_ino *sino) 1533207141Sjeff{ 1534207141Sjeff struct suj_rec *srec; 1535207141Sjeff struct jrefrec *rrec; 1536207141Sjeff nlink_t dotlinks; 1537207141Sjeff int newlinks; 1538207141Sjeff int removes; 1539207141Sjeff int nlink; 1540207141Sjeff ino_t ino; 1541207141Sjeff int isdot; 1542207141Sjeff int isat; 1543207141Sjeff int mode; 1544207141Sjeff 1545207141Sjeff if (sino->si_hasrecs == 0) 1546207141Sjeff return; 1547207141Sjeff ino = sino->si_ino; 1548207141Sjeff rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; 1549207141Sjeff nlink = rrec->jr_nlink; 1550207141Sjeff newlinks = 0; 1551207141Sjeff dotlinks = 0; 1552207141Sjeff removes = sino->si_nlinkadj; 1553207141Sjeff TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1554207141Sjeff rrec = (struct jrefrec *)srec->sr_rec; 1555207141Sjeff isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, 1556207141Sjeff rrec->jr_ino, &mode, &isdot); 1557207141Sjeff if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) 1558207141Sjeff errx(1, "Inode mode/directory type mismatch %o != %o", 1559207141Sjeff mode, rrec->jr_mode); 1560207141Sjeff if (debug) 1561207141Sjeff printf("jrefrec: op %d ino %d, nlink %d, parent %d, " 1562207141Sjeff "diroff %jd, mode %o, isat %d, isdot %d\n", 1563207141Sjeff rrec->jr_op, rrec->jr_ino, rrec->jr_nlink, 1564207141Sjeff rrec->jr_parent, rrec->jr_diroff, rrec->jr_mode, 1565207141Sjeff isat, isdot); 1566207141Sjeff mode = rrec->jr_mode & IFMT; 1567207141Sjeff if (rrec->jr_op == JOP_REMREF) 1568207141Sjeff removes++; 1569207141Sjeff newlinks += isat; 1570207141Sjeff if (isdot) 1571207141Sjeff dotlinks += isat; 1572207141Sjeff } 1573207141Sjeff /* 1574207141Sjeff * The number of links that remain are the starting link count 1575207141Sjeff * subtracted by the total number of removes with the total 1576207141Sjeff * links discovered back in. An incomplete remove thus 1577207141Sjeff * makes no change to the link count but an add increases 1578207141Sjeff * by one. 1579207141Sjeff */ 1580207141Sjeff if (debug) 1581207141Sjeff printf("ino %d nlink %d newlinks %d removes %d dotlinks %d\n", 1582207141Sjeff ino, nlink, newlinks, removes, dotlinks); 1583207141Sjeff nlink += newlinks; 1584207141Sjeff nlink -= removes; 1585207141Sjeff sino->si_linkadj = 1; 1586207141Sjeff sino->si_nlink = nlink; 1587207141Sjeff sino->si_dotlinks = dotlinks; 1588207141Sjeff sino->si_mode = mode; 1589207141Sjeff ino_adjust(sino); 1590207141Sjeff} 1591207141Sjeff 1592207141Sjeff/* 1593207141Sjeff * Process records available for one block and determine whether it is 1594207141Sjeff * still allocated and whether the owning inode needs to be updated or 1595207141Sjeff * a free completed. 1596207141Sjeff */ 1597207141Sjeffstatic void 1598207141Sjeffblk_check(struct suj_blk *sblk) 1599207141Sjeff{ 1600207141Sjeff struct suj_rec *srec; 1601207141Sjeff struct jblkrec *brec; 1602207141Sjeff struct suj_ino *sino; 1603207141Sjeff ufs2_daddr_t blk; 1604207141Sjeff int mask; 1605207141Sjeff int frags; 1606207141Sjeff int isat; 1607207141Sjeff 1608207141Sjeff /* 1609207141Sjeff * Each suj_blk actually contains records for any fragments in that 1610207141Sjeff * block. As a result we must evaluate each record individually. 1611207141Sjeff */ 1612207141Sjeff sino = NULL; 1613207141Sjeff TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 1614207141Sjeff brec = (struct jblkrec *)srec->sr_rec; 1615207141Sjeff frags = brec->jb_frags; 1616207141Sjeff blk = brec->jb_blkno + brec->jb_oldfrags; 1617207141Sjeff isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); 1618207141Sjeff if (sino == NULL || sino->si_ino != brec->jb_ino) { 1619207141Sjeff sino = ino_lookup(brec->jb_ino, 1); 1620207141Sjeff sino->si_blkadj = 1; 1621207141Sjeff } 1622207141Sjeff if (debug) 1623207141Sjeff printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n", 1624207141Sjeff brec->jb_op, blk, brec->jb_ino, brec->jb_lbn, 1625207141Sjeff brec->jb_frags, isat, frags); 1626207141Sjeff /* 1627207141Sjeff * If we found the block at this address we still have to 1628207141Sjeff * determine if we need to free the tail end that was 1629207141Sjeff * added by adding contiguous fragments from the same block. 1630207141Sjeff */ 1631207141Sjeff if (isat == 1) { 1632207141Sjeff if (frags == brec->jb_frags) 1633207141Sjeff continue; 1634207141Sjeff mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, 1635207141Sjeff brec->jb_frags); 1636207141Sjeff mask >>= frags; 1637207141Sjeff blk += frags; 1638207141Sjeff frags = brec->jb_frags - frags; 1639207141Sjeff blk_free(blk, mask, frags); 1640207141Sjeff continue; 1641207141Sjeff } 1642207141Sjeff /* 1643207141Sjeff * The block wasn't found, attempt to free it. It won't be 1644207141Sjeff * freed if it was actually reallocated. If this was an 1645207141Sjeff * allocation we don't want to follow indirects as they 1646207141Sjeff * may not be written yet. Any children of the indirect will 1647207141Sjeff * have their own records. If it's a free we need to 1648207141Sjeff * recursively free children. 1649207141Sjeff */ 1650207141Sjeff blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, 1651207141Sjeff brec->jb_op == JOP_FREEBLK); 1652207141Sjeff } 1653207141Sjeff} 1654207141Sjeff 1655207141Sjeff/* 1656207141Sjeff * Walk the list of inode records for this cg and resolve moved and duplicate 1657207141Sjeff * inode references now that we have a complete picture. 1658207141Sjeff */ 1659207141Sjeffstatic void 1660207141Sjeffcg_build(struct suj_cg *sc) 1661207141Sjeff{ 1662207141Sjeff struct suj_ino *sino; 1663207141Sjeff int i; 1664207141Sjeff 1665207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1666207141Sjeff LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1667207141Sjeff ino_build(sino); 1668207141Sjeff} 1669207141Sjeff 1670207141Sjeff/* 1671207141Sjeff * Handle inodes requiring truncation. This must be done prior to 1672207141Sjeff * looking up any inodes in directories. 1673207141Sjeff */ 1674207141Sjeffstatic void 1675207141Sjeffcg_trunc(struct suj_cg *sc) 1676207141Sjeff{ 1677207141Sjeff struct suj_ino *sino; 1678207141Sjeff int i; 1679207141Sjeff 1680207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1681207141Sjeff LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1682207141Sjeff if (sino->si_trunc) { 1683207141Sjeff ino_trunc(sino->si_ino, 1684207141Sjeff sino->si_trunc->jt_size); 1685207141Sjeff sino->si_trunc = NULL; 1686207141Sjeff } 1687207141Sjeff} 1688207141Sjeff 1689207141Sjeff/* 1690207141Sjeff * Free any partially allocated blocks and then resolve inode block 1691207141Sjeff * counts. 1692207141Sjeff */ 1693207141Sjeffstatic void 1694207141Sjeffcg_check_blk(struct suj_cg *sc) 1695207141Sjeff{ 1696207141Sjeff struct suj_ino *sino; 1697207141Sjeff struct suj_blk *sblk; 1698207141Sjeff int i; 1699207141Sjeff 1700207141Sjeff 1701207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1702207141Sjeff LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) 1703207141Sjeff blk_check(sblk); 1704207141Sjeff /* 1705207141Sjeff * Now that we've freed blocks which are not referenced we 1706207141Sjeff * make a second pass over all inodes to adjust their block 1707207141Sjeff * counts. 1708207141Sjeff */ 1709207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1710207141Sjeff LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1711207141Sjeff if (sino->si_blkadj) 1712207141Sjeff ino_adjblks(sino); 1713207141Sjeff} 1714207141Sjeff 1715207141Sjeff/* 1716207141Sjeff * Walk the list of inode records for this cg, recovering any 1717207141Sjeff * changes which were not complete at the time of crash. 1718207141Sjeff */ 1719207141Sjeffstatic void 1720207141Sjeffcg_check_ino(struct suj_cg *sc) 1721207141Sjeff{ 1722207141Sjeff struct suj_ino *sino; 1723207141Sjeff int i; 1724207141Sjeff 1725207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1726207141Sjeff LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1727207141Sjeff ino_check(sino); 1728207141Sjeff} 1729207141Sjeff 1730207141Sjeff/* 1731207141Sjeff * Write a potentially dirty cg. Recalculate the summary information and 1732207141Sjeff * update the superblock summary. 1733207141Sjeff */ 1734207141Sjeffstatic void 1735207141Sjeffcg_write(struct suj_cg *sc) 1736207141Sjeff{ 1737207141Sjeff ufs1_daddr_t fragno, cgbno, maxbno; 1738207141Sjeff u_int8_t *blksfree; 1739207141Sjeff struct cg *cgp; 1740207141Sjeff int blk; 1741207141Sjeff int i; 1742207141Sjeff 1743207141Sjeff if (sc->sc_dirty == 0) 1744207141Sjeff return; 1745207141Sjeff /* 1746207141Sjeff * Fix the frag and cluster summary. 1747207141Sjeff */ 1748207141Sjeff cgp = sc->sc_cgp; 1749207141Sjeff cgp->cg_cs.cs_nbfree = 0; 1750207141Sjeff cgp->cg_cs.cs_nffree = 0; 1751207141Sjeff bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 1752207141Sjeff maxbno = fragstoblks(fs, fs->fs_fpg); 1753207141Sjeff if (fs->fs_contigsumsize > 0) { 1754207141Sjeff for (i = 1; i <= fs->fs_contigsumsize; i++) 1755207141Sjeff cg_clustersum(cgp)[i] = 0; 1756207141Sjeff bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 1757207141Sjeff } 1758207141Sjeff blksfree = cg_blksfree(cgp); 1759207141Sjeff for (cgbno = 0; cgbno < maxbno; cgbno++) { 1760207141Sjeff if (ffs_isfreeblock(fs, blksfree, cgbno)) 1761207141Sjeff continue; 1762207141Sjeff if (ffs_isblock(fs, blksfree, cgbno)) { 1763207141Sjeff ffs_clusteracct(fs, cgp, cgbno, 1); 1764207141Sjeff cgp->cg_cs.cs_nbfree++; 1765207141Sjeff continue; 1766207141Sjeff } 1767207141Sjeff fragno = blkstofrags(fs, cgbno); 1768207141Sjeff blk = blkmap(fs, blksfree, fragno); 1769207141Sjeff ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1770207141Sjeff for (i = 0; i < fs->fs_frag; i++) 1771207141Sjeff if (isset(blksfree, fragno + i)) 1772207141Sjeff cgp->cg_cs.cs_nffree++; 1773207141Sjeff } 1774207141Sjeff /* 1775207141Sjeff * Update the superblock cg summary from our now correct values 1776207141Sjeff * before writing the block. 1777207141Sjeff */ 1778207141Sjeff fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; 1779207141Sjeff if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 1780207141Sjeff fs->fs_bsize) == -1) 1781207141Sjeff err(1, "Unable to write cylinder group %d", sc->sc_cgx); 1782207141Sjeff} 1783207141Sjeff 1784207141Sjeff/* 1785207141Sjeff * Write out any modified inodes. 1786207141Sjeff */ 1787207141Sjeffstatic void 1788207141Sjeffcg_write_inos(struct suj_cg *sc) 1789207141Sjeff{ 1790207141Sjeff struct ino_blk *iblk; 1791207141Sjeff int i; 1792207141Sjeff 1793207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1794207141Sjeff LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) 1795207141Sjeff if (iblk->ib_dirty) 1796207141Sjeff iblk_write(iblk); 1797207141Sjeff} 1798207141Sjeff 1799207141Sjeffstatic void 1800207141Sjeffcg_apply(void (*apply)(struct suj_cg *)) 1801207141Sjeff{ 1802207141Sjeff struct suj_cg *scg; 1803207141Sjeff int i; 1804207141Sjeff 1805207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 1806207141Sjeff LIST_FOREACH(scg, &cghash[i], sc_next) 1807207141Sjeff apply(scg); 1808207141Sjeff} 1809207141Sjeff 1810207141Sjeff/* 1811207141Sjeff * Process the unlinked but referenced file list. Freeing all inodes. 1812207141Sjeff */ 1813207141Sjeffstatic void 1814207141Sjeffino_unlinked(void) 1815207141Sjeff{ 1816207141Sjeff union dinode *ip; 1817207141Sjeff uint16_t mode; 1818207141Sjeff ino_t inon; 1819207141Sjeff ino_t ino; 1820207141Sjeff 1821207141Sjeff ino = fs->fs_sujfree; 1822207141Sjeff fs->fs_sujfree = 0; 1823207141Sjeff while (ino != 0) { 1824207141Sjeff ip = ino_read(ino); 1825207141Sjeff mode = DIP(ip, di_mode) & IFMT; 1826207141Sjeff inon = DIP(ip, di_freelink); 1827207141Sjeff DIP_SET(ip, di_freelink, 0); 1828207141Sjeff /* 1829207141Sjeff * XXX Should this be an errx? 1830207141Sjeff */ 1831207141Sjeff if (DIP(ip, di_nlink) == 0) { 1832207141Sjeff if (debug) 1833207141Sjeff printf("Freeing unlinked ino %d mode %o\n", 1834207141Sjeff ino, mode); 1835207141Sjeff ino_reclaim(ip, ino, mode); 1836207141Sjeff } else if (debug) 1837207141Sjeff printf("Skipping ino %d mode %o with link %d\n", 1838207141Sjeff ino, mode, DIP(ip, di_nlink)); 1839207141Sjeff ino = inon; 1840207141Sjeff } 1841207141Sjeff} 1842207141Sjeff 1843207141Sjeff/* 1844207141Sjeff * Append a new record to the list of records requiring processing. 1845207141Sjeff */ 1846207141Sjeffstatic void 1847207141Sjeffino_append(union jrec *rec) 1848207141Sjeff{ 1849207141Sjeff struct jrefrec *refrec; 1850207141Sjeff struct jmvrec *mvrec; 1851207141Sjeff struct suj_ino *sino; 1852207141Sjeff struct suj_rec *srec; 1853207141Sjeff 1854207141Sjeff mvrec = &rec->rec_jmvrec; 1855207141Sjeff refrec = &rec->rec_jrefrec; 1856207141Sjeff if (debug && mvrec->jm_op == JOP_MVREF) 1857207141Sjeff printf("ino move: ino %d, parent %d, diroff %jd, oldoff %jd\n", 1858207141Sjeff mvrec->jm_ino, mvrec->jm_parent, mvrec->jm_newoff, 1859207141Sjeff mvrec->jm_oldoff); 1860207141Sjeff else if (debug && 1861207141Sjeff (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) 1862207141Sjeff printf("ino ref: op %d, ino %d, nlink %d, " 1863207141Sjeff "parent %d, diroff %jd\n", 1864207141Sjeff refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, 1865207141Sjeff refrec->jr_parent, refrec->jr_diroff); 1866207141Sjeff /* 1867207141Sjeff * Lookup the ino and clear truncate if one is found. Partial 1868207141Sjeff * truncates are always done synchronously so if we discover 1869207141Sjeff * an operation that requires a lock the truncation has completed 1870207141Sjeff * and can be discarded. 1871207141Sjeff */ 1872207141Sjeff sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); 1873207141Sjeff sino->si_trunc = NULL; 1874207141Sjeff sino->si_hasrecs = 1; 1875207141Sjeff srec = errmalloc(sizeof(*srec)); 1876207141Sjeff srec->sr_rec = rec; 1877207141Sjeff TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); 1878207141Sjeff} 1879207141Sjeff 1880207141Sjeff/* 1881207141Sjeff * Add a reference adjustment to the sino list and eliminate dups. The 1882207141Sjeff * primary loop in ino_build_ref() checks for dups but new ones may be 1883207141Sjeff * created as a result of offset adjustments. 1884207141Sjeff */ 1885207141Sjeffstatic void 1886207141Sjeffino_add_ref(struct suj_ino *sino, struct suj_rec *srec) 1887207141Sjeff{ 1888207141Sjeff struct jrefrec *refrec; 1889207141Sjeff struct suj_rec *srn; 1890207141Sjeff struct jrefrec *rrn; 1891207141Sjeff 1892207141Sjeff refrec = (struct jrefrec *)srec->sr_rec; 1893207141Sjeff /* 1894207141Sjeff * We walk backwards so that the oldest link count is preserved. If 1895207141Sjeff * an add record conflicts with a remove keep the remove. Redundant 1896207141Sjeff * removes are eliminated in ino_build_ref. Otherwise we keep the 1897207141Sjeff * oldest record at a given location. 1898207141Sjeff */ 1899207141Sjeff for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 1900207141Sjeff srn = TAILQ_PREV(srn, srechd, sr_next)) { 1901207141Sjeff rrn = (struct jrefrec *)srn->sr_rec; 1902207141Sjeff if (rrn->jr_parent != refrec->jr_parent || 1903207141Sjeff rrn->jr_diroff != refrec->jr_diroff) 1904207141Sjeff continue; 1905207141Sjeff if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { 1906207141Sjeff rrn->jr_mode = refrec->jr_mode; 1907207141Sjeff return; 1908207141Sjeff } 1909207141Sjeff /* 1910207141Sjeff * Adding a remove. 1911207141Sjeff * 1912207141Sjeff * Replace the record in place with the old nlink in case 1913207141Sjeff * we replace the head of the list. Abandon srec as a dup. 1914207141Sjeff */ 1915207141Sjeff refrec->jr_nlink = rrn->jr_nlink; 1916207141Sjeff srn->sr_rec = srec->sr_rec; 1917207141Sjeff return; 1918207141Sjeff } 1919207141Sjeff TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); 1920207141Sjeff} 1921207141Sjeff 1922207141Sjeff/* 1923207141Sjeff * Create a duplicate of a reference at a previous location. 1924207141Sjeff */ 1925207141Sjeffstatic void 1926207141Sjeffino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) 1927207141Sjeff{ 1928207141Sjeff struct jrefrec *rrn; 1929207141Sjeff struct suj_rec *srn; 1930207141Sjeff 1931207141Sjeff rrn = errmalloc(sizeof(*refrec)); 1932207141Sjeff *rrn = *refrec; 1933207141Sjeff rrn->jr_op = JOP_ADDREF; 1934207141Sjeff rrn->jr_diroff = diroff; 1935207141Sjeff srn = errmalloc(sizeof(*srn)); 1936207141Sjeff srn->sr_rec = (union jrec *)rrn; 1937207141Sjeff ino_add_ref(sino, srn); 1938207141Sjeff} 1939207141Sjeff 1940207141Sjeff/* 1941207141Sjeff * Add a reference to the list at all known locations. We follow the offset 1942207141Sjeff * changes for a single instance and create duplicate add refs at each so 1943207141Sjeff * that we can tolerate any version of the directory block. Eliminate 1944207141Sjeff * removes which collide with adds that are seen in the journal. They should 1945207141Sjeff * not adjust the link count down. 1946207141Sjeff */ 1947207141Sjeffstatic void 1948207141Sjeffino_build_ref(struct suj_ino *sino, struct suj_rec *srec) 1949207141Sjeff{ 1950207141Sjeff struct jrefrec *refrec; 1951207141Sjeff struct jmvrec *mvrec; 1952207141Sjeff struct suj_rec *srp; 1953207141Sjeff struct suj_rec *srn; 1954207141Sjeff struct jrefrec *rrn; 1955207141Sjeff off_t diroff; 1956207141Sjeff 1957207141Sjeff refrec = (struct jrefrec *)srec->sr_rec; 1958207141Sjeff /* 1959207141Sjeff * Search for a mvrec that matches this offset. Whether it's an add 1960207141Sjeff * or a remove we can delete the mvref after creating a dup record in 1961207141Sjeff * the old location. 1962207141Sjeff */ 1963207141Sjeff if (!TAILQ_EMPTY(&sino->si_movs)) { 1964207141Sjeff diroff = refrec->jr_diroff; 1965207141Sjeff for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { 1966207141Sjeff srp = TAILQ_PREV(srn, srechd, sr_next); 1967207141Sjeff mvrec = (struct jmvrec *)srn->sr_rec; 1968207141Sjeff if (mvrec->jm_parent != refrec->jr_parent || 1969207141Sjeff mvrec->jm_newoff != diroff) 1970207141Sjeff continue; 1971207141Sjeff diroff = mvrec->jm_oldoff; 1972207141Sjeff TAILQ_REMOVE(&sino->si_movs, srn, sr_next); 1973207141Sjeff ino_dup_ref(sino, refrec, diroff); 1974207141Sjeff } 1975207141Sjeff } 1976207141Sjeff /* 1977207141Sjeff * If a remove wasn't eliminated by an earlier add just append it to 1978207141Sjeff * the list. 1979207141Sjeff */ 1980207141Sjeff if (refrec->jr_op == JOP_REMREF) { 1981207141Sjeff ino_add_ref(sino, srec); 1982207141Sjeff return; 1983207141Sjeff } 1984207141Sjeff /* 1985207141Sjeff * Walk the list of records waiting to be added to the list. We 1986207141Sjeff * must check for moves that apply to our current offset and remove 1987207141Sjeff * them from the list. Remove any duplicates to eliminate removes 1988207141Sjeff * with corresponding adds. 1989207141Sjeff */ 1990207141Sjeff TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { 1991207141Sjeff switch (srn->sr_rec->rec_jrefrec.jr_op) { 1992207141Sjeff case JOP_ADDREF: 1993207141Sjeff /* 1994207141Sjeff * This should actually be an error we should 1995207141Sjeff * have a remove for every add journaled. 1996207141Sjeff */ 1997207141Sjeff rrn = (struct jrefrec *)srn->sr_rec; 1998207141Sjeff if (rrn->jr_parent != refrec->jr_parent || 1999207141Sjeff rrn->jr_diroff != refrec->jr_diroff) 2000207141Sjeff break; 2001207141Sjeff TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2002207141Sjeff break; 2003207141Sjeff case JOP_REMREF: 2004207141Sjeff /* 2005207141Sjeff * Once we remove the current iteration of the 2006207141Sjeff * record at this address we're done. 2007207141Sjeff */ 2008207141Sjeff rrn = (struct jrefrec *)srn->sr_rec; 2009207141Sjeff if (rrn->jr_parent != refrec->jr_parent || 2010207141Sjeff rrn->jr_diroff != refrec->jr_diroff) 2011207141Sjeff break; 2012207141Sjeff TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2013207141Sjeff ino_add_ref(sino, srec); 2014207141Sjeff return; 2015207141Sjeff case JOP_MVREF: 2016207141Sjeff /* 2017207141Sjeff * Update our diroff based on any moves that match 2018207141Sjeff * and remove the move. 2019207141Sjeff */ 2020207141Sjeff mvrec = (struct jmvrec *)srn->sr_rec; 2021207141Sjeff if (mvrec->jm_parent != refrec->jr_parent || 2022207141Sjeff mvrec->jm_oldoff != refrec->jr_diroff) 2023207141Sjeff break; 2024207141Sjeff ino_dup_ref(sino, refrec, mvrec->jm_oldoff); 2025207141Sjeff refrec->jr_diroff = mvrec->jm_newoff; 2026207141Sjeff TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2027207141Sjeff break; 2028207141Sjeff default: 2029207141Sjeff errx(1, "ino_build_ref: Unknown op %d", 2030207141Sjeff srn->sr_rec->rec_jrefrec.jr_op); 2031207141Sjeff } 2032207141Sjeff } 2033207141Sjeff ino_add_ref(sino, srec); 2034207141Sjeff} 2035207141Sjeff 2036207141Sjeff/* 2037207141Sjeff * Walk the list of new records and add them in-order resolving any 2038207141Sjeff * dups and adjusted offsets. 2039207141Sjeff */ 2040207141Sjeffstatic void 2041207141Sjeffino_build(struct suj_ino *sino) 2042207141Sjeff{ 2043207141Sjeff struct suj_rec *srec; 2044207141Sjeff 2045207141Sjeff while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { 2046207141Sjeff TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); 2047207141Sjeff switch (srec->sr_rec->rec_jrefrec.jr_op) { 2048207141Sjeff case JOP_ADDREF: 2049207141Sjeff case JOP_REMREF: 2050207141Sjeff ino_build_ref(sino, srec); 2051207141Sjeff break; 2052207141Sjeff case JOP_MVREF: 2053207141Sjeff /* 2054207141Sjeff * Add this mvrec to the queue of pending mvs. 2055207141Sjeff */ 2056207141Sjeff TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); 2057207141Sjeff break; 2058207141Sjeff default: 2059207141Sjeff errx(1, "ino_build: Unknown op %d", 2060207141Sjeff srec->sr_rec->rec_jrefrec.jr_op); 2061207141Sjeff } 2062207141Sjeff } 2063207141Sjeff if (TAILQ_EMPTY(&sino->si_recs)) 2064207141Sjeff sino->si_hasrecs = 0; 2065207141Sjeff} 2066207141Sjeff 2067207141Sjeff/* 2068207141Sjeff * Modify journal records so they refer to the base block number 2069207141Sjeff * and a start and end frag range. This is to facilitate the discovery 2070207141Sjeff * of overlapping fragment allocations. 2071207141Sjeff */ 2072207141Sjeffstatic void 2073207141Sjeffblk_build(struct jblkrec *blkrec) 2074207141Sjeff{ 2075207141Sjeff struct suj_rec *srec; 2076207141Sjeff struct suj_blk *sblk; 2077207141Sjeff struct jblkrec *blkrn; 2078207141Sjeff struct suj_ino *sino; 2079207141Sjeff ufs2_daddr_t blk; 2080207141Sjeff off_t foff; 2081207141Sjeff int frag; 2082207141Sjeff 2083207141Sjeff if (debug) 2084207141Sjeff printf("blk_build: op %d blkno %jd frags %d oldfrags %d " 2085207141Sjeff "ino %d lbn %jd\n", 2086207141Sjeff blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, 2087207141Sjeff blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); 2088207141Sjeff 2089207141Sjeff /* 2090207141Sjeff * Look up the inode and clear the truncate if any lbns after the 2091207141Sjeff * truncate lbn are freed or allocated. 2092207141Sjeff */ 2093207141Sjeff sino = ino_lookup(blkrec->jb_ino, 0); 2094207141Sjeff if (sino && sino->si_trunc) { 2095207141Sjeff foff = lblktosize(fs, blkrec->jb_lbn); 2096207141Sjeff foff += lfragtosize(fs, blkrec->jb_frags); 2097207141Sjeff if (foff > sino->si_trunc->jt_size) 2098207141Sjeff sino->si_trunc = NULL; 2099207141Sjeff } 2100207141Sjeff blk = blknum(fs, blkrec->jb_blkno); 2101207141Sjeff frag = fragnum(fs, blkrec->jb_blkno); 2102207141Sjeff sblk = blk_lookup(blk, 1); 2103207141Sjeff /* 2104207141Sjeff * Rewrite the record using oldfrags to indicate the offset into 2105207141Sjeff * the block. Leave jb_frags as the actual allocated count. 2106207141Sjeff */ 2107207141Sjeff blkrec->jb_blkno -= frag; 2108207141Sjeff blkrec->jb_oldfrags = frag; 2109207141Sjeff if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) 2110207141Sjeff errx(1, "Invalid fragment count %d oldfrags %d", 2111207141Sjeff blkrec->jb_frags, frag); 2112207141Sjeff /* 2113207141Sjeff * Detect dups. If we detect a dup we always discard the oldest 2114207141Sjeff * record as it is superseded by the new record. This speeds up 2115207141Sjeff * later stages but also eliminates free records which are used 2116207141Sjeff * to indicate that the contents of indirects can be trusted. 2117207141Sjeff */ 2118207141Sjeff TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 2119207141Sjeff blkrn = (struct jblkrec *)srec->sr_rec; 2120207141Sjeff if (blkrn->jb_ino != blkrec->jb_ino || 2121207141Sjeff blkrn->jb_lbn != blkrec->jb_lbn || 2122207141Sjeff blkrn->jb_blkno != blkrec->jb_blkno || 2123207141Sjeff blkrn->jb_frags != blkrec->jb_frags || 2124207141Sjeff blkrn->jb_oldfrags != blkrec->jb_oldfrags) 2125207141Sjeff continue; 2126207141Sjeff if (debug) 2127207141Sjeff printf("Removed dup.\n"); 2128207141Sjeff /* Discard the free which is a dup with an alloc. */ 2129207141Sjeff if (blkrec->jb_op == JOP_FREEBLK) 2130207141Sjeff return; 2131207141Sjeff TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); 2132207141Sjeff free(srec); 2133207141Sjeff break; 2134207141Sjeff } 2135207141Sjeff srec = errmalloc(sizeof(*srec)); 2136207141Sjeff srec->sr_rec = (union jrec *)blkrec; 2137207141Sjeff TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); 2138207141Sjeff} 2139207141Sjeff 2140207141Sjeffstatic void 2141207141Sjeffino_build_trunc(struct jtrncrec *rec) 2142207141Sjeff{ 2143207141Sjeff struct suj_ino *sino; 2144207141Sjeff 2145207141Sjeff if (debug) 2146207141Sjeff printf("ino_build_trunc: ino %d, size %jd\n", 2147207141Sjeff rec->jt_ino, rec->jt_size); 2148207141Sjeff sino = ino_lookup(rec->jt_ino, 1); 2149207141Sjeff sino->si_trunc = rec; 2150207141Sjeff} 2151207141Sjeff 2152207141Sjeff/* 2153207141Sjeff * Build up tables of the operations we need to recover. 2154207141Sjeff */ 2155207141Sjeffstatic void 2156207141Sjeffsuj_build(void) 2157207141Sjeff{ 2158207141Sjeff struct suj_seg *seg; 2159207141Sjeff union jrec *rec; 2160207141Sjeff int off; 2161207141Sjeff int i; 2162207141Sjeff 2163207141Sjeff TAILQ_FOREACH(seg, &allsegs, ss_next) { 2164207141Sjeff if (debug) 2165207141Sjeff printf("seg %jd has %d records, oldseq %jd.\n", 2166207141Sjeff seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, 2167207141Sjeff seg->ss_rec.jsr_oldest); 2168207141Sjeff off = 0; 2169207141Sjeff rec = (union jrec *)seg->ss_blk; 2170207141Sjeff for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { 2171207141Sjeff /* skip the segrec. */ 2172207141Sjeff if ((off % DEV_BSIZE) == 0) 2173207141Sjeff continue; 2174207141Sjeff switch (rec->rec_jrefrec.jr_op) { 2175207141Sjeff case JOP_ADDREF: 2176207141Sjeff case JOP_REMREF: 2177207141Sjeff case JOP_MVREF: 2178207141Sjeff ino_append(rec); 2179207141Sjeff break; 2180207141Sjeff case JOP_NEWBLK: 2181207141Sjeff case JOP_FREEBLK: 2182207141Sjeff blk_build((struct jblkrec *)rec); 2183207141Sjeff break; 2184207141Sjeff case JOP_TRUNC: 2185207141Sjeff ino_build_trunc((struct jtrncrec *)rec); 2186207141Sjeff break; 2187207141Sjeff default: 2188207141Sjeff errx(1, "Unknown journal operation %d (%d)", 2189207141Sjeff rec->rec_jrefrec.jr_op, off); 2190207141Sjeff } 2191207141Sjeff i++; 2192207141Sjeff } 2193207141Sjeff } 2194207141Sjeff} 2195207141Sjeff 2196207141Sjeff/* 2197207141Sjeff * Prune the journal segments to those we care about based on the 2198207141Sjeff * oldest sequence in the newest segment. Order the segment list 2199207141Sjeff * based on sequence number. 2200207141Sjeff */ 2201207141Sjeffstatic void 2202207141Sjeffsuj_prune(void) 2203207141Sjeff{ 2204207141Sjeff struct suj_seg *seg; 2205207141Sjeff struct suj_seg *segn; 2206207141Sjeff uint64_t newseq; 2207207141Sjeff int discard; 2208207141Sjeff 2209207141Sjeff if (debug) 2210207141Sjeff printf("Pruning up to %jd\n", oldseq); 2211207141Sjeff /* First free the expired segments. */ 2212207141Sjeff TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2213207141Sjeff if (seg->ss_rec.jsr_seq >= oldseq) 2214207141Sjeff continue; 2215207141Sjeff TAILQ_REMOVE(&allsegs, seg, ss_next); 2216207141Sjeff free(seg->ss_blk); 2217207141Sjeff free(seg); 2218207141Sjeff } 2219207141Sjeff /* Next ensure that segments are ordered properly. */ 2220207141Sjeff seg = TAILQ_FIRST(&allsegs); 2221207141Sjeff if (seg == NULL) { 2222207141Sjeff if (debug) 2223207141Sjeff printf("Empty journal\n"); 2224207141Sjeff return; 2225207141Sjeff } 2226207141Sjeff newseq = seg->ss_rec.jsr_seq; 2227207141Sjeff for (;;) { 2228207141Sjeff seg = TAILQ_LAST(&allsegs, seghd); 2229207141Sjeff if (seg->ss_rec.jsr_seq >= newseq) 2230207141Sjeff break; 2231207141Sjeff TAILQ_REMOVE(&allsegs, seg, ss_next); 2232207141Sjeff TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); 2233207141Sjeff newseq = seg->ss_rec.jsr_seq; 2234207141Sjeff 2235207141Sjeff } 2236207141Sjeff if (newseq != oldseq) 2237207141Sjeff errx(1, "Journal file sequence mismatch %jd != %jd", 2238207141Sjeff newseq, oldseq); 2239207141Sjeff /* 2240207141Sjeff * The kernel may asynchronously write segments which can create 2241207141Sjeff * gaps in the sequence space. Throw away any segments after the 2242207141Sjeff * gap as the kernel guarantees only those that are contiguously 2243207141Sjeff * reachable are marked as completed. 2244207141Sjeff */ 2245207141Sjeff discard = 0; 2246207141Sjeff TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2247207141Sjeff if (!discard && newseq++ == seg->ss_rec.jsr_seq) { 2248207141Sjeff jrecs += seg->ss_rec.jsr_cnt; 2249207141Sjeff jbytes += seg->ss_rec.jsr_blocks * DEV_BSIZE; 2250207141Sjeff continue; 2251207141Sjeff } 2252207141Sjeff discard = 1; 2253207141Sjeff if (debug) 2254207141Sjeff printf("Journal order mismatch %jd != %jd pruning\n", 2255207141Sjeff newseq-1, seg->ss_rec.jsr_seq); 2256207141Sjeff TAILQ_REMOVE(&allsegs, seg, ss_next); 2257207141Sjeff free(seg->ss_blk); 2258207141Sjeff free(seg); 2259207141Sjeff } 2260207141Sjeff if (debug) 2261207141Sjeff printf("Processing journal segments from %jd to %jd\n", 2262207141Sjeff oldseq, newseq-1); 2263207141Sjeff} 2264207141Sjeff 2265207141Sjeff/* 2266207141Sjeff * Verify the journal inode before attempting to read records. 2267207141Sjeff */ 2268207141Sjeffstatic int 2269207141Sjeffsuj_verifyino(union dinode *ip) 2270207141Sjeff{ 2271207141Sjeff 2272207141Sjeff if (DIP(ip, di_nlink) != 1) { 2273207141Sjeff printf("Invalid link count %d for journal inode %d\n", 2274207141Sjeff DIP(ip, di_nlink), sujino); 2275207141Sjeff return (-1); 2276207141Sjeff } 2277207141Sjeff 2278207141Sjeff if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != 2279207141Sjeff (SF_IMMUTABLE | SF_NOUNLINK)) { 2280207141Sjeff printf("Invalid flags 0x%X for journal inode %d\n", 2281207141Sjeff DIP(ip, di_flags), sujino); 2282207141Sjeff return (-1); 2283207141Sjeff } 2284207141Sjeff 2285207141Sjeff if (DIP(ip, di_mode) != (IFREG | IREAD)) { 2286207141Sjeff printf("Invalid mode %o for journal inode %d\n", 2287207141Sjeff DIP(ip, di_mode), sujino); 2288207141Sjeff return (-1); 2289207141Sjeff } 2290207141Sjeff 2291207141Sjeff if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) { 2292207141Sjeff printf("Invalid size %jd for journal inode %d\n", 2293207141Sjeff DIP(ip, di_size), sujino); 2294207141Sjeff return (-1); 2295207141Sjeff } 2296207141Sjeff 2297207141Sjeff if (DIP(ip, di_modrev) != fs->fs_mtime) { 2298207141Sjeff printf("Journal timestamp does not match fs mount time\n"); 2299207141Sjeff return (-1); 2300207141Sjeff } 2301207141Sjeff 2302207141Sjeff return (0); 2303207141Sjeff} 2304207141Sjeff 2305207141Sjeffstruct jblocks { 2306207141Sjeff struct jextent *jb_extent; /* Extent array. */ 2307207141Sjeff int jb_avail; /* Available extents. */ 2308207141Sjeff int jb_used; /* Last used extent. */ 2309207141Sjeff int jb_head; /* Allocator head. */ 2310207141Sjeff int jb_off; /* Allocator extent offset. */ 2311207141Sjeff}; 2312207141Sjeffstruct jextent { 2313207141Sjeff ufs2_daddr_t je_daddr; /* Disk block address. */ 2314207141Sjeff int je_blocks; /* Disk block count. */ 2315207141Sjeff}; 2316207141Sjeff 2317207141Sjeffstruct jblocks *suj_jblocks; 2318207141Sjeff 2319207141Sjeffstatic struct jblocks * 2320207141Sjeffjblocks_create(void) 2321207141Sjeff{ 2322207141Sjeff struct jblocks *jblocks; 2323207141Sjeff int size; 2324207141Sjeff 2325207141Sjeff jblocks = errmalloc(sizeof(*jblocks)); 2326207141Sjeff jblocks->jb_avail = 10; 2327207141Sjeff jblocks->jb_used = 0; 2328207141Sjeff jblocks->jb_head = 0; 2329207141Sjeff jblocks->jb_off = 0; 2330207141Sjeff size = sizeof(struct jextent) * jblocks->jb_avail; 2331207141Sjeff jblocks->jb_extent = errmalloc(size); 2332207141Sjeff bzero(jblocks->jb_extent, size); 2333207141Sjeff 2334207141Sjeff return (jblocks); 2335207141Sjeff} 2336207141Sjeff 2337207141Sjeff/* 2338207141Sjeff * Return the next available disk block and the amount of contiguous 2339207141Sjeff * free space it contains. 2340207141Sjeff */ 2341207141Sjeffstatic ufs2_daddr_t 2342207141Sjeffjblocks_next(struct jblocks *jblocks, int bytes, int *actual) 2343207141Sjeff{ 2344207141Sjeff struct jextent *jext; 2345207141Sjeff ufs2_daddr_t daddr; 2346207141Sjeff int freecnt; 2347207141Sjeff int blocks; 2348207141Sjeff 2349207141Sjeff blocks = bytes / DEV_BSIZE; 2350207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_head]; 2351207141Sjeff freecnt = jext->je_blocks - jblocks->jb_off; 2352207141Sjeff if (freecnt == 0) { 2353207141Sjeff jblocks->jb_off = 0; 2354207141Sjeff if (++jblocks->jb_head > jblocks->jb_used) 2355207141Sjeff return (0); 2356207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_head]; 2357207141Sjeff freecnt = jext->je_blocks; 2358207141Sjeff } 2359207141Sjeff if (freecnt > blocks) 2360207141Sjeff freecnt = blocks; 2361207141Sjeff *actual = freecnt * DEV_BSIZE; 2362207141Sjeff daddr = jext->je_daddr + jblocks->jb_off; 2363207141Sjeff 2364207141Sjeff return (daddr); 2365207141Sjeff} 2366207141Sjeff 2367207141Sjeff/* 2368207141Sjeff * Advance the allocation head by a specified number of bytes, consuming 2369207141Sjeff * one journal segment. 2370207141Sjeff */ 2371207141Sjeffstatic void 2372207141Sjeffjblocks_advance(struct jblocks *jblocks, int bytes) 2373207141Sjeff{ 2374207141Sjeff 2375207141Sjeff jblocks->jb_off += bytes / DEV_BSIZE; 2376207141Sjeff} 2377207141Sjeff 2378207141Sjeffstatic void 2379207141Sjeffjblocks_destroy(struct jblocks *jblocks) 2380207141Sjeff{ 2381207141Sjeff 2382207141Sjeff free(jblocks->jb_extent); 2383207141Sjeff free(jblocks); 2384207141Sjeff} 2385207141Sjeff 2386207141Sjeffstatic void 2387207141Sjeffjblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) 2388207141Sjeff{ 2389207141Sjeff struct jextent *jext; 2390207141Sjeff int size; 2391207141Sjeff 2392207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_used]; 2393207141Sjeff /* Adding the first block. */ 2394207141Sjeff if (jext->je_daddr == 0) { 2395207141Sjeff jext->je_daddr = daddr; 2396207141Sjeff jext->je_blocks = blocks; 2397207141Sjeff return; 2398207141Sjeff } 2399207141Sjeff /* Extending the last extent. */ 2400207141Sjeff if (jext->je_daddr + jext->je_blocks == daddr) { 2401207141Sjeff jext->je_blocks += blocks; 2402207141Sjeff return; 2403207141Sjeff } 2404207141Sjeff /* Adding a new extent. */ 2405207141Sjeff if (++jblocks->jb_used == jblocks->jb_avail) { 2406207141Sjeff jblocks->jb_avail *= 2; 2407207141Sjeff size = sizeof(struct jextent) * jblocks->jb_avail; 2408207141Sjeff jext = errmalloc(size); 2409207141Sjeff bzero(jext, size); 2410207141Sjeff bcopy(jblocks->jb_extent, jext, 2411207141Sjeff sizeof(struct jextent) * jblocks->jb_used); 2412207141Sjeff free(jblocks->jb_extent); 2413207141Sjeff jblocks->jb_extent = jext; 2414207141Sjeff } 2415207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_used]; 2416207141Sjeff jext->je_daddr = daddr; 2417207141Sjeff jext->je_blocks = blocks; 2418207141Sjeff 2419207141Sjeff return; 2420207141Sjeff} 2421207141Sjeff 2422207141Sjeff/* 2423207141Sjeff * Add a file block from the journal to the extent map. We can't read 2424207141Sjeff * each file block individually because the kernel treats it as a circular 2425207141Sjeff * buffer and segments may span mutliple contiguous blocks. 2426207141Sjeff */ 2427207141Sjeffstatic void 2428207141Sjeffsuj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 2429207141Sjeff{ 2430207141Sjeff 2431207141Sjeff jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); 2432207141Sjeff} 2433207141Sjeff 2434207141Sjeffstatic void 2435207141Sjeffsuj_read(void) 2436207141Sjeff{ 2437207141Sjeff uint8_t block[1 * 1024 * 1024]; 2438207141Sjeff struct suj_seg *seg; 2439207141Sjeff struct jsegrec *recn; 2440207141Sjeff struct jsegrec *rec; 2441207141Sjeff ufs2_daddr_t blk; 2442207141Sjeff int readsize; 2443207141Sjeff int blocks; 2444207141Sjeff int recsize; 2445207141Sjeff int size; 2446207141Sjeff int i; 2447207141Sjeff 2448207141Sjeff /* 2449207141Sjeff * Read records until we exhaust the journal space. If we find 2450207141Sjeff * an invalid record we start searching for a valid segment header 2451207141Sjeff * at the next block. This is because we don't have a head/tail 2452207141Sjeff * pointer and must recover the information indirectly. At the gap 2453207141Sjeff * between the head and tail we won't necessarily have a valid 2454207141Sjeff * segment. 2455207141Sjeff */ 2456207141Sjeffrestart: 2457207141Sjeff for (;;) { 2458207141Sjeff size = sizeof(block); 2459207141Sjeff blk = jblocks_next(suj_jblocks, size, &readsize); 2460207141Sjeff if (blk == 0) 2461207141Sjeff return; 2462207141Sjeff size = readsize; 2463207141Sjeff /* 2464207141Sjeff * Read 1MB at a time and scan for records within this block. 2465207141Sjeff */ 2466207141Sjeff if (bread(disk, blk, &block, size) == -1) 2467207141Sjeff err(1, "Error reading journal block %jd", 2468207141Sjeff (intmax_t)blk); 2469207141Sjeff for (rec = (void *)block; size; size -= recsize, 2470207141Sjeff rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { 2471207141Sjeff recsize = DEV_BSIZE; 2472207141Sjeff if (rec->jsr_time != fs->fs_mtime) { 2473207141Sjeff if (debug) 2474207141Sjeff printf("Rec time %jd != fs mtime %jd\n", 2475207141Sjeff rec->jsr_time, fs->fs_mtime); 2476207141Sjeff jblocks_advance(suj_jblocks, recsize); 2477207141Sjeff continue; 2478207141Sjeff } 2479207141Sjeff if (rec->jsr_cnt == 0) { 2480207141Sjeff if (debug) 2481207141Sjeff printf("Found illegal count %d\n", 2482207141Sjeff rec->jsr_cnt); 2483207141Sjeff jblocks_advance(suj_jblocks, recsize); 2484207141Sjeff continue; 2485207141Sjeff } 2486207141Sjeff blocks = rec->jsr_blocks; 2487207141Sjeff recsize = blocks * DEV_BSIZE; 2488207141Sjeff if (recsize > size) { 2489207141Sjeff /* 2490207141Sjeff * We may just have run out of buffer, restart 2491207141Sjeff * the loop to re-read from this spot. 2492207141Sjeff */ 2493207141Sjeff if (size < fs->fs_bsize && 2494207141Sjeff size != readsize && 2495207141Sjeff recsize <= fs->fs_bsize) 2496207141Sjeff goto restart; 2497207141Sjeff if (debug) 2498207141Sjeff printf("Found invalid segsize %d > %d\n", 2499207141Sjeff recsize, size); 2500207141Sjeff recsize = DEV_BSIZE; 2501207141Sjeff jblocks_advance(suj_jblocks, recsize); 2502207141Sjeff continue; 2503207141Sjeff } 2504207141Sjeff /* 2505207141Sjeff * Verify that all blocks in the segment are present. 2506207141Sjeff */ 2507207141Sjeff for (i = 1; i < blocks; i++) { 2508207141Sjeff recn = (void *) 2509207141Sjeff ((uintptr_t)rec) + i * DEV_BSIZE; 2510207141Sjeff if (recn->jsr_seq == rec->jsr_seq && 2511207141Sjeff recn->jsr_time == rec->jsr_time) 2512207141Sjeff continue; 2513207141Sjeff if (debug) 2514207141Sjeff printf("Incomplete record %jd (%d)\n", 2515207141Sjeff rec->jsr_seq, i); 2516207141Sjeff recsize = i * DEV_BSIZE; 2517207141Sjeff jblocks_advance(suj_jblocks, recsize); 2518207141Sjeff goto restart; 2519207141Sjeff } 2520207141Sjeff seg = errmalloc(sizeof(*seg)); 2521207141Sjeff seg->ss_blk = errmalloc(recsize); 2522207141Sjeff seg->ss_rec = *rec; 2523207141Sjeff bcopy((void *)rec, seg->ss_blk, recsize); 2524207141Sjeff if (rec->jsr_oldest > oldseq) 2525207141Sjeff oldseq = rec->jsr_oldest; 2526207141Sjeff TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); 2527207141Sjeff jblocks_advance(suj_jblocks, recsize); 2528207141Sjeff } 2529207141Sjeff } 2530207141Sjeff} 2531207141Sjeff 2532207141Sjeff/* 2533207141Sjeff * Search a directory block for the SUJ_FILE. 2534207141Sjeff */ 2535207141Sjeffstatic void 2536207141Sjeffsuj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 2537207141Sjeff{ 2538207141Sjeff char block[MAXBSIZE]; 2539207141Sjeff struct direct *dp; 2540207141Sjeff int bytes; 2541207141Sjeff int off; 2542207141Sjeff 2543207141Sjeff if (sujino) 2544207141Sjeff return; 2545207141Sjeff bytes = lfragtosize(fs, frags); 2546207141Sjeff if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0) 2547207141Sjeff err(1, "Failed to read ROOTINO directory block %jd", blk); 2548207141Sjeff for (off = 0; off < bytes; off += dp->d_reclen) { 2549207141Sjeff dp = (struct direct *)&block[off]; 2550207141Sjeff if (dp->d_reclen == 0) 2551207141Sjeff break; 2552207141Sjeff if (dp->d_ino == 0) 2553207141Sjeff continue; 2554207141Sjeff if (dp->d_namlen != strlen(SUJ_FILE)) 2555207141Sjeff continue; 2556207141Sjeff if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) 2557207141Sjeff continue; 2558207141Sjeff sujino = dp->d_ino; 2559207141Sjeff return; 2560207141Sjeff } 2561207141Sjeff} 2562207141Sjeff 2563207141Sjeff/* 2564207141Sjeff * Orchestrate the verification of a filesystem via the softupdates journal. 2565207141Sjeff */ 2566207141Sjeffint 2567207141Sjeffsuj_check(const char *filesys) 2568207141Sjeff{ 2569207141Sjeff union dinode *jip; 2570207141Sjeff union dinode *ip; 2571207141Sjeff uint64_t blocks; 2572207141Sjeff 2573207141Sjeff opendisk(filesys); 2574207141Sjeff TAILQ_INIT(&allsegs); 2575207141Sjeff /* 2576207141Sjeff * Find the journal inode. 2577207141Sjeff */ 2578207141Sjeff ip = ino_read(ROOTINO); 2579207141Sjeff sujino = 0; 2580207141Sjeff ino_visit(ip, ROOTINO, suj_find, 0); 2581207141Sjeff if (sujino == 0) 2582207141Sjeff errx(1, "Journal inode removed. Use tunefs to re-create."); 2583207141Sjeff /* 2584207141Sjeff * Fetch the journal inode and verify it. 2585207141Sjeff */ 2586207141Sjeff jip = ino_read(sujino); 2587207141Sjeff printf("** SU+J Recovering %s\n", filesys); 2588207141Sjeff if (suj_verifyino(jip) != 0) 2589207141Sjeff return (-1); 2590207141Sjeff /* 2591207141Sjeff * Build a list of journal blocks in jblocks before parsing the 2592207141Sjeff * available journal blocks in with suj_read(). 2593207141Sjeff */ 2594207141Sjeff printf("** Reading %jd byte journal from inode %d.\n", 2595207141Sjeff DIP(jip, di_size), sujino); 2596207141Sjeff suj_jblocks = jblocks_create(); 2597207141Sjeff blocks = ino_visit(jip, sujino, suj_add_block, 0); 2598207141Sjeff if (blocks != numfrags(fs, DIP(jip, di_size))) 2599207141Sjeff errx(1, "Sparse journal inode %d.\n", sujino); 2600207141Sjeff suj_read(); 2601207141Sjeff jblocks_destroy(suj_jblocks); 2602207141Sjeff suj_jblocks = NULL; 2603207141Sjeff if (preen || reply("RECOVER")) { 2604207141Sjeff printf("** Building recovery table.\n"); 2605207141Sjeff suj_prune(); 2606207141Sjeff suj_build(); 2607207141Sjeff cg_apply(cg_build); 2608207141Sjeff printf("** Resolving unreferenced inode list.\n"); 2609207141Sjeff ino_unlinked(); 2610207141Sjeff printf("** Processing journal entries.\n"); 2611207141Sjeff cg_apply(cg_trunc); 2612207141Sjeff cg_apply(cg_check_blk); 2613207141Sjeff cg_apply(cg_check_ino); 2614207141Sjeff } 2615207141Sjeff if (preen == 0 && reply("WRITE CHANGES") == 0) 2616207141Sjeff return (0); 2617207141Sjeff /* 2618207141Sjeff * To remain idempotent with partial truncations the free bitmaps 2619207141Sjeff * must be written followed by indirect blocks and lastly inode 2620207141Sjeff * blocks. This preserves access to the modified pointers until 2621207141Sjeff * they are freed. 2622207141Sjeff */ 2623207141Sjeff cg_apply(cg_write); 2624207141Sjeff dblk_write(); 2625207141Sjeff cg_apply(cg_write_inos); 2626207141Sjeff /* Write back superblock. */ 2627207141Sjeff closedisk(filesys); 2628207141Sjeff printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", 2629207141Sjeff jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); 2630207141Sjeff printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", 2631207141Sjeff freeinos, freedir, freeblocks, freefrags); 2632207141Sjeff 2633207141Sjeff return (0); 2634207141Sjeff} 2635207141Sjeff/*- 2636207141Sjeff * Copyright (c) 2009 Jeffrey W. Roberson <jeff@FreeBSD.org> 2637207141Sjeff * All rights reserved. 2638207141Sjeff * 2639207141Sjeff * Redistribution and use in source and binary forms, with or without 2640207141Sjeff * modification, are permitted provided that the following conditions 2641207141Sjeff * are met: 2642207141Sjeff * 1. Redistributions of source code must retain the above copyright 2643207141Sjeff * notice, this list of conditions and the following disclaimer. 2644207141Sjeff * 2. Redistributions in binary form must reproduce the above copyright 2645207141Sjeff * notice, this list of conditions and the following disclaimer in the 2646207141Sjeff * documentation and/or other materials provided with the distribution. 2647207141Sjeff * 2648207141Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 2649207141Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2650207141Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2651207141Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 2652207141Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2653207141Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2654207141Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2655207141Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2656207141Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2657207141Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2658207141Sjeff * SUCH DAMAGE. 2659207141Sjeff */ 2660207141Sjeff 2661207141Sjeff#include <sys/cdefs.h> 2662207141Sjeff__FBSDID("$FreeBSD: head/sbin/fsck_ffs/suj.c 207141 2010-04-24 07:05:35Z jeff $"); 2663207141Sjeff 2664207141Sjeff#include <sys/param.h> 2665207141Sjeff#include <sys/disklabel.h> 2666207141Sjeff#include <sys/mount.h> 2667207141Sjeff#include <sys/stat.h> 2668207141Sjeff 2669207141Sjeff#include <ufs/ufs/ufsmount.h> 2670207141Sjeff#include <ufs/ufs/dinode.h> 2671207141Sjeff#include <ufs/ufs/dir.h> 2672207141Sjeff#include <ufs/ffs/fs.h> 2673207141Sjeff 2674207141Sjeff#include <stdio.h> 2675207141Sjeff#include <stdlib.h> 2676207141Sjeff#include <stdint.h> 2677207141Sjeff#include <libufs.h> 2678207141Sjeff#include <strings.h> 2679207141Sjeff#include <err.h> 2680207141Sjeff#include <assert.h> 2681207141Sjeff 2682207141Sjeff#include "fsck.h" 2683207141Sjeff 2684207141Sjeffstatic void ino_decr(ino_t); 2685207141Sjeff 2686207141Sjeff#define SUJ_HASHSIZE 128 2687207141Sjeff#define SUJ_HASHMASK (SUJ_HASHSIZE - 1) 2688207141Sjeff#define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) 2689207141Sjeff 2690207141Sjeffstruct suj_seg { 2691207141Sjeff TAILQ_ENTRY(suj_seg) ss_next; 2692207141Sjeff struct jsegrec ss_rec; 2693207141Sjeff uint8_t *ss_blk; 2694207141Sjeff}; 2695207141Sjeff 2696207141Sjeffstruct suj_rec { 2697207141Sjeff TAILQ_ENTRY(suj_rec) sr_next; 2698207141Sjeff union jrec *sr_rec; 2699207141Sjeff}; 2700207141SjeffTAILQ_HEAD(srechd, suj_rec); 2701207141Sjeff 2702207141Sjeffstruct suj_ino { 2703207141Sjeff LIST_ENTRY(suj_ino) si_next; 2704207141Sjeff struct srechd si_recs; 2705207141Sjeff struct srechd si_movs; 2706207141Sjeff ino_t si_ino; 2707207141Sjeff int si_nlinkadj; 2708207141Sjeff int si_skipparent; 2709207141Sjeff int si_linkadj; 2710207141Sjeff int si_hasrecs; 2711207141Sjeff int si_blkadj; 2712207141Sjeff}; 2713207141SjeffLIST_HEAD(inohd, suj_ino); 2714207141Sjeff 2715207141Sjeffstruct suj_blk { 2716207141Sjeff LIST_ENTRY(suj_blk) sb_next; 2717207141Sjeff struct srechd sb_recs; 2718207141Sjeff ufs2_daddr_t sb_blk; 2719207141Sjeff}; 2720207141SjeffLIST_HEAD(blkhd, suj_blk); 2721207141Sjeff 2722207141Sjeffstruct data_blk { 2723207141Sjeff LIST_ENTRY(data_blk) db_next; 2724207141Sjeff uint8_t *db_buf; 2725207141Sjeff ufs2_daddr_t db_blk; 2726207141Sjeff int db_size; 2727207141Sjeff}; 2728207141Sjeff 2729207141Sjeffstruct ino_blk { 2730207141Sjeff LIST_ENTRY(ino_blk) ib_next; 2731207141Sjeff uint8_t *ib_buf; 2732207141Sjeff int ib_dirty; 2733207141Sjeff ufs2_daddr_t ib_blk; 2734207141Sjeff}; 2735207141SjeffLIST_HEAD(iblkhd, ino_blk); 2736207141Sjeff 2737207141Sjeffstruct suj_cg { 2738207141Sjeff LIST_ENTRY(suj_cg) sc_next; 2739207141Sjeff struct blkhd sc_blkhash[SUJ_HASHSIZE]; 2740207141Sjeff struct inohd sc_inohash[SUJ_HASHSIZE]; 2741207141Sjeff struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; 2742207141Sjeff struct ino_blk *sc_lastiblk; 2743207141Sjeff uint8_t *sc_cgbuf; 2744207141Sjeff struct cg *sc_cgp; 2745207141Sjeff int sc_dirty; 2746207141Sjeff int sc_cgx; 2747207141Sjeff}; 2748207141Sjeff 2749207141SjeffLIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; 2750207141SjeffLIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; 2751207141Sjeff 2752207141SjeffTAILQ_HEAD(seghd, suj_seg) allsegs; 2753207141Sjeffuint64_t oldseq; 2754207141Sjeffstatic struct uufsd *disk = NULL; 2755207141Sjeffstatic struct fs *fs = NULL; 2756207141Sjeff 2757207141Sjeff/* 2758207141Sjeff * Summary statistics. 2759207141Sjeff */ 2760207141Sjeffuint64_t freefrags; 2761207141Sjeffuint64_t freeblocks; 2762207141Sjeffuint64_t freeinos; 2763207141Sjeffuint64_t freedir; 2764207141Sjeffuint64_t jbytes; 2765207141Sjeffuint64_t jrecs; 2766207141Sjeff 2767207141Sjefftypedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); 2768207141Sjeff 2769207141Sjeffstatic void * 2770207141Sjefferrmalloc(size_t n) 2771207141Sjeff{ 2772207141Sjeff void *a; 2773207141Sjeff 2774207141Sjeff a = malloc(n); 2775207141Sjeff if (a == NULL) 2776207141Sjeff errx(1, "malloc(%zu)", n); 2777207141Sjeff return (a); 2778207141Sjeff} 2779207141Sjeff 2780207141Sjeff/* 2781207141Sjeff * Open the given provider, load superblock. 2782207141Sjeff */ 2783207141Sjeffstatic void 2784207141Sjeffopendisk(const char *devnam) 2785207141Sjeff{ 2786207141Sjeff if (disk != NULL) 2787207141Sjeff return; 2788207141Sjeff disk = malloc(sizeof(*disk)); 2789207141Sjeff if (disk == NULL) 2790207141Sjeff errx(1, "malloc(%zu)", sizeof(*disk)); 2791207141Sjeff if (ufs_disk_fillout(disk, devnam) == -1) { 2792207141Sjeff err(1, "ufs_disk_fillout(%s) failed: %s", devnam, 2793207141Sjeff disk->d_error); 2794207141Sjeff } 2795207141Sjeff fs = &disk->d_fs; 2796207141Sjeff /* 2797207141Sjeff * Setup a few things so reply() can work. 2798207141Sjeff */ 2799207141Sjeff bcopy(fs, &sblock, sizeof(sblock)); 2800207141Sjeff fsreadfd = disk->d_fd; 2801207141Sjeff fswritefd = disk->d_fd; 2802207141Sjeff} 2803207141Sjeff 2804207141Sjeff/* 2805207141Sjeff * Mark file system as clean, write the super-block back, close the disk. 2806207141Sjeff */ 2807207141Sjeffstatic void 2808207141Sjeffclosedisk(const char *devnam) 2809207141Sjeff{ 2810207141Sjeff struct csum *cgsum; 2811207141Sjeff int i; 2812207141Sjeff 2813207141Sjeff /* 2814207141Sjeff * Recompute the fs summary info from correct cs summaries. 2815207141Sjeff */ 2816207141Sjeff bzero(&fs->fs_cstotal, sizeof(struct csum_total)); 2817207141Sjeff for (i = 0; i < fs->fs_ncg; i++) { 2818207141Sjeff cgsum = &fs->fs_cs(fs, i); 2819207141Sjeff fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; 2820207141Sjeff fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; 2821207141Sjeff fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; 2822207141Sjeff fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; 2823207141Sjeff } 2824207141Sjeff /* XXX Don't set clean for now, we don't trust the journal. */ 2825207141Sjeff /* fs->fs_clean = 1; */ 2826207141Sjeff fs->fs_time = time(NULL); 2827207141Sjeff fs->fs_mtime = time(NULL); 2828207141Sjeff if (sbwrite(disk, 0) == -1) 2829207141Sjeff err(1, "sbwrite(%s)", devnam); 2830207141Sjeff if (ufs_disk_close(disk) == -1) 2831207141Sjeff err(1, "ufs_disk_close(%s)", devnam); 2832207141Sjeff free(disk); 2833207141Sjeff disk = NULL; 2834207141Sjeff fs = NULL; 2835207141Sjeff fsreadfd = -1; 2836207141Sjeff fswritefd = -1; 2837207141Sjeff} 2838207141Sjeff 2839207141Sjeff/* 2840207141Sjeff * Lookup a cg by number in the hash so we can keep track of which cgs 2841207141Sjeff * need stats rebuilt. 2842207141Sjeff */ 2843207141Sjeffstatic struct suj_cg * 2844207141Sjeffcg_lookup(int cgx) 2845207141Sjeff{ 2846207141Sjeff struct cghd *hd; 2847207141Sjeff struct suj_cg *sc; 2848207141Sjeff 2849207141Sjeff if (cgx < 0 || cgx >= fs->fs_ncg) { 2850207141Sjeff abort(); 2851207141Sjeff errx(1, "Bad cg number %d", cgx); 2852207141Sjeff } 2853207141Sjeff hd = &cghash[SUJ_HASH(cgx)]; 2854207141Sjeff LIST_FOREACH(sc, hd, sc_next) 2855207141Sjeff if (sc->sc_cgx == cgx) 2856207141Sjeff return (sc); 2857207141Sjeff sc = errmalloc(sizeof(*sc)); 2858207141Sjeff bzero(sc, sizeof(*sc)); 2859207141Sjeff sc->sc_cgbuf = errmalloc(fs->fs_bsize); 2860207141Sjeff sc->sc_cgp = (struct cg *)sc->sc_cgbuf; 2861207141Sjeff sc->sc_cgx = cgx; 2862207141Sjeff LIST_INSERT_HEAD(hd, sc, sc_next); 2863207141Sjeff if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 2864207141Sjeff fs->fs_bsize) == -1) 2865207141Sjeff err(1, "Unable to read cylinder group %d", sc->sc_cgx); 2866207141Sjeff 2867207141Sjeff return (sc); 2868207141Sjeff} 2869207141Sjeff 2870207141Sjeff/* 2871207141Sjeff * Lookup an inode number in the hash and allocate a suj_ino if it does 2872207141Sjeff * not exist. 2873207141Sjeff */ 2874207141Sjeffstatic struct suj_ino * 2875207141Sjeffino_lookup(ino_t ino, int creat) 2876207141Sjeff{ 2877207141Sjeff struct suj_ino *sino; 2878207141Sjeff struct inohd *hd; 2879207141Sjeff struct suj_cg *sc; 2880207141Sjeff 2881207141Sjeff sc = cg_lookup(ino_to_cg(fs, ino)); 2882207141Sjeff hd = &sc->sc_inohash[SUJ_HASH(ino)]; 2883207141Sjeff LIST_FOREACH(sino, hd, si_next) 2884207141Sjeff if (sino->si_ino == ino) 2885207141Sjeff return (sino); 2886207141Sjeff if (creat == 0) 2887207141Sjeff return (NULL); 2888207141Sjeff sino = errmalloc(sizeof(*sino)); 2889207141Sjeff bzero(sino, sizeof(*sino)); 2890207141Sjeff sino->si_ino = ino; 2891207141Sjeff sino->si_nlinkadj = 0; 2892207141Sjeff TAILQ_INIT(&sino->si_recs); 2893207141Sjeff TAILQ_INIT(&sino->si_movs); 2894207141Sjeff LIST_INSERT_HEAD(hd, sino, si_next); 2895207141Sjeff 2896207141Sjeff return (sino); 2897207141Sjeff} 2898207141Sjeff 2899207141Sjeff/* 2900207141Sjeff * Lookup a block number in the hash and allocate a suj_blk if it does 2901207141Sjeff * not exist. 2902207141Sjeff */ 2903207141Sjeffstatic struct suj_blk * 2904207141Sjeffblk_lookup(ufs2_daddr_t blk, int creat) 2905207141Sjeff{ 2906207141Sjeff struct suj_blk *sblk; 2907207141Sjeff struct suj_cg *sc; 2908207141Sjeff struct blkhd *hd; 2909207141Sjeff 2910207141Sjeff sc = cg_lookup(dtog(fs, blk)); 2911207141Sjeff hd = &sc->sc_blkhash[SUJ_HASH(blk)]; 2912207141Sjeff LIST_FOREACH(sblk, hd, sb_next) 2913207141Sjeff if (sblk->sb_blk == blk) 2914207141Sjeff return (sblk); 2915207141Sjeff if (creat == 0) 2916207141Sjeff return (NULL); 2917207141Sjeff sblk = errmalloc(sizeof(*sblk)); 2918207141Sjeff bzero(sblk, sizeof(*sblk)); 2919207141Sjeff sblk->sb_blk = blk; 2920207141Sjeff TAILQ_INIT(&sblk->sb_recs); 2921207141Sjeff LIST_INSERT_HEAD(hd, sblk, sb_next); 2922207141Sjeff 2923207141Sjeff return (sblk); 2924207141Sjeff} 2925207141Sjeff 2926207141Sjeffstatic uint8_t * 2927207141Sjeffdblk_read(ufs2_daddr_t blk, int size) 2928207141Sjeff{ 2929207141Sjeff struct data_blk *dblk; 2930207141Sjeff struct dblkhd *hd; 2931207141Sjeff 2932207141Sjeff hd = &dbhash[SUJ_HASH(blk)]; 2933207141Sjeff LIST_FOREACH(dblk, hd, db_next) 2934207141Sjeff if (dblk->db_blk == blk) 2935207141Sjeff goto found; 2936207141Sjeff /* 2937207141Sjeff * The inode block wasn't located, allocate a new one. 2938207141Sjeff */ 2939207141Sjeff dblk = errmalloc(sizeof(*dblk)); 2940207141Sjeff bzero(dblk, sizeof(*dblk)); 2941207141Sjeff LIST_INSERT_HEAD(hd, dblk, db_next); 2942207141Sjeff dblk->db_blk = blk; 2943207141Sjefffound: 2944207141Sjeff /* 2945207141Sjeff * I doubt size mismatches can happen in practice but it is trivial 2946207141Sjeff * to handle. 2947207141Sjeff */ 2948207141Sjeff if (size != dblk->db_size) { 2949207141Sjeff if (dblk->db_buf) 2950207141Sjeff free(dblk->db_buf); 2951207141Sjeff dblk->db_buf = errmalloc(size); 2952207141Sjeff dblk->db_size = size; 2953207141Sjeff if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) 2954207141Sjeff err(1, "Failed to read data block %jd", blk); 2955207141Sjeff } 2956207141Sjeff return (dblk->db_buf); 2957207141Sjeff} 2958207141Sjeff 2959207141Sjeffstatic union dinode * 2960207141Sjeffino_read(ino_t ino) 2961207141Sjeff{ 2962207141Sjeff struct ino_blk *iblk; 2963207141Sjeff struct iblkhd *hd; 2964207141Sjeff struct suj_cg *sc; 2965207141Sjeff ufs2_daddr_t blk; 2966207141Sjeff int off; 2967207141Sjeff 2968207141Sjeff blk = ino_to_fsba(fs, ino); 2969207141Sjeff sc = cg_lookup(ino_to_cg(fs, ino)); 2970207141Sjeff hd = &sc->sc_iblkhash[SUJ_HASH(blk)]; 2971207141Sjeff LIST_FOREACH(iblk, hd, ib_next) 2972207141Sjeff if (iblk->ib_blk == blk) 2973207141Sjeff goto found; 2974207141Sjeff /* 2975207141Sjeff * The inode block wasn't located, allocate a new one. 2976207141Sjeff */ 2977207141Sjeff iblk = errmalloc(sizeof(*iblk)); 2978207141Sjeff bzero(iblk, sizeof(*iblk)); 2979207141Sjeff iblk->ib_buf = errmalloc(fs->fs_bsize); 2980207141Sjeff iblk->ib_blk = blk; 2981207141Sjeff LIST_INSERT_HEAD(hd, iblk, ib_next); 2982207141Sjeff if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) 2983207141Sjeff err(1, "Failed to read inode block %jd", blk); 2984207141Sjefffound: 2985207141Sjeff sc->sc_lastiblk = iblk; 2986207141Sjeff off = ino_to_fsbo(fs, ino); 2987207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 2988207141Sjeff return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; 2989207141Sjeff else 2990207141Sjeff return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; 2991207141Sjeff} 2992207141Sjeff 2993207141Sjeffstatic void 2994207141Sjeffino_dirty(ino_t ino) 2995207141Sjeff{ 2996207141Sjeff struct ino_blk *iblk; 2997207141Sjeff struct iblkhd *hd; 2998207141Sjeff struct suj_cg *sc; 2999207141Sjeff ufs2_daddr_t blk; 3000207141Sjeff 3001207141Sjeff blk = ino_to_fsba(fs, ino); 3002207141Sjeff sc = cg_lookup(ino_to_cg(fs, ino)); 3003207141Sjeff iblk = sc->sc_lastiblk; 3004207141Sjeff if (iblk && iblk->ib_blk == blk) { 3005207141Sjeff iblk->ib_dirty = 1; 3006207141Sjeff return; 3007207141Sjeff } 3008207141Sjeff hd = &sc->sc_iblkhash[SUJ_HASH(blk)]; 3009207141Sjeff LIST_FOREACH(iblk, hd, ib_next) { 3010207141Sjeff if (iblk->ib_blk == blk) { 3011207141Sjeff iblk->ib_dirty = 1; 3012207141Sjeff return; 3013207141Sjeff } 3014207141Sjeff } 3015207141Sjeff ino_read(ino); 3016207141Sjeff ino_dirty(ino); 3017207141Sjeff} 3018207141Sjeff 3019207141Sjeffstatic void 3020207141Sjeffiblk_write(struct ino_blk *iblk) 3021207141Sjeff{ 3022207141Sjeff 3023207141Sjeff if (iblk->ib_dirty == 0) 3024207141Sjeff return; 3025207141Sjeff if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, 3026207141Sjeff fs->fs_bsize) == -1) 3027207141Sjeff err(1, "Failed to write inode block %jd", iblk->ib_blk); 3028207141Sjeff} 3029207141Sjeff 3030207141Sjeff/* 3031207141Sjeff * Return 1 if the inode was free and 0 if it is allocated. 3032207141Sjeff */ 3033207141Sjeffstatic int 3034207141Sjeffino_isfree(ino_t ino) 3035207141Sjeff{ 3036207141Sjeff struct suj_cg *sc; 3037207141Sjeff uint8_t *inosused; 3038207141Sjeff struct cg *cgp; 3039207141Sjeff int cg; 3040207141Sjeff 3041207141Sjeff cg = ino_to_cg(fs, ino); 3042207141Sjeff ino = ino % fs->fs_ipg; 3043207141Sjeff sc = cg_lookup(cg); 3044207141Sjeff cgp = sc->sc_cgp; 3045207141Sjeff inosused = cg_inosused(cgp); 3046207141Sjeff return isclr(inosused, ino); 3047207141Sjeff} 3048207141Sjeff 3049207141Sjeffstatic int 3050207141Sjeffblk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) 3051207141Sjeff{ 3052207141Sjeff ufs2_daddr_t bstart; 3053207141Sjeff ufs2_daddr_t bend; 3054207141Sjeff ufs2_daddr_t end; 3055207141Sjeff 3056207141Sjeff end = start + frags; 3057207141Sjeff bstart = brec->jb_blkno + brec->jb_oldfrags; 3058207141Sjeff bend = bstart + brec->jb_frags; 3059207141Sjeff if (start < bend && end > bstart) 3060207141Sjeff return (1); 3061207141Sjeff return (0); 3062207141Sjeff} 3063207141Sjeff 3064207141Sjeffstatic int 3065207141Sjeffblk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, 3066207141Sjeff int frags) 3067207141Sjeff{ 3068207141Sjeff 3069207141Sjeff if (brec->jb_ino != ino || brec->jb_lbn != lbn) 3070207141Sjeff return (0); 3071207141Sjeff if (brec->jb_blkno + brec->jb_oldfrags != start) 3072207141Sjeff return (0); 3073207141Sjeff if (brec->jb_frags != frags) 3074207141Sjeff return (0); 3075207141Sjeff return (1); 3076207141Sjeff} 3077207141Sjeff 3078207141Sjeffstatic void 3079207141Sjeffblk_setmask(struct jblkrec *brec, int *mask) 3080207141Sjeff{ 3081207141Sjeff int i; 3082207141Sjeff 3083207141Sjeff for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) 3084207141Sjeff *mask |= 1 << i; 3085207141Sjeff} 3086207141Sjeff 3087207141Sjeff/* 3088207141Sjeff * Determine whether a given block has been reallocated to a new location. 3089207141Sjeff * Returns a mask of overlapping bits if any frags have been reused or 3090207141Sjeff * zero if the block has not been re-used and the contents can be trusted. 3091207141Sjeff * 3092207141Sjeff * This is used to ensure that an orphaned pointer due to truncate is safe 3093207141Sjeff * to be freed. The mask value can be used to free partial blocks. 3094207141Sjeff */ 3095207141Sjeffstatic int 3096207141Sjeffblk_isfree(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) 3097207141Sjeff{ 3098207141Sjeff struct suj_blk *sblk; 3099207141Sjeff struct suj_rec *srec; 3100207141Sjeff struct jblkrec *brec; 3101207141Sjeff int mask; 3102207141Sjeff int off; 3103207141Sjeff 3104207141Sjeff /* 3105207141Sjeff * To be certain we're not freeing a reallocated block we lookup 3106207141Sjeff * this block in the blk hash and see if there is an allocation 3107207141Sjeff * journal record that overlaps with any fragments in the block 3108207141Sjeff * we're concerned with. If any fragments have ben reallocated 3109207141Sjeff * the block has already been freed and re-used for another purpose. 3110207141Sjeff */ 3111207141Sjeff mask = 0; 3112207141Sjeff sblk = blk_lookup(blknum(fs, blk), 0); 3113207141Sjeff if (sblk == NULL) 3114207141Sjeff return (0); 3115207141Sjeff off = blk - sblk->sb_blk; 3116207141Sjeff TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 3117207141Sjeff brec = (struct jblkrec *)srec->sr_rec; 3118207141Sjeff /* 3119207141Sjeff * If the block overlaps but does not match 3120207141Sjeff * exactly it's a new allocation. If it matches 3121207141Sjeff * exactly this record refers to the current 3122207141Sjeff * location. 3123207141Sjeff */ 3124207141Sjeff if (blk_overlaps(brec, blk, frags) == 0) 3125207141Sjeff continue; 3126207141Sjeff if (blk_equals(brec, ino, lbn, blk, frags) == 1) 3127207141Sjeff mask = 0; 3128207141Sjeff else 3129207141Sjeff blk_setmask(brec, &mask); 3130207141Sjeff } 3131207141Sjeff if (debug) 3132207141Sjeff printf("blk_isfree: blk %jd sblk %jd off %d mask 0x%X\n", 3133207141Sjeff blk, sblk->sb_blk, off, mask); 3134207141Sjeff return (mask >> off); 3135207141Sjeff} 3136207141Sjeff 3137207141Sjeff/* 3138207141Sjeff * Determine whether it is safe to follow an indirect. It is not safe 3139207141Sjeff * if any part of the indirect has been reallocated or the last journal 3140207141Sjeff * entry was an allocation. Just allocated indirects may not have valid 3141207141Sjeff * pointers yet and all of their children will have their own records. 3142207141Sjeff * 3143207141Sjeff * Returns 1 if it's safe to follow the indirect and 0 otherwise. 3144207141Sjeff */ 3145207141Sjeffstatic int 3146207141Sjeffblk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) 3147207141Sjeff{ 3148207141Sjeff struct suj_blk *sblk; 3149207141Sjeff struct jblkrec *brec; 3150207141Sjeff 3151207141Sjeff sblk = blk_lookup(blk, 0); 3152207141Sjeff if (sblk == NULL) 3153207141Sjeff return (1); 3154207141Sjeff if (TAILQ_EMPTY(&sblk->sb_recs)) 3155207141Sjeff return (1); 3156207141Sjeff brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; 3157207141Sjeff if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) 3158207141Sjeff if (brec->jb_op == JOP_FREEBLK) 3159207141Sjeff return (1); 3160207141Sjeff return (0); 3161207141Sjeff} 3162207141Sjeff 3163207141Sjeff/* 3164207141Sjeff * Clear an inode from the cg bitmap. If the inode was already clear return 3165207141Sjeff * 0 so the caller knows it does not have to check the inode contents. 3166207141Sjeff */ 3167207141Sjeffstatic int 3168207141Sjeffino_free(ino_t ino, int mode) 3169207141Sjeff{ 3170207141Sjeff struct suj_cg *sc; 3171207141Sjeff uint8_t *inosused; 3172207141Sjeff struct cg *cgp; 3173207141Sjeff int cg; 3174207141Sjeff 3175207141Sjeff cg = ino_to_cg(fs, ino); 3176207141Sjeff ino = ino % fs->fs_ipg; 3177207141Sjeff sc = cg_lookup(cg); 3178207141Sjeff cgp = sc->sc_cgp; 3179207141Sjeff inosused = cg_inosused(cgp); 3180207141Sjeff /* 3181207141Sjeff * The bitmap may never have made it to the disk so we have to 3182207141Sjeff * conditionally clear. We can avoid writing the cg in this case. 3183207141Sjeff */ 3184207141Sjeff if (isclr(inosused, ino)) 3185207141Sjeff return (0); 3186207141Sjeff freeinos++; 3187207141Sjeff clrbit(inosused, ino); 3188207141Sjeff if (ino < cgp->cg_irotor) 3189207141Sjeff cgp->cg_irotor = ino; 3190207141Sjeff cgp->cg_cs.cs_nifree++; 3191207141Sjeff if ((mode & IFMT) == IFDIR) { 3192207141Sjeff freedir++; 3193207141Sjeff cgp->cg_cs.cs_ndir--; 3194207141Sjeff } 3195207141Sjeff sc->sc_dirty = 1; 3196207141Sjeff 3197207141Sjeff return (1); 3198207141Sjeff} 3199207141Sjeff 3200207141Sjeff/* 3201207141Sjeff * Free 'frags' frags starting at filesystem block 'bno' skipping any frags 3202207141Sjeff * set in the mask. 3203207141Sjeff */ 3204207141Sjeffstatic void 3205207141Sjeffblk_free(ufs2_daddr_t bno, int mask, int frags) 3206207141Sjeff{ 3207207141Sjeff ufs1_daddr_t fragno, cgbno; 3208207141Sjeff struct suj_cg *sc; 3209207141Sjeff struct cg *cgp; 3210207141Sjeff int i, cg; 3211207141Sjeff uint8_t *blksfree; 3212207141Sjeff 3213207141Sjeff if (debug) 3214207141Sjeff printf("Freeing %d frags at blk %jd\n", frags, bno); 3215207141Sjeff cg = dtog(fs, bno); 3216207141Sjeff sc = cg_lookup(cg); 3217207141Sjeff cgp = sc->sc_cgp; 3218207141Sjeff cgbno = dtogd(fs, bno); 3219207141Sjeff blksfree = cg_blksfree(cgp); 3220207141Sjeff 3221207141Sjeff /* 3222207141Sjeff * If it's not allocated we only wrote the journal entry 3223207141Sjeff * and never the bitmaps. Here we unconditionally clear and 3224207141Sjeff * resolve the cg summary later. 3225207141Sjeff */ 3226207141Sjeff if (frags == fs->fs_frag && mask == 0) { 3227207141Sjeff fragno = fragstoblks(fs, cgbno); 3228207141Sjeff ffs_setblock(fs, blksfree, fragno); 3229207141Sjeff freeblocks++; 3230207141Sjeff } else { 3231207141Sjeff /* 3232207141Sjeff * deallocate the fragment 3233207141Sjeff */ 3234207141Sjeff for (i = 0; i < frags; i++) 3235207141Sjeff if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { 3236207141Sjeff freefrags++; 3237207141Sjeff setbit(blksfree, cgbno + i); 3238207141Sjeff } 3239207141Sjeff } 3240207141Sjeff sc->sc_dirty = 1; 3241207141Sjeff} 3242207141Sjeff 3243207141Sjeff/* 3244207141Sjeff * Fetch an indirect block to find the block at a given lbn. The lbn 3245207141Sjeff * may be negative to fetch a specific indirect block pointer or positive 3246207141Sjeff * to fetch a specific block. 3247207141Sjeff */ 3248207141Sjeffstatic ufs2_daddr_t 3249207141Sjeffindir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, int level) 3250207141Sjeff{ 3251207141Sjeff ufs2_daddr_t *bap2; 3252207141Sjeff ufs2_daddr_t *bap1; 3253207141Sjeff ufs_lbn_t lbnadd; 3254207141Sjeff ufs_lbn_t base; 3255207141Sjeff int i; 3256207141Sjeff 3257207141Sjeff if (blk == 0) 3258207141Sjeff return (0); 3259207141Sjeff if (cur == lbn) 3260207141Sjeff return (blk); 3261207141Sjeff if (level == 0 && lbn < 0) { 3262207141Sjeff abort(); 3263207141Sjeff errx(1, "Invalid lbn %jd", lbn); 3264207141Sjeff } 3265207141Sjeff bap2 = (void *)dblk_read(blk, fs->fs_bsize); 3266207141Sjeff bap1 = (void *)bap2; 3267207141Sjeff lbnadd = 1; 3268207141Sjeff base = -(cur + level); 3269207141Sjeff for (i = level; i > 0; i--) 3270207141Sjeff lbnadd *= NINDIR(fs); 3271207141Sjeff if (lbn > 0) 3272207141Sjeff i = (lbn - base) / lbnadd; 3273207141Sjeff else 3274207141Sjeff i = (-lbn - base) / lbnadd; 3275207141Sjeff if (i < 0 || i >= NINDIR(fs)) { 3276207141Sjeff abort(); 3277207141Sjeff errx(1, "Invalid indirect index %d produced by lbn %jd", 3278207141Sjeff i, lbn); 3279207141Sjeff } 3280207141Sjeff if (level == 0) 3281207141Sjeff cur = base + (i * lbnadd); 3282207141Sjeff else 3283207141Sjeff cur = -(base + (i * lbnadd)) - (level - 1); 3284207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 3285207141Sjeff blk = bap1[i]; 3286207141Sjeff else 3287207141Sjeff blk = bap2[i]; 3288207141Sjeff if (cur == lbn) 3289207141Sjeff return (blk); 3290207141Sjeff if (level == 0) { 3291207141Sjeff abort(); 3292207141Sjeff errx(1, "Invalid lbn %jd at level 0", lbn); 3293207141Sjeff } 3294207141Sjeff return indir_blkatoff(blk, ino, cur, lbn, level - 1); 3295207141Sjeff} 3296207141Sjeff 3297207141Sjeff/* 3298207141Sjeff * Finds the disk block address at the specified lbn within the inode 3299207141Sjeff * specified by ip. This follows the whole tree and honors di_size and 3300207141Sjeff * di_extsize so it is a true test of reachability. The lbn may be 3301207141Sjeff * negative if an extattr or indirect block is requested. 3302207141Sjeff */ 3303207141Sjeffstatic ufs2_daddr_t 3304207141Sjeffino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) 3305207141Sjeff{ 3306207141Sjeff ufs_lbn_t tmpval; 3307207141Sjeff ufs_lbn_t cur; 3308207141Sjeff ufs_lbn_t next; 3309207141Sjeff int i; 3310207141Sjeff 3311207141Sjeff /* 3312207141Sjeff * Handle extattr blocks first. 3313207141Sjeff */ 3314207141Sjeff if (lbn < 0 && lbn >= -NXADDR) { 3315207141Sjeff lbn = -1 - lbn; 3316207141Sjeff if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) 3317207141Sjeff return (0); 3318207141Sjeff *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); 3319207141Sjeff return (ip->dp2.di_extb[lbn]); 3320207141Sjeff } 3321207141Sjeff /* 3322207141Sjeff * And now direct and indirect. Verify that the lbn does not 3323207141Sjeff * exceed the size required to store the file by asking for 3324207141Sjeff * the lbn of the last byte. These blocks should be 0 anyway 3325207141Sjeff * so this simply saves the traversal. 3326207141Sjeff */ 3327207141Sjeff if (lbn > 0 && lbn > lblkno(fs, DIP(ip, di_size) - 1)) 3328207141Sjeff return (0); 3329207141Sjeff if (lbn < 0 && -lbn > lblkno(fs, DIP(ip, di_size) - 1)) 3330207141Sjeff return (0); 3331207141Sjeff if (lbn >= 0 && lbn < NDADDR) { 3332207141Sjeff *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); 3333207141Sjeff return (DIP(ip, di_db[lbn])); 3334207141Sjeff } 3335207141Sjeff *frags = fs->fs_frag; 3336207141Sjeff 3337207141Sjeff for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++, 3338207141Sjeff tmpval *= NINDIR(fs), cur = next) { 3339207141Sjeff next = cur + tmpval; 3340207141Sjeff if (lbn == -cur) 3341207141Sjeff return (DIP(ip, di_ib[i])); 3342207141Sjeff /* 3343207141Sjeff * Determine whether the lbn in question is within this tree. 3344207141Sjeff */ 3345207141Sjeff if (lbn < 0 && -lbn >= next) 3346207141Sjeff continue; 3347207141Sjeff if (lbn > 0 && lbn >= next) 3348207141Sjeff continue; 3349207141Sjeff 3350207141Sjeff return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn, i); 3351207141Sjeff } 3352207141Sjeff errx(1, "lbn %jd not in ino", lbn); 3353207141Sjeff} 3354207141Sjeff 3355207141Sjeff/* 3356207141Sjeff * Determine whether a block exists at a particular lbn in an inode. 3357207141Sjeff * Returns 1 if found, 0 if not. lbn may be negative for indirects 3358207141Sjeff * or ext blocks. 3359207141Sjeff */ 3360207141Sjeffstatic int 3361207141Sjeffblk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) 3362207141Sjeff{ 3363207141Sjeff union dinode *ip; 3364207141Sjeff ufs2_daddr_t nblk; 3365207141Sjeff 3366207141Sjeff ip = ino_read(ino); 3367207141Sjeff 3368207141Sjeff if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) 3369207141Sjeff return (0); 3370207141Sjeff nblk = ino_blkatoff(ip, ino, lbn, frags); 3371207141Sjeff 3372207141Sjeff return (nblk == blk); 3373207141Sjeff} 3374207141Sjeff 3375207141Sjeff/* 3376207141Sjeff * Determines whether a pointer to an inode exists within a directory 3377207141Sjeff * at a specified offset. Returns the mode of the found entry. 3378207141Sjeff */ 3379207141Sjeffstatic int 3380207141Sjeffino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) 3381207141Sjeff{ 3382207141Sjeff union dinode *dip; 3383207141Sjeff struct direct *dp; 3384207141Sjeff ufs2_daddr_t blk; 3385207141Sjeff uint8_t *block; 3386207141Sjeff ufs_lbn_t lbn; 3387207141Sjeff int blksize; 3388207141Sjeff int frags; 3389207141Sjeff int dpoff; 3390207141Sjeff int doff; 3391207141Sjeff 3392207141Sjeff *isdot = 0; 3393207141Sjeff dip = ino_read(parent); 3394207141Sjeff *mode = DIP(dip, di_mode); 3395207141Sjeff if ((*mode & IFMT) != IFDIR) { 3396207141Sjeff if (debug) { 3397207141Sjeff /* This can happen if the parent inode was reallocated. */ 3398207141Sjeff if (*mode != 0) 3399207141Sjeff printf("Directory %d has bad mode %o\n", 3400207141Sjeff parent, *mode); 3401207141Sjeff else 3402207141Sjeff printf("Directory %d zero inode\n", parent); 3403207141Sjeff } 3404207141Sjeff return (0); 3405207141Sjeff } 3406207141Sjeff lbn = lblkno(fs, diroff); 3407207141Sjeff doff = blkoff(fs, diroff); 3408207141Sjeff blksize = sblksize(fs, DIP(dip, di_size), lbn); 3409207141Sjeff if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { 3410207141Sjeff if (debug) 3411207141Sjeff printf("ino %d absent from %d due to offset %jd" 3412207141Sjeff " exceeding size %jd\n", 3413207141Sjeff child, parent, diroff, DIP(dip, di_size)); 3414207141Sjeff return (0); 3415207141Sjeff } 3416207141Sjeff blk = ino_blkatoff(dip, parent, lbn, &frags); 3417207141Sjeff if (blk <= 0) { 3418207141Sjeff if (debug) 3419207141Sjeff printf("Sparse directory %d", parent); 3420207141Sjeff return (0); 3421207141Sjeff } 3422207141Sjeff block = dblk_read(blk, blksize); 3423207141Sjeff /* 3424207141Sjeff * Walk through the records from the start of the block to be 3425207141Sjeff * certain we hit a valid record and not some junk in the middle 3426207141Sjeff * of a file name. Stop when we reach or pass the expected offset. 3427207141Sjeff */ 3428207141Sjeff dpoff = 0; 3429207141Sjeff do { 3430207141Sjeff dp = (struct direct *)&block[dpoff]; 3431207141Sjeff if (dpoff == doff) 3432207141Sjeff break; 3433207141Sjeff if (dp->d_reclen == 0) 3434207141Sjeff break; 3435207141Sjeff dpoff += dp->d_reclen; 3436207141Sjeff } while (dpoff <= doff); 3437207141Sjeff if (dpoff > fs->fs_bsize) 3438207141Sjeff errx(1, "Corrupt directory block in dir inode %d", parent); 3439207141Sjeff /* Not found. */ 3440207141Sjeff if (dpoff != doff) { 3441207141Sjeff if (debug) 3442207141Sjeff printf("ino %d not found in %d, lbn %jd, dpoff %d\n", 3443207141Sjeff child, parent, lbn, dpoff); 3444207141Sjeff return (0); 3445207141Sjeff } 3446207141Sjeff /* 3447207141Sjeff * We found the item in question. Record the mode and whether it's 3448207141Sjeff * a . or .. link for the caller. 3449207141Sjeff */ 3450207141Sjeff if (dp->d_ino == child) { 3451207141Sjeff if (child == parent) 3452207141Sjeff *isdot = 1; 3453207141Sjeff else if (dp->d_namlen == 2 && 3454207141Sjeff dp->d_name[0] == '.' && dp->d_name[1] == '.') 3455207141Sjeff *isdot = 1; 3456207141Sjeff *mode = DTTOIF(dp->d_type); 3457207141Sjeff return (1); 3458207141Sjeff } 3459207141Sjeff if (debug) 3460207141Sjeff printf("ino %d doesn't match dirent ino %d in parent %d\n", 3461207141Sjeff child, dp->d_ino, parent); 3462207141Sjeff return (0); 3463207141Sjeff} 3464207141Sjeff 3465207141Sjeff#define VISIT_INDIR 0x0001 3466207141Sjeff#define VISIT_EXT 0x0002 3467207141Sjeff 3468207141Sjeff/* 3469207141Sjeff * Read an indirect level which may or may not be linked into an inode. 3470207141Sjeff */ 3471207141Sjeffstatic void 3472207141Sjeffindir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, 3473207141Sjeff ino_visitor visitor, int flags) 3474207141Sjeff{ 3475207141Sjeff ufs2_daddr_t *bap2; 3476207141Sjeff ufs1_daddr_t *bap1; 3477207141Sjeff ufs_lbn_t lbnadd; 3478207141Sjeff ufs2_daddr_t nblk; 3479207141Sjeff ufs_lbn_t nlbn; 3480207141Sjeff int level; 3481207141Sjeff int i; 3482207141Sjeff 3483207141Sjeff /* 3484207141Sjeff * Don't visit indirect blocks with contents we can't trust. This 3485207141Sjeff * should only happen when indir_visit() is called to complete a 3486207141Sjeff * truncate that never finished and not when a pointer is found via 3487207141Sjeff * an inode. 3488207141Sjeff */ 3489207141Sjeff if (blk == 0) 3490207141Sjeff return; 3491207141Sjeff if (blk_isindir(blk, ino, lbn) == 0) { 3492207141Sjeff if (debug) 3493207141Sjeff printf("blk %jd ino %d lbn %jd is not indir.\n", 3494207141Sjeff blk, ino, lbn); 3495207141Sjeff goto out; 3496207141Sjeff } 3497207141Sjeff level = lbn_level(lbn); 3498207141Sjeff if (level == -1) { 3499207141Sjeff abort(); 3500207141Sjeff errx(1, "Invalid level for lbn %jd", lbn); 3501207141Sjeff } 3502207141Sjeff lbnadd = 1; 3503207141Sjeff for (i = level; i > 0; i--) 3504207141Sjeff lbnadd *= NINDIR(fs); 3505207141Sjeff bap1 = (void *)dblk_read(blk, fs->fs_bsize); 3506207141Sjeff bap2 = (void *)bap1; 3507207141Sjeff for (i = 0; i < NINDIR(fs); i++) { 3508207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 3509207141Sjeff nblk = *bap1++; 3510207141Sjeff else 3511207141Sjeff nblk = *bap2++; 3512207141Sjeff if (nblk == 0) 3513207141Sjeff continue; 3514207141Sjeff if (level == 0) { 3515207141Sjeff nlbn = -lbn + i * lbnadd; 3516207141Sjeff (*frags) += fs->fs_frag; 3517207141Sjeff visitor(ino, nlbn, nblk, fs->fs_frag); 3518207141Sjeff } else { 3519207141Sjeff nlbn = (lbn + 1) - (i * lbnadd); 3520207141Sjeff indir_visit(ino, nlbn, nblk, frags, visitor, flags); 3521207141Sjeff } 3522207141Sjeff } 3523207141Sjeffout: 3524207141Sjeff if (flags & VISIT_INDIR) { 3525207141Sjeff (*frags) += fs->fs_frag; 3526207141Sjeff visitor(ino, lbn, blk, fs->fs_frag); 3527207141Sjeff } 3528207141Sjeff} 3529207141Sjeff 3530207141Sjeff/* 3531207141Sjeff * Visit each block in an inode as specified by 'flags' and call a 3532207141Sjeff * callback function. The callback may inspect or free blocks. The 3533207141Sjeff * count of frags found according to the size in the file is returned. 3534207141Sjeff * This is not valid for sparse files but may be used to determine 3535207141Sjeff * the correct di_blocks for a file. 3536207141Sjeff */ 3537207141Sjeffstatic uint64_t 3538207141Sjeffino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) 3539207141Sjeff{ 3540207141Sjeff ufs_lbn_t tmpval; 3541207141Sjeff ufs_lbn_t lbn; 3542207141Sjeff uint64_t size; 3543207141Sjeff uint64_t fragcnt; 3544207141Sjeff int mode; 3545207141Sjeff int frags; 3546207141Sjeff int i; 3547207141Sjeff 3548207141Sjeff size = DIP(ip, di_size); 3549207141Sjeff mode = DIP(ip, di_mode) & IFMT; 3550207141Sjeff fragcnt = 0; 3551207141Sjeff if ((flags & VISIT_EXT) && 3552207141Sjeff fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { 3553207141Sjeff for (i = 0; i < NXADDR; i++) { 3554207141Sjeff if (ip->dp2.di_extb[i] == 0) 3555207141Sjeff continue; 3556207141Sjeff frags = sblksize(fs, ip->dp2.di_extsize, i); 3557207141Sjeff frags = numfrags(fs, frags); 3558207141Sjeff fragcnt += frags; 3559207141Sjeff visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); 3560207141Sjeff } 3561207141Sjeff } 3562207141Sjeff /* Skip datablocks for short links and devices. */ 3563207141Sjeff if (mode == IFBLK || mode == IFCHR || 3564207141Sjeff (mode == IFLNK && size < fs->fs_maxsymlinklen)) 3565207141Sjeff return (fragcnt); 3566207141Sjeff for (i = 0; i < NDADDR; i++) { 3567207141Sjeff if (DIP(ip, di_db[i]) == 0) 3568207141Sjeff continue; 3569207141Sjeff frags = sblksize(fs, size, i); 3570207141Sjeff frags = numfrags(fs, frags); 3571207141Sjeff fragcnt += frags; 3572207141Sjeff visitor(ino, i, DIP(ip, di_db[i]), frags); 3573207141Sjeff } 3574207141Sjeff for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, 3575207141Sjeff tmpval *= NINDIR(fs), lbn += tmpval) { 3576207141Sjeff if (DIP(ip, di_ib[i]) == 0) 3577207141Sjeff continue; 3578207141Sjeff indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, 3579207141Sjeff flags); 3580207141Sjeff } 3581207141Sjeff return (fragcnt); 3582207141Sjeff} 3583207141Sjeff 3584207141Sjeff/* 3585207141Sjeff * Null visitor function used when we just want to count blocks. 3586207141Sjeff */ 3587207141Sjeffstatic void 3588207141Sjeffnull_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 3589207141Sjeff{ 3590207141Sjeff} 3591207141Sjeff 3592207141Sjeff/* 3593207141Sjeff * Recalculate di_blocks when we discover that a block allocation or 3594207141Sjeff * free was not successfully completed. The kernel does not roll this back 3595207141Sjeff * because it would be too expensive to compute which indirects were 3596207141Sjeff * reachable at the time the inode was written. 3597207141Sjeff */ 3598207141Sjeffstatic void 3599207141Sjeffino_adjblks(ino_t ino) 3600207141Sjeff{ 3601207141Sjeff struct suj_ino *sino; 3602207141Sjeff union dinode *ip; 3603207141Sjeff uint64_t blocks; 3604207141Sjeff uint64_t frags; 3605207141Sjeff 3606207141Sjeff sino = ino_lookup(ino, 1); 3607207141Sjeff if (sino->si_blkadj) 3608207141Sjeff return; 3609207141Sjeff sino->si_blkadj = 1; 3610207141Sjeff ip = ino_read(ino); 3611207141Sjeff /* No need to adjust zero'd inodes. */ 3612207141Sjeff if (DIP(ip, di_mode) == 0) 3613207141Sjeff return; 3614207141Sjeff frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 3615207141Sjeff blocks = fsbtodb(fs, frags); 3616207141Sjeff if (blocks == DIP(ip, di_blocks)) 3617207141Sjeff return; 3618207141Sjeff if (debug) 3619207141Sjeff printf("ino %d adjusting block count from %jd to %jd\n", 3620207141Sjeff ino, DIP(ip, di_blocks), blocks); 3621207141Sjeff DIP_SET(ip, di_blocks, blocks); 3622207141Sjeff ino_dirty(ino); 3623207141Sjeff} 3624207141Sjeff 3625207141Sjeffstatic void 3626207141Sjeffblk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 3627207141Sjeff{ 3628207141Sjeff int mask; 3629207141Sjeff 3630207141Sjeff mask = blk_isfree(blk, ino, lbn, frags); 3631207141Sjeff if (debug) 3632207141Sjeff printf("blk %jd freemask 0x%X\n", blk, mask); 3633207141Sjeff blk_free(blk, mask, frags); 3634207141Sjeff} 3635207141Sjeff 3636207141Sjeff/* 3637207141Sjeff * Free a block or tree of blocks that was previously rooted in ino at 3638207141Sjeff * the given lbn. If the lbn is an indirect all children are freed 3639207141Sjeff * recursively. 3640207141Sjeff */ 3641207141Sjeffstatic void 3642207141Sjeffblk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) 3643207141Sjeff{ 3644207141Sjeff uint64_t resid; 3645207141Sjeff int mask; 3646207141Sjeff 3647207141Sjeff mask = blk_isfree(blk, ino, lbn, frags); 3648207141Sjeff if (debug) 3649207141Sjeff printf("blk %jd freemask 0x%X\n", blk, mask); 3650207141Sjeff resid = 0; 3651207141Sjeff if (lbn <= -NDADDR && follow && mask == 0) 3652207141Sjeff indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); 3653207141Sjeff else 3654207141Sjeff blk_free(blk, mask, frags); 3655207141Sjeff} 3656207141Sjeff 3657207141Sjeffstatic void 3658207141Sjeffino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 3659207141Sjeff{ 3660207141Sjeff struct suj_ino *sino; 3661207141Sjeff struct suj_rec *srec; 3662207141Sjeff struct jrefrec *rrec; 3663207141Sjeff struct direct *dp; 3664207141Sjeff off_t diroff; 3665207141Sjeff uint8_t *block; 3666207141Sjeff int skipparent; 3667207141Sjeff int isparent; 3668207141Sjeff int dpoff; 3669207141Sjeff int size; 3670207141Sjeff 3671207141Sjeff sino = ino_lookup(ino, 0); 3672207141Sjeff if (sino) 3673207141Sjeff skipparent = sino->si_skipparent; 3674207141Sjeff else 3675207141Sjeff skipparent = 0; 3676207141Sjeff size = lfragtosize(fs, frags); 3677207141Sjeff block = dblk_read(blk, size); 3678207141Sjeff dp = (struct direct *)&block[0]; 3679207141Sjeff for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { 3680207141Sjeff dp = (struct direct *)&block[dpoff]; 3681207141Sjeff if (dp->d_ino == 0 || dp->d_ino == WINO) 3682207141Sjeff continue; 3683207141Sjeff if (dp->d_namlen == 1 && dp->d_name[0] == '.') 3684207141Sjeff continue; 3685207141Sjeff isparent = dp->d_namlen == 2 && dp->d_name[0] == '.' && 3686207141Sjeff dp->d_name[1] == '.'; 3687207141Sjeff if (isparent && skipparent == 1) 3688207141Sjeff continue; 3689207141Sjeff if (debug) 3690207141Sjeff printf("Directory %d removing inode %d name %s\n", 3691207141Sjeff ino, dp->d_ino, dp->d_name); 3692207141Sjeff /* 3693207141Sjeff * Lookup this inode to see if we have a record for it. 3694207141Sjeff * If not, we've already adjusted it assuming this path 3695207141Sjeff * was valid and we have to adjust once more. 3696207141Sjeff */ 3697207141Sjeff sino = ino_lookup(dp->d_ino, 0); 3698207141Sjeff if (sino == NULL || sino->si_linkadj || sino->si_hasrecs == 0) { 3699207141Sjeff ino_decr(dp->d_ino); 3700207141Sjeff continue; 3701207141Sjeff } 3702207141Sjeff /* 3703207141Sjeff * Tell any child directories we've already removed their 3704207141Sjeff * parent. Don't try to adjust our link down again. 3705207141Sjeff */ 3706207141Sjeff if (isparent == 0) 3707207141Sjeff sino->si_skipparent = 1; 3708207141Sjeff /* 3709207141Sjeff * If we haven't yet processed this inode we need to make 3710207141Sjeff * sure we will successfully discover the lost path. If not 3711207141Sjeff * use nlinkadj to remember. 3712207141Sjeff */ 3713207141Sjeff diroff = lblktosize(fs, lbn) + dpoff; 3714207141Sjeff TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 3715207141Sjeff rrec = (struct jrefrec *)srec->sr_rec; 3716207141Sjeff if (rrec->jr_parent == ino && 3717207141Sjeff rrec->jr_diroff == diroff) 3718207141Sjeff break; 3719207141Sjeff } 3720207141Sjeff if (srec == NULL) 3721207141Sjeff sino->si_nlinkadj--; 3722207141Sjeff } 3723207141Sjeff} 3724207141Sjeff 3725207141Sjeff/* 3726207141Sjeff * Truncate an inode, freeing all blocks and decrementing all children's 3727207141Sjeff * link counts. Free the inode back to the cg. 3728207141Sjeff */ 3729207141Sjeffstatic void 3730207141Sjeffino_truncate(union dinode *ip, ino_t ino, int mode) 3731207141Sjeff{ 3732207141Sjeff uint32_t gen; 3733207141Sjeff 3734207141Sjeff if (ino == ROOTINO) 3735207141Sjeff errx(1, "Attempting to free ROOTINO"); 3736207141Sjeff if (debug) 3737207141Sjeff printf("Truncating and freeing ino %d, nlink %d, mode %o\n", 3738207141Sjeff ino, DIP(ip, di_nlink), DIP(ip, di_mode)); 3739207141Sjeff 3740207141Sjeff /* We are freeing an inode or directory. */ 3741207141Sjeff if ((DIP(ip, di_mode) & IFMT) == IFDIR) 3742207141Sjeff ino_visit(ip, ino, ino_free_children, 0); 3743207141Sjeff DIP_SET(ip, di_nlink, 0); 3744207141Sjeff ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); 3745207141Sjeff /* Here we have to clear the inode and release any blocks it holds. */ 3746207141Sjeff gen = DIP(ip, di_gen); 3747207141Sjeff if (fs->fs_magic == FS_UFS1_MAGIC) 3748207141Sjeff bzero(ip, sizeof(struct ufs1_dinode)); 3749207141Sjeff else 3750207141Sjeff bzero(ip, sizeof(struct ufs2_dinode)); 3751207141Sjeff DIP_SET(ip, di_gen, gen); 3752207141Sjeff ino_dirty(ino); 3753207141Sjeff ino_free(ino, mode); 3754207141Sjeff return; 3755207141Sjeff} 3756207141Sjeff 3757207141Sjeff/* 3758207141Sjeff * Adjust an inode's link count down by one when a directory goes away. 3759207141Sjeff */ 3760207141Sjeffstatic void 3761207141Sjeffino_decr(ino_t ino) 3762207141Sjeff{ 3763207141Sjeff union dinode *ip; 3764207141Sjeff int reqlink; 3765207141Sjeff int nlink; 3766207141Sjeff int mode; 3767207141Sjeff 3768207141Sjeff ip = ino_read(ino); 3769207141Sjeff nlink = DIP(ip, di_nlink); 3770207141Sjeff mode = DIP(ip, di_mode); 3771207141Sjeff if (nlink < 1) 3772207141Sjeff errx(1, "Inode %d link count %d invalid", ino, nlink); 3773207141Sjeff if (mode == 0) 3774207141Sjeff errx(1, "Inode %d has a link of %d with 0 mode.", ino, nlink); 3775207141Sjeff nlink--; 3776207141Sjeff if ((mode & IFMT) == IFDIR) 3777207141Sjeff reqlink = 2; 3778207141Sjeff else 3779207141Sjeff reqlink = 1; 3780207141Sjeff if (nlink < reqlink) { 3781207141Sjeff if (debug) 3782207141Sjeff printf("ino %d not enough links to live %d < %d\n", 3783207141Sjeff ino, nlink, reqlink); 3784207141Sjeff ino_truncate(ip, ino, mode); 3785207141Sjeff return; 3786207141Sjeff } 3787207141Sjeff DIP_SET(ip, di_nlink, nlink); 3788207141Sjeff ino_dirty(ino); 3789207141Sjeff} 3790207141Sjeff 3791207141Sjeff/* 3792207141Sjeff * Adjust the inode link count to 'nlink'. If the count reaches zero 3793207141Sjeff * free it. 3794207141Sjeff */ 3795207141Sjeffstatic void 3796207141Sjeffino_adjust(ino_t ino, int lastmode, nlink_t nlink) 3797207141Sjeff{ 3798207141Sjeff union dinode *ip; 3799207141Sjeff int reqlink; 3800207141Sjeff int mode; 3801207141Sjeff 3802207141Sjeff ip = ino_read(ino); 3803207141Sjeff mode = DIP(ip, di_mode) & IFMT; 3804207141Sjeff if (nlink > LINK_MAX) 3805207141Sjeff errx(1, 3806207141Sjeff "ino %d nlink manipulation error, new link %d, old link %d", 3807207141Sjeff ino, nlink, DIP(ip, di_nlink)); 3808207141Sjeff if (debug) 3809207141Sjeff printf("Adjusting ino %d, nlink %d, old link %d lastmode %o\n", 3810207141Sjeff ino, nlink, DIP(ip, di_nlink), lastmode); 3811207141Sjeff if (mode == 0) { 3812207141Sjeff if (debug) 3813207141Sjeff printf("ino %d, zero inode freeing bitmap\n", ino); 3814207141Sjeff ino_free(ino, lastmode); 3815207141Sjeff return; 3816207141Sjeff } 3817207141Sjeff /* XXX Should be an assert? */ 3818207141Sjeff if (mode != lastmode && debug) 3819207141Sjeff printf("ino %d, mode %o != %o\n", ino, mode, lastmode); 3820207141Sjeff if ((mode & IFMT) == IFDIR) 3821207141Sjeff reqlink = 2; 3822207141Sjeff else 3823207141Sjeff reqlink = 1; 3824207141Sjeff /* If the inode doesn't have enough links to live, free it. */ 3825207141Sjeff if (nlink < reqlink) { 3826207141Sjeff if (debug) 3827207141Sjeff printf("ino %d not enough links to live %d < %d\n", 3828207141Sjeff ino, nlink, reqlink); 3829207141Sjeff ino_truncate(ip, ino, mode); 3830207141Sjeff return; 3831207141Sjeff } 3832207141Sjeff /* If required write the updated link count. */ 3833207141Sjeff if (DIP(ip, di_nlink) == nlink) { 3834207141Sjeff if (debug) 3835207141Sjeff printf("ino %d, link matches, skipping.\n", ino); 3836207141Sjeff return; 3837207141Sjeff } 3838207141Sjeff DIP_SET(ip, di_nlink, nlink); 3839207141Sjeff ino_dirty(ino); 3840207141Sjeff} 3841207141Sjeff 3842207141Sjeff#define DOTDOT_OFFSET DIRECTSIZ(1) 3843207141Sjeff 3844207141Sjeff/* 3845207141Sjeff * Process records available for one inode and determine whether the 3846207141Sjeff * link count is correct or needs adjusting. 3847207141Sjeff * 3848207141Sjeff * XXX Failed to fix zero length directory. Shouldn't .. have been mising? 3849207141Sjeff */ 3850207141Sjeffstatic void 3851207141Sjeffino_check(struct suj_ino *sino) 3852207141Sjeff{ 3853207141Sjeff struct suj_rec *srec; 3854207141Sjeff struct jrefrec *rrec; 3855207141Sjeff struct suj_ino *stmp; 3856207141Sjeff nlink_t dotlinks; 3857207141Sjeff int newlinks; 3858207141Sjeff int removes; 3859207141Sjeff int nlink; 3860207141Sjeff ino_t ino; 3861207141Sjeff int isdot; 3862207141Sjeff int isat; 3863207141Sjeff int mode; 3864207141Sjeff 3865207141Sjeff if (sino->si_hasrecs == 0) 3866207141Sjeff return; 3867207141Sjeff ino = sino->si_ino; 3868207141Sjeff /* 3869207141Sjeff * XXX ino_isfree currently is skipping initialized inodes 3870207141Sjeff * that are unreferenced. 3871207141Sjeff */ 3872207141Sjeff if (0 && ino_isfree(ino)) 3873207141Sjeff return; 3874207141Sjeff rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; 3875207141Sjeff nlink = rrec->jr_nlink; 3876207141Sjeff newlinks = sino->si_nlinkadj; 3877207141Sjeff dotlinks = 0; 3878207141Sjeff removes = 0; 3879207141Sjeff TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 3880207141Sjeff rrec = (struct jrefrec *)srec->sr_rec; 3881207141Sjeff isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, 3882207141Sjeff rrec->jr_ino, &mode, &isdot); 3883207141Sjeff if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) 3884207141Sjeff errx(1, "Inode mode/directory type mismatch %o != %o", 3885207141Sjeff mode, rrec->jr_mode); 3886207141Sjeff if (debug) 3887207141Sjeff printf("jrefrec: op %d ino %d, nlink %d, parent %d, " 3888207141Sjeff "diroff %jd, mode %o, isat %d, isdot %d\n", 3889207141Sjeff rrec->jr_op, rrec->jr_ino, rrec->jr_nlink, 3890207141Sjeff rrec->jr_parent, rrec->jr_diroff, rrec->jr_mode, 3891207141Sjeff isat, isdot); 3892207141Sjeff mode = rrec->jr_mode & IFMT; 3893207141Sjeff if (rrec->jr_op == JOP_REMREF) 3894207141Sjeff removes++; 3895207141Sjeff newlinks += isat; 3896207141Sjeff if (isdot) 3897207141Sjeff dotlinks += isat; 3898207141Sjeff } 3899207141Sjeff /* 3900207141Sjeff * The number of links that remain are the starting link count 3901207141Sjeff * subtracted by the total number of removes with the total 3902207141Sjeff * links discovered back in. An incomplete remove thus 3903207141Sjeff * makes no change to the link count but an add increases 3904207141Sjeff * by one. 3905207141Sjeff */ 3906207141Sjeff nlink += newlinks; 3907207141Sjeff nlink -= removes; 3908207141Sjeff /* 3909207141Sjeff * If it's a directory with no real names pointing to it go ahead 3910207141Sjeff * and truncate it. This will free any children. 3911207141Sjeff */ 3912207141Sjeff if ((mode & IFMT) == IFDIR && nlink - dotlinks == 0) { 3913207141Sjeff nlink = 0; 3914207141Sjeff /* 3915207141Sjeff * Mark any .. links so they know not to free this inode 3916207141Sjeff * when they are removed. 3917207141Sjeff */ 3918207141Sjeff TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 3919207141Sjeff rrec = (struct jrefrec *)srec->sr_rec; 3920207141Sjeff if (rrec->jr_diroff == DOTDOT_OFFSET) { 3921207141Sjeff stmp = ino_lookup(rrec->jr_parent, 0); 3922207141Sjeff if (stmp) 3923207141Sjeff stmp->si_skipparent = 1; 3924207141Sjeff } 3925207141Sjeff } 3926207141Sjeff } 3927207141Sjeff sino->si_linkadj = 1; 3928207141Sjeff ino_adjust(ino, mode, nlink); 3929207141Sjeff} 3930207141Sjeff 3931207141Sjeff/* 3932207141Sjeff * Process records available for one block and determine whether it is 3933207141Sjeff * still allocated and whether the owning inode needs to be updated or 3934207141Sjeff * a free completed. 3935207141Sjeff */ 3936207141Sjeffstatic void 3937207141Sjeffblk_check(struct suj_blk *sblk) 3938207141Sjeff{ 3939207141Sjeff struct suj_rec *srec; 3940207141Sjeff struct jblkrec *brec; 3941207141Sjeff ufs2_daddr_t blk; 3942207141Sjeff int mask; 3943207141Sjeff int frags; 3944207141Sjeff int isat; 3945207141Sjeff 3946207141Sjeff /* 3947207141Sjeff * Each suj_blk actually contains records for any fragments in that 3948207141Sjeff * block. As a result we must evaluate each record individually. 3949207141Sjeff */ 3950207141Sjeff TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 3951207141Sjeff brec = (struct jblkrec *)srec->sr_rec; 3952207141Sjeff frags = brec->jb_frags; 3953207141Sjeff blk = brec->jb_blkno + brec->jb_oldfrags; 3954207141Sjeff isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); 3955207141Sjeff if (debug) 3956207141Sjeff printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n", 3957207141Sjeff brec->jb_op, blk, brec->jb_ino, brec->jb_lbn, 3958207141Sjeff brec->jb_frags, isat, frags); 3959207141Sjeff /* 3960207141Sjeff * If we found the block at this address we still have to 3961207141Sjeff * determine if we need to free the tail end that was 3962207141Sjeff * added by adding contiguous fragments from the same block. 3963207141Sjeff */ 3964207141Sjeff if (isat == 1) { 3965207141Sjeff if (frags == brec->jb_frags) 3966207141Sjeff continue; 3967207141Sjeff mask = blk_isfree(blk, brec->jb_ino, brec->jb_lbn, 3968207141Sjeff brec->jb_frags); 3969207141Sjeff mask >>= frags; 3970207141Sjeff blk += frags; 3971207141Sjeff frags = brec->jb_frags - frags; 3972207141Sjeff blk_free(blk, mask, frags); 3973207141Sjeff ino_adjblks(brec->jb_ino); 3974207141Sjeff continue; 3975207141Sjeff } 3976207141Sjeff /* 3977207141Sjeff * The block wasn't found, attempt to free it. It won't be 3978207141Sjeff * freed if it was actually reallocated. If this was an 3979207141Sjeff * allocation we don't want to follow indirects as they 3980207141Sjeff * may not be written yet. Any children of the indirect will 3981207141Sjeff * have their own records. If it's a free we need to 3982207141Sjeff * recursively free children. 3983207141Sjeff */ 3984207141Sjeff blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, 3985207141Sjeff brec->jb_op == JOP_FREEBLK); 3986207141Sjeff ino_adjblks(brec->jb_ino); 3987207141Sjeff } 3988207141Sjeff} 3989207141Sjeff 3990207141Sjeff/* 3991207141Sjeff * Walk the list of inode and block records for this cg, recovering any 3992207141Sjeff * changes which were not complete at the time of crash. 3993207141Sjeff */ 3994207141Sjeffstatic void 3995207141Sjeffcg_check(struct suj_cg *sc) 3996207141Sjeff{ 3997207141Sjeff struct suj_blk *nextb; 3998207141Sjeff struct suj_ino *nexti; 3999207141Sjeff struct suj_ino *sino; 4000207141Sjeff struct suj_blk *sblk; 4001207141Sjeff int i; 4002207141Sjeff 4003207141Sjeff if (debug) 4004207141Sjeff printf("Recovering cg %d\n", sc->sc_cgx); 4005207141Sjeff 4006207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 4007207141Sjeff LIST_FOREACH_SAFE(sino, &sc->sc_inohash[i], si_next, nexti) 4008207141Sjeff ino_check(sino); 4009207141Sjeff 4010207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 4011207141Sjeff LIST_FOREACH_SAFE(sblk, &sc->sc_blkhash[i], sb_next, nextb) 4012207141Sjeff blk_check(sblk); 4013207141Sjeff} 4014207141Sjeff 4015207141Sjeff/* 4016207141Sjeff * Write a potentially dirty cg. All inodes must be written before the 4017207141Sjeff * cg maps are so that an allocated inode is never marked free, even if 4018207141Sjeff * we crash during fsck. 4019207141Sjeff */ 4020207141Sjeffstatic void 4021207141Sjeffcg_write(struct suj_cg *sc) 4022207141Sjeff{ 4023207141Sjeff struct ino_blk *iblk; 4024207141Sjeff ufs1_daddr_t fragno, cgbno, maxbno; 4025207141Sjeff u_int8_t *blksfree; 4026207141Sjeff struct cg *cgp; 4027207141Sjeff int blk; 4028207141Sjeff int i; 4029207141Sjeff 4030207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 4031207141Sjeff LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) 4032207141Sjeff iblk_write(iblk); 4033207141Sjeff if (sc->sc_dirty == 0) 4034207141Sjeff return; 4035207141Sjeff /* 4036207141Sjeff * Fix the frag and cluster summary. 4037207141Sjeff */ 4038207141Sjeff cgp = sc->sc_cgp; 4039207141Sjeff cgp->cg_cs.cs_nbfree = 0; 4040207141Sjeff cgp->cg_cs.cs_nffree = 0; 4041207141Sjeff bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 4042207141Sjeff maxbno = fragstoblks(fs, fs->fs_fpg); 4043207141Sjeff if (fs->fs_contigsumsize > 0) { 4044207141Sjeff for (i = 1; i <= fs->fs_contigsumsize; i++) 4045207141Sjeff cg_clustersum(cgp)[i] = 0; 4046207141Sjeff bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 4047207141Sjeff } 4048207141Sjeff blksfree = cg_blksfree(cgp); 4049207141Sjeff for (cgbno = 0; cgbno < maxbno; cgbno++) { 4050207141Sjeff if (ffs_isfreeblock(fs, blksfree, cgbno)) 4051207141Sjeff continue; 4052207141Sjeff if (ffs_isblock(fs, blksfree, cgbno)) { 4053207141Sjeff ffs_clusteracct(fs, cgp, cgbno, 1); 4054207141Sjeff cgp->cg_cs.cs_nbfree++; 4055207141Sjeff continue; 4056207141Sjeff } 4057207141Sjeff fragno = blkstofrags(fs, cgbno); 4058207141Sjeff blk = blkmap(fs, blksfree, fragno); 4059207141Sjeff ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 4060207141Sjeff for (i = 0; i < fs->fs_frag; i++) 4061207141Sjeff if (isset(blksfree, fragno + i)) 4062207141Sjeff cgp->cg_cs.cs_nffree++; 4063207141Sjeff } 4064207141Sjeff /* 4065207141Sjeff * Update the superblock cg summary from our now correct values 4066207141Sjeff * before writing the block. 4067207141Sjeff */ 4068207141Sjeff fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; 4069207141Sjeff if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 4070207141Sjeff fs->fs_bsize) == -1) 4071207141Sjeff err(1, "Unable to write cylinder group %d", sc->sc_cgx); 4072207141Sjeff} 4073207141Sjeff 4074207141Sjeffstatic void 4075207141Sjeffcg_apply(void (*apply)(struct suj_cg *)) 4076207141Sjeff{ 4077207141Sjeff struct suj_cg *scg; 4078207141Sjeff int i; 4079207141Sjeff 4080207141Sjeff for (i = 0; i < SUJ_HASHSIZE; i++) 4081207141Sjeff LIST_FOREACH(scg, &cghash[i], sc_next) 4082207141Sjeff apply(scg); 4083207141Sjeff} 4084207141Sjeff 4085207141Sjeff/* 4086207141Sjeff * Process the unlinked but referenced file list. Freeing all inodes. 4087207141Sjeff */ 4088207141Sjeffstatic void 4089207141Sjeffino_unlinked(void) 4090207141Sjeff{ 4091207141Sjeff union dinode *ip; 4092207141Sjeff uint16_t mode; 4093207141Sjeff ino_t inon; 4094207141Sjeff ino_t ino; 4095207141Sjeff 4096207141Sjeff ino = fs->fs_sujfree; 4097207141Sjeff fs->fs_sujfree = 0; 4098207141Sjeff while (ino != 0) { 4099207141Sjeff ip = ino_read(ino); 4100207141Sjeff mode = DIP(ip, di_mode) & IFMT; 4101207141Sjeff inon = DIP(ip, di_freelink); 4102207141Sjeff DIP_SET(ip, di_freelink, 0); 4103207141Sjeff /* 4104207141Sjeff * XXX Should this be an errx? 4105207141Sjeff */ 4106207141Sjeff if (DIP(ip, di_nlink) == 0) { 4107207141Sjeff if (debug) 4108207141Sjeff printf("Freeing unlinked ino %d mode %o\n", 4109207141Sjeff ino, mode); 4110207141Sjeff ino_truncate(ip, ino, mode); 4111207141Sjeff } else if (debug) 4112207141Sjeff printf("Skipping ino %d mode %o with link %d\n", 4113207141Sjeff ino, mode, DIP(ip, di_nlink)); 4114207141Sjeff ino = inon; 4115207141Sjeff } 4116207141Sjeff} 4117207141Sjeff 4118207141Sjeff/* 4119207141Sjeff * If we see two ops for the same inode to the same parent at the same 4120207141Sjeff * offset we could miscount the link with ino_isat() returning twice. 4121207141Sjeff * Keep only the first record because it has the valid link count but keep 4122207141Sjeff * the mode from the final op as that should be the correct mode in case 4123207141Sjeff * it changed. 4124207141Sjeff */ 4125207141Sjeffstatic void 4126207141Sjeffsuj_build_ino(struct jrefrec *refrec) 4127207141Sjeff{ 4128207141Sjeff struct jmvrec *mvrec; 4129207141Sjeff struct suj_rec *srec; 4130207141Sjeff struct suj_ino *sino; 4131207141Sjeff struct suj_rec *srn; 4132207141Sjeff struct jrefrec *rrn; 4133207141Sjeff 4134207141Sjeff if (debug) 4135207141Sjeff printf("suj_build_ino: op %d, ino %d, nlink %d, parent %d, diroff %jd\n", 4136207141Sjeff refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent, 4137207141Sjeff refrec->jr_diroff); 4138207141Sjeff sino = ino_lookup(refrec->jr_ino, 1); 4139207141Sjeff /* 4140207141Sjeff * Search for a mvrec that matches this offset. Whether it's an add 4141207141Sjeff * or a remove we can delete the mvref. It no longer applies to this 4142207141Sjeff * location. 4143207141Sjeff * 4144207141Sjeff * For removes, we have to find the original offset so we can create 4145207141Sjeff * a remove that matches the earlier add so it can be abandoned 4146207141Sjeff * if necessary. We create an add in the new location so we can 4147207141Sjeff * tolerate the directory block as it existed before or after 4148207141Sjeff * the move. 4149207141Sjeff */ 4150207141Sjeff if (!TAILQ_EMPTY(&sino->si_movs)) { 4151207141Sjeff for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; 4152207141Sjeff srn = TAILQ_PREV(srn, srechd, sr_next)) { 4153207141Sjeff mvrec = (struct jmvrec *)srn->sr_rec; 4154207141Sjeff if (mvrec->jm_parent != refrec->jr_parent || 4155207141Sjeff mvrec->jm_newoff != refrec->jr_diroff) 4156207141Sjeff continue; 4157207141Sjeff TAILQ_REMOVE(&sino->si_movs, srn, sr_next); 4158207141Sjeff if (refrec->jr_op == JOP_REMREF) { 4159207141Sjeff rrn = errmalloc(sizeof(*refrec)); 4160207141Sjeff *rrn = *refrec; 4161207141Sjeff rrn->jr_op = JOP_ADDREF; 4162207141Sjeff suj_build_ino(rrn); 4163207141Sjeff refrec->jr_diroff = mvrec->jm_oldoff; 4164207141Sjeff } 4165207141Sjeff } 4166207141Sjeff } 4167207141Sjeff /* 4168207141Sjeff * We walk backwards so that adds and removes are evaluated in the 4169207141Sjeff * correct order. 4170207141Sjeff */ 4171207141Sjeff for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 4172207141Sjeff srn = TAILQ_PREV(srn, srechd, sr_next)) { 4173207141Sjeff rrn = (struct jrefrec *)srn->sr_rec; 4174207141Sjeff if (rrn->jr_parent != refrec->jr_parent || 4175207141Sjeff rrn->jr_diroff != refrec->jr_diroff) 4176207141Sjeff continue; 4177207141Sjeff if (debug) 4178207141Sjeff printf("Discarding dup.\n"); 4179207141Sjeff rrn->jr_mode = refrec->jr_mode; 4180207141Sjeff return; 4181207141Sjeff } 4182207141Sjeff sino->si_hasrecs = 1; 4183207141Sjeff srec = errmalloc(sizeof(*srec)); 4184207141Sjeff srec->sr_rec = (union jrec *)refrec; 4185207141Sjeff TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); 4186207141Sjeff} 4187207141Sjeff 4188207141Sjeff/* 4189207141Sjeff * Apply a move record to an inode. We must search for adds that preceed us 4190207141Sjeff * and add duplicates because we won't know which location to search first. 4191207141Sjeff * Then we add movs to a queue that is maintained until the moved location 4192207141Sjeff * is removed. If a single record is moved multiple times we only maintain 4193207141Sjeff * one copy that contains the original and final diroffs. 4194207141Sjeff */ 4195207141Sjeffstatic void 4196207141Sjeffsuj_move_ino(struct jmvrec *mvrec) 4197207141Sjeff{ 4198207141Sjeff struct jrefrec *refrec; 4199207141Sjeff struct suj_ino *sino; 4200207141Sjeff struct suj_rec *srec; 4201207141Sjeff struct jmvrec *mvrn; 4202207141Sjeff struct suj_rec *srn; 4203207141Sjeff struct jrefrec *rrn; 4204207141Sjeff 4205207141Sjeff if (debug) 4206207141Sjeff printf("suj_move_ino: ino %d, parent %d, diroff %jd, oldoff %jd\n", 4207207141Sjeff mvrec->jm_ino, mvrec->jm_parent, mvrec->jm_newoff, 4208207141Sjeff mvrec->jm_oldoff); 4209207141Sjeff sino = ino_lookup(mvrec->jm_ino, 0); 4210207141Sjeff if (sino == NULL) 4211207141Sjeff return; 4212207141Sjeff /* 4213207141Sjeff * We walk backwards so we only evaluate the most recent record at 4214207141Sjeff * this offset. 4215207141Sjeff */ 4216207141Sjeff for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 4217207141Sjeff srn = TAILQ_PREV(srn, srechd, sr_next)) { 4218207141Sjeff rrn = (struct jrefrec *)srn->sr_rec; 4219207141Sjeff if (rrn->jr_op != JOP_ADDREF) 4220207141Sjeff continue; 4221207141Sjeff if (rrn->jr_parent != mvrec->jm_parent || 4222207141Sjeff rrn->jr_diroff != mvrec->jm_oldoff) 4223207141Sjeff continue; 4224207141Sjeff /* 4225207141Sjeff * When an entry is moved we don't know whether the write 4226207141Sjeff * to move has completed yet. To resolve this we create 4227207141Sjeff * a new add dependency in the new location as if it were added 4228207141Sjeff * twice. Only one will succeed. 4229207141Sjeff */ 4230207141Sjeff refrec = errmalloc(sizeof(*refrec)); 4231207141Sjeff refrec->jr_op = JOP_ADDREF; 4232207141Sjeff refrec->jr_ino = mvrec->jm_ino; 4233207141Sjeff refrec->jr_parent = mvrec->jm_parent; 4234207141Sjeff refrec->jr_diroff = mvrec->jm_newoff; 4235207141Sjeff refrec->jr_mode = rrn->jr_mode; 4236207141Sjeff refrec->jr_nlink = rrn->jr_nlink; 4237207141Sjeff suj_build_ino(refrec); 4238207141Sjeff break; 4239207141Sjeff } 4240207141Sjeff /* 4241207141Sjeff * Add this mvrec to the queue of pending mvs. 4242207141Sjeff */ 4243207141Sjeff for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; 4244207141Sjeff srn = TAILQ_PREV(srn, srechd, sr_next)) { 4245207141Sjeff mvrn = (struct jmvrec *)srn->sr_rec; 4246207141Sjeff if (mvrn->jm_parent != mvrec->jm_parent || 4247207141Sjeff mvrn->jm_newoff != mvrec->jm_oldoff) 4248207141Sjeff continue; 4249207141Sjeff mvrn->jm_newoff = mvrec->jm_newoff; 4250207141Sjeff return; 4251207141Sjeff } 4252207141Sjeff srec = errmalloc(sizeof(*srec)); 4253207141Sjeff srec->sr_rec = (union jrec *)mvrec; 4254207141Sjeff TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); 4255207141Sjeff} 4256207141Sjeff 4257207141Sjeff/* 4258207141Sjeff * Modify journal records so they refer to the base block number 4259207141Sjeff * and a start and end frag range. This is to facilitate the discovery 4260207141Sjeff * of overlapping fragment allocations. 4261207141Sjeff */ 4262207141Sjeffstatic void 4263207141Sjeffsuj_build_blk(struct jblkrec *blkrec) 4264207141Sjeff{ 4265207141Sjeff struct suj_rec *srec; 4266207141Sjeff struct suj_blk *sblk; 4267207141Sjeff struct jblkrec *blkrn; 4268207141Sjeff ufs2_daddr_t blk; 4269207141Sjeff int frag; 4270207141Sjeff 4271207141Sjeff if (debug) 4272207141Sjeff printf("suj_build_blk: op %d blkno %jd frags %d oldfrags %d " 4273207141Sjeff "ino %d lbn %jd\n", 4274207141Sjeff blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, 4275207141Sjeff blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); 4276207141Sjeff blk = blknum(fs, blkrec->jb_blkno); 4277207141Sjeff frag = fragnum(fs, blkrec->jb_blkno); 4278207141Sjeff sblk = blk_lookup(blk, 1); 4279207141Sjeff /* 4280207141Sjeff * Rewrite the record using oldfrags to indicate the offset into 4281207141Sjeff * the block. Leave jb_frags as the actual allocated count. 4282207141Sjeff */ 4283207141Sjeff blkrec->jb_blkno -= frag; 4284207141Sjeff blkrec->jb_oldfrags = frag; 4285207141Sjeff if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) 4286207141Sjeff errx(1, "Invalid fragment count %d oldfrags %d", 4287207141Sjeff blkrec->jb_frags, frag); 4288207141Sjeff /* 4289207141Sjeff * Detect dups. If we detect a dup we always discard the oldest 4290207141Sjeff * record as it is superseded by the new record. This speeds up 4291207141Sjeff * later stages but also eliminates free records which are used 4292207141Sjeff * to indicate that the contents of indirects can be trusted. 4293207141Sjeff */ 4294207141Sjeff TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 4295207141Sjeff blkrn = (struct jblkrec *)srec->sr_rec; 4296207141Sjeff if (blkrn->jb_ino != blkrec->jb_ino || 4297207141Sjeff blkrn->jb_lbn != blkrec->jb_lbn || 4298207141Sjeff blkrn->jb_blkno != blkrec->jb_blkno || 4299207141Sjeff blkrn->jb_frags != blkrec->jb_frags || 4300207141Sjeff blkrn->jb_oldfrags != blkrec->jb_oldfrags) 4301207141Sjeff continue; 4302207141Sjeff if (debug) 4303207141Sjeff printf("Removed dup.\n"); 4304207141Sjeff /* Discard the free which is a dup with an alloc. */ 4305207141Sjeff if (blkrec->jb_op == JOP_FREEBLK) 4306207141Sjeff return; 4307207141Sjeff TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); 4308207141Sjeff free(srec); 4309207141Sjeff break; 4310207141Sjeff } 4311207141Sjeff srec = errmalloc(sizeof(*srec)); 4312207141Sjeff srec->sr_rec = (union jrec *)blkrec; 4313207141Sjeff TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); 4314207141Sjeff} 4315207141Sjeff 4316207141Sjeff/* 4317207141Sjeff * Build up tables of the operations we need to recover. 4318207141Sjeff */ 4319207141Sjeffstatic void 4320207141Sjeffsuj_build(void) 4321207141Sjeff{ 4322207141Sjeff struct suj_seg *seg; 4323207141Sjeff union jrec *rec; 4324207141Sjeff int i; 4325207141Sjeff 4326207141Sjeff TAILQ_FOREACH(seg, &allsegs, ss_next) { 4327207141Sjeff rec = (union jrec *)seg->ss_blk; 4328207141Sjeff rec++; /* skip the segrec. */ 4329207141Sjeff if (debug) 4330207141Sjeff printf("seg %jd has %d records, oldseq %jd.\n", 4331207141Sjeff seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, 4332207141Sjeff seg->ss_rec.jsr_oldest); 4333207141Sjeff for (i = 0; i < seg->ss_rec.jsr_cnt; i++, rec++) { 4334207141Sjeff switch (rec->rec_jrefrec.jr_op) { 4335207141Sjeff case JOP_ADDREF: 4336207141Sjeff case JOP_REMREF: 4337207141Sjeff suj_build_ino((struct jrefrec *)rec); 4338207141Sjeff break; 4339207141Sjeff case JOP_MVREF: 4340207141Sjeff suj_move_ino((struct jmvrec *)rec); 4341207141Sjeff break; 4342207141Sjeff case JOP_NEWBLK: 4343207141Sjeff case JOP_FREEBLK: 4344207141Sjeff suj_build_blk((struct jblkrec *)rec); 4345207141Sjeff break; 4346207141Sjeff default: 4347207141Sjeff errx(1, "Unknown journal operation %d (%d)", 4348207141Sjeff rec->rec_jrefrec.jr_op, i); 4349207141Sjeff } 4350207141Sjeff } 4351207141Sjeff } 4352207141Sjeff} 4353207141Sjeff 4354207141Sjeff/* 4355207141Sjeff * Prune the journal segments to those we care about based on the 4356207141Sjeff * oldest sequence in the newest segment. Order the segment list 4357207141Sjeff * based on sequence number. 4358207141Sjeff */ 4359207141Sjeffstatic void 4360207141Sjeffsuj_prune(void) 4361207141Sjeff{ 4362207141Sjeff struct suj_seg *seg; 4363207141Sjeff struct suj_seg *segn; 4364207141Sjeff uint64_t newseq; 4365207141Sjeff int discard; 4366207141Sjeff 4367207141Sjeff if (debug) 4368207141Sjeff printf("Pruning up to %jd\n", oldseq); 4369207141Sjeff /* First free the expired segments. */ 4370207141Sjeff TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 4371207141Sjeff if (seg->ss_rec.jsr_seq >= oldseq) 4372207141Sjeff continue; 4373207141Sjeff TAILQ_REMOVE(&allsegs, seg, ss_next); 4374207141Sjeff free(seg->ss_blk); 4375207141Sjeff free(seg); 4376207141Sjeff } 4377207141Sjeff /* Next ensure that segments are ordered properly. */ 4378207141Sjeff seg = TAILQ_FIRST(&allsegs); 4379207141Sjeff if (seg == NULL) { 4380207141Sjeff if (debug) 4381207141Sjeff printf("Empty journal\n"); 4382207141Sjeff return; 4383207141Sjeff } 4384207141Sjeff newseq = seg->ss_rec.jsr_seq; 4385207141Sjeff for (;;) { 4386207141Sjeff seg = TAILQ_LAST(&allsegs, seghd); 4387207141Sjeff if (seg->ss_rec.jsr_seq >= newseq) 4388207141Sjeff break; 4389207141Sjeff TAILQ_REMOVE(&allsegs, seg, ss_next); 4390207141Sjeff TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); 4391207141Sjeff newseq = seg->ss_rec.jsr_seq; 4392207141Sjeff 4393207141Sjeff } 4394207141Sjeff if (newseq != oldseq) 4395207141Sjeff errx(1, "Journal file sequence mismatch %jd != %jd", 4396207141Sjeff newseq, oldseq); 4397207141Sjeff /* 4398207141Sjeff * The kernel may asynchronously write segments which can create 4399207141Sjeff * gaps in the sequence space. Throw away any segments after the 4400207141Sjeff * gap as the kernel guarantees only those that are contiguously 4401207141Sjeff * reachable are marked as completed. 4402207141Sjeff */ 4403207141Sjeff discard = 0; 4404207141Sjeff TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 4405207141Sjeff if (!discard && newseq++ == seg->ss_rec.jsr_seq) 4406207141Sjeff continue; 4407207141Sjeff discard = 1; 4408207141Sjeff if (debug) 4409207141Sjeff printf("Journal order mismatch %jd != %jd pruning\n", 4410207141Sjeff newseq-1, seg->ss_rec.jsr_seq); 4411207141Sjeff TAILQ_REMOVE(&allsegs, seg, ss_next); 4412207141Sjeff free(seg->ss_blk); 4413207141Sjeff free(seg); 4414207141Sjeff } 4415207141Sjeff if (debug) 4416207141Sjeff printf("Processing journal segments from %jd to %jd\n", 4417207141Sjeff oldseq, newseq-1); 4418207141Sjeff} 4419207141Sjeff 4420207141Sjeff/* 4421207141Sjeff * Verify the journal inode before attempting to read records. 4422207141Sjeff */ 4423207141Sjeffstatic void 4424207141Sjeffsuj_verifyino(union dinode *ip) 4425207141Sjeff{ 4426207141Sjeff 4427207141Sjeff if (DIP(ip, di_nlink) != 1) 4428207141Sjeff errx(1, "Invalid link count %d for journal inode %d", 4429207141Sjeff DIP(ip, di_nlink), fs->fs_sujournal); 4430207141Sjeff 4431207141Sjeff if (DIP(ip, di_mode) != IFREG) 4432207141Sjeff errx(1, "Invalid mode %d for journal inode %d", 4433207141Sjeff DIP(ip, di_mode), fs->fs_sujournal); 4434207141Sjeff 4435207141Sjeff if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) 4436207141Sjeff errx(1, "Invalid size %jd for journal inode %d", 4437207141Sjeff DIP(ip, di_size), fs->fs_sujournal); 4438207141Sjeff 4439207141Sjeff if (DIP(ip, di_modrev) != fs->fs_mtime) 4440207141Sjeff errx(1, "Journal timestamp does not match fs mount time"); 4441207141Sjeff /* XXX Add further checks. */ 4442207141Sjeff} 4443207141Sjeff 4444207141Sjeffstruct jblocks { 4445207141Sjeff struct jextent *jb_extent; /* Extent array. */ 4446207141Sjeff int jb_avail; /* Available extents. */ 4447207141Sjeff int jb_used; /* Last used extent. */ 4448207141Sjeff int jb_head; /* Allocator head. */ 4449207141Sjeff int jb_off; /* Allocator extent offset. */ 4450207141Sjeff}; 4451207141Sjeffstruct jextent { 4452207141Sjeff ufs2_daddr_t je_daddr; /* Disk block address. */ 4453207141Sjeff int je_blocks; /* Disk block count. */ 4454207141Sjeff}; 4455207141Sjeff 4456207141Sjeffstruct jblocks *suj_jblocks; 4457207141Sjeff 4458207141Sjeffstatic struct jblocks * 4459207141Sjeffjblocks_create(void) 4460207141Sjeff{ 4461207141Sjeff struct jblocks *jblocks; 4462207141Sjeff int size; 4463207141Sjeff 4464207141Sjeff jblocks = errmalloc(sizeof(*jblocks)); 4465207141Sjeff jblocks->jb_avail = 10; 4466207141Sjeff jblocks->jb_used = 0; 4467207141Sjeff jblocks->jb_head = 0; 4468207141Sjeff jblocks->jb_off = 0; 4469207141Sjeff size = sizeof(struct jextent) * jblocks->jb_avail; 4470207141Sjeff jblocks->jb_extent = errmalloc(size); 4471207141Sjeff bzero(jblocks->jb_extent, size); 4472207141Sjeff 4473207141Sjeff return (jblocks); 4474207141Sjeff} 4475207141Sjeff 4476207141Sjeff/* 4477207141Sjeff * Return the next available disk block and the amount of contiguous 4478207141Sjeff * free space it contains. 4479207141Sjeff */ 4480207141Sjeffstatic ufs2_daddr_t 4481207141Sjeffjblocks_next(struct jblocks *jblocks, int bytes, int *actual) 4482207141Sjeff{ 4483207141Sjeff struct jextent *jext; 4484207141Sjeff ufs2_daddr_t daddr; 4485207141Sjeff int freecnt; 4486207141Sjeff int blocks; 4487207141Sjeff 4488207141Sjeff blocks = bytes / DEV_BSIZE; 4489207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_head]; 4490207141Sjeff freecnt = jext->je_blocks - jblocks->jb_off; 4491207141Sjeff if (freecnt == 0) { 4492207141Sjeff jblocks->jb_off = 0; 4493207141Sjeff if (++jblocks->jb_head > jblocks->jb_used) 4494207141Sjeff return (0); 4495207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_head]; 4496207141Sjeff freecnt = jext->je_blocks; 4497207141Sjeff } 4498207141Sjeff if (freecnt > blocks) 4499207141Sjeff freecnt = blocks; 4500207141Sjeff *actual = freecnt * DEV_BSIZE; 4501207141Sjeff daddr = jext->je_daddr + jblocks->jb_off; 4502207141Sjeff 4503207141Sjeff return (daddr); 4504207141Sjeff} 4505207141Sjeff 4506207141Sjeff/* 4507207141Sjeff * Advance the allocation head by a specified number of bytes, consuming 4508207141Sjeff * one journal segment. 4509207141Sjeff */ 4510207141Sjeffstatic void 4511207141Sjeffjblocks_advance(struct jblocks *jblocks, int bytes) 4512207141Sjeff{ 4513207141Sjeff 4514207141Sjeff jblocks->jb_off += bytes / DEV_BSIZE; 4515207141Sjeff} 4516207141Sjeff 4517207141Sjeffstatic void 4518207141Sjeffjblocks_destroy(struct jblocks *jblocks) 4519207141Sjeff{ 4520207141Sjeff 4521207141Sjeff free(jblocks->jb_extent); 4522207141Sjeff free(jblocks); 4523207141Sjeff} 4524207141Sjeff 4525207141Sjeffstatic void 4526207141Sjeffjblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) 4527207141Sjeff{ 4528207141Sjeff struct jextent *jext; 4529207141Sjeff int size; 4530207141Sjeff 4531207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_used]; 4532207141Sjeff /* Adding the first block. */ 4533207141Sjeff if (jext->je_daddr == 0) { 4534207141Sjeff jext->je_daddr = daddr; 4535207141Sjeff jext->je_blocks = blocks; 4536207141Sjeff return; 4537207141Sjeff } 4538207141Sjeff /* Extending the last extent. */ 4539207141Sjeff if (jext->je_daddr + jext->je_blocks == daddr) { 4540207141Sjeff jext->je_blocks += blocks; 4541207141Sjeff return; 4542207141Sjeff } 4543207141Sjeff /* Adding a new extent. */ 4544207141Sjeff if (++jblocks->jb_used == jblocks->jb_avail) { 4545207141Sjeff jblocks->jb_avail *= 2; 4546207141Sjeff size = sizeof(struct jextent) * jblocks->jb_avail; 4547207141Sjeff jext = errmalloc(size); 4548207141Sjeff bzero(jext, size); 4549207141Sjeff bcopy(jblocks->jb_extent, jext, 4550207141Sjeff sizeof(struct jextent) * jblocks->jb_used); 4551207141Sjeff free(jblocks->jb_extent); 4552207141Sjeff jblocks->jb_extent = jext; 4553207141Sjeff } 4554207141Sjeff jext = &jblocks->jb_extent[jblocks->jb_used]; 4555207141Sjeff jext->je_daddr = daddr; 4556207141Sjeff jext->je_blocks = blocks; 4557207141Sjeff 4558207141Sjeff return; 4559207141Sjeff} 4560207141Sjeff 4561207141Sjeff/* 4562207141Sjeff * Add a file block from the journal to the extent map. We can't read 4563207141Sjeff * each file block individually because the kernel treats it as a circular 4564207141Sjeff * buffer and segments may span mutliple contiguous blocks. 4565207141Sjeff */ 4566207141Sjeffstatic void 4567207141Sjeffsuj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 4568207141Sjeff{ 4569207141Sjeff 4570207141Sjeff jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); 4571207141Sjeff} 4572207141Sjeff 4573207141Sjeffstatic void 4574207141Sjeffsuj_read(void) 4575207141Sjeff{ 4576207141Sjeff uint8_t block[1 * 1024 * 1024]; 4577207141Sjeff struct suj_seg *seg; 4578207141Sjeff struct jsegrec *rec; 4579207141Sjeff ufs2_daddr_t blk; 4580207141Sjeff int recsize; 4581207141Sjeff int size; 4582207141Sjeff 4583207141Sjeff /* 4584207141Sjeff * Read records until we exhaust the journal space. If we find 4585207141Sjeff * an invalid record we start searching for a valid segment header 4586207141Sjeff * at the next block. This is because we don't have a head/tail 4587207141Sjeff * pointer and must recover the information indirectly. At the gap 4588207141Sjeff * between the head and tail we won't necessarily have a valid 4589207141Sjeff * segment. 4590207141Sjeff */ 4591207141Sjeff for (;;) { 4592207141Sjeff size = sizeof(block); 4593207141Sjeff blk = jblocks_next(suj_jblocks, size, &size); 4594207141Sjeff if (blk == 0) 4595207141Sjeff return; 4596207141Sjeff /* 4597207141Sjeff * Read 1MB at a time and scan for records within this block. 4598207141Sjeff */ 4599207141Sjeff if (bread(disk, blk, &block, size) == -1) 4600207141Sjeff err(1, "Error reading journal block %jd", 4601207141Sjeff (intmax_t)blk); 4602207141Sjeff for (rec = (void *)block; size; size -= recsize, 4603207141Sjeff rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { 4604207141Sjeff recsize = DEV_BSIZE; 4605207141Sjeff if (rec->jsr_time != fs->fs_mtime) { 4606207141Sjeff if (debug) 4607207141Sjeff printf("Rec time %jd != fs mtime %jd\n", 4608207141Sjeff rec->jsr_time, fs->fs_mtime); 4609207141Sjeff jblocks_advance(suj_jblocks, recsize); 4610207141Sjeff continue; 4611207141Sjeff } 4612207141Sjeff if (rec->jsr_cnt == 0) { 4613207141Sjeff if (debug) 4614207141Sjeff printf("Found illegal count %d\n", 4615207141Sjeff rec->jsr_cnt); 4616207141Sjeff jblocks_advance(suj_jblocks, recsize); 4617207141Sjeff continue; 4618207141Sjeff } 4619207141Sjeff recsize = roundup2((rec->jsr_cnt + 1) * JREC_SIZE, 4620207141Sjeff DEV_BSIZE); 4621207141Sjeff if (recsize > size) { 4622207141Sjeff /* 4623207141Sjeff * We may just have run out of buffer, restart 4624207141Sjeff * the loop to re-read from this spot. 4625207141Sjeff */ 4626207141Sjeff if (size < fs->fs_bsize && 4627207141Sjeff recsize <= fs->fs_bsize) { 4628207141Sjeff recsize = size; 4629207141Sjeff continue; 4630207141Sjeff } 4631207141Sjeff if (debug) 4632207141Sjeff printf("Found invalid segsize %d > %d\n", 4633207141Sjeff recsize, size); 4634207141Sjeff recsize = DEV_BSIZE; 4635207141Sjeff jblocks_advance(suj_jblocks, recsize); 4636207141Sjeff continue; 4637207141Sjeff } 4638207141Sjeff seg = errmalloc(sizeof(*seg)); 4639207141Sjeff seg->ss_blk = errmalloc(recsize); 4640207141Sjeff seg->ss_rec = *rec; 4641207141Sjeff bcopy((void *)rec, seg->ss_blk, recsize); 4642207141Sjeff if (rec->jsr_oldest > oldseq) 4643207141Sjeff oldseq = rec->jsr_oldest; 4644207141Sjeff TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); 4645207141Sjeff jrecs += rec->jsr_cnt; 4646207141Sjeff jbytes += recsize; 4647207141Sjeff jblocks_advance(suj_jblocks, recsize); 4648207141Sjeff } 4649207141Sjeff } 4650207141Sjeff} 4651207141Sjeff 4652207141Sjeff/* 4653207141Sjeff * Orchestrate the verification of a filesystem via the softupdates journal. 4654207141Sjeff */ 4655207141Sjeffvoid 4656207141Sjeffsuj_check(const char *filesys) 4657207141Sjeff{ 4658207141Sjeff union dinode *jip; 4659207141Sjeff uint64_t blocks; 4660207141Sjeff 4661207141Sjeff opendisk(filesys); 4662207141Sjeff TAILQ_INIT(&allsegs); 4663207141Sjeff /* 4664207141Sjeff * Fetch the journal inode and verify it. 4665207141Sjeff */ 4666207141Sjeff jip = ino_read(fs->fs_sujournal); 4667207141Sjeff printf("SU+J Checking %s\n", filesys); 4668207141Sjeff suj_verifyino(jip); 4669207141Sjeff /* 4670207141Sjeff * Build a list of journal blocks in jblocks before parsing the 4671207141Sjeff * available journal blocks in with suj_read(). 4672207141Sjeff */ 4673207141Sjeff printf("Reading %jd byte journal from inode %d.\n", 4674207141Sjeff DIP(jip, di_size), fs->fs_sujournal); 4675207141Sjeff suj_jblocks = jblocks_create(); 4676207141Sjeff blocks = ino_visit(jip, fs->fs_sujournal, suj_add_block, 0); 4677207141Sjeff if (blocks != numfrags(fs, DIP(jip, di_size))) 4678207141Sjeff errx(1, "Sparse journal inode %d.\n", fs->fs_sujournal); 4679207141Sjeff suj_read(); 4680207141Sjeff jblocks_destroy(suj_jblocks); 4681207141Sjeff suj_jblocks = NULL; 4682207141Sjeff if (reply("RECOVER")) { 4683207141Sjeff printf("Building recovery table.\n"); 4684207141Sjeff suj_prune(); 4685207141Sjeff suj_build(); 4686207141Sjeff printf("Resolving unreferenced inode list.\n"); 4687207141Sjeff ino_unlinked(); 4688207141Sjeff printf("Processing journal entries.\n"); 4689207141Sjeff cg_apply(cg_check); 4690207141Sjeff } 4691207141Sjeff if (reply("WRITE CHANGES")) 4692207141Sjeff cg_apply(cg_write); 4693207141Sjeff printf("%jd journal records in %jd bytes for %.2f%% utilization\n", 4694207141Sjeff jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); 4695207141Sjeff printf("Freed %jd inodes (%jd directories) %jd blocks and %jd frags.\n", 4696207141Sjeff freeinos, freedir, freeblocks, freefrags); 4697207141Sjeff /* Write back superblock. */ 4698207141Sjeff closedisk(filesys); 4699207141Sjeff} 4700