ufs_inode.h revision 9915:bc9126487a5f
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27/* All Rights Reserved */ 28 29/* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39#ifndef _SYS_FS_UFS_INODE_H 40#define _SYS_FS_UFS_INODE_H 41 42#include <sys/isa_defs.h> 43#include <sys/fbuf.h> 44#include <sys/fdbuffer.h> 45#include <sys/fcntl.h> 46#include <sys/uio.h> 47#include <sys/t_lock.h> 48#include <sys/thread.h> 49#include <sys/cred.h> 50#include <sys/time.h> 51#include <sys/types32.h> 52#include <sys/fs/ufs_fs.h> 53#include <sys/fs/ufs_lockfs.h> 54#include <sys/fs/ufs_trans.h> 55#include <sys/kstat.h> 56#include <sys/fs/ufs_acl.h> 57#include <sys/fs/ufs_panic.h> 58#include <sys/dnlc.h> 59 60#ifdef _KERNEL 61#include <sys/vfs_opreg.h> 62#endif 63 64#ifdef __cplusplus 65extern "C" { 66#endif 67 68/* 69 * The I node is the focus of all local file activity in UNIX. 70 * There is a unique inode allocated for each active file, 71 * each current directory, each mounted-on file, each mapping, 72 * and the root. An inode is `named' by its dev/inumber pair. 73 * Data in icommon is read in from permanent inode on volume. 74 * 75 * Each inode has 5 locks associated with it: 76 * i_rwlock: Serializes ufs_write and ufs_setattr request 77 * and allows ufs_read requests to proceed in parallel. 78 * Serializes reads/updates to directories. 79 * vfs_dqrwlock: Manages quota sub-system quiescence. See below. 80 * i_contents: Protects almost all of the fields in the inode 81 * except for those listed below. When held 82 * in writer mode also protects those fields 83 * listed under i_tlock. 84 * i_tlock: When i_tlock is held with the i_contents reader 85 * lock the i_atime, i_mtime, i_ctime, 86 * i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag 87 * i_seq, i_writer & i_mapcnt fields are protected. 88 * For more i_flag locking info see below. 89 * ih_lock: Protects inode hash chain buckets 90 * ifree_lock: Protects inode freelist 91 * 92 * Lock ordering: 93 * i_rwlock > i_contents > i_tlock 94 * i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock 95 * i_contents > i_tlock 96 * vfs_dqrwlock > i_contents(writer) > i_tlock 97 * ih_lock > i_contents > i_tlock 98 * 99 * Making major changes to quota sub-system state, while the file 100 * system is mounted required the addition of another lock. The 101 * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs 102 * structure. This lock is used to manage quota sub-system quiescence 103 * for a particular file system. Major changes to quota sub-system 104 * state (disabling quotas, enabling quotas, and setting new quota 105 * limits) all require the file system to be quiescent and grabbing 106 * vfs_dqrwlock as writer accomplishes this. On the other hand, 107 * grabbing vfs_dqrwlock as reader makes the quota sub-system 108 * non-quiescent and lets the quota sub-system know that now is not a 109 * good time to change major quota sub-system state. Typically 110 * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for 111 * writing. However, there are cases where vfs_dqrwlock is grabbed for 112 * reading without a corresponding i_contents write grab because there 113 * is no relevant inode. There are also cases where i_contents is 114 * grabbed for writing when a vfs_dqrwlock read grab is not needed 115 * because the inode changes do not affect quotas. 116 * 117 * Unfortunately, performance considerations have required that we be more 118 * intelligent about using i_tlock when updating i_flag. Ideally, we would 119 * have simply separated out several of the bits in i_flag into their own 120 * ints to avoid problems. But, instead, we have implemented the following 121 * rules: 122 * 123 * o You can update any i_flag field while holding the writer-contents, 124 * or by holding the reader-contents AND holding i_tlock. 125 * You can only call ITIMES_NOLOCK while holding the writer-contents, 126 * or by holding the reader-contents AND holding i_tlock. 127 * 128 * o For a directory, holding the reader-rw_lock is sufficient for setting 129 * IACC. 130 * 131 * o Races with IREF are avoided by holding the reader contents lock 132 * and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage. 133 * And by holding the writer-contents in ufs_iinactive. 134 * 135 * o The callers are no longer required to handle the calls to ITIMES 136 * and ITIMES_NOLOCK. The functions that set the i_flag bits are 137 * responsible for managing those calls. The exceptions are the 138 * bmap routines. 139 * 140 * SVR4 Extended Fundamental Type (EFT) support: 141 * The inode structure has been enhanced to support 142 * 32-bit user-id, 32-bit group-id, and 32-bit device number. 143 * Standard SVR4 ufs also supports 32-bit mode field. For the reason 144 * of backward compatibility with the previous ufs disk format, 145 * 32-bit mode field is not supported. 146 * 147 * The current inode structure is 100% backward compatible with 148 * the previous inode structure if no user-id or group-id exceeds 149 * USHRT_MAX, and no major or minor number of a device number 150 * stored in an inode exceeds 255. 151 * 152 * Rules for managing i_seq: 153 * o i_seq is locked under the same rules as i_flag 154 * o The i_ctime or i_mtime MUST never change without increasing 155 * the value of i_seq. 156 * o You may increase the value of i_seq without the timestamps 157 * changing, this may decrease the callers performance but will 158 * be functionally correct. 159 * o The common case is when IUPD or ICHG is set, increase i_seq 160 * and immediately call ITIMES* or ufs_iupdat to create a new timestamp. 161 * o A less common case is the setting of IUPD or ICHG and while still 162 * holding the correct lock defer the timestamp and i_seq update 163 * until later, but it must still be done before the lock is released. 164 * bmap_write is an example of this, where the caller does the update. 165 * o If multiple changes are being made with the timestamps being 166 * updated only at the end, a single increase of i_seq is allowed. 167 * o If changes are made with IUPD or ICHG being set, but 168 * the controlling lock is being dropped before the timestamp is 169 * updated, there is a risk that another thread will also change 170 * the file, update i_flag, and push just one timestamp update. 171 * There is also the risk that another thread calls ITIMES or 172 * ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq, 173 * this will cause ufs_imark to change the timestamps without changing 174 * i_seq. If the controlling lock is dropped, ISEQ must be set to 175 * force i_seq to be increased on next ufs_imark, but i_seq MUST still 176 * be increased by the original setting thread before its deferred 177 * call to ITIMES to insure it is increased the correct number of times. 178 */ 179 180#define UID_LONG (o_uid_t)65535 181 /* flag value to indicate uid is 32-bit long */ 182#define GID_LONG (o_uid_t)65535 183 /* flag value to indicate gid is 32-bit long */ 184 185#define NDADDR 12 /* direct addresses in inode */ 186#define NIADDR 3 /* indirect addresses in inode */ 187#define FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t) 188 /* max fast symbolic name length is 56 */ 189 190#define i_fs i_ufsvfs->vfs_bufp->b_un.b_fs 191#define i_vfs i_vnode->v_vfsp 192 193struct icommon { 194 o_mode_t ic_smode; /* 0: mode and type of file */ 195 short ic_nlink; /* 2: number of links to file */ 196 o_uid_t ic_suid; /* 4: owner's user id */ 197 o_gid_t ic_sgid; /* 6: owner's group id */ 198 u_offset_t ic_lsize; /* 8: number of bytes in file */ 199#ifdef _KERNEL 200 struct timeval32 ic_atime; /* 16: time last accessed */ 201 struct timeval32 ic_mtime; /* 24: time last modified */ 202 struct timeval32 ic_ctime; /* 32: last time inode changed */ 203#else 204 time32_t ic_atime; /* 16: time last accessed */ 205 int32_t ic_atspare; 206 time32_t ic_mtime; /* 24: time last modified */ 207 int32_t ic_mtspare; 208 time32_t ic_ctime; /* 32: last time inode changed */ 209 int32_t ic_ctspare; 210#endif 211 daddr32_t ic_db[NDADDR]; /* 40: disk block addresses */ 212 daddr32_t ic_ib[NIADDR]; /* 88: indirect blocks */ 213 int32_t ic_flags; /* 100: cflags */ 214 int32_t ic_blocks; /* 104: 512 byte blocks actually held */ 215 int32_t ic_gen; /* 108: generation number */ 216 int32_t ic_shadow; /* 112: shadow inode */ 217 uid_t ic_uid; /* 116: long EFT version of uid */ 218 gid_t ic_gid; /* 120: long EFT version of gid */ 219 uint32_t ic_oeftflag; /* 124: extended attr directory ino, 0 = none */ 220}; 221 222/* 223 * Large directories can be cached. Directory caching can take the following 224 * states: 225 */ 226typedef enum { 227 CD_DISABLED_NOMEM = -2, 228 CD_DISABLED_TOOBIG, 229 CD_DISABLED, 230 CD_ENABLED 231} cachedir_t; 232 233/* 234 * Large Files: Note we use the inline functions load_double, store_double 235 * to load and store the long long values of i_size. Therefore the 236 * address of i_size must be eight byte aligned. Kmem_alloc of incore 237 * inode structure makes sure that the structure is 8-byte aligned. 238 * XX64 - reorder this structure? 239 */ 240typedef struct inode { 241 struct inode *i_chain[2]; /* must be first */ 242 struct inode *i_freef; /* free list forward - must be before i_ic */ 243 struct inode *i_freeb; /* free list back - must be before i_ic */ 244 struct icommon i_ic; /* Must be here */ 245 struct vnode *i_vnode; /* vnode associated with this inode */ 246 struct vnode *i_devvp; /* vnode for block I/O */ 247 dev_t i_dev; /* device where inode resides */ 248 ino_t i_number; /* i number, 1-to-1 with device address */ 249 off_t i_diroff; /* offset in dir, where we found last entry */ 250 /* just a hint - no locking needed */ 251 struct ufsvfs *i_ufsvfs; /* incore fs associated with inode */ 252 struct dquot *i_dquot; /* quota structure controlling this file */ 253 krwlock_t i_rwlock; /* serializes write/setattr requests */ 254 krwlock_t i_contents; /* protects (most of) inode contents */ 255 kmutex_t i_tlock; /* protects time fields, i_flag */ 256 offset_t i_nextr; /* */ 257 /* next byte read offset (read-ahead) */ 258 /* No lock required */ 259 /* */ 260 uint_t i_flag; /* inode flags */ 261 uint_t i_seq; /* modification sequence number */ 262 cachedir_t i_cachedir; /* Cache this directory on next lookup */ 263 /* - no locking needed */ 264 long i_mapcnt; /* mappings to file pages */ 265 int *i_map; /* block list for the corresponding file */ 266 dev_t i_rdev; /* INCORE rdev from i_oldrdev by ufs_iget */ 267 size_t i_delaylen; /* delayed writes, units=bytes */ 268 offset_t i_delayoff; /* where we started delaying */ 269 offset_t i_nextrio; /* where to start the next clust */ 270 long i_writes; /* number of outstanding bytes in write q */ 271 kcondvar_t i_wrcv; /* sleep/wakeup for write throttle */ 272 offset_t i_doff; /* dinode byte offset in file system */ 273 si_t *i_ufs_acl; /* pointer to acl entry */ 274 dcanchor_t i_danchor; /* directory cache anchor */ 275 kthread_t *i_writer; /* thread which is in window in wrip() */ 276} inode_t; 277 278struct dinode { 279 union { 280 struct icommon di_icom; 281 char di_size[128]; 282 } di_un; 283}; 284 285#define i_mode i_ic.ic_smode 286#define i_nlink i_ic.ic_nlink 287#define i_uid i_ic.ic_uid 288#define i_gid i_ic.ic_gid 289#define i_smode i_ic.ic_smode 290#define i_suid i_ic.ic_suid 291#define i_sgid i_ic.ic_sgid 292 293#define i_size i_ic.ic_lsize 294#define i_db i_ic.ic_db 295#define i_ib i_ic.ic_ib 296 297#define i_atime i_ic.ic_atime 298#define i_mtime i_ic.ic_mtime 299#define i_ctime i_ic.ic_ctime 300 301#define i_shadow i_ic.ic_shadow 302#define i_oeftflag i_ic.ic_oeftflag 303#define i_blocks i_ic.ic_blocks 304#define i_cflags i_ic.ic_flags 305#ifdef _LITTLE_ENDIAN 306/* 307 * Originally done on x86, but carried on to all other little 308 * architectures, which provides for file system compatibility. 309 */ 310#define i_ordev i_ic.ic_db[1] /* USL SVR4 compatibility */ 311#else 312#define i_ordev i_ic.ic_db[0] /* was i_oldrdev */ 313#endif 314#define i_gen i_ic.ic_gen 315#define i_forw i_chain[0] 316#define i_back i_chain[1] 317 318/* EFT transition aids - obsolete */ 319#define oEFT_MAGIC 0x90909090 320#define di_oeftflag di_ic.ic_oeftflag 321 322#define di_ic di_un.di_icom 323#define di_mode di_ic.ic_smode 324#define di_nlink di_ic.ic_nlink 325#define di_uid di_ic.ic_uid 326#define di_gid di_ic.ic_gid 327#define di_smode di_ic.ic_smode 328#define di_suid di_ic.ic_suid 329#define di_sgid di_ic.ic_sgid 330 331#define di_size di_ic.ic_lsize 332#define di_db di_ic.ic_db 333#define di_ib di_ic.ic_ib 334 335#define di_atime di_ic.ic_atime 336#define di_mtime di_ic.ic_mtime 337#define di_ctime di_ic.ic_ctime 338#define di_cflags di_ic.ic_flags 339 340#ifdef _LITTLE_ENDIAN 341#define di_ordev di_ic.ic_db[1] 342#else 343#define di_ordev di_ic.ic_db[0] 344#endif 345#define di_shadow di_ic.ic_shadow 346#define di_blocks di_ic.ic_blocks 347#define di_gen di_ic.ic_gen 348 349/* flags */ 350#define IUPD 0x0001 /* file has been modified */ 351#define IACC 0x0002 /* inode access time to be updated */ 352#define IMOD 0x0004 /* inode has been modified */ 353#define ICHG 0x0008 /* inode has been changed */ 354#define INOACC 0x0010 /* no access time update in getpage */ 355#define IMODTIME 0x0020 /* mod time already set */ 356#define IREF 0x0040 /* inode is being referenced */ 357#define ISYNC 0x0080 /* do all allocation synchronously */ 358#define IFASTSYMLNK 0x0100 /* fast symbolic link */ 359#define IMODACC 0x0200 /* only access time changed; */ 360 /* filesystem won't become active */ 361#define IATTCHG 0x0400 /* only size/blocks have changed */ 362#define IBDWRITE 0x0800 /* the inode has been scheduled for */ 363 /* write operation asynchronously */ 364#define ISTALE 0x1000 /* inode couldn't be read from disk */ 365#define IDEL 0x2000 /* inode is being deleted */ 366#define IDIRECTIO 0x4000 /* attempt directio */ 367#define ISEQ 0x8000 /* deferred i_seq increase */ 368#define IJUNKIQ 0x10000 /* on junk idle queue */ 369#define IQUIET 0x20000 /* No file system full messages */ 370 371/* cflags */ 372#define IXATTR 0x0001 /* extended attribute */ 373#define IFALLOCATE 0x0002 /* fallocate'd file */ 374#define ICOMPRESS 0x0004 /* compressed for dcfs - see */ 375 /* `ufs_ioctl()`_FIO_COMPRESSED */ 376 377/* modes */ 378#define IFMT 0170000 /* type of file */ 379#define IFIFO 0010000 /* named pipe (fifo) */ 380#define IFCHR 0020000 /* character special */ 381#define IFDIR 0040000 /* directory */ 382#define IFBLK 0060000 /* block special */ 383#define IFREG 0100000 /* regular */ 384#define IFLNK 0120000 /* symbolic link */ 385#define IFSHAD 0130000 /* shadow indode */ 386#define IFSOCK 0140000 /* socket */ 387#define IFATTRDIR 0160000 /* Attribute directory */ 388 389#define ISUID 04000 /* set user id on execution */ 390#define ISGID 02000 /* set group id on execution */ 391#define ISVTX 01000 /* save swapped text even after use */ 392#define IREAD 0400 /* read, write, execute permissions */ 393#define IWRITE 0200 394#define IEXEC 0100 395 396/* specify how the inode info is written in ufs_syncip() */ 397#define I_SYNC 1 /* wait for the inode written to disk */ 398#define I_DSYNC 2 /* wait for the inode written to disk */ 399 /* only if IATTCHG is set */ 400#define I_ASYNC 0 /* don't wait for the inode written */ 401 402/* flags passed to ufs_itrunc(), indirtrunc(), and free() */ 403#define I_FREE 0x00000001 /* inode is being freed */ 404#define I_DIR 0x00000002 /* inode is a directory */ 405#define I_IBLK 0x00000004 /* indirect block */ 406#define I_CHEAP 0x00000008 /* cheap free */ 407#define I_SHAD 0x00000010 /* inode is a shadow inode */ 408#define I_QUOTA 0x00000020 /* quota file */ 409#define I_NOCANCEL 0x40 /* Don't cancel these fragments */ 410#define I_ACCT 0x00000080 /* Update ufsvfs' unreclaimed_blocks */ 411 412/* 413 * If ufs_dircheckforname() fails to find an entry with the given name, 414 * this "slot" structure holds state for ufs_direnter_*() as to where 415 * there is space to put an entry with that name. 416 * If ufs_dircheckforname() finds an entry with the given name, this structure 417 * holds state for ufs_dirrename() and ufs_dirremove() as to where the 418 * entry is. "status" indicates what ufs_dircheckforname() found: 419 * NONE name not found, large enough free slot not found, 420 * FOUND name not found, large enough free slot found 421 * EXIST name found 422 * If ufs_dircheckforname() fails due to an error, this structure is not 423 * filled in. 424 * 425 * After ufs_dircheckforname() succeeds the values are: 426 * status offset size fbp, ep 427 * ------ ------ ---- ------- 428 * NONE end of dir needed not valid 429 * FOUND start of entry of ent both valid if fbp != NULL 430 * EXIST start of entry of prev ent valid 431 * 432 * "endoff" is set to 0 if the an entry with the given name is found, or if no 433 * free slot could be found or made; this means that the directory should not 434 * be truncated. If the entry was found, the search terminates so 435 * ufs_dircheckforname() didn't find out where the last valid entry in the 436 * directory was, so it doesn't know where to cut the directory off; if no free 437 * slot could be found or made, the directory has to be extended to make room 438 * for the new entry, so there's nothing to cut off. 439 * Otherwise, "endoff" is set to the larger of the offset of the last 440 * non-empty entry in the directory, or the offset at which the new entry will 441 * be placed, whichever is larger. This is used by ufs_diraddentry(); if a new 442 * entry is to be added to the directory, any complete directory blocks at the 443 * end of the directory that contain no non-empty entries are lopped off the 444 * end, thus shrinking the directory dynamically. 445 */ 446typedef enum {NONE, FOUND, EXIST} slotstat_t; 447struct ufs_slot { 448 struct direct *ep; /* pointer to slot */ 449 struct fbuf *fbp; /* dir buf where slot is */ 450 off_t offset; /* offset of area with free space */ 451 off_t endoff; /* last useful location found in search */ 452 slotstat_t status; /* status of slot */ 453 int size; /* size of area at slotoffset */ 454 int cached; /* cached directory */ 455}; 456 457/* 458 * Statistics on inodes 459 * Not protected by locks 460 */ 461struct instats { 462 kstat_named_t in_size; /* current cache size */ 463 kstat_named_t in_maxsize; /* maximum cache size */ 464 kstat_named_t in_hits; /* cache hits */ 465 kstat_named_t in_misses; /* cache misses */ 466 kstat_named_t in_malloc; /* kmem_alloce'd */ 467 kstat_named_t in_mfree; /* kmem_free'd */ 468 kstat_named_t in_maxreached; /* Largest size reached by cache */ 469 kstat_named_t in_frfront; /* # put at front of freelist */ 470 kstat_named_t in_frback; /* # put at back of freelist */ 471 kstat_named_t in_qfree; /* q's to delete thread */ 472 kstat_named_t in_scan; /* # inodes scanned */ 473 kstat_named_t in_tidles; /* # inodes idled by idle thread */ 474 kstat_named_t in_lidles; /* # inodes idled by ufs_lookup */ 475 kstat_named_t in_vidles; /* # inodes idled by ufs_vget */ 476 kstat_named_t in_kcalloc; /* # inodes kmem_cache_alloced */ 477 kstat_named_t in_kcfree; /* # inodes kmem_cache_freed */ 478 kstat_named_t in_poc; /* # push-on-close's */ 479}; 480 481#ifdef _KERNEL 482 483/* 484 * Extended attributes 485 */ 486 487#define XATTR_DIR_NAME "/@/" 488extern int ufs_ninode; /* high-water mark for inode cache */ 489 490extern struct vnodeops *ufs_vnodeops; /* vnode operations for ufs */ 491extern const struct fs_operation_def ufs_vnodeops_template[]; 492 493/* 494 * Convert between inode pointers and vnode pointers 495 */ 496#define VTOI(VP) ((struct inode *)(VP)->v_data) 497#define ITOV(IP) ((struct vnode *)(IP)->i_vnode) 498 499/* 500 * convert to fs 501 */ 502#define ITOF(IP) ((struct fs *)(IP)->i_fs) 503 504/* 505 * Convert between vnode types and inode formats 506 */ 507extern enum vtype iftovt_tab[]; 508 509#ifdef notneeded 510 511/* Look at sys/mode.h and os/vnode.c */ 512 513extern int vttoif_tab[]; 514 515#endif 516 517/* 518 * Mark an inode with the current (unique) timestamp. 519 * (Note that UFS's concept of time only keeps 32 bits of seconds 520 * in the on-disk format). 521 */ 522struct timeval32 iuniqtime; 523extern kmutex_t ufs_iuniqtime_lock; 524 525#define ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip) 526 527#define ITIMES(ip) { \ 528 mutex_enter(&(ip)->i_tlock); \ 529 ITIMES_NOLOCK(ip); \ 530 mutex_exit(&(ip)->i_tlock); \ 531} 532 533/* 534 * The following interfaces are used to do atomic loads and stores 535 * of an inode's i_size, which is a long long data type. 536 * 537 * For LP64, we just to a load or a store - atomicity and alignment 538 * are 8-byte guaranteed. For x86 there are no such instructions, 539 * so we grab i_contents as reader to get the size; we already hold 540 * it as writer when we're setting the size. 541 */ 542 543#ifdef _LP64 544 545#define UFS_GET_ISIZE(resultp, ip) *(resultp) = (ip)->i_size 546#define UFS_SET_ISIZE(value, ip) (ip)->i_size = (value) 547 548#else /* _LP64 */ 549 550#define UFS_GET_ISIZE(resultp, ip) \ 551 { \ 552 rw_enter(&(ip)->i_contents, RW_READER); \ 553 *(resultp) = (ip)->i_size; \ 554 rw_exit(&(ip)->i_contents); \ 555 } 556#define UFS_SET_ISIZE(value, ip) \ 557 { \ 558 ASSERT(RW_WRITE_HELD(&(ip)->i_contents)); \ 559 (ip)->i_size = (value); \ 560 } 561 562#endif /* _LP64 */ 563 564/* 565 * Allocate the specified block in the inode 566 * and make sure any in-core pages are initialized. 567 */ 568#define BMAPALLOC(ip, off, size, cr) \ 569 bmap_write((ip), (u_offset_t)(off), (size), BI_NORMAL, NULL, cr) 570 571#define ESAME (-1) /* trying to rename linked files (special) */ 572 573#define UFS_HOLE (daddr32_t)-1 /* value used when no block allocated */ 574 575/* 576 * enums 577 */ 578 579/* direnter ops */ 580enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME, DE_SYMLINK, DE_ATTRDIR}; 581 582/* dirremove ops */ 583enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; 584 585/* 586 * block initialization type for bmap_write 587 * 588 * BI_NORMAL - allocate and zero fill pages in memory 589 * BI_ALLOC_ONLY - only allocate the block, do not zero out pages in mem 590 * BI_FALLOCATE - allocate only, do not zero out pages, and store as negative 591 * block number in inode block list 592 */ 593enum bi_type { BI_NORMAL, BI_ALLOC_ONLY, BI_FALLOCATE }; 594 595/* 596 * This overlays the fid structure (see vfs.h) 597 * 598 * LP64 note: we use int32_t instead of ino_t since UFS does not use 599 * inode numbers larger than 32-bits and ufid's are passed to NFS 600 * which expects them to not grow in size beyond 10 bytes (12 including 601 * the length). 602 */ 603struct ufid { 604 ushort_t ufid_len; 605 ushort_t ufid_flags; 606 int32_t ufid_ino; 607 int32_t ufid_gen; 608}; 609 610/* 611 * each ufs thread (see ufs_thread.c) is managed by this struct 612 */ 613struct ufs_q { 614 union uq_head { 615 void *_uq_generic; /* first entry on q */ 616 struct inode *_uq_i; 617 ufs_failure_t *_uq_uf; 618 } _uq_head; 619 int uq_ne; /* # of entries/failures found */ 620 int uq_lowat; /* thread runs when ne == lowat */ 621 int uq_hiwat; /* synchronous idle if ne >= hiwat */ 622 ushort_t uq_flags; /* flags (see below) */ 623 kcondvar_t uq_cv; /* for sleep/wakeup */ 624 kthread_id_t uq_threadp; /* thread managing this q */ 625 kmutex_t uq_mutex; /* protects this struct */ 626}; 627 628#define uq_head _uq_head._uq_generic 629#define uq_ihead _uq_head._uq_i 630#define uq_ufhead _uq_head._uq_uf 631 632/* 633 * uq_flags 634 */ 635#define UQ_EXIT (0x0001) /* q server exits at its convenience */ 636#define UQ_WAIT (0x0002) /* thread is waiting on q server */ 637#define UQ_SUSPEND (0x0004) /* request for suspension */ 638#define UQ_SUSPENDED (0x0008) /* thread has suspended itself */ 639 640/* 641 * When logging is enabled, statvfs must account for blocks and files that 642 * may be on the delete queue. Protected by ufsvfsp->vfs_delete.uq_mutex 643 */ 644struct ufs_delq_info { 645 u_offset_t delq_unreclaimed_blocks; 646 ulong_t delq_unreclaimed_files; 647}; 648 649 650/* 651 * global idle queues 652 * The queues are sized dynamically in proportion to ufs_ninode 653 * which, unless overridden, scales with the amount of memory. 654 * The idle queue is halved whenever it hits the low water mark 655 * (1/4 of ufs_ninode), but can burst to sizes much larger. The number 656 * of hash queues is currently maintained to give on average IQHASHQLEN 657 * entries when the idle queue is at the low water mark. 658 * Note, we do not need to search along the hash queues, but use them 659 * in order to batch together geographically local inodes to allow 660 * their updates (via the log or buffer cache) to require less disk seeks. 661 * This gives an incredible performance boost for logging and a boost for 662 * non logging file systems. 663 */ 664typedef struct { 665 inode_t *i_chain[2]; /* must match inode_t, but unused */ 666 inode_t *i_freef; /* must match inode_t, idle list forward */ 667 inode_t *i_freeb; /* must match inode_t, idle list back */ 668} iqhead_t; 669 670extern struct ufs_q ufs_idle_q; /* used by global ufs idle thread */ 671extern iqhead_t *ufs_junk_iq; /* junk idle queues */ 672extern iqhead_t *ufs_useful_iq; /* useful idle queues */ 673extern int ufs_njunk_iq; /* number of entries in junk iq */ 674extern int ufs_nuseful_iq; /* number of entries in useful iq */ 675extern int ufs_niqhash; /* number of iq hash qs - power of 2 */ 676extern int ufs_iqhashmask; /* iq hash mask = ufs_niqhash - 1 */ 677 678#define IQHASHQLEN 32 /* see comments above */ 679#define INOCGSHIFT 7 /* 128 inodes per cylinder group */ 680#define IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask) 681#define IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */ 682 683extern struct ufs_q ufs_hlock; /* used by global ufs hlock thread */ 684 685/* 686 * vfs_lfflags flags 687 */ 688#define UFS_LARGEFILES ((ushort_t)0x1) /* set if mount allows largefiles */ 689 690/* 691 * vfs_dfritime flags 692 */ 693#define UFS_DFRATIME 0x1 /* deferred access time */ 694 695/* 696 * UFS VFS private data. 697 * 698 * UFS file system instances may be linked on several lists. 699 * 700 * - The vfs_next field chains together every extant ufs instance; this 701 * list is rooted at ufs_instances and should be used in preference to 702 * the overall vfs list (which is properly the province of the generic 703 * file system code, not of file system implementations). This same list 704 * link is used during forcible unmounts to chain together instances that 705 * can't yet be completely dismantled, 706 * 707 * - The vfs_wnext field is used within ufs_update to form a work list of 708 * UFS instances to be synced out. 709 */ 710typedef struct ufsvfs { 711 struct vfs *vfs_vfs; /* back link */ 712 struct ufsvfs *vfs_next; /* instance list link */ 713 struct ufsvfs *vfs_wnext; /* work list link */ 714 struct vnode *vfs_root; /* root vnode */ 715 struct buf *vfs_bufp; /* buffer containing superblock */ 716 struct vnode *vfs_devvp; /* block device vnode */ 717 ushort_t vfs_lfflags; /* Large files (set by mount) */ 718 ushort_t vfs_qflags; /* QUOTA: filesystem flags */ 719 struct inode *vfs_qinod; /* QUOTA: pointer to quota file */ 720 uint_t vfs_btimelimit; /* QUOTA: block time limit */ 721 uint_t vfs_ftimelimit; /* QUOTA: file time limit */ 722 krwlock_t vfs_dqrwlock; /* QUOTA: protects quota fields */ 723 /* 724 * some fs local threads 725 */ 726 struct ufs_q vfs_delete; /* delayed inode delete */ 727 struct ufs_q vfs_reclaim; /* reclaim open, deleted files */ 728 729 /* 730 * This is copied from the super block at mount time. 731 */ 732 int vfs_nrpos; /* # rotational positions */ 733 /* 734 * This lock protects cg's and super block pointed at by 735 * vfs_bufp->b_fs. Locks contents of fs and cg's and contents 736 * of vfs_dio. 737 */ 738 kmutex_t vfs_lock; 739 struct ulockfs vfs_ulockfs; /* ufs lockfs support */ 740 uint_t vfs_dio; /* delayed io (_FIODIO) */ 741 uint_t vfs_nointr; /* disallow lockfs interrupts */ 742 uint_t vfs_nosetsec; /* disallow ufs_setsecattr */ 743 uint_t vfs_syncdir; /* synchronous local directory ops */ 744 uint_t vfs_dontblock; /* don't block on forced umount */ 745 746 /* 747 * trans (logging ufs) stuff 748 */ 749 uint_t vfs_domatamap; /* set if matamap enabled */ 750 ulong_t vfs_maxacl; /* transaction stuff - max acl size */ 751 ulong_t vfs_dirsize; /* logspace for directory creation */ 752 ulong_t vfs_avgbfree; /* average free blks in cg (blkpref) */ 753 /* 754 * Some useful constants 755 */ 756 int vfs_nindirshift; /* calc. from fs_nindir */ 757 int vfs_nindiroffset; /* calc. from fs_ninidr */ 758 int vfs_ioclustsz; /* bytes in read/write cluster */ 759 int vfs_iotransz; /* max device i/o transfer size */ 760 761 vfs_ufsfx_t vfs_fsfx; /* lock/fix-on-panic support */ 762 /* 763 * More useful constants 764 */ 765 int vfs_minfrags; /* calc. from fs_minfree */ 766 /* 767 * Force DirectIO on all files 768 */ 769 uint_t vfs_forcedirectio; 770 /* 771 * Deferred inode time related fields 772 */ 773 clock_t vfs_iotstamp; /* last I/O timestamp */ 774 uint_t vfs_dfritime; /* deferred inode time flags */ 775 /* 776 * Some more useful info 777 */ 778 dev_t vfs_dev; /* device mounted from */ 779 struct ml_unit *vfs_log; /* pointer to embedded log struct */ 780 uint_t vfs_noatime; /* disable inode atime updates */ 781 /* 782 * snapshot stuff 783 */ 784 void *vfs_snapshot; /* snapshot handle */ 785 /* 786 * Controls logging "file system full" messages to messages file 787 */ 788 clock_t vfs_lastwhinetime; 789 790 int vfs_nolog_si; /* not logging summary info */ 791 int vfs_validfs; /* indicates mounted fs */ 792 793 /* 794 * Additional information about vfs_delete above 795 */ 796 struct ufs_delq_info vfs_delete_info; /* what's on the delete queue */ 797} ufsvfs_t; 798 799#define vfs_fs vfs_bufp->b_un.b_fs 800 801/* 802 * values for vfs_validfs 803 */ 804#define UT_UNMOUNTED 0 805#define UT_MOUNTED 1 806#define UT_HLOCKING 2 807 808/* inohsz is guaranteed to be a power of 2 */ 809#define INOHASH(ino) (((int)ino) & (inohsz - 1)) 810 811#define ISFALLOCBLK(ip, bn) \ 812 (((bn) < 0) && ((bn) % ip->i_fs->fs_frag == 0) && \ 813 ((ip)->i_cflags & IFALLOCATE && (bn) != UFS_HOLE)) 814 815union ihead { 816 union ihead *ih_head[2]; 817 struct inode *ih_chain[2]; 818}; 819 820extern union ihead *ihead; 821extern kmutex_t *ih_lock; 822extern int *ih_ne; 823extern int inohsz; 824 825extern clock_t ufs_iowait; 826 827#endif /* _KERNEL */ 828 829/* 830 * ufs function prototypes 831 */ 832#if defined(_KERNEL) && !defined(_BOOT) 833 834extern void ufs_iinit(void); 835extern int ufs_iget(struct vfs *, ino_t, struct inode **, cred_t *); 836extern int ufs_iget_alloced(struct vfs *, ino_t, struct inode **, 837 cred_t *); 838extern void ufs_reset_vnode(vnode_t *); 839extern void ufs_iinactive(struct inode *); 840extern void ufs_iupdat(struct inode *, int); 841extern int ufs_rmidle(struct inode *); 842extern int ufs_itrunc(struct inode *, u_offset_t, int, cred_t *); 843extern int ufs_iaccess(struct inode *, int, cred_t *, int); 844extern int rdip(struct inode *, struct uio *, int, struct cred *); 845extern int wrip(struct inode *, struct uio *, int, struct cred *); 846 847extern void ufs_imark(struct inode *); 848extern void ufs_itimes_nolock(struct inode *); 849 850extern int ufs_diraccess(struct inode *, int, struct cred *); 851extern int ufs_dirlook(struct inode *, char *, struct inode **, 852 cred_t *, int); 853extern int ufs_direnter_cm(struct inode *, char *, enum de_op, 854 struct vattr *, struct inode **, cred_t *, int); 855extern int ufs_direnter_lr(struct inode *, char *, enum de_op, 856 struct inode *, struct inode *, cred_t *, vnode_t **); 857extern int ufs_dircheckpath(ino_t, struct inode *, struct inode *, 858 struct cred *); 859extern int ufs_dirmakeinode(struct inode *, struct inode **, 860 struct vattr *, enum de_op, cred_t *); 861extern int ufs_dirremove(struct inode *, char *, struct inode *, 862 vnode_t *, enum dr_op, cred_t *, vnode_t **); 863extern int ufs_dircheckforname(struct inode *, char *, int, 864 struct ufs_slot *, struct inode **, struct cred *, int); 865extern int ufs_xattrdirempty(struct inode *, ino_t, cred_t *); 866extern int blkatoff(struct inode *, off_t, char **, struct fbuf **); 867 868extern void sbupdate(struct vfs *); 869 870extern int ufs_ialloc(struct inode *, ino_t, mode_t, struct inode **, 871 cred_t *); 872extern void ufs_ifree(struct inode *, ino_t, mode_t); 873extern void free(struct inode *, daddr_t, off_t, int); 874extern int alloc(struct inode *, daddr_t, int, daddr_t *, cred_t *); 875extern int realloccg(struct inode *, daddr_t, daddr_t, int, int, 876 daddr_t *, cred_t *); 877extern int ufs_allocsp(struct vnode *, struct flock64 *, cred_t *); 878extern int ufs_freesp(struct vnode *, struct flock64 *, int, cred_t *); 879extern ino_t dirpref(inode_t *); 880extern daddr_t blkpref(struct inode *, daddr_t, int, daddr32_t *); 881extern daddr_t contigpref(ufsvfs_t *, size_t, size_t); 882 883extern int ufs_rdwri(enum uio_rw, int, struct inode *, caddr_t, ssize_t, 884 offset_t, enum uio_seg, int *, cred_t *); 885 886extern int bmap_read(struct inode *, u_offset_t, daddr_t *, int *); 887extern int bmap_write(struct inode *, u_offset_t, int, enum bi_type, 888 daddr_t *, struct cred *); 889extern int bmap_has_holes(struct inode *); 890extern int bmap_find(struct inode *, boolean_t, u_offset_t *); 891extern int bmap_set_bn(struct vnode *, u_offset_t, daddr32_t); 892 893extern void ufs_vfs_add(struct ufsvfs *); 894extern void ufs_vfs_remove(struct ufsvfs *); 895 896extern void ufs_sbwrite(struct ufsvfs *); 897extern void ufs_update(int); 898extern int ufs_getsummaryinfo(dev_t, struct ufsvfs *, struct fs *); 899extern int ufs_putsummaryinfo(dev_t, struct ufsvfs *, struct fs *); 900extern int ufs_syncip(struct inode *, int, int, top_t); 901extern int ufs_sync_indir(struct inode *); 902extern int ufs_indirblk_sync(struct inode *, offset_t); 903extern int ufs_badblock(struct inode *, daddr_t); 904extern int ufs_indir_badblock(struct inode *, daddr32_t *); 905extern void ufs_notclean(struct ufsvfs *); 906extern void ufs_checkclean(struct vfs *); 907extern int isblock(struct fs *, uchar_t *, daddr_t); 908extern void setblock(struct fs *, uchar_t *, daddr_t); 909extern void clrblock(struct fs *, uchar_t *, daddr_t); 910extern int isclrblock(struct fs *, uchar_t *, daddr_t); 911extern void fragacct(struct fs *, int, int32_t *, int); 912extern int skpc(char, uint_t, char *); 913extern int ufs_fbwrite(struct fbuf *, struct inode *); 914extern int ufs_fbiwrite(struct fbuf *, struct inode *, daddr_t, long); 915extern int ufs_putapage(struct vnode *, struct page *, u_offset_t *, 916 size_t *, int, struct cred *); 917extern inode_t *ufs_alloc_inode(ufsvfs_t *, ino_t); 918extern void ufs_free_inode(inode_t *); 919 920/* 921 * special stuff 922 */ 923extern void ufs_setreclaim(struct inode *); 924extern int ufs_scan_inodes(int, int (*)(struct inode *, void *), void *, 925 struct ufsvfs *); 926extern int ufs_sync_inode(struct inode *, void *); 927extern int ufs_sticky_remove_access(struct inode *, struct inode *, 928 struct cred *); 929/* 930 * quota 931 */ 932extern int chkiq(struct ufsvfs *, int, struct inode *, uid_t, int, 933 struct cred *, char **errp, size_t *lenp); 934 935/* 936 * ufs thread stuff 937 */ 938extern void ufs_thread_delete(struct vfs *); 939extern void ufs_delete_drain(struct vfs *, int, int); 940extern void ufs_delete(struct ufsvfs *, struct inode *, int); 941extern void ufs_inode_cache_reclaim(void *); 942extern void ufs_idle_drain(struct vfs *); 943extern void ufs_idle_some(int); 944extern void ufs_thread_idle(void); 945extern void ufs_thread_reclaim(struct vfs *); 946extern void ufs_thread_init(struct ufs_q *, int); 947extern void ufs_thread_start(struct ufs_q *, void (*)(), struct vfs *); 948extern void ufs_thread_exit(struct ufs_q *); 949extern void ufs_thread_suspend(struct ufs_q *); 950extern void ufs_thread_continue(struct ufs_q *); 951extern void ufs_thread_hlock(void *); 952extern void ufs_delete_init(struct ufsvfs *, int); 953extern void ufs_delete_adjust_stats(struct ufsvfs *, struct statvfs64 *); 954extern void ufs_delete_drain_wait(struct ufsvfs *, int); 955 956/* 957 * ufs lockfs stuff 958 */ 959struct seg; 960extern int ufs_reconcile_fs(struct vfs *, struct ufsvfs *, int); 961extern int ufs_quiesce(struct ulockfs *); 962extern int ufs_flush(struct vfs *); 963extern int ufs_fiolfs(struct vnode *, struct lockfs *, int); 964extern int ufs__fiolfs(struct vnode *, struct lockfs *, int, int); 965extern int ufs_fiolfss(struct vnode *, struct lockfs *); 966extern int ufs_fioffs(struct vnode *, char *, struct cred *); 967extern int ufs_check_lockfs(struct ufsvfs *, struct ulockfs *, ulong_t); 968extern int ufs_lockfs_begin(struct ufsvfs *, struct ulockfs **, ulong_t); 969extern int ufs_lockfs_trybegin(struct ufsvfs *, struct ulockfs **, ulong_t); 970extern int ufs_lockfs_begin_getpage(struct ufsvfs *, struct ulockfs **, 971 struct seg *, int, uint_t *); 972extern void ufs_lockfs_end(struct ulockfs *); 973/* 974 * ufs acl stuff 975 */ 976extern int ufs_si_inherit(struct inode *, struct inode *, o_mode_t, cred_t *); 977extern void si_cache_init(void); 978extern int ufs_si_load(struct inode *, cred_t *); 979extern void ufs_si_del(struct inode *); 980extern int ufs_acl_access(struct inode *, int, cred_t *); 981extern void ufs_si_cache_flush(dev_t); 982extern int ufs_si_free(si_t *, struct vfs *, cred_t *); 983extern int ufs_acl_setattr(struct inode *, struct vattr *, cred_t *); 984extern int ufs_acl_get(struct inode *, vsecattr_t *, int, cred_t *); 985extern int ufs_acl_set(struct inode *, vsecattr_t *, int, cred_t *); 986/* 987 * ufs directio stuff 988 */ 989extern void ufs_directio_init(); 990extern int ufs_directio_write(struct inode *, uio_t *, int, int, cred_t *, 991 int *); 992extern int ufs_directio_read(struct inode *, uio_t *, cred_t *, int *); 993#define DIRECTIO_FAILURE (0) 994#define DIRECTIO_SUCCESS (1) 995 996/* 997 * ufs extensions for PXFS 998 */ 999 1000int ufs_rdwr_data(vnode_t *vp, u_offset_t offset, size_t len, fdbuffer_t *fdb, 1001 int flags, cred_t *cr); 1002int ufs_alloc_data(vnode_t *vp, u_offset_t offset, size_t *len, fdbuffer_t *fdb, 1003 int flags, cred_t *cr); 1004 1005/* 1006 * prototypes to support the forced unmount 1007 */ 1008 1009void ufs_freeze(struct ulockfs *, struct lockfs *); 1010int ufs_thaw(struct vfs *, struct ufsvfs *, struct ulockfs *); 1011 1012/* 1013 * extended attributes 1014 */ 1015 1016int ufs_xattrmkdir(inode_t *, inode_t **, int, struct cred *); 1017int ufs_xattr_getattrdir(vnode_t *, inode_t **, int, struct cred *); 1018void ufs_unhook_shadow(inode_t *, inode_t *); 1019 1020#endif /* defined(_KERNEL) && !defined(_BOOT) */ 1021 1022#ifdef __cplusplus 1023} 1024#endif 1025 1026#endif /* _SYS_FS_UFS_INODE_H */ 1027