1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. All advertising materials mentioning features or use of this software 42 * must display the following acknowledgement: 43 * This product includes software developed by the University of 44 * California, Berkeley and its contributors. 45 * 4. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)mount.h 8.21 (Berkeley) 5/20/95 62 */ 63/* 64 * NOTICE: This file was modified by McAfee Research in 2004 to introduce 65 * support for mandatory and extensible security protections. This notice 66 * is included in support of clause 2.2 (b) of the Apple Public License, 67 * Version 2.0. 68 */ 69 70#ifndef _SYS_MOUNT_INTERNAL_H_ 71#define _SYS_MOUNT_INTERNAL_H_ 72 73#include <sys/appleapiopts.h> 74#ifndef KERNEL 75#include <sys/ucred.h> 76#else 77#include <sys/kernel_types.h> 78#include <sys/namei.h> 79#endif 80#include <sys/queue.h> 81#include <sys/lock.h> 82#include <net/radix.h> 83#include <sys/socket.h> /* XXX for AF_MAX */ 84#include <sys/vfs_context.h> /* XXX for AF_MAX */ 85#include <sys/mount.h> 86#include <sys/cdefs.h> 87 88struct label; 89 90#if defined(__i386__) || defined(__x86_64__) 91typedef uint64_t pending_io_t; 92#define INCR_PENDING_IO(a, b) OSAddAtomic64((int64_t)(a), (int64_t *)&(b)); 93#else 94typedef uint32_t pending_io_t; 95#define INCR_PENDING_IO(a, b) OSAddAtomic((int32_t)(a), (int32_t *)&(b)); 96#endif 97 98 99/* 100 * Structure per mounted file system. Each mounted file system has an 101 * array of operations and an instance record. The file systems are 102 * put on a doubly linked list. 103 */ 104TAILQ_HEAD(vnodelst, vnode); 105 106struct mount { 107 TAILQ_ENTRY(mount) mnt_list; /* mount list */ 108 int32_t mnt_count; /* reference on the mount */ 109 lck_mtx_t mnt_mlock; /* mutex that protects mount point */ 110 struct vfsops *mnt_op; /* operations on fs */ 111 struct vfstable *mnt_vtable; /* configuration info */ 112 struct vnode *mnt_vnodecovered; /* vnode we mounted on */ 113 struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ 114 struct vnodelst mnt_workerqueue; /* list of vnodes this mount */ 115 struct vnodelst mnt_newvnodes; /* list of vnodes this mount */ 116 uint32_t mnt_flag; /* flags */ 117 uint32_t mnt_kern_flag; /* kernel only flags */ 118 uint32_t mnt_compound_ops; /* Available compound operations */ 119 uint32_t mnt_lflag; /* mount life cycle flags */ 120 uint32_t mnt_maxsymlinklen; /* max size of short symlink */ 121 struct vfsstatfs mnt_vfsstat; /* cache of filesystem stats */ 122 qaddr_t mnt_data; /* private data */ 123 /* Cached values of the IO constraints for the device */ 124 uint32_t mnt_maxreadcnt; /* Max. byte count for read */ 125 uint32_t mnt_maxwritecnt; /* Max. byte count for write */ 126 uint32_t mnt_segreadcnt; /* Max. segment count for read */ 127 uint32_t mnt_segwritecnt; /* Max. segment count for write */ 128 uint32_t mnt_maxsegreadsize; /* Max. segment read size */ 129 uint32_t mnt_maxsegwritesize; /* Max. segment write size */ 130 uint32_t mnt_alignmentmask; /* Mask of bits that aren't addressable via DMA */ 131 uint32_t mnt_devblocksize; /* the underlying device block size */ 132 uint32_t mnt_ioqueue_depth; /* the maxiumum number of commands a device can accept */ 133 uint32_t mnt_ioscale; /* scale the various throttles/limits imposed on the amount of I/O in flight */ 134 uint32_t mnt_ioflags; /* flags for underlying device */ 135 pending_io_t mnt_pending_write_size __attribute__((aligned(sizeof(pending_io_t)))); /* byte count of pending writes */ 136 pending_io_t mnt_pending_read_size __attribute__((aligned(sizeof(pending_io_t)))); /* byte count of pending reads */ 137 struct timeval mnt_last_write_issued_timestamp; 138 struct timeval mnt_last_write_completed_timestamp; 139 140 lck_rw_t mnt_rwlock; /* mutex readwrite lock */ 141 lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */ 142 vnode_t mnt_devvp; /* the device mounted on for local file systems */ 143 uint32_t mnt_devbsdunit; /* the BSD unit number of the device */ 144 uint64_t mnt_throttle_mask; /* the throttle mask of what devices will be affected by I/O from this mnt */ 145 void *mnt_throttle_info; /* used by the throttle code */ 146 int32_t mnt_crossref; /* refernces to cover lookups crossing into mp */ 147 int32_t mnt_iterref; /* refernces to cover iterations; drained makes it -ve */ 148#if CONFIG_TRIGGERS 149 int32_t mnt_numtriggers; /* num of trigger vnodes for this mount */ 150 vfs_trigger_callback_t *mnt_triggercallback; 151 void *mnt_triggerdata; 152#endif 153 /* XXX 3762912 hack to support HFS filesystem 'owner' */ 154 uid_t mnt_fsowner; 155 gid_t mnt_fsgroup; 156 157 struct label *mnt_mntlabel; /* MAC mount label */ 158 struct label *mnt_fslabel; /* MAC default fs label */ 159 160 /* 161 * cache the rootvp of the last mount point 162 * in the chain in the mount struct pointed 163 * to by the vnode sitting in '/' 164 * this cache is used to shortcircuit the 165 * mount chain traversal and allows us 166 * to traverse to the true underlying rootvp 167 * in 1 easy step inside of 'cache_lookup_path' 168 * 169 * make sure to validate against the cached vid 170 * in case the rootvp gets stolen away since 171 * we don't take an explicit long term reference 172 * on it when we mount it 173 */ 174 vnode_t mnt_realrootvp; 175 uint32_t mnt_realrootvp_vid; 176 /* 177 * bumped each time a mount or unmount 178 * occurs... its used to invalidate 179 * 'mnt_realrootvp' from the cache 180 */ 181 uint32_t mnt_generation; 182 /* 183 * if 'MNTK_AUTH_CACHE_TIMEOUT' is 184 * set, then 'mnt_authcache_ttl' is 185 * the time-to-live for the per-vnode authentication cache 186 * on this mount... if zero, no cache is maintained... 187 * if 'MNTK_AUTH_CACHE_TIMEOUT' isn't set, its the 188 * time-to-live for the cached lookup right for 189 * volumes marked 'MNTK_AUTH_OPAQUE'. 190 */ 191 int mnt_authcache_ttl; 192 char fstypename_override[MFSTYPENAMELEN]; 193}; 194 195/* 196 * default number of seconds to keep cached lookup 197 * rights valid on mounts marked MNTK_AUTH_OPAQUE 198 */ 199#define CACHED_LOOKUP_RIGHT_TTL 2 200 201/* 202 * ioflags 203 */ 204#define MNT_IOFLAGS_FUA_SUPPORTED 0x00000001 205#define MNT_IOFLAGS_UNMAP_SUPPORTED 0x00000002 206 207/* 208 * ioqueue depth for devices that don't report one 209 */ 210#define MNT_DEFAULT_IOQUEUE_DEPTH 32 211 212 213/* XXX 3762912 hack to support HFS filesystem 'owner' */ 214#define vfs_setowner(_mp, _uid, _gid) do {(_mp)->mnt_fsowner = (_uid); (_mp)->mnt_fsgroup = (_gid); } while (0) 215 216 217/* mount point to which dead vps point to */ 218extern struct mount * dead_mountp; 219 220/* 221 * Internal filesystem control flags stored in mnt_kern_flag. 222 * 223 * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed 224 * past the mount point. This keeps the subtree stable during mounts 225 * and unmounts. 226 * 227 * Note: We are counting down on new bit assignments. This is 228 * because the bits here were broken out from the high bits 229 * of the mount flags. 230 */ 231#define MNTK_DENY_READDIREXT 0x00000200 /* Deny Extended-style readdir's for this volume */ 232#define MNTK_PERMIT_UNMOUNT 0x00000400 /* Allow (non-forced) unmounts by UIDs other than the one that mounted the volume */ 233#ifdef NFSCLIENT 234#define MNTK_TYPENAME_OVERRIDE 0x00000800 /* override the fstypename for statfs() */ 235#endif /* NFSCLIENT */ 236#define MNTK_KERNEL_MOUNT 0x00001000 /* mount came from kernel side */ 237#ifdef CONFIG_IMGSRC_ACCESS 238#define MNTK_HAS_MOVED 0x00002000 239#define MNTK_BACKS_ROOT 0x00004000 240#endif /* CONFIG_IMGSRC_ACCESS */ 241#define MNTK_AUTH_CACHE_TTL 0x00008000 /* rights cache has TTL - TTL of 0 disables cache */ 242#define MNTK_PATH_FROM_ID 0x00010000 /* mounted file system supports id-to-path lookups */ 243#define MNTK_UNMOUNT_PREFLIGHT 0x00020000 /* mounted file system wants preflight check during unmount */ 244#define MNTK_NAMED_STREAMS 0x00040000 /* mounted file system supports Named Streams VNOPs */ 245#define MNTK_EXTENDED_ATTRS 0x00080000 /* mounted file system supports Extended Attributes VNOPs */ 246#define MNTK_LOCK_LOCAL 0x00100000 /* advisory locking is done above the VFS itself */ 247#define MNTK_VIRTUALDEV 0x00200000 /* mounted on a virtual device i.e. a disk image */ 248#define MNTK_ROOTDEV 0x00400000 /* this filesystem resides on the same device as the root */ 249#define MNTK_SSD 0x00800000 /* underlying device is of the solid state variety */ 250#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ 251#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ 252#define MNTK_WANTRDWR 0x04000000 /* upgrade to read/write requested */ 253#if REV_ENDIAN_FS 254#define MNT_REVEND 0x08000000 /* Reverse endian FS */ 255#endif /* REV_ENDIAN_FS */ 256#define MNTK_FRCUNMOUNT 0x10000000 /* Forced unmount wanted. */ 257#define MNTK_AUTH_OPAQUE 0x20000000 /* authorisation decisions are not made locally */ 258#define MNTK_AUTH_OPAQUE_ACCESS 0x40000000 /* VNOP_ACCESS is reliable for remote auth */ 259#define MNTK_EXTENDED_SECURITY 0x80000000 /* extended security supported */ 260 261#define MNT_LNOTRESP 0x00000001 /* mount not responding */ 262#define MNT_LUNMOUNT 0x00000002 /* mount in unmount */ 263#define MNT_LFORCE 0x00000004 /* mount in forced unmount */ 264#define MNT_LDRAIN 0x00000008 /* mount in drain */ 265#define MNT_LITER 0x00000010 /* mount in iteration */ 266#define MNT_LNEWVN 0x00000020 /* mount has new vnodes created */ 267#define MNT_LWAIT 0x00000040 /* wait for unmount op */ 268#define MNT_LITERWAIT 0x00000080 /* mount in iteration */ 269#define MNT_LDEAD 0x00000100 /* mount already unmounted*/ 270 271 272/* 273 * Generic file handle 274 */ 275#define NFS_MAX_FH_SIZE NFSV4_MAX_FH_SIZE 276#define NFSV4_MAX_FH_SIZE 128 277#define NFSV3_MAX_FH_SIZE 64 278#define NFSV2_MAX_FH_SIZE 32 279struct fhandle { 280 int fh_len; /* length of file handle */ 281 unsigned char fh_data[NFS_MAX_FH_SIZE]; /* file handle value */ 282}; 283typedef struct fhandle fhandle_t; 284 285 286 287/* 288 * Filesystem configuration information. One of these exists for each 289 * type of filesystem supported by the kernel. These are searched at 290 * mount time to identify the requested filesystem. 291 */ 292struct vfstable { 293 struct vfsops *vfc_vfsops; /* filesystem operations vector */ 294 char vfc_name[MFSNAMELEN]; /* filesystem type name */ 295 int vfc_typenum; /* historic filesystem type number */ 296 int vfc_refcount; /* number mounted of this type */ 297 int vfc_flags; /* permanent flags */ 298 int (*vfc_mountroot)(mount_t, vnode_t, vfs_context_t); /* if != NULL, routine to mount root */ 299 struct vfstable *vfc_next; /* next in list */ 300 int32_t vfc_reserved1; 301 int32_t vfc_reserved2; 302 int vfc_vfsflags; /* for optional types */ 303 void * vfc_descptr; /* desc table allocated address */ 304 int vfc_descsize; /* size allocated for desc table */ 305}; 306 307/* vfc_vfsflags: */ 308#define VFC_VFSLOCALARGS 0x002 309#define VFC_VFSGENERICARGS 0x004 310#define VFC_VFSNATIVEXATTR 0x010 311#define VFC_VFSDIRLINKS 0x020 312#define VFC_VFSPREFLIGHT 0x040 313#define VFC_VFSREADDIR_EXTENDED 0x080 314#define VFC_VFS64BITREADY 0x100 315#define VFC_VFSNOMACLABEL 0x1000 316#define VFC_VFSVNOP_PAGEINV2 0x2000 317#define VFC_VFSVNOP_PAGEOUTV2 0x4000 318#define VFC_VFSVNOP_NOUPDATEID_RENAME 0x8000 319 320 321extern int maxvfsconf; /* highest defined filesystem type */ 322extern struct vfstable *vfsconf; /* head of list of filesystem types */ 323extern int maxvfsslots; /* Maximum slots available to be used */ 324extern int numused_vfsslots; /* number of slots already used */ 325 326/* the following two are xnu private */ 327struct vfstable * vfstable_add(struct vfstable *); 328int vfstable_del(struct vfstable *); 329 330 331struct vfsmount_args { 332 union { 333 struct { 334 char * mnt_fspec; 335 void * mnt_fsdata; 336 } mnt_localfs_args; 337 struct { 338 void * mnt_fsdata; /* FS specific */ 339 } mnt_remotefs_args; 340 } mountfs_args; 341}; 342 343 344/* 345 * LP64 *user* version of statfs structure. 346 * NOTE - must be kept in sync with struct statfs in mount.h 347 */ 348struct user64_statfs { 349 short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ 350 short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ 351 user64_long_t f_bsize; /* fundamental file system block size */ 352 user64_long_t f_iosize; /* optimal transfer block size */ 353 user64_long_t f_blocks; /* total data blocks in file system */ 354 user64_long_t f_bfree; /* free blocks in fs */ 355 user64_long_t f_bavail; /* free blocks avail to non-superuser */ 356 user64_long_t f_files; /* total file nodes in file system */ 357 user64_long_t f_ffree; /* free file nodes in fs */ 358 fsid_t f_fsid; /* file system id */ 359 uid_t f_owner; /* user that mounted the filesystem */ 360 short f_reserved1; /* spare for later */ 361 short f_type; /* type of filesystem */ 362 user64_long_t f_flags; /* copy of mount exported flags */ 363 user64_long_t f_reserved2[2]; /* reserved for future use */ 364 char f_fstypename[MFSNAMELEN]; /* fs type name */ 365 char f_mntonname[MNAMELEN]; /* directory on which mounted */ 366 char f_mntfromname[MNAMELEN];/* mounted filesystem */ 367 char f_reserved3; /* For alignment */ 368 user64_long_t f_reserved4[4]; /* For future use */ 369}; 370 371/* 372 * ILP32 *user* version of statfs structure. 373 * NOTE - must be kept in sync with struct statfs in mount.h 374 */ 375struct user32_statfs { 376 short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ 377 short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ 378 user32_long_t f_bsize; /* fundamental file system block size */ 379 user32_long_t f_iosize; /* optimal transfer block size */ 380 user32_long_t f_blocks; /* total data blocks in file system */ 381 user32_long_t f_bfree; /* free blocks in fs */ 382 user32_long_t f_bavail; /* free blocks avail to non-superuser */ 383 user32_long_t f_files; /* total file nodes in file system */ 384 user32_long_t f_ffree; /* free file nodes in fs */ 385 fsid_t f_fsid; /* file system id */ 386 uid_t f_owner; /* user that mounted the filesystem */ 387 short f_reserved1; /* spare for later */ 388 short f_type; /* type of filesystem */ 389 user32_long_t f_flags; /* copy of mount exported flags */ 390 user32_long_t f_reserved2[2]; /* reserved for future use */ 391 char f_fstypename[MFSNAMELEN]; /* fs type name */ 392 char f_mntonname[MNAMELEN]; /* directory on which mounted */ 393 char f_mntfromname[MNAMELEN];/* mounted filesystem */ 394 char f_reserved3; /* For alignment */ 395 user32_long_t f_reserved4[4]; /* For future use */ 396}; 397 398/* 399 * throttle I/Os are affected only by normal I/Os happening on the same spindle. Currently we use a 64-bit integer to 400 * represent what devices are affected, so we can handle at most 64 different spindles. Since 401 * throttled I/O is usually useful in non-server environment only, this number is enough in most cases. 402 */ 403#define LOWPRI_MAX_NUM_DEV 64 404 405__BEGIN_DECLS 406 407extern uint32_t mount_generation; 408extern TAILQ_HEAD(mntlist, mount) mountlist; 409void mount_list_lock(void); 410void mount_list_unlock(void); 411void mount_lock_init(mount_t); 412void mount_lock_destroy(mount_t); 413void mount_lock(mount_t); 414void mount_lock_spin(mount_t); 415void mount_unlock(mount_t); 416void mount_lock_renames(mount_t); 417void mount_unlock_renames(mount_t); 418void mount_ref(mount_t, int); 419void mount_drop(mount_t, int); 420int mount_refdrain(mount_t); 421 422/* vfs_rootmountalloc should be kept as a private api */ 423errno_t vfs_rootmountalloc(const char *, const char *, mount_t *mpp); 424 425int vfs_mountroot(void); 426void vfs_unmountall(void); 427int safedounmount(struct mount *, int, vfs_context_t); 428int dounmount(struct mount *, int, int, vfs_context_t); 429 430/* xnu internal api */ 431void mount_dropcrossref(mount_t, vnode_t, int); 432mount_t mount_lookupby_volfsid(int, int); 433mount_t mount_list_lookupby_fsid(fsid_t *, int, int); 434int mount_list_add(mount_t); 435void mount_list_remove(mount_t); 436int mount_iterref(mount_t, int); 437int mount_isdrained(mount_t, int); 438void mount_iterdrop(mount_t); 439void mount_iterdrain(mount_t); 440void mount_iterreset(mount_t); 441 442/* tags a volume as not supporting extended readdir for NFS exports */ 443#ifdef BSD_KERNEL_PRIVATE 444void mount_set_noreaddirext (mount_t); 445#endif 446 447/* Private NFS spi */ 448#define KERNEL_MOUNT_NOAUTH 0x01 /* Don't check the UID of the directory we are mounting on */ 449#define KERNEL_MOUNT_PERMIT_UNMOUNT 0x02 /* Allow (non-forced) unmounts by users other the one who mounted the volume */ 450#if NFSCLIENT 451/* 452 * NOTE: kernel_mount() does not force MNT_NOSUID, MNT_NOEXEC, or MNT_NODEC for non-privileged 453 * mounting credentials, as the mount(2) system call does. 454 */ 455int kernel_mount(char *, vnode_t, vnode_t, const char *, void *, size_t, int, uint32_t, vfs_context_t); 456boolean_t vfs_iskernelmount(mount_t); 457#endif 458 459/* throttled I/O api */ 460 461/* returned by throttle_io_will_be_throttled */ 462#define THROTTLE_DISENGAGED 0 463#define THROTTLE_ENGAGED 1 464#define THROTTLE_NOW 2 465 466int throttle_get_io_policy(struct uthread **ut); 467int throttle_get_passive_io_policy(struct uthread **ut); 468int throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp); 469void *throttle_info_update_by_mount(mount_t mp); 470void rethrottle_thread(uthread_t ut); 471 472/* throttled I/O helper function */ 473/* convert the lowest bit to a device index */ 474extern int num_trailing_0(uint64_t n); 475 476__END_DECLS 477 478#endif /* !_SYS_MOUNT_INTERNAL_H_ */ 479