1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1991, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 *    must display the following acknowledgement:
43 *	This product includes software developed by the University of
44 *	California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)mount.h	8.21 (Berkeley) 5/20/95
62 */
63/*
64 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
65 * support for mandatory and extensible security protections.  This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70#ifndef _SYS_MOUNT_INTERNAL_H_
71#define	_SYS_MOUNT_INTERNAL_H_
72
73#include <sys/appleapiopts.h>
74#ifndef KERNEL
75#include <sys/ucred.h>
76#else
77#include <sys/kernel_types.h>
78#include <sys/namei.h>
79#endif
80#include <sys/queue.h>
81#include <sys/lock.h>
82#include <net/radix.h>
83#include <sys/socket.h>		/* XXX for AF_MAX */
84#include <sys/vfs_context.h>		/* XXX for AF_MAX */
85#include <sys/mount.h>
86#include <sys/cdefs.h>
87
88struct label;
89
90#if defined(__i386__) || defined(__x86_64__)
91typedef uint64_t  pending_io_t;
92#define INCR_PENDING_IO(a, b) OSAddAtomic64((int64_t)(a), (int64_t *)&(b));
93#else
94typedef uint32_t  pending_io_t;
95#define INCR_PENDING_IO(a, b) OSAddAtomic((int32_t)(a), (int32_t *)&(b));
96#endif
97
98
99/*
100 * Structure per mounted file system.  Each mounted file system has an
101 * array of operations and an instance record.  The file systems are
102 * put on a doubly linked list.
103 */
104TAILQ_HEAD(vnodelst, vnode);
105
106struct mount {
107	TAILQ_ENTRY(mount) mnt_list;		/* mount list */
108	int32_t		mnt_count;		/* reference on the mount */
109	lck_mtx_t	mnt_mlock;		/* mutex that protects mount point */
110	struct vfsops	*mnt_op;		/* operations on fs */
111	struct vfstable	*mnt_vtable;		/* configuration info */
112	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
113	struct vnodelst	mnt_vnodelist;		/* list of vnodes this mount */
114	struct vnodelst	mnt_workerqueue;		/* list of vnodes this mount */
115	struct vnodelst	mnt_newvnodes;		/* list of vnodes this mount */
116	uint32_t		mnt_flag;		/* flags */
117	uint32_t		mnt_kern_flag;		/* kernel only flags */
118	uint32_t		mnt_compound_ops;	/* Available compound operations */
119	uint32_t		mnt_lflag;			/* mount life cycle flags */
120	uint32_t		mnt_maxsymlinklen;	/* max size of short symlink */
121	struct vfsstatfs	mnt_vfsstat;		/* cache of filesystem stats */
122	qaddr_t		mnt_data;		/* private data */
123	/* Cached values of the IO constraints for the device */
124	uint32_t	mnt_maxreadcnt;		/* Max. byte count for read */
125	uint32_t	mnt_maxwritecnt;	/* Max. byte count for write */
126	uint32_t	mnt_segreadcnt;		/* Max. segment count for read */
127	uint32_t	mnt_segwritecnt;	/* Max. segment count for write */
128	uint32_t	mnt_maxsegreadsize;	/* Max. segment read size  */
129	uint32_t	mnt_maxsegwritesize;	/* Max. segment write size */
130	uint32_t	mnt_alignmentmask;	/* Mask of bits that aren't addressable via DMA */
131	uint32_t	mnt_devblocksize;	/* the underlying device block size */
132	uint32_t	mnt_ioqueue_depth;	/* the maxiumum number of commands a device can accept */
133        uint32_t	mnt_ioscale;		/* scale the various throttles/limits imposed on the amount of I/O in flight */
134	uint32_t	mnt_ioflags;		/* flags for  underlying device */
135	pending_io_t	mnt_pending_write_size __attribute__((aligned(sizeof(pending_io_t))));	/* byte count of pending writes */
136	pending_io_t	mnt_pending_read_size  __attribute__((aligned(sizeof(pending_io_t))));	/* byte count of pending reads */
137	struct timeval	mnt_last_write_issued_timestamp;
138	struct timeval	mnt_last_write_completed_timestamp;
139
140	lck_rw_t	mnt_rwlock;		/* mutex readwrite lock */
141	lck_mtx_t	mnt_renamelock;		/* mutex that serializes renames that change shape of tree */
142	vnode_t		mnt_devvp;		/* the device mounted on for local file systems */
143	uint32_t	mnt_devbsdunit;		/* the BSD unit number of the device */
144	uint64_t	mnt_throttle_mask;	/* the throttle mask of what devices will be affected by I/O from this mnt */
145	void		*mnt_throttle_info;	/* used by the throttle code */
146	int32_t		mnt_crossref;		/* refernces to cover lookups  crossing into mp */
147	int32_t		mnt_iterref;		/* refernces to cover iterations; drained makes it -ve  */
148#if CONFIG_TRIGGERS
149	int32_t		mnt_numtriggers; 	/* num of trigger vnodes for this mount */
150	vfs_trigger_callback_t *mnt_triggercallback;
151	void		*mnt_triggerdata;
152#endif
153 	/* XXX 3762912 hack to support HFS filesystem 'owner' */
154 	uid_t		mnt_fsowner;
155 	gid_t		mnt_fsgroup;
156
157	struct label	*mnt_mntlabel;		/* MAC mount label */
158	struct label	*mnt_fslabel;		/* MAC default fs label */
159
160	/*
161	 * cache the rootvp of the last mount point
162	 * in the chain in the mount struct pointed
163	 * to by the vnode sitting in '/'
164	 * this cache is used to shortcircuit the
165	 * mount chain traversal and allows us
166	 * to traverse to the true underlying rootvp
167	 * in 1 easy step inside of 'cache_lookup_path'
168	 *
169	 * make sure to validate against the cached vid
170	 * in case the rootvp gets stolen away since
171	 * we don't take an explicit long term reference
172	 * on it when we mount it
173	 */
174	vnode_t		mnt_realrootvp;
175	uint32_t	mnt_realrootvp_vid;
176	/*
177	 * bumped each time a mount or unmount
178	 * occurs... its used to invalidate
179	 * 'mnt_realrootvp' from the cache
180	 */
181	uint32_t             mnt_generation;
182        /*
183	 * if 'MNTK_AUTH_CACHE_TIMEOUT' is
184	 * set, then 'mnt_authcache_ttl' is
185	 * the time-to-live for the per-vnode authentication cache
186	 * on this mount... if zero, no cache is maintained...
187	 * if 'MNTK_AUTH_CACHE_TIMEOUT' isn't set, its the
188	 * time-to-live for the cached lookup right for
189	 * volumes marked 'MNTK_AUTH_OPAQUE'.
190	 */
191	int		mnt_authcache_ttl;
192	char		fstypename_override[MFSTYPENAMELEN];
193};
194
195/*
196 * default number of seconds to keep cached lookup
197 * rights valid on mounts marked MNTK_AUTH_OPAQUE
198 */
199#define CACHED_LOOKUP_RIGHT_TTL		2
200
201/*
202 * ioflags
203 */
204#define MNT_IOFLAGS_FUA_SUPPORTED	0x00000001
205#define MNT_IOFLAGS_UNMAP_SUPPORTED	0x00000002
206
207/*
208 * ioqueue depth for devices that don't report one
209 */
210#define MNT_DEFAULT_IOQUEUE_DEPTH	32
211
212
213/* XXX 3762912 hack to support HFS filesystem 'owner' */
214#define vfs_setowner(_mp, _uid, _gid)	do {(_mp)->mnt_fsowner = (_uid); (_mp)->mnt_fsgroup = (_gid); } while (0)
215
216
217/* mount point to which dead vps point to */
218extern struct mount * dead_mountp;
219
220/*
221 * Internal filesystem control flags stored in mnt_kern_flag.
222 *
223 * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed
224 * past the mount point.  This keeps the subtree stable during mounts
225 * and unmounts.
226 *
227 * Note:	We are counting down on new bit assignments.  This is
228 *		because the bits here were broken out from the high bits
229 *		of the mount flags.
230 */
231#define MNTK_DENY_READDIREXT 0x00000200 /* Deny Extended-style readdir's for this volume */
232#define MNTK_PERMIT_UNMOUNT	0x00000400	/* Allow (non-forced) unmounts by UIDs other than the one that mounted the volume */
233#ifdef NFSCLIENT
234#define MNTK_TYPENAME_OVERRIDE  0x00000800      /* override the fstypename for statfs() */
235#endif /* NFSCLIENT */
236#define MNTK_KERNEL_MOUNT	0x00001000	/* mount came from kernel side */
237#ifdef CONFIG_IMGSRC_ACCESS
238#define MNTK_HAS_MOVED		0x00002000
239#define MNTK_BACKS_ROOT		0x00004000
240#endif /* CONFIG_IMGSRC_ACCESS */
241#define MNTK_AUTH_CACHE_TTL	0x00008000      /* rights cache has TTL - TTL of 0 disables cache */
242#define	MNTK_PATH_FROM_ID	0x00010000	/* mounted file system supports id-to-path lookups */
243#define	MNTK_UNMOUNT_PREFLIGHT	0x00020000	/* mounted file system wants preflight check during unmount */
244#define	MNTK_NAMED_STREAMS	0x00040000	/* mounted file system supports Named Streams VNOPs */
245#define	MNTK_EXTENDED_ATTRS	0x00080000	/* mounted file system supports Extended Attributes VNOPs */
246#define	MNTK_LOCK_LOCAL		0x00100000	/* advisory locking is done above the VFS itself */
247#define MNTK_VIRTUALDEV 	0x00200000      /* mounted on a virtual device i.e. a disk image */
248#define MNTK_ROOTDEV    	0x00400000      /* this filesystem resides on the same device as the root */
249#define MNTK_SSD		0x00800000	/* underlying device is of the solid state variety */
250#define MNTK_UNMOUNT		0x01000000	/* unmount in progress */
251#define	MNTK_MWAIT		0x02000000	/* waiting for unmount to finish */
252#define MNTK_WANTRDWR		0x04000000	/* upgrade to read/write requested */
253#if REV_ENDIAN_FS
254#define MNT_REVEND		0x08000000	/* Reverse endian FS */
255#endif /* REV_ENDIAN_FS */
256#define MNTK_FRCUNMOUNT		0x10000000	/* Forced unmount wanted. */
257#define MNTK_AUTH_OPAQUE        0x20000000      /* authorisation decisions are not made locally */
258#define MNTK_AUTH_OPAQUE_ACCESS 0x40000000      /* VNOP_ACCESS is reliable for remote auth */
259#define MNTK_EXTENDED_SECURITY	0x80000000	/* extended security supported */
260
261#define	MNT_LNOTRESP		0x00000001	/* mount not responding */
262#define MNT_LUNMOUNT		0x00000002	/* mount in unmount */
263#define MNT_LFORCE		0x00000004	/* mount in forced unmount */
264#define MNT_LDRAIN		0x00000008	/* mount in drain */
265#define MNT_LITER		0x00000010	/* mount in iteration */
266#define MNT_LNEWVN		0x00000020	/* mount has new vnodes created */
267#define MNT_LWAIT		0x00000040	/* wait for unmount op */
268#define MNT_LITERWAIT		0x00000080	/* mount in iteration */
269#define MNT_LDEAD		0x00000100	/* mount already unmounted*/
270
271
272/*
273 * Generic file handle
274 */
275#define	NFS_MAX_FH_SIZE		NFSV4_MAX_FH_SIZE
276#define	NFSV4_MAX_FH_SIZE	128
277#define	NFSV3_MAX_FH_SIZE	64
278#define	NFSV2_MAX_FH_SIZE	32
279struct fhandle {
280	int		fh_len;				/* length of file handle */
281	unsigned char	fh_data[NFS_MAX_FH_SIZE];	/* file handle value */
282};
283typedef struct fhandle	fhandle_t;
284
285
286
287/*
288 * Filesystem configuration information. One of these exists for each
289 * type of filesystem supported by the kernel. These are searched at
290 * mount time to identify the requested filesystem.
291 */
292struct vfstable {
293	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
294	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
295	int	vfc_typenum;		/* historic filesystem type number */
296	int	vfc_refcount;		/* number mounted of this type */
297	int	vfc_flags;		/* permanent flags */
298	int	(*vfc_mountroot)(mount_t, vnode_t, vfs_context_t);	/* if != NULL, routine to mount root */
299	struct	vfstable *vfc_next;	/* next in list */
300	int32_t	vfc_reserved1;
301	int32_t vfc_reserved2;
302	int 		vfc_vfsflags;	/* for optional types */
303	void *		vfc_descptr;	/* desc table allocated address */
304	int			vfc_descsize;	/* size allocated for desc table */
305};
306
307/* vfc_vfsflags: */
308#define VFC_VFSLOCALARGS	0x002
309#define	VFC_VFSGENERICARGS	0x004
310#define	VFC_VFSNATIVEXATTR	0x010
311#define	VFC_VFSDIRLINKS		0x020
312#define	VFC_VFSPREFLIGHT	0x040
313#define	VFC_VFSREADDIR_EXTENDED	0x080
314#define	VFC_VFS64BITREADY	0x100
315#define	VFC_VFSNOMACLABEL	0x1000
316#define	VFC_VFSVNOP_PAGEINV2	0x2000
317#define	VFC_VFSVNOP_PAGEOUTV2	0x4000
318#define	VFC_VFSVNOP_NOUPDATEID_RENAME	0x8000
319
320
321extern int maxvfsconf;		/* highest defined filesystem type */
322extern struct vfstable  *vfsconf;	/* head of list of filesystem types */
323extern int maxvfsslots;		/* Maximum slots available to be used */
324extern int numused_vfsslots;	/* number of slots already used */
325
326/* the following two are xnu private */
327struct vfstable *	vfstable_add(struct	vfstable *);
328int	vfstable_del(struct vfstable *);
329
330
331struct vfsmount_args {
332	union {
333		struct {
334			char * mnt_fspec;
335			void * mnt_fsdata;
336		} mnt_localfs_args;
337		struct {
338			void *  mnt_fsdata;		/* FS specific */
339		} mnt_remotefs_args;
340	} mountfs_args;
341};
342
343
344/*
345 * LP64 *user* version of statfs structure.
346 * NOTE - must be kept in sync with struct statfs in mount.h
347 */
348struct user64_statfs {
349	short		f_otype;		/* TEMPORARY SHADOW COPY OF f_type */
350	short		f_oflags;		/* TEMPORARY SHADOW COPY OF f_flags */
351	user64_long_t	f_bsize;		/* fundamental file system block size */
352	user64_long_t	f_iosize;		/* optimal transfer block size */
353	user64_long_t	f_blocks;		/* total data blocks in file system */
354	user64_long_t	f_bfree;		/* free blocks in fs */
355	user64_long_t	f_bavail;		/* free blocks avail to non-superuser */
356	user64_long_t	f_files;		/* total file nodes in file system */
357	user64_long_t	f_ffree;		/* free file nodes in fs */
358	fsid_t		f_fsid;			/* file system id */
359	uid_t		f_owner;		/* user that mounted the filesystem */
360	short		f_reserved1;	/* spare for later */
361	short		f_type;			/* type of filesystem */
362    user64_long_t	f_flags;		/* copy of mount exported flags */
363	user64_long_t f_reserved2[2];	/* reserved for future use */
364	char		f_fstypename[MFSNAMELEN]; /* fs type name */
365	char		f_mntonname[MNAMELEN];	/* directory on which mounted */
366	char		f_mntfromname[MNAMELEN];/* mounted filesystem */
367	char		f_reserved3;	/* For alignment */
368	user64_long_t	f_reserved4[4];	/* For future use */
369};
370
371/*
372 * ILP32 *user* version of statfs structure.
373 * NOTE - must be kept in sync with struct statfs in mount.h
374 */
375struct user32_statfs {
376	short		f_otype;		/* TEMPORARY SHADOW COPY OF f_type */
377	short		f_oflags;		/* TEMPORARY SHADOW COPY OF f_flags */
378	user32_long_t	f_bsize;		/* fundamental file system block size */
379	user32_long_t	f_iosize;		/* optimal transfer block size */
380	user32_long_t	f_blocks;		/* total data blocks in file system */
381	user32_long_t	f_bfree;		/* free blocks in fs */
382	user32_long_t	f_bavail;		/* free blocks avail to non-superuser */
383	user32_long_t	f_files;		/* total file nodes in file system */
384	user32_long_t	f_ffree;		/* free file nodes in fs */
385	fsid_t		f_fsid;			/* file system id */
386	uid_t		f_owner;		/* user that mounted the filesystem */
387	short		f_reserved1;	/* spare for later */
388	short		f_type;			/* type of filesystem */
389    user32_long_t	f_flags;		/* copy of mount exported flags */
390	user32_long_t f_reserved2[2];	/* reserved for future use */
391	char		f_fstypename[MFSNAMELEN]; /* fs type name */
392	char		f_mntonname[MNAMELEN];	/* directory on which mounted */
393	char		f_mntfromname[MNAMELEN];/* mounted filesystem */
394	char		f_reserved3;	/* For alignment */
395	user32_long_t	f_reserved4[4];	/* For future use */
396};
397
398/*
399 * throttle I/Os are affected only by normal I/Os happening on the same spindle.  Currently we use a 64-bit integer to
400 * represent what devices are affected, so we can handle at most 64 different spindles.  Since
401 * throttled I/O is usually useful in non-server environment only, this number is enough in most cases.
402 */
403#define LOWPRI_MAX_NUM_DEV 64
404
405__BEGIN_DECLS
406
407extern uint32_t mount_generation;
408extern TAILQ_HEAD(mntlist, mount) mountlist;
409void mount_list_lock(void);
410void mount_list_unlock(void);
411void mount_lock_init(mount_t);
412void mount_lock_destroy(mount_t);
413void mount_lock(mount_t);
414void mount_lock_spin(mount_t);
415void mount_unlock(mount_t);
416void mount_lock_renames(mount_t);
417void mount_unlock_renames(mount_t);
418void mount_ref(mount_t, int);
419void mount_drop(mount_t, int);
420int  mount_refdrain(mount_t);
421
422/* vfs_rootmountalloc should be kept as a private api */
423errno_t vfs_rootmountalloc(const char *, const char *, mount_t *mpp);
424
425int	vfs_mountroot(void);
426void	vfs_unmountall(void);
427int	safedounmount(struct mount *, int, vfs_context_t);
428int	dounmount(struct mount *, int, int, vfs_context_t);
429
430/* xnu internal api */
431void  mount_dropcrossref(mount_t, vnode_t, int);
432mount_t mount_lookupby_volfsid(int, int);
433mount_t mount_list_lookupby_fsid(fsid_t *, int, int);
434int  mount_list_add(mount_t);
435void mount_list_remove(mount_t);
436int  mount_iterref(mount_t, int);
437int  mount_isdrained(mount_t, int);
438void mount_iterdrop(mount_t);
439void mount_iterdrain(mount_t);
440void mount_iterreset(mount_t);
441
442/* tags a volume as not supporting extended readdir for NFS exports */
443#ifdef BSD_KERNEL_PRIVATE
444void mount_set_noreaddirext (mount_t);
445#endif
446
447/* Private NFS spi */
448#define KERNEL_MOUNT_NOAUTH		0x01 /* Don't check the UID of the directory we are mounting on */
449#define KERNEL_MOUNT_PERMIT_UNMOUNT	0x02 /* Allow (non-forced) unmounts by users other the one who mounted the volume */
450#if NFSCLIENT
451/*
452 * NOTE: kernel_mount() does not force MNT_NOSUID, MNT_NOEXEC, or MNT_NODEC for non-privileged
453 * mounting credentials, as the mount(2) system call does.
454 */
455int kernel_mount(char *, vnode_t, vnode_t, const char *, void *, size_t, int, uint32_t, vfs_context_t);
456boolean_t vfs_iskernelmount(mount_t);
457#endif
458
459/* throttled I/O api */
460
461/* returned by throttle_io_will_be_throttled */
462#define THROTTLE_DISENGAGED	0
463#define THROTTLE_ENGAGED	1
464#define THROTTLE_NOW		2
465
466int  throttle_get_io_policy(struct uthread **ut);
467int  throttle_get_passive_io_policy(struct uthread **ut);
468int  throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp);
469void *throttle_info_update_by_mount(mount_t mp);
470void rethrottle_thread(uthread_t ut);
471
472/* throttled I/O helper function */
473/* convert the lowest bit to a device index */
474extern int num_trailing_0(uint64_t n);
475
476__END_DECLS
477
478#endif /* !_SYS_MOUNT_INTERNAL_H_ */
479