1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1994, 1995 The Regents of the University of California.
31 * Copyright (c) 1994, 1995 Jan-Simon Pendry.
32 * All rights reserved.
33 *
34 * This code is derived from software donated to Berkeley by
35 * Jan-Simon Pendry.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)union_vfsops.c	8.20 (Berkeley) 5/20/95
66 */
67
68/*
69 * Union Layer
70 */
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/time.h>
75#include <sys/types.h>
76#include <sys/proc_internal.h>
77#include <sys/kauth.h>
78#include <sys/vnode_internal.h>
79#include <sys/mount_internal.h>
80#include <sys/namei.h>
81#include <sys/malloc.h>
82#include <sys/filedesc.h>
83#include <sys/queue.h>
84#include <miscfs/union/union.h>
85
86static	int union_itercallback(vnode_t, void *);
87static int union_root(mount_t, vnode_t *, vfs_context_t);
88
89/*
90 * Mount union filesystem
91 */
92static int
93union_mount(mount_t mp, __unused vnode_t devvp, user_addr_t data, vfs_context_t context)
94{
95	proc_t p = vfs_context_proc(context);
96	int error = 0;
97	struct user_union_args args;
98	struct vnode *lowerrootvp = NULLVP;
99	struct vnode *upperrootvp = NULLVP;
100	struct union_mount *um = NULL;
101	kauth_cred_t cred = NOCRED;
102	const char *cp = NULL;
103	char *vcp;
104	int len;
105	u_int size;
106	struct nameidata nd;
107
108#ifdef UNION_DIAGNOSTIC
109	printf("union_mount(mp = %x)\n", mp);
110#endif
111
112	/*
113	 * Update is a no-op
114	 */
115	if (mp->mnt_flag & MNT_UPDATE) {
116		/*
117		 * Need to provide.
118		 * 1. a way to convert between rdonly and rdwr mounts.
119		 * 2. support for nfs exports.
120		 */
121		error = ENOTSUP;
122		goto bad;
123	}
124
125	/*
126	 * Get argument
127	 */
128	if (vfs_context_is64bit(context)) {
129		error = copyin(data, (caddr_t)&args, sizeof(args));
130	}
131	else {
132		struct union_args temp;
133		error = copyin(data, (caddr_t)&temp, sizeof (temp));
134		args.target = CAST_USER_ADDR_T(temp.target);
135		args.mntflags = temp.mntflags;
136	}
137	if (error)
138		goto bad;
139
140	lowerrootvp = mp->mnt_vnodecovered;
141	vnode_get(lowerrootvp);
142
143	/*
144	 * Find upper node.
145	 */
146	NDINIT(&nd, LOOKUP, FOLLOW|WANTPARENT,
147	       (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
148	       args.target, context);
149
150	if ((error = namei(&nd)))
151		goto bad;
152
153	nameidone(&nd);
154	upperrootvp = nd.ni_vp;
155	vnode_put(nd.ni_dvp);
156	nd.ni_dvp = NULL;
157
158	if (upperrootvp->v_type != VDIR) {
159		error = EINVAL;
160		goto bad;
161	}
162
163	MALLOC(um, struct union_mount *, sizeof(struct union_mount),
164				M_UFSMNT, M_WAITOK);
165
166	/*
167	 * Keep a held reference to the target vnodes.
168	 * They are vnode_put'd in union_unmount.
169	 *
170	 * Depending on the _BELOW flag, the filesystems are
171	 * viewed in a different order.  In effect, this is the
172	 * same as providing a mount under option to the mount syscall.
173	 */
174
175	um->um_op = args.mntflags & UNMNT_OPMASK;
176	switch (um->um_op) {
177	case UNMNT_ABOVE:
178		um->um_lowervp = lowerrootvp;
179		um->um_uppervp = upperrootvp;
180		break;
181
182	case UNMNT_BELOW:
183		um->um_lowervp = upperrootvp;
184		um->um_uppervp = lowerrootvp;
185		break;
186
187	case UNMNT_REPLACE:
188		vnode_put(lowerrootvp);
189		lowerrootvp = NULLVP;
190		um->um_uppervp = upperrootvp;
191		um->um_lowervp = lowerrootvp;
192		break;
193
194#ifdef FAULTFS
195	case UNMNT_FAULTIN:
196		um->um_lowervp = upperrootvp;
197		um->um_uppervp = lowerrootvp;
198		break;
199#endif
200
201	default:
202		error = EINVAL;
203		goto bad;
204	}
205
206	if (um->um_lowervp != NULLVP)
207		um->um_lowervid = vnode_vid(um->um_lowervp);
208	if (um->um_uppervp != NULLVP)
209		um->um_uppervid = vnode_vid(um->um_uppervp);
210	/*
211	 * Unless the mount is readonly, ensure that the top layer
212	 * supports whiteout operations
213	 */
214#ifdef FAULTFS
215	if ((um->um_op != UNMNT_FAULTIN) && (mp->mnt_flag & MNT_RDONLY) == 0)
216#else
217	if ((mp->mnt_flag & MNT_RDONLY) == 0)
218#endif
219	{
220		error = VNOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0,
221		                      LOOKUP, context);
222		if (error)
223			goto bad;
224	}
225
226	um->um_cred = kauth_cred_get_with_ref();
227	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
228
229	/*
230	 * Depending on what you think the MNT_LOCAL flag might mean,
231	 * you may want the && to be || on the conditional below.
232	 * At the moment it has been defined that the filesystem is
233	 * only local if it is all local, ie the MNT_LOCAL flag implies
234	 * that the entire namespace is local.  If you think the MNT_LOCAL
235	 * flag implies that some of the files might be stored locally
236	 * then you will want to change the conditional.
237	 */
238	if (um->um_op == UNMNT_ABOVE) {
239		if (((um->um_lowervp == NULLVP) ||
240		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
241		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
242			mp->mnt_flag |= MNT_LOCAL;
243	}
244
245	/*
246	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
247	 * of lookup() which explicitly checks the flag, rather than asking
248	 * the filesystem for it's own opinion.  This means, that an update
249	 * mount of the underlying filesystem to go from rdonly to rdwr
250	 * will leave the unioned view as read-only.
251	 */
252	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
253
254	mp->mnt_data = (qaddr_t) um;
255	vfs_getnewfsid(mp);
256
257
258	switch (um->um_op) {
259	case UNMNT_ABOVE:
260		cp = "<above>:";
261		break;
262	case UNMNT_BELOW:
263		cp = "<below>:";
264		break;
265	case UNMNT_REPLACE:
266		cp = "";
267		break;
268#ifdef FAULTFS
269	case UNMNT_FAULTIN:
270		cp = "/FaultingFS/";
271		break;
272#endif
273	}
274	len = strlen(cp);
275	bcopy(cp, mp->mnt_vfsstat.f_mntfromname, len);
276
277	vcp = mp->mnt_vfsstat.f_mntfromname + len;
278	len = MNAMELEN - len;
279
280	(void) copyinstr(args.target, vcp, len - 1, (size_t *)&size);
281	bzero(vcp + size, len - size);
282
283	/* mark the filesystem thred safe */
284	 mp->mnt_vtable->vfc_threadsafe = TRUE;
285
286#ifdef UNION_DIAGNOSTIC
287	printf("union_mount: from %s, on %s\n",
288		mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
289#endif
290	return (0);
291
292bad:
293	if (um)
294		_FREE(um, M_UFSMNT);
295	if (IS_VALID_CRED(cred))
296		kauth_cred_unref(&cred);
297	if (upperrootvp)
298		vnode_put(upperrootvp);
299	if (lowerrootvp)
300		vnode_put(lowerrootvp);
301	return (error);
302}
303
304/*
305 * VFS start.  Nothing needed here - the start routine
306 * on the underlying filesystem(s) will have been called
307 * when that filesystem was mounted.
308 */
309static int
310union_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
311{
312
313	return (0);
314}
315
316static int
317union_itercallback(__unused vnode_t vp, void *args)
318{
319	int  num = *(int *)args;
320
321	*(int *)args = num + 1;
322	return(VNODE_RETURNED);
323}
324
325
326
327/*
328 * Free reference to union layer
329 */
330static int
331union_unmount(mount_t mp, int mntflags, vfs_context_t context)
332{
333	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
334	struct vnode *um_rootvp;
335	int error;
336	int freeing;
337	int flags = 0;
338
339#ifdef UNION_DIAGNOSTIC
340	printf("union_unmount(mp = %x)\n", mp);
341#endif
342
343	if (mntflags & MNT_FORCE)
344		flags |= FORCECLOSE;
345
346	if ((error = union_root(mp, &um_rootvp, context)))
347		return (error);
348
349	/*
350	 * Keep flushing vnodes from the mount list.
351	 * This is needed because of the un_pvp held
352	 * reference to the parent vnode.
353	 * If more vnodes have been freed on a given pass,
354	 * the try again.  The loop will iterate at most
355	 * (d) times, where (d) is the maximum tree depth
356	 * in the filesystem.
357	 */
358	for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
359		int n = 0;
360
361		vnode_iterate(mp, VNODE_NOLOCK_INTERNAL, union_itercallback, &n);
362
363		/* if this is unchanged then stop */
364		if (n == freeing)
365			break;
366
367		/* otherwise try once more time */
368		freeing = n;
369	}
370
371	/* At this point the root vnode should have a single reference */
372	if (vnode_isinuse(um_rootvp, 0)) {
373		vnode_put(um_rootvp);
374		return (EBUSY);
375	}
376
377#ifdef UNION_DIAGNOSTIC
378	vprint("union root", um_rootvp);
379#endif
380	/*
381	 * Discard references to upper and lower target vnodes.
382	 */
383	if (um->um_lowervp)
384		vnode_put(um->um_lowervp);
385	vnode_put(um->um_uppervp);
386	if (IS_VALID_CRED(um->um_cred)) {
387		kauth_cred_unref(&um->um_cred);
388	}
389	/*
390	 * Release reference on underlying root vnode
391	 */
392	vnode_put(um_rootvp);
393	/*
394	 * And blow it away for future re-use
395	 */
396	vnode_reclaim(um_rootvp);
397	/*
398	 * Finally, throw away the union_mount structure
399	 */
400	_FREE(mp->mnt_data, M_UFSMNT);	/* XXX */
401	mp->mnt_data = NULL;
402	return (0);
403}
404
405static int
406union_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t context)
407{
408	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
409	int error;
410
411	/*
412	 * Return locked reference to root.
413	 */
414	vnode_get(um->um_uppervp);
415	if (um->um_lowervp)
416		vnode_get(um->um_lowervp);
417
418	union_lock();
419	error = union_allocvp(vpp, mp,
420			      (struct vnode *) 0,
421			      (struct vnode *) 0,
422			      (struct componentname *) 0,
423			      um->um_uppervp,
424			      um->um_lowervp,
425			      1);
426	union_unlock();
427
428	if (error) {
429	        vnode_put(um->um_uppervp);
430		if (um->um_lowervp)
431			vnode_put(um->um_lowervp);
432	}
433
434	return (error);
435}
436
437static int
438union_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context)
439{
440	int error;
441	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
442	struct vfs_attr attr;
443	uint32_t lbsize = 0;
444
445#ifdef UNION_DIAGNOSTIC
446	printf("union_vfs_getattr(mp = %x, lvp = %x, uvp = %x)\n", mp,
447			um->um_lowervp,
448	       		um->um_uppervp);
449#endif
450
451	/* Get values from lower file system (if any) */
452	if (um->um_lowervp) {
453		VFSATTR_INIT(&attr);
454		VFSATTR_WANTED(&attr, f_bsize);
455		VFSATTR_WANTED(&attr, f_blocks);
456		VFSATTR_WANTED(&attr, f_bused);
457		VFSATTR_WANTED(&attr, f_files);
458		error = vfs_getattr(um->um_lowervp->v_mount, &attr, context);
459		if (error)
460			return (error);
461
462		/* now copy across the "interesting" information and fake the rest */
463		if (VFSATTR_IS_SUPPORTED(&attr, f_bsize))
464			lbsize = attr.f_bsize;
465		else
466			lbsize = um->um_lowervp->v_mount->mnt_devblocksize;
467		fsap->f_blocks = VFSATTR_IS_SUPPORTED(&attr, f_blocks) ? attr.f_blocks : 0;
468		fsap->f_bused  = VFSATTR_IS_SUPPORTED(&attr, f_bused)  ? attr.f_bused  : 0;
469		fsap->f_files  = VFSATTR_IS_SUPPORTED(&attr, f_files)  ? attr.f_files  : 0;
470	} else {
471		fsap->f_blocks = 0;
472		fsap->f_bused = 0;
473		fsap->f_files = 0;
474	}
475
476	VFSATTR_INIT(&attr);
477	VFSATTR_WANTED(&attr, f_bsize);
478	VFSATTR_WANTED(&attr, f_blocks);
479	VFSATTR_WANTED(&attr, f_bfree);
480	VFSATTR_WANTED(&attr, f_bavail);
481	VFSATTR_WANTED(&attr, f_files);
482	VFSATTR_WANTED(&attr, f_ffree);
483	error = vfs_getattr(um->um_uppervp->v_mount, &attr, context);
484	if (error)
485		return (error);
486
487	if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) {
488		fsap->f_bsize = attr.f_bsize;
489		VFSATTR_SET_SUPPORTED(fsap, f_bsize);
490	}
491	if (VFSATTR_IS_SUPPORTED(&attr, f_iosize)) {
492		fsap->f_iosize = attr.f_iosize;
493		VFSATTR_SET_SUPPORTED(fsap, f_iosize);
494	}
495
496	/*
497	 * if the lower and upper blocksizes differ, then frig the
498	 * block counts so that the sizes reported by df make some
499	 * kind of sense.  none of this makes sense though.
500	 */
501	if (VFSATTR_IS_SUPPORTED(&attr, f_bsize))
502		fsap->f_bsize = attr.f_bsize;
503	else
504		fsap->f_bsize =  um->um_uppervp->v_mount->mnt_devblocksize;
505	VFSATTR_RETURN(fsap, f_bsize, attr.f_bsize);
506	if (fsap->f_bsize != lbsize)
507		fsap->f_blocks = fsap->f_blocks * lbsize / attr.f_bsize;
508
509	/*
510	 * The "total" fields count total resources in all layers,
511	 * the "free" fields count only those resources which are
512	 * free in the upper layer (since only the upper layer
513	 * is writeable).
514	 */
515	if (VFSATTR_IS_SUPPORTED(&attr, f_blocks))
516		fsap->f_blocks += attr.f_blocks;
517	if (VFSATTR_IS_SUPPORTED(&attr, f_bfree))
518		fsap->f_bfree = attr.f_bfree;
519	if (VFSATTR_IS_SUPPORTED(&attr, f_bavail))
520		fsap->f_bavail = attr.f_bavail;
521	if (VFSATTR_IS_SUPPORTED(&attr, f_bused))
522		fsap->f_bused += attr.f_bused;
523	if (VFSATTR_IS_SUPPORTED(&attr, f_files))
524		fsap->f_files += attr.f_files;
525	if (VFSATTR_IS_SUPPORTED(&attr, f_ffree))
526		fsap->f_ffree = attr.f_ffree;
527
528	VFSATTR_SET_SUPPORTED(fsap, f_bsize);
529	VFSATTR_SET_SUPPORTED(fsap, f_blocks);
530	VFSATTR_SET_SUPPORTED(fsap, f_bfree);
531	VFSATTR_SET_SUPPORTED(fsap, f_bavail);
532	VFSATTR_SET_SUPPORTED(fsap, f_bused);
533	VFSATTR_SET_SUPPORTED(fsap, f_files);
534	VFSATTR_SET_SUPPORTED(fsap, f_ffree);
535
536	return (0);
537}
538
539/*
540 * XXX - Assumes no data cached at union layer.
541 */
542#define union_sync (int (*) (mount_t, int, vfs_context_t))nullop
543
544#define union_fhtovp (int (*) (mount_t, int, unsigned char *, vnode_t *, vfs_context_t))eopnotsupp
545#define union_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t))eopnotsupp
546#define union_vget (int (*) (mount_t, ino64_t, vnode_t *, vfs_context_t))eopnotsupp
547#define union_vptofh (int (*) (vnode_t, int *, unsigned char *, vfs_context_t))eopnotsupp
548
549struct vfsops union_vfsops = {
550	union_mount,
551	union_start,
552	union_unmount,
553	union_root,
554	NULL,			/* quotactl */
555	union_vfs_getattr,
556	union_sync,
557	union_vget,
558	union_fhtovp,
559	union_vptofh,
560	union_init,
561	union_sysctl,
562	NULL,
563	{NULL}
564};
565
566
567