ufs_extattr.c revision 74273
1/*-
2 * Copyright (c) 1999, 2000, 2001 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/ufs/ufs/ufs_extattr.c 74273 2001-03-15 02:54:29Z rwatson $
27 */
28/*
29 * TrustedBSD Project - extended attribute support for UFS-like file systems
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/namei.h>
36#include <sys/malloc.h>
37#include <sys/fcntl.h>
38#include <sys/proc.h>
39#include <sys/vnode.h>
40#include <sys/mount.h>
41#include <sys/lock.h>
42#include <sys/dirent.h>
43#include <sys/extattr.h>
44
45#include <vm/vm_zone.h>
46
47#include <ufs/ufs/dir.h>
48#include <ufs/ufs/extattr.h>
49#include <ufs/ufs/quota.h>
50#include <ufs/ufs/ufsmount.h>
51#include <ufs/ufs/inode.h>
52#include <ufs/ufs/ufs_extern.h>
53
54#include "opt_ffs.h"
55
56#ifdef FFS_EXTATTR
57
58#define	MIN(a,b) (((a)<(b))?(a):(b))
59
60static MALLOC_DEFINE(M_UFS_EXTATTR, "ufs_extattr", "ufs extended attribute");
61
62static int	ufs_extattr_valid_attrname(const char *attrname);
63static int	ufs_extattr_credcheck(struct vnode *vp,
64    struct ufs_extattr_list_entry *uele, struct ucred *cred, struct proc *p,
65    int access);
66static int	ufs_extattr_enable_with_open(struct ufsmount *ump,
67    struct vnode *vp, int namespace, const char *attrname, struct proc *p);
68static int	ufs_extattr_enable(struct ufsmount *ump, int namespace,
69    const char *attrname, struct vnode *backing_vnode, struct proc *p);
70static int	ufs_extattr_disable(struct ufsmount *ump, int namespace,
71    const char *attrname, struct proc *p);
72static int	ufs_extattr_get(struct vnode *vp, int namespace,
73    const char *name, struct uio *uio, struct ucred *cred, struct proc *p);
74static int	ufs_extattr_set(struct vnode *vp, int namespace,
75    const char *name, struct uio *uio, struct ucred *cred, struct proc *p);
76static int	ufs_extattr_rm(struct vnode *vp, int namespace,
77    const char *name, struct ucred *cred, struct proc *p);
78
79/*
80 * Per-FS attribute lock protecting attribute operations.
81 * XXX Right now there is a lot of lock contention due to having a single
82 * lock per-FS; really, this should be far more fine-grained.
83 */
84static void
85ufs_extattr_uepm_lock(struct ufsmount *ump, struct proc *p)
86{
87
88	/* Ideally, LK_CANRECURSE would not be used, here. */
89	lockmgr(&ump->um_extattr.uepm_lock, LK_EXCLUSIVE | LK_RETRY |
90	    LK_CANRECURSE, 0, p);
91}
92
93static void
94ufs_extattr_uepm_unlock(struct ufsmount *ump, struct proc *p)
95{
96
97	lockmgr(&ump->um_extattr.uepm_lock, LK_RELEASE, 0, p);
98}
99
100/*
101 * Determine whether the name passed is a valid name for an actual
102 * attribute.
103 *
104 * Invalid currently consists of:
105 *	 NULL pointer for attrname
106 *	 zero-length attrname (used to retrieve application attribute list)
107 *	 attrname consisting of "$" (used to treive system attribute list)
108 */
109static int
110ufs_extattr_valid_attrname(const char *attrname)
111{
112
113	if (attrname == NULL)
114		return (0);
115	if (strlen(attrname) == 0)
116		return (0);
117	if (strlen(attrname) == 1 && attrname[0] == '$')
118		return (0);
119	return (1);
120}
121
122/*
123 * Locate an attribute given a name and mountpoint.
124 * Must be holding uepm lock for the mount point.
125 */
126static struct ufs_extattr_list_entry *
127ufs_extattr_find_attr(struct ufsmount *ump, int namespace,
128    const char *attrname)
129{
130	struct ufs_extattr_list_entry	*search_attribute;
131
132	for (search_attribute = LIST_FIRST(&ump->um_extattr.uepm_list);
133	    search_attribute;
134	    search_attribute = LIST_NEXT(search_attribute, uele_entries)) {
135		if (!(strncmp(attrname, search_attribute->uele_attrname,
136		    UFS_EXTATTR_MAXEXTATTRNAME)) &&
137		    (namespace == search_attribute->uele_namespace)) {
138			return (search_attribute);
139		}
140	}
141
142	return (0);
143}
144
145/*
146 * Initialize per-FS structures supporting extended attributes.  Do not
147 * start extended attributes yet.
148 */
149void
150ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm)
151{
152
153	uepm->uepm_flags = 0;
154
155	LIST_INIT(&uepm->uepm_list);
156	/* XXX is PVFS right, here? */
157	lockinit(&uepm->uepm_lock, PVFS, "extattr", 0, 0);
158	uepm->uepm_flags |= UFS_EXTATTR_UEPM_INITIALIZED;
159}
160
161/*
162 * Destroy per-FS structures supporting extended attributes.  Assumes
163 * that EAs have already been stopped, and will panic if not.
164 */
165void
166ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm)
167{
168
169	if (!(uepm->uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
170		panic("ufs_extattr_uepm_destroy: not initialized");
171
172	if ((uepm->uepm_flags & UFS_EXTATTR_UEPM_STARTED))
173		panic("ufs_extattr_uepm_destroy: called while still started");
174
175	/*
176	 * XXX: It's not clear that either order for the next two lines is
177	 * ideal, and it should never be a problem if this is only called
178	 * during unmount, and with vfs_busy().
179	 */
180	uepm->uepm_flags &= ~UFS_EXTATTR_UEPM_INITIALIZED;
181	lockdestroy(&uepm->uepm_lock);
182}
183
184/*
185 * Start extended attribute support on an FS.
186 */
187int
188ufs_extattr_start(struct mount *mp, struct proc *p)
189{
190	struct ufsmount	*ump;
191	int	error = 0;
192
193	ump = VFSTOUFS(mp);
194
195	ufs_extattr_uepm_lock(ump, p);
196
197	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)) {
198		error = EOPNOTSUPP;
199		goto unlock;
200	}
201	if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED) {
202		error = EBUSY;
203		goto unlock;
204	}
205
206	ump->um_extattr.uepm_flags |= UFS_EXTATTR_UEPM_STARTED;
207
208	crhold(p->p_ucred);
209	ump->um_extattr.uepm_ucred = p->p_ucred;
210
211unlock:
212	ufs_extattr_uepm_unlock(ump, p);
213
214	return (error);
215}
216
217#ifdef FFS_EXTATTR_AUTOSTART
218/*
219 * Helper routine: given a locked parent directory and filename, return
220 * the locked vnode of the inode associated with the name.  Will not
221 * follow symlinks, may return any type of vnode.  Lock on parent will
222 * be released even in the event of a failure.  In the event that the
223 * target is the parent (i.e., "."), there will be two references and
224 * one lock, requiring the caller to possibly special-case.
225 */
226#define	UE_GETDIR_LOCKPARENT	1
227#define	UE_GETDIR_LOCKPARENT_DONT	2
228static int
229ufs_extattr_lookup(struct vnode *start_dvp, int lockparent, char *dirname,
230    struct vnode **vp, struct proc *p)
231{
232	struct vop_cachedlookup_args vargs;
233	struct componentname cnp;
234	struct vnode *target_vp;
235	int error;
236
237	bzero(&cnp, sizeof(cnp));
238	cnp.cn_nameiop = LOOKUP;
239	cnp.cn_flags = ISLASTCN;
240	if (lockparent == UE_GETDIR_LOCKPARENT)
241		cnp.cn_flags |= LOCKPARENT;
242	cnp.cn_proc = p;
243	cnp.cn_cred = p->p_ucred;
244	cnp.cn_pnbuf = zalloc(namei_zone);
245	cnp.cn_nameptr = cnp.cn_pnbuf;
246	error = copystr(dirname, cnp.cn_pnbuf, MAXPATHLEN,
247	    (size_t *) &cnp.cn_namelen);
248	if (error) {
249		if (lockparent == UE_GETDIR_LOCKPARENT_DONT) {
250			VOP_UNLOCK(start_dvp, 0, p);
251		}
252		zfree(namei_zone, cnp.cn_pnbuf);
253		printf("ufs_extattr_lookup: copystr failed\n");
254		return (error);
255	}
256	cnp.cn_namelen--;	/* trim nul termination */
257	vargs.a_desc = NULL;
258	vargs.a_dvp = start_dvp;
259	vargs.a_vpp = &target_vp;
260	vargs.a_cnp = &cnp;
261	error = ufs_lookup(&vargs);
262	zfree(namei_zone, cnp.cn_pnbuf);
263	if (error) {
264		/*
265		 * Error condition, may have to release the lock on the parent
266		 * if ufs_lookup() didn't.
267		 */
268		if (!(cnp.cn_flags & PDIRUNLOCK) &&
269		    (lockparent == UE_GETDIR_LOCKPARENT_DONT))
270			VOP_UNLOCK(start_dvp, 0, p);
271
272		/*
273		 * Check that ufs_lookup() didn't release the lock when we
274		 * didn't want it to.
275		 */
276		if ((cnp.cn_flags & PDIRUNLOCK) &&
277		    (lockparent == UE_GETDIR_LOCKPARENT))
278			panic("ufs_extattr_lookup: lockparent but PDIRUNLOCK");
279
280		printf("ufs_extattr_lookup: ufs_lookup failed (%d)\n", error);
281		return (error);
282	}
283/*
284	if (target_vp == start_dvp)
285		panic("ufs_extattr_lookup: target_vp == start_dvp");
286*/
287
288	if (target_vp != start_dvp &&
289	    !(cnp.cn_flags & PDIRUNLOCK) &&
290	    (lockparent == UE_GETDIR_LOCKPARENT_DONT))
291		panic("ufs_extattr_lookup: !lockparent but !PDIRUNLOCK");
292
293	if ((cnp.cn_flags & PDIRUNLOCK) &&
294	    (lockparent == UE_GETDIR_LOCKPARENT))
295		panic("ufs_extattr_lookup: lockparent but PDIRUNLOCK");
296
297	/* printf("ufs_extattr_lookup: success\n"); */
298	*vp = target_vp;
299	return (0);
300}
301#endif /* !FFS_EXTATTR_AUTOSTART */
302
303/*
304 * Enable an EA using the passed file system, backing vnode, attribute name,
305 * namespace, and proc.  Will perform a VOP_OPEN() on the vp, so expects vp
306 * to be locked when passed in.  Will unlock vp, and grab its own reference,
307 * so the caller needs to vrele(), just not vput().  The unlock the vnode
308 * regardless of call success or failure.
309 */
310static int
311ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp,
312    int namespace, const char *attrname, struct proc *p)
313{
314	int error;
315
316	error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p);
317	if (error) {
318		printf("ufs_extattr_enable_with_open.VOP_OPEN(): failed "
319		    "with %d\n", error);
320		VOP_UNLOCK(vp, 0, p);
321		return (error);
322	}
323
324	/*
325	 * XXX: Note, should VOP_CLOSE() if vfs_object_create() fails, but due
326	 * to a similar piece of code in vn_open(), we don't.
327	 */
328	if (vn_canvmio(vp) == TRUE)
329		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0) {
330			/*
331			 * XXX: bug replicated from vn_open(): should
332			 * VOP_CLOSE() here.
333			 */
334			VOP_UNLOCK(vp, 0, p);
335			return (error);
336		}
337
338	vp->v_writecount++;
339
340	vref(vp);
341
342	VOP_UNLOCK(vp, 0, p);
343
344	return (ufs_extattr_enable(ump, namespace, attrname, vp, p));
345}
346
347#ifdef FFS_EXTATTR_AUTOSTART
348/*
349 * Given a locked directory vnode, iterate over the names in the directory
350 * and use ufs_extattr_lookup() to retrieve locked vnodes of potential
351 * attribute files.  Then invoke ufs_extattr_enable_with_open() on each
352 * to attempt to start the attribute.  Leaves the directory locked on
353 * exit.
354 * XXX: Add a EA namespace argument
355 */
356static int
357ufs_extattr_iterate_directory(struct ufsmount *ump, struct vnode *dvp,
358    int namespace, struct proc *p)
359{
360	struct vop_readdir_args vargs;
361	struct dirent *dp, *edp;
362	struct vnode *attr_vp;
363	struct uio auio;
364	struct iovec aiov;
365	char *dirbuf;
366	int error, eofflag = 0;
367
368	if (dvp->v_type != VDIR)
369		return (ENOTDIR);
370
371	MALLOC(dirbuf, char *, DIRBLKSIZ, M_TEMP, M_WAITOK);
372
373	auio.uio_iov = &aiov;
374	auio.uio_iovcnt = 1;
375	auio.uio_rw = UIO_READ;
376	auio.uio_segflg = UIO_SYSSPACE;
377	auio.uio_procp = p;
378	auio.uio_offset = 0;
379
380	vargs.a_desc = NULL;
381	vargs.a_vp = dvp;
382	vargs.a_uio = &auio;
383	vargs.a_cred = p->p_ucred;
384	vargs.a_eofflag = &eofflag;
385	vargs.a_ncookies = NULL;
386	vargs.a_cookies = NULL;
387
388	while (!eofflag) {
389		auio.uio_resid = DIRBLKSIZ;
390		aiov.iov_base = dirbuf;
391		aiov.iov_len = DIRBLKSIZ;
392		error = ufs_readdir(&vargs);
393		if (error) {
394			printf("ufs_extattr_iterate_directory: ufs_readdir "
395			    "%d\n", error);
396			return (error);
397		}
398
399		edp = (struct dirent *)&dirbuf[DIRBLKSIZ];
400		for (dp = (struct dirent *)dirbuf; dp < edp; ) {
401#if (BYTE_ORDER == LITTLE_ENDIAN)
402			dp->d_type = dp->d_namlen;
403			dp->d_namlen = 0;
404#else
405			dp->d_type = 0;
406#endif
407			if (dp->d_reclen == 0)
408				break;
409			error = ufs_extattr_lookup(dvp, UE_GETDIR_LOCKPARENT,
410			    dp->d_name, &attr_vp, p);
411			if (error) {
412				printf("ufs_extattr_iterate_directory: lookup "
413				    "%s %d\n", dp->d_name, error);
414			} else if (attr_vp == dvp) {
415				vrele(attr_vp);
416			} else if (attr_vp->v_type != VREG) {
417/*
418 * Eventually, this will be uncommented, but in the mean time, the ".."
419 * entry causes unnecessary console warnings.
420				printf("ufs_extattr_iterate_directory: "
421				    "%s not VREG\n", dp->d_name);
422*/
423				vput(attr_vp);
424			} else {
425				error = ufs_extattr_enable_with_open(ump,
426				    attr_vp, namespace, dp->d_name, p);
427				vrele(attr_vp);
428				if (error) {
429					printf("ufs_extattr_iterate_directory: "
430					    "enable %s %d\n", dp->d_name,
431					    error);
432				} else {
433/*
434 * While it's nice to have some visual output here, skip for the time-being.
435 * Probably should be enabled by -v at boot.
436					printf("Autostarted %s\n", dp->d_name);
437 */
438				}
439			}
440			dp = (struct dirent *) ((char *)dp + dp->d_reclen);
441			if (dp >= edp)
442				break;
443		}
444	}
445	FREE(dirbuf, M_TEMP);
446
447	return (0);
448}
449
450/*
451 * Auto-start of extended attributes, to be executed (optionally) at
452 * mount-time.
453 */
454int
455ufs_extattr_autostart(struct mount *mp, struct proc *p)
456{
457	struct vnode *attr_dvp, /**attr_vp,*/ *rvp;
458	int error;
459
460	/*
461	 * Does UFS_EXTATTR_FSROOTSUBDIR exist off the file system root?
462	 * If so, automatically start EA's.
463	 */
464	error = VFS_ROOT(mp, &rvp);
465	if (error) {
466		printf("ufs_extattr_autostart.VFS_ROOT() returned %d\n", error);
467		return (error);
468	}
469
470	error = ufs_extattr_lookup(rvp, UE_GETDIR_LOCKPARENT_DONT,
471	    UFS_EXTATTR_FSROOTSUBDIR, &attr_dvp, p);
472	if (error) {
473		/* rvp ref'd but now unlocked */
474		vrele(rvp);
475		return (error);
476	}
477	if (rvp == attr_dvp) {
478		/* Should never happen. */
479		vrele(attr_dvp);
480		vput(rvp);
481		return (EINVAL);
482	}
483	vrele(rvp);
484
485	if (attr_dvp->v_type != VDIR) {
486		printf("ufs_extattr_autostart: %s != VDIR\n",
487		    UFS_EXTATTR_FSROOTSUBDIR);
488		goto return_vput;
489	}
490
491	error = ufs_extattr_start(mp, p);
492	if (error) {
493		printf("ufs_extattr_autostart: ufs_extattr_start failed (%d)\n",
494		    error);
495		goto return_vput;
496	}
497
498	/*
499	 * Iterate over the directory.  Eventually we will lookup sub-
500	 * directories and iterate over them independently with different
501	 * EA namespaces.
502	 *
503	 * XXX: Right now, assert that all attributes are in the system
504	 * namespace.
505	 */
506	error = ufs_extattr_iterate_directory(VFSTOUFS(mp), attr_dvp,
507	    EXTATTR_NAMESPACE_SYSTEM, p);
508	if (error)
509		printf("ufs_extattr_iterate_directory returned %d\n", error);
510
511	/* Mask startup failures. */
512	error = 0;
513
514return_vput:
515	vput(attr_dvp);
516
517	return (error);
518}
519#endif /* !FFS_EXTATTR_AUTOSTART */
520
521/*
522 * Stop extended attribute support on an FS.
523 */
524int
525ufs_extattr_stop(struct mount *mp, struct proc *p)
526{
527	struct ufs_extattr_list_entry	*uele;
528	struct ufsmount	*ump = VFSTOUFS(mp);
529	int	error = 0;
530
531	ufs_extattr_uepm_lock(ump, p);
532
533	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
534		error = EOPNOTSUPP;
535		goto unlock;
536	}
537
538	while (LIST_FIRST(&ump->um_extattr.uepm_list) != NULL) {
539		uele = LIST_FIRST(&ump->um_extattr.uepm_list);
540		ufs_extattr_disable(ump, uele->uele_namespace,
541		    uele->uele_attrname, p);
542	}
543
544	ump->um_extattr.uepm_flags &= ~UFS_EXTATTR_UEPM_STARTED;
545
546	crfree(ump->um_extattr.uepm_ucred);
547	ump->um_extattr.uepm_ucred = NULL;
548
549unlock:
550	ufs_extattr_uepm_unlock(ump, p);
551
552	return (error);
553}
554
555/*
556 * Enable a named attribute on the specified file system; provide an
557 * unlocked backing vnode to hold the attribute data.
558 */
559static int
560ufs_extattr_enable(struct ufsmount *ump, int namespace, const char *attrname,
561    struct vnode *backing_vnode, struct proc *p)
562{
563	struct ufs_extattr_list_entry	*attribute;
564	struct iovec	aiov;
565	struct uio	auio;
566	int	error = 0;
567
568	if (!ufs_extattr_valid_attrname(attrname))
569		return (EINVAL);
570	if (backing_vnode->v_type != VREG)
571		return (EINVAL);
572
573	MALLOC(attribute, struct ufs_extattr_list_entry *,
574	    sizeof(struct ufs_extattr_list_entry), M_UFS_EXTATTR, M_WAITOK);
575	if (attribute == NULL)
576		return (ENOMEM);
577
578	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
579		error = EOPNOTSUPP;
580		goto free_exit;
581	}
582
583	if (ufs_extattr_find_attr(ump, namespace, attrname)) {
584		error = EEXIST;
585		goto free_exit;
586	}
587
588	strncpy(attribute->uele_attrname, attrname, UFS_EXTATTR_MAXEXTATTRNAME);
589	attribute->uele_namespace = namespace;
590	bzero(&attribute->uele_fileheader,
591	    sizeof(struct ufs_extattr_fileheader));
592
593	attribute->uele_backing_vnode = backing_vnode;
594
595	auio.uio_iov = &aiov;
596	auio.uio_iovcnt = 1;
597	aiov.iov_base = (caddr_t) &attribute->uele_fileheader;
598	aiov.iov_len = sizeof(struct ufs_extattr_fileheader);
599	auio.uio_resid = sizeof(struct ufs_extattr_fileheader);
600	auio.uio_offset = (off_t) 0;
601	auio.uio_segflg = UIO_SYSSPACE;
602	auio.uio_rw = UIO_READ;
603	auio.uio_procp = (struct proc *) p;
604
605	VOP_LEASE(backing_vnode, p, p->p_cred->pc_ucred, LEASE_WRITE);
606	vn_lock(backing_vnode, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
607	error = VOP_READ(backing_vnode, &auio, IO_NODELOCKED,
608	    ump->um_extattr.uepm_ucred);
609	VOP_UNLOCK(backing_vnode, 0, p);
610
611	if (error)
612		goto free_exit;
613
614	if (auio.uio_resid != 0) {
615		printf("ufs_extattr_enable: malformed attribute header\n");
616		error = EINVAL;
617		goto free_exit;
618	}
619
620	if (attribute->uele_fileheader.uef_magic != UFS_EXTATTR_MAGIC) {
621		printf("ufs_extattr_enable: invalid attribute header magic\n");
622		error = EINVAL;
623		goto free_exit;
624	}
625
626	if (attribute->uele_fileheader.uef_version != UFS_EXTATTR_VERSION) {
627		printf("ufs_extattr_enable: incorrect attribute header "
628		    "version\n");
629		error = EINVAL;
630		goto free_exit;
631	}
632
633	backing_vnode->v_flag |= VSYSTEM;
634	LIST_INSERT_HEAD(&ump->um_extattr.uepm_list, attribute, uele_entries);
635
636	return (0);
637
638free_exit:
639	FREE(attribute, M_UFS_EXTATTR);
640	return (error);
641}
642
643/*
644 * Disable extended attribute support on an FS.
645 */
646static int
647ufs_extattr_disable(struct ufsmount *ump, int namespace, const char *attrname,
648    struct proc *p)
649{
650	struct ufs_extattr_list_entry	*uele;
651	int	error = 0;
652
653	if (!ufs_extattr_valid_attrname(attrname))
654		return (EINVAL);
655
656	uele = ufs_extattr_find_attr(ump, namespace, attrname);
657	if (!uele)
658		return (ENOENT);
659
660	LIST_REMOVE(uele, uele_entries);
661
662	uele->uele_backing_vnode->v_flag &= ~VSYSTEM;
663	error = vn_close(uele->uele_backing_vnode, FREAD|FWRITE, p->p_ucred, p);
664
665	FREE(uele, M_UFS_EXTATTR);
666
667	return (error);
668}
669
670/*
671 * VFS call to manage extended attributes in UFS.  If filename_vp is
672 * non-NULL, it must be passed in locked, and regardless of errors in
673 * processing, will be unlocked.
674 */
675int
676ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
677    int namespace, const char *attrname, struct proc *p)
678{
679	struct ufsmount	*ump = VFSTOUFS(mp);
680	int	error;
681
682	/*
683	 * Processes with privilege, but in jail, are not allowed to
684	 * configure extended attributes.
685	 */
686	if ((error = suser_xxx(p->p_cred->pc_ucred, p, 0))) {
687		if (filename_vp != NULL)
688			VOP_UNLOCK(filename_vp, 0, p);
689		return (error);
690	}
691
692	switch(cmd) {
693	case UFS_EXTATTR_CMD_START:
694		if (filename_vp != NULL) {
695			VOP_UNLOCK(filename_vp, 0, p);
696			return (EINVAL);
697		}
698		if (attrname != NULL)
699			return (EINVAL);
700
701		error = ufs_extattr_start(mp, p);
702
703		return (error);
704
705	case UFS_EXTATTR_CMD_STOP:
706		if (filename_vp != NULL) {
707			VOP_UNLOCK(filename_vp, 0, p);
708			return (EINVAL);
709		}
710		if (attrname != NULL)
711			return (EINVAL);
712
713		error = ufs_extattr_stop(mp, p);
714
715		return (error);
716
717	case UFS_EXTATTR_CMD_ENABLE:
718
719		if (filename_vp == NULL)
720			return (EINVAL);
721		if (attrname == NULL) {
722			VOP_UNLOCK(filename_vp, 0, p);
723			return (EINVAL);
724		}
725
726		/*
727		 * ufs_extattr_enable_with_open() will always unlock the
728		 * vnode, regardless of failure.
729		 */
730		ufs_extattr_uepm_lock(ump, p);
731		error = ufs_extattr_enable_with_open(ump, filename_vp,
732		    namespace, attrname, p);
733		ufs_extattr_uepm_unlock(ump, p);
734
735		return (error);
736
737	case UFS_EXTATTR_CMD_DISABLE:
738
739		if (filename_vp != NULL) {
740			VOP_UNLOCK(filename_vp, 0, p);
741			return (EINVAL);
742		}
743		if (attrname == NULL)
744			return (EINVAL);
745
746		ufs_extattr_uepm_lock(ump, p);
747		error = ufs_extattr_disable(ump, namespace, attrname, p);
748		ufs_extattr_uepm_unlock(ump, p);
749
750		return (error);
751
752	default:
753		return (EINVAL);
754	}
755}
756
757/*
758 * Credential check based on process requesting service, and per-attribute
759 * permissions.
760 */
761static int
762ufs_extattr_credcheck(struct vnode *vp, struct ufs_extattr_list_entry *uele,
763    struct ucred *cred, struct proc *p, int access)
764{
765
766	/*
767	 * Kernel-invoked always succeeds.
768	 */
769	if (cred == NULL)
770		return (0);
771
772	/*
773	 * Do not allow privileged processes in jail to directly
774	 * manipulate system attributes.
775	 *
776	 * XXX What capability should apply here?
777	 * Probably CAP_SYS_SETFFLAG.
778	 */
779	switch (uele->uele_namespace) {
780	case EXTATTR_NAMESPACE_SYSTEM:
781		return (suser_xxx(cred, p, 0));
782	case EXTATTR_NAMESPACE_USER:
783		return (VOP_ACCESS(vp, access, cred, p));
784	default:
785		return (EPERM);
786	}
787}
788
789/*
790 * Vnode operating to retrieve a named extended attribute.
791 */
792int
793ufs_vop_getextattr(struct vop_getextattr_args *ap)
794/*
795vop_getextattr {
796	IN struct vnode *a_vp;
797	IN int a_namespace;
798	IN const char *a_name;
799	INOUT struct uio *a_uio;
800	IN struct ucred *a_cred;
801	IN struct proc *a_p;
802};
803*/
804{
805	struct mount	*mp = ap->a_vp->v_mount;
806	struct ufsmount	*ump = VFSTOUFS(mp);
807	int	error;
808
809	ufs_extattr_uepm_lock(ump, ap->a_p);
810
811	error = ufs_extattr_get(ap->a_vp, ap->a_namespace, ap->a_name,
812	    ap->a_uio, ap->a_cred, ap->a_p);
813
814	ufs_extattr_uepm_unlock(ump, ap->a_p);
815
816	return (error);
817}
818
819/*
820 * Real work associated with retrieving a named attribute--assumes that
821 * the attribute lock has already been grabbed.
822 */
823static int
824ufs_extattr_get(struct vnode *vp, int namespace, const char *name,
825    struct uio *uio, struct ucred *cred, struct proc *p)
826{
827	struct ufs_extattr_list_entry	*attribute;
828	struct ufs_extattr_header	ueh;
829	struct iovec	local_aiov;
830	struct uio	local_aio;
831	struct mount	*mp = vp->v_mount;
832	struct ufsmount	*ump = VFSTOUFS(mp);
833	struct inode	*ip = VTOI(vp);
834	off_t	base_offset;
835	size_t	size, old_size;
836	int	error = 0;
837
838	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
839		return (EOPNOTSUPP);
840
841	if (strlen(name) == 0) {
842		/* XXX retrieve attribute lists. */
843		/* XXX should probably be checking for name == NULL? */
844		return (EINVAL);
845	}
846
847	attribute = ufs_extattr_find_attr(ump, namespace, name);
848	if (!attribute)
849		return (ENOENT);
850
851	if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IREAD)))
852		return (error);
853
854	/*
855	 * Allow only offsets of zero to encourage the read/replace
856	 * extended attribute semantic.  Otherwise we can't guarantee
857	 * atomicity, as we don't provide locks for extended attributes.
858	 */
859	if (uio->uio_offset != 0)
860		return (ENXIO);
861
862	/*
863	 * Find base offset of header in file based on file header size, and
864	 * data header size + maximum data size, indexed by inode number.
865	 */
866	base_offset = sizeof(struct ufs_extattr_fileheader) +
867	    ip->i_number * (sizeof(struct ufs_extattr_header) +
868	    attribute->uele_fileheader.uef_size);
869
870	/*
871	 * Read in the data header to see if the data is defined, and if so
872	 * how much.
873	 */
874	bzero(&ueh, sizeof(struct ufs_extattr_header));
875	local_aiov.iov_base = (caddr_t) &ueh;
876	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
877	local_aio.uio_iov = &local_aiov;
878	local_aio.uio_iovcnt = 1;
879	local_aio.uio_rw = UIO_READ;
880	local_aio.uio_segflg = UIO_SYSSPACE;
881	local_aio.uio_procp = p;
882	local_aio.uio_offset = base_offset;
883	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
884
885	/*
886	 * Acquire locks.
887	 */
888	VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_READ);
889	/*
890	 * Don't need to get a lock on the backing file if the getattr is
891	 * being applied to the backing file, as the lock is already held.
892	 */
893	if (attribute->uele_backing_vnode != vp)
894		vn_lock(attribute->uele_backing_vnode, LK_SHARED |
895		    LK_NOPAUSE | LK_RETRY, p);
896
897	error = VOP_READ(attribute->uele_backing_vnode, &local_aio,
898	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
899	if (error)
900		goto vopunlock_exit;
901
902	/* Defined? */
903	if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) {
904		error = ENOENT;
905		goto vopunlock_exit;
906	}
907
908	/* Valid for the current inode generation? */
909	if (ueh.ueh_i_gen != ip->i_gen) {
910		/*
911		 * The inode itself has a different generation number
912		 * than the attribute data.  For now, the best solution
913		 * is to coerce this to undefined, and let it get cleaned
914		 * up by the next write or extattrctl clean.
915		 */
916		printf("ufs_extattr_get: inode number inconsistency (%d, %d)\n",
917		    ueh.ueh_i_gen, ip->i_gen);
918		error = ENOENT;
919		goto vopunlock_exit;
920	}
921
922	/* Local size consistency check. */
923	if (ueh.ueh_len > attribute->uele_fileheader.uef_size) {
924		error = ENXIO;
925		goto vopunlock_exit;
926	}
927
928	/* Allow for offset into the attribute data. */
929	uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header);
930
931	/*
932	 * Figure out maximum to transfer -- use buffer size and local data
933	 * limit.
934	 */
935	size = MIN(uio->uio_resid, ueh.ueh_len);
936	old_size = uio->uio_resid;
937	uio->uio_resid = size;
938
939	error = VOP_READ(attribute->uele_backing_vnode, uio,
940	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
941	if (error)
942		goto vopunlock_exit;
943
944	uio->uio_resid = old_size - (size - uio->uio_resid);
945
946vopunlock_exit:
947
948	uio->uio_offset = 0;
949
950	if (attribute->uele_backing_vnode != vp)
951		VOP_UNLOCK(attribute->uele_backing_vnode, 0, p);
952
953	return (error);
954}
955
956/*
957 * Vnode operation to set a named attribute.
958 */
959int
960ufs_vop_setextattr(struct vop_setextattr_args *ap)
961/*
962vop_setextattr {
963	IN struct vnode *a_vp;
964	IN int a_namespace;
965	IN const char *a_name;
966	INOUT struct uio *a_uio;
967	IN struct ucred *a_cred;
968	IN struct proc *a_p;
969};
970*/
971{
972	struct mount	*mp = ap->a_vp->v_mount;
973	struct ufsmount	*ump = VFSTOUFS(mp);
974
975	int	error;
976
977	ufs_extattr_uepm_lock(ump, ap->a_p);
978
979	if (ap->a_uio != NULL)
980		error = ufs_extattr_set(ap->a_vp, ap->a_namespace, ap->a_name,
981		    ap->a_uio, ap->a_cred, ap->a_p);
982	else
983		error = ufs_extattr_rm(ap->a_vp, ap->a_namespace, ap->a_name,
984		    ap->a_cred, ap->a_p);
985
986	ufs_extattr_uepm_unlock(ump, ap->a_p);
987
988	return (error);
989}
990
991/*
992 * Real work associated with setting a vnode's extended attributes;
993 * assumes that the attribute lock has already been grabbed.
994 */
995static int
996ufs_extattr_set(struct vnode *vp, int namespace, const char *name,
997    struct uio *uio, struct ucred *cred, struct proc *p)
998{
999	struct ufs_extattr_list_entry	*attribute;
1000	struct ufs_extattr_header	ueh;
1001	struct iovec	local_aiov;
1002	struct uio	local_aio;
1003	struct mount	*mp = vp->v_mount;
1004	struct ufsmount	*ump = VFSTOUFS(mp);
1005	struct inode	*ip = VTOI(vp);
1006	off_t	base_offset;
1007	int	error = 0;
1008
1009	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1010		return (EROFS);
1011	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
1012		return (EOPNOTSUPP);
1013	if (!ufs_extattr_valid_attrname(name))
1014		return (EINVAL);
1015
1016	attribute = ufs_extattr_find_attr(ump, namespace, name);
1017	if (!attribute)
1018		return (ENOENT);
1019
1020	if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IWRITE)))
1021		return (error);
1022
1023	/*
1024	 * Early rejection of invalid offsets/length.
1025	 * Reject: any offset but 0 (replace)
1026	 *	 Any size greater than attribute size limit
1027 	 */
1028	if (uio->uio_offset != 0 ||
1029	    uio->uio_resid > attribute->uele_fileheader.uef_size)
1030		return (ENXIO);
1031
1032	/*
1033	 * Find base offset of header in file based on file header size, and
1034	 * data header size + maximum data size, indexed by inode number.
1035	 */
1036	base_offset = sizeof(struct ufs_extattr_fileheader) +
1037	    ip->i_number * (sizeof(struct ufs_extattr_header) +
1038	    attribute->uele_fileheader.uef_size);
1039
1040	/*
1041	 * Write out a data header for the data.
1042	 */
1043	ueh.ueh_len = uio->uio_resid;
1044	ueh.ueh_flags = UFS_EXTATTR_ATTR_FLAG_INUSE;
1045	ueh.ueh_i_gen = ip->i_gen;
1046	local_aiov.iov_base = (caddr_t) &ueh;
1047	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
1048	local_aio.uio_iov = &local_aiov;
1049	local_aio.uio_iovcnt = 1;
1050	local_aio.uio_rw = UIO_WRITE;
1051	local_aio.uio_segflg = UIO_SYSSPACE;
1052	local_aio.uio_procp = p;
1053	local_aio.uio_offset = base_offset;
1054	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
1055
1056	/*
1057	 * Acquire locks.
1058	 */
1059	VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_WRITE);
1060
1061	/*
1062	 * Don't need to get a lock on the backing file if the setattr is
1063	 * being applied to the backing file, as the lock is already held.
1064	 */
1065	if (attribute->uele_backing_vnode != vp)
1066		vn_lock(attribute->uele_backing_vnode,
1067		    LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, p);
1068
1069	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio,
1070	    IO_NODELOCKED | IO_SYNC, ump->um_extattr.uepm_ucred);
1071	if (error)
1072		goto vopunlock_exit;
1073
1074	if (local_aio.uio_resid != 0) {
1075		error = ENXIO;
1076		goto vopunlock_exit;
1077	}
1078
1079	/*
1080	 * Write out user data.
1081	 */
1082	uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header);
1083
1084	error = VOP_WRITE(attribute->uele_backing_vnode, uio,
1085	    IO_NODELOCKED | IO_SYNC, ump->um_extattr.uepm_ucred);
1086
1087vopunlock_exit:
1088	uio->uio_offset = 0;
1089
1090	if (attribute->uele_backing_vnode != vp)
1091		VOP_UNLOCK(attribute->uele_backing_vnode, 0, p);
1092
1093	return (error);
1094}
1095
1096/*
1097 * Real work associated with removing an extended attribute from a vnode.
1098 * Assumes the attribute lock has already been grabbed.
1099 */
1100static int
1101ufs_extattr_rm(struct vnode *vp, int namespace, const char *name,
1102    struct ucred *cred, struct proc *p)
1103{
1104	struct ufs_extattr_list_entry	*attribute;
1105	struct ufs_extattr_header	ueh;
1106	struct iovec	local_aiov;
1107	struct uio	local_aio;
1108	struct mount	*mp = vp->v_mount;
1109	struct ufsmount	*ump = VFSTOUFS(mp);
1110	struct inode	*ip = VTOI(vp);
1111	off_t	base_offset;
1112	int	error = 0;
1113
1114	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1115		return (EROFS);
1116	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
1117		return (EOPNOTSUPP);
1118	if (!ufs_extattr_valid_attrname(name))
1119		return (EINVAL);
1120
1121	attribute = ufs_extattr_find_attr(ump, namespace, name);
1122	if (!attribute)
1123		return (ENOENT);
1124
1125	if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IWRITE)))
1126		return (error);
1127
1128	/*
1129	 * Find base offset of header in file based on file header size, and
1130	 * data header size + maximum data size, indexed by inode number.
1131	 */
1132	base_offset = sizeof(struct ufs_extattr_fileheader) +
1133	    ip->i_number * (sizeof(struct ufs_extattr_header) +
1134	    attribute->uele_fileheader.uef_size);
1135
1136	/*
1137	 * Check to see if currently defined.
1138	 */
1139	bzero(&ueh, sizeof(struct ufs_extattr_header));
1140
1141	local_aiov.iov_base = (caddr_t) &ueh;
1142	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
1143	local_aio.uio_iov = &local_aiov;
1144	local_aio.uio_iovcnt = 1;
1145	local_aio.uio_rw = UIO_READ;
1146	local_aio.uio_segflg = UIO_SYSSPACE;
1147	local_aio.uio_procp = p;
1148	local_aio.uio_offset = base_offset;
1149	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
1150
1151	VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_WRITE);
1152
1153	/*
1154	 * Don't need to get the lock on the backing vnode if the vnode we're
1155	 * modifying is it, as we already hold the lock.
1156	 */
1157	if (attribute->uele_backing_vnode != vp)
1158		vn_lock(attribute->uele_backing_vnode,
1159		    LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, p);
1160
1161	error = VOP_READ(attribute->uele_backing_vnode, &local_aio,
1162	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
1163	if (error)
1164		goto vopunlock_exit;
1165
1166	/* Defined? */
1167	if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) {
1168		error = ENOENT;
1169		goto vopunlock_exit;
1170	}
1171
1172	/* Valid for the current inode generation? */
1173	if (ueh.ueh_i_gen != ip->i_gen) {
1174		/*
1175		 * The inode itself has a different generation number than
1176		 * the attribute data.  For now, the best solution is to
1177		 * coerce this to undefined, and let it get cleaned up by
1178		 * the next write or extattrctl clean.
1179		 */
1180		printf("ufs_extattr_rm: inode number inconsistency (%d, %d)\n",
1181		    ueh.ueh_i_gen, ip->i_gen);
1182		error = ENOENT;
1183		goto vopunlock_exit;
1184	}
1185
1186	/* Flag it as not in use. */
1187	ueh.ueh_flags = 0;
1188	ueh.ueh_len = 0;
1189
1190	local_aiov.iov_base = (caddr_t) &ueh;
1191	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
1192	local_aio.uio_iov = &local_aiov;
1193	local_aio.uio_iovcnt = 1;
1194	local_aio.uio_rw = UIO_WRITE;
1195	local_aio.uio_segflg = UIO_SYSSPACE;
1196	local_aio.uio_procp = p;
1197	local_aio.uio_offset = base_offset;
1198	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
1199
1200	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio,
1201	    IO_NODELOCKED | IO_SYNC, ump->um_extattr.uepm_ucred);
1202	if (error)
1203		goto vopunlock_exit;
1204
1205	if (local_aio.uio_resid != 0)
1206		error = ENXIO;
1207
1208vopunlock_exit:
1209	VOP_UNLOCK(attribute->uele_backing_vnode, 0, p);
1210
1211	return (error);
1212}
1213
1214/*
1215 * Called by UFS when an inode is no longer active and should have its
1216 * attributes stripped.
1217 */
1218void
1219ufs_extattr_vnode_inactive(struct vnode *vp, struct proc *p)
1220{
1221	struct ufs_extattr_list_entry	*uele;
1222	struct mount	*mp = vp->v_mount;
1223	struct ufsmount	*ump = VFSTOUFS(mp);
1224
1225	ufs_extattr_uepm_lock(ump, p);
1226
1227	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
1228		ufs_extattr_uepm_unlock(ump, p);
1229		return;
1230	}
1231
1232	LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries)
1233		ufs_extattr_rm(vp, uele->uele_namespace, uele->uele_attrname,
1234		    NULL, p);
1235
1236	ufs_extattr_uepm_unlock(ump, p);
1237}
1238
1239#endif /* !FFS_EXTATTR */
1240