vfs_syscalls.c revision 162649
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 162649 2006-09-26 04:15:59Z tegge $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mount.h>
51#include <sys/mutex.h>
52#include <sys/sysproto.h>
53#include <sys/namei.h>
54#include <sys/filedesc.h>
55#include <sys/kernel.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/limits.h>
59#include <sys/linker.h>
60#include <sys/stat.h>
61#include <sys/sx.h>
62#include <sys/unistd.h>
63#include <sys/vnode.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/extattr.h>
67#include <sys/jail.h>
68#include <sys/syscallsubr.h>
69#include <sys/sysctl.h>
70
71#include <machine/stdarg.h>
72
73#include <security/audit/audit.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/uma.h>
79
80static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83static int setfmode(struct thread *td, struct vnode *, int);
84static int setfflags(struct thread *td, struct vnode *, int);
85static int setutimes(struct thread *td, struct vnode *,
86    const struct timespec *, int, int);
87static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88    struct thread *td);
89
90static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91    size_t nbytes, struct thread *td);
92
93int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94
95/*
96 * The module initialization routine for POSIX asynchronous I/O will
97 * set this to the version of AIO that it implements.  (Zero means
98 * that it is not implemented.)  This value is used here by pathconf()
99 * and in kern_descrip.c by fpathconf().
100 */
101int async_io_version;
102
103/*
104 * Sync each mounted filesystem.
105 */
106#ifndef _SYS_SYSPROTO_H_
107struct sync_args {
108	int     dummy;
109};
110#endif
111
112#ifdef DEBUG
113static int syncprt = 0;
114SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115#endif
116
117/* ARGSUSED */
118int
119sync(td, uap)
120	struct thread *td;
121	struct sync_args *uap;
122{
123	struct mount *mp, *nmp;
124	int vfslocked;
125
126	mtx_lock(&mountlist_mtx);
127	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129			nmp = TAILQ_NEXT(mp, mnt_list);
130			continue;
131		}
132		vfslocked = VFS_LOCK_GIANT(mp);
133		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
134		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
135			MNT_ILOCK(mp);
136			mp->mnt_noasync++;
137			mp->mnt_kern_flag &= ~MNTK_ASYNC;
138			MNT_IUNLOCK(mp);
139			vfs_msync(mp, MNT_NOWAIT);
140			VFS_SYNC(mp, MNT_NOWAIT, td);
141			MNT_ILOCK(mp);
142			mp->mnt_noasync--;
143			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
144			    mp->mnt_noasync == 0)
145				mp->mnt_kern_flag |= MNTK_ASYNC;
146			MNT_IUNLOCK(mp);
147			vn_finished_write(mp);
148		}
149		VFS_UNLOCK_GIANT(vfslocked);
150		mtx_lock(&mountlist_mtx);
151		nmp = TAILQ_NEXT(mp, mnt_list);
152		vfs_unbusy(mp, td);
153	}
154	mtx_unlock(&mountlist_mtx);
155	return (0);
156}
157
158/* XXX PRISON: could be per prison flag */
159static int prison_quotas;
160#if 0
161SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
162#endif
163
164/*
165 * Change filesystem quotas.
166 *
167 * MP SAFE
168 */
169#ifndef _SYS_SYSPROTO_H_
170struct quotactl_args {
171	char *path;
172	int cmd;
173	int uid;
174	caddr_t arg;
175};
176#endif
177int
178quotactl(td, uap)
179	struct thread *td;
180	register struct quotactl_args /* {
181		char *path;
182		int cmd;
183		int uid;
184		caddr_t arg;
185	} */ *uap;
186{
187	struct mount *mp, *vmp;
188	int vfslocked;
189	int error;
190	struct nameidata nd;
191
192	AUDIT_ARG(cmd, uap->cmd);
193	AUDIT_ARG(uid, uap->uid);
194	if (jailed(td->td_ucred) && !prison_quotas)
195		return (EPERM);
196	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
197	   UIO_USERSPACE, uap->path, td);
198	if ((error = namei(&nd)) != 0)
199		return (error);
200	vfslocked = NDHASGIANT(&nd);
201	NDFREE(&nd, NDF_ONLY_PNBUF);
202	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
203	mp = nd.ni_vp->v_mount;
204	vrele(nd.ni_vp);
205	if (error)
206		goto out;
207	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
208	vn_finished_write(vmp);
209out:
210	VFS_UNLOCK_GIANT(vfslocked);
211	return (error);
212}
213
214/*
215 * Get filesystem statistics.
216 */
217#ifndef _SYS_SYSPROTO_H_
218struct statfs_args {
219	char *path;
220	struct statfs *buf;
221};
222#endif
223int
224statfs(td, uap)
225	struct thread *td;
226	register struct statfs_args /* {
227		char *path;
228		struct statfs *buf;
229	} */ *uap;
230{
231	struct statfs sf;
232	int error;
233
234	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
235	if (error == 0)
236		error = copyout(&sf, uap->buf, sizeof(sf));
237	return (error);
238}
239
240int
241kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
242    struct statfs *buf)
243{
244	struct mount *mp;
245	struct statfs *sp, sb;
246	int vfslocked;
247	int error;
248	struct nameidata nd;
249
250	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
251	    pathseg, path, td);
252	error = namei(&nd);
253	if (error)
254		return (error);
255	vfslocked = NDHASGIANT(&nd);
256	mp = nd.ni_vp->v_mount;
257	vfs_ref(mp);
258	NDFREE(&nd, NDF_ONLY_PNBUF);
259	vput(nd.ni_vp);
260#ifdef MAC
261	error = mac_check_mount_stat(td->td_ucred, mp);
262	if (error)
263		goto out;
264#endif
265	/*
266	 * Set these in case the underlying filesystem fails to do so.
267	 */
268	sp = &mp->mnt_stat;
269	sp->f_version = STATFS_VERSION;
270	sp->f_namemax = NAME_MAX;
271	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
272	error = VFS_STATFS(mp, sp, td);
273	if (error)
274		goto out;
275	if (suser(td)) {
276		bcopy(sp, &sb, sizeof(sb));
277		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
278		prison_enforce_statfs(td->td_ucred, mp, &sb);
279		sp = &sb;
280	}
281	*buf = *sp;
282out:
283	vfs_rel(mp);
284	VFS_UNLOCK_GIANT(vfslocked);
285	if (mtx_owned(&Giant))
286		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
287	return (error);
288}
289
290/*
291 * Get filesystem statistics.
292 */
293#ifndef _SYS_SYSPROTO_H_
294struct fstatfs_args {
295	int fd;
296	struct statfs *buf;
297};
298#endif
299int
300fstatfs(td, uap)
301	struct thread *td;
302	register struct fstatfs_args /* {
303		int fd;
304		struct statfs *buf;
305	} */ *uap;
306{
307	struct statfs sf;
308	int error;
309
310	error = kern_fstatfs(td, uap->fd, &sf);
311	if (error == 0)
312		error = copyout(&sf, uap->buf, sizeof(sf));
313	return (error);
314}
315
316int
317kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
318{
319	struct file *fp;
320	struct mount *mp;
321	struct statfs *sp, sb;
322	int vfslocked;
323	struct vnode *vp;
324	int error;
325
326	AUDIT_ARG(fd, fd);
327	error = getvnode(td->td_proc->p_fd, fd, &fp);
328	if (error)
329		return (error);
330	vp = fp->f_vnode;
331	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
332	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
333#ifdef AUDIT
334	AUDIT_ARG(vnode, vp, ARG_VNODE1);
335#endif
336	mp = vp->v_mount;
337	if (mp)
338		vfs_ref(mp);
339	VOP_UNLOCK(vp, 0, td);
340	fdrop(fp, td);
341	if (vp->v_iflag & VI_DOOMED) {
342		error = EBADF;
343		goto out;
344	}
345#ifdef MAC
346	error = mac_check_mount_stat(td->td_ucred, mp);
347	if (error)
348		goto out;
349#endif
350	/*
351	 * Set these in case the underlying filesystem fails to do so.
352	 */
353	sp = &mp->mnt_stat;
354	sp->f_version = STATFS_VERSION;
355	sp->f_namemax = NAME_MAX;
356	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357	error = VFS_STATFS(mp, sp, td);
358	if (error)
359		goto out;
360	if (suser(td)) {
361		bcopy(sp, &sb, sizeof(sb));
362		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
363		prison_enforce_statfs(td->td_ucred, mp, &sb);
364		sp = &sb;
365	}
366	*buf = *sp;
367out:
368	if (mp)
369		vfs_rel(mp);
370	VFS_UNLOCK_GIANT(vfslocked);
371	return (error);
372}
373
374/*
375 * Get statistics on all filesystems.
376 */
377#ifndef _SYS_SYSPROTO_H_
378struct getfsstat_args {
379	struct statfs *buf;
380	long bufsize;
381	int flags;
382};
383#endif
384int
385getfsstat(td, uap)
386	struct thread *td;
387	register struct getfsstat_args /* {
388		struct statfs *buf;
389		long bufsize;
390		int flags;
391	} */ *uap;
392{
393
394	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
395	    uap->flags));
396}
397
398/*
399 * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
400 * 	The caller is responsible for freeing memory which will be allocated
401 *	in '*buf'.
402 */
403int
404kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
405    enum uio_seg bufseg, int flags)
406{
407	struct mount *mp, *nmp;
408	struct statfs *sfsp, *sp, sb;
409	size_t count, maxcount;
410	int vfslocked;
411	int error;
412
413	maxcount = bufsize / sizeof(struct statfs);
414	if (bufsize == 0)
415		sfsp = NULL;
416	else if (bufseg == UIO_USERSPACE)
417		sfsp = *buf;
418	else /* if (bufseg == UIO_SYSSPACE) */ {
419		count = 0;
420		mtx_lock(&mountlist_mtx);
421		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
422			count++;
423		}
424		mtx_unlock(&mountlist_mtx);
425		if (maxcount > count)
426			maxcount = count;
427		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
428		    M_WAITOK);
429	}
430	count = 0;
431	mtx_lock(&mountlist_mtx);
432	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
433		if (prison_canseemount(td->td_ucred, mp) != 0) {
434			nmp = TAILQ_NEXT(mp, mnt_list);
435			continue;
436		}
437#ifdef MAC
438		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
439			nmp = TAILQ_NEXT(mp, mnt_list);
440			continue;
441		}
442#endif
443		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
444			nmp = TAILQ_NEXT(mp, mnt_list);
445			continue;
446		}
447		vfslocked = VFS_LOCK_GIANT(mp);
448		if (sfsp && count < maxcount) {
449			sp = &mp->mnt_stat;
450			/*
451			 * Set these in case the underlying filesystem
452			 * fails to do so.
453			 */
454			sp->f_version = STATFS_VERSION;
455			sp->f_namemax = NAME_MAX;
456			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
457			/*
458			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
459			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
460			 * overrides MNT_WAIT.
461			 */
462			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
463			    (flags & MNT_WAIT)) &&
464			    (error = VFS_STATFS(mp, sp, td))) {
465				VFS_UNLOCK_GIANT(vfslocked);
466				mtx_lock(&mountlist_mtx);
467				nmp = TAILQ_NEXT(mp, mnt_list);
468				vfs_unbusy(mp, td);
469				continue;
470			}
471			if (suser(td)) {
472				bcopy(sp, &sb, sizeof(sb));
473				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
474				prison_enforce_statfs(td->td_ucred, mp, &sb);
475				sp = &sb;
476			}
477			if (bufseg == UIO_SYSSPACE)
478				bcopy(sp, sfsp, sizeof(*sp));
479			else /* if (bufseg == UIO_USERSPACE) */ {
480				error = copyout(sp, sfsp, sizeof(*sp));
481				if (error) {
482					vfs_unbusy(mp, td);
483					VFS_UNLOCK_GIANT(vfslocked);
484					return (error);
485				}
486			}
487			sfsp++;
488		}
489		VFS_UNLOCK_GIANT(vfslocked);
490		count++;
491		mtx_lock(&mountlist_mtx);
492		nmp = TAILQ_NEXT(mp, mnt_list);
493		vfs_unbusy(mp, td);
494	}
495	mtx_unlock(&mountlist_mtx);
496	if (sfsp && count > maxcount)
497		td->td_retval[0] = maxcount;
498	else
499		td->td_retval[0] = count;
500	return (0);
501}
502
503#ifdef COMPAT_FREEBSD4
504/*
505 * Get old format filesystem statistics.
506 */
507static void cvtstatfs(struct statfs *, struct ostatfs *);
508
509#ifndef _SYS_SYSPROTO_H_
510struct freebsd4_statfs_args {
511	char *path;
512	struct ostatfs *buf;
513};
514#endif
515int
516freebsd4_statfs(td, uap)
517	struct thread *td;
518	struct freebsd4_statfs_args /* {
519		char *path;
520		struct ostatfs *buf;
521	} */ *uap;
522{
523	struct ostatfs osb;
524	struct statfs sf;
525	int error;
526
527	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
528	if (error)
529		return (error);
530	cvtstatfs(&sf, &osb);
531	return (copyout(&osb, uap->buf, sizeof(osb)));
532}
533
534/*
535 * Get filesystem statistics.
536 */
537#ifndef _SYS_SYSPROTO_H_
538struct freebsd4_fstatfs_args {
539	int fd;
540	struct ostatfs *buf;
541};
542#endif
543int
544freebsd4_fstatfs(td, uap)
545	struct thread *td;
546	struct freebsd4_fstatfs_args /* {
547		int fd;
548		struct ostatfs *buf;
549	} */ *uap;
550{
551	struct ostatfs osb;
552	struct statfs sf;
553	int error;
554
555	error = kern_fstatfs(td, uap->fd, &sf);
556	if (error)
557		return (error);
558	cvtstatfs(&sf, &osb);
559	return (copyout(&osb, uap->buf, sizeof(osb)));
560}
561
562/*
563 * Get statistics on all filesystems.
564 */
565#ifndef _SYS_SYSPROTO_H_
566struct freebsd4_getfsstat_args {
567	struct ostatfs *buf;
568	long bufsize;
569	int flags;
570};
571#endif
572int
573freebsd4_getfsstat(td, uap)
574	struct thread *td;
575	register struct freebsd4_getfsstat_args /* {
576		struct ostatfs *buf;
577		long bufsize;
578		int flags;
579	} */ *uap;
580{
581	struct statfs *buf, *sp;
582	struct ostatfs osb;
583	size_t count, size;
584	int error;
585
586	count = uap->bufsize / sizeof(struct ostatfs);
587	size = count * sizeof(struct statfs);
588	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
589	if (size > 0) {
590		count = td->td_retval[0];
591		sp = buf;
592		while (count > 0 && error == 0) {
593			cvtstatfs(sp, &osb);
594			error = copyout(&osb, uap->buf, sizeof(osb));
595			sp++;
596			uap->buf++;
597			count--;
598		}
599		free(buf, M_TEMP);
600	}
601	return (error);
602}
603
604/*
605 * Implement fstatfs() for (NFS) file handles.
606 */
607#ifndef _SYS_SYSPROTO_H_
608struct freebsd4_fhstatfs_args {
609	struct fhandle *u_fhp;
610	struct ostatfs *buf;
611};
612#endif
613int
614freebsd4_fhstatfs(td, uap)
615	struct thread *td;
616	struct freebsd4_fhstatfs_args /* {
617		struct fhandle *u_fhp;
618		struct ostatfs *buf;
619	} */ *uap;
620{
621	struct ostatfs osb;
622	struct statfs sf;
623	fhandle_t fh;
624	int error;
625
626	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
627	if (error)
628		return (error);
629	error = kern_fhstatfs(td, fh, &sf);
630	if (error)
631		return (error);
632	cvtstatfs(&sf, &osb);
633	return (copyout(&osb, uap->buf, sizeof(osb)));
634}
635
636/*
637 * Convert a new format statfs structure to an old format statfs structure.
638 */
639static void
640cvtstatfs(nsp, osp)
641	struct statfs *nsp;
642	struct ostatfs *osp;
643{
644
645	bzero(osp, sizeof(*osp));
646	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
647	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
648	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
649	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
650	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
651	osp->f_files = MIN(nsp->f_files, LONG_MAX);
652	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
653	osp->f_owner = nsp->f_owner;
654	osp->f_type = nsp->f_type;
655	osp->f_flags = nsp->f_flags;
656	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
657	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
658	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
659	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
660	strlcpy(osp->f_fstypename, nsp->f_fstypename,
661	    MIN(MFSNAMELEN, OMFSNAMELEN));
662	strlcpy(osp->f_mntonname, nsp->f_mntonname,
663	    MIN(MNAMELEN, OMNAMELEN));
664	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
665	    MIN(MNAMELEN, OMNAMELEN));
666	osp->f_fsid = nsp->f_fsid;
667}
668#endif /* COMPAT_FREEBSD4 */
669
670/*
671 * Change current working directory to a given file descriptor.
672 */
673#ifndef _SYS_SYSPROTO_H_
674struct fchdir_args {
675	int	fd;
676};
677#endif
678int
679fchdir(td, uap)
680	struct thread *td;
681	struct fchdir_args /* {
682		int fd;
683	} */ *uap;
684{
685	register struct filedesc *fdp = td->td_proc->p_fd;
686	struct vnode *vp, *tdp, *vpold;
687	struct mount *mp;
688	struct file *fp;
689	int vfslocked;
690	int error;
691
692	AUDIT_ARG(fd, uap->fd);
693	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
694		return (error);
695	vp = fp->f_vnode;
696	VREF(vp);
697	fdrop(fp, td);
698	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
699	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
700	AUDIT_ARG(vnode, vp, ARG_VNODE1);
701	error = change_dir(vp, td);
702	while (!error && (mp = vp->v_mountedhere) != NULL) {
703		int tvfslocked;
704		if (vfs_busy(mp, 0, 0, td))
705			continue;
706		tvfslocked = VFS_LOCK_GIANT(mp);
707		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
708		vfs_unbusy(mp, td);
709		if (error) {
710			VFS_UNLOCK_GIANT(tvfslocked);
711			break;
712		}
713		vput(vp);
714		VFS_UNLOCK_GIANT(vfslocked);
715		vp = tdp;
716		vfslocked = tvfslocked;
717	}
718	if (error) {
719		vput(vp);
720		VFS_UNLOCK_GIANT(vfslocked);
721		return (error);
722	}
723	VOP_UNLOCK(vp, 0, td);
724	VFS_UNLOCK_GIANT(vfslocked);
725	FILEDESC_LOCK_FAST(fdp);
726	vpold = fdp->fd_cdir;
727	fdp->fd_cdir = vp;
728	FILEDESC_UNLOCK_FAST(fdp);
729	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
730	vrele(vpold);
731	VFS_UNLOCK_GIANT(vfslocked);
732	return (0);
733}
734
735/*
736 * Change current working directory (``.'').
737 */
738#ifndef _SYS_SYSPROTO_H_
739struct chdir_args {
740	char	*path;
741};
742#endif
743int
744chdir(td, uap)
745	struct thread *td;
746	struct chdir_args /* {
747		char *path;
748	} */ *uap;
749{
750
751	return (kern_chdir(td, uap->path, UIO_USERSPACE));
752}
753
754int
755kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
756{
757	register struct filedesc *fdp = td->td_proc->p_fd;
758	int error;
759	struct nameidata nd;
760	struct vnode *vp;
761	int vfslocked;
762
763	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
764	    pathseg, path, td);
765	if ((error = namei(&nd)) != 0)
766		return (error);
767	vfslocked = NDHASGIANT(&nd);
768	if ((error = change_dir(nd.ni_vp, td)) != 0) {
769		vput(nd.ni_vp);
770		VFS_UNLOCK_GIANT(vfslocked);
771		NDFREE(&nd, NDF_ONLY_PNBUF);
772		return (error);
773	}
774	VOP_UNLOCK(nd.ni_vp, 0, td);
775	VFS_UNLOCK_GIANT(vfslocked);
776	NDFREE(&nd, NDF_ONLY_PNBUF);
777	FILEDESC_LOCK_FAST(fdp);
778	vp = fdp->fd_cdir;
779	fdp->fd_cdir = nd.ni_vp;
780	FILEDESC_UNLOCK_FAST(fdp);
781	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
782	vrele(vp);
783	VFS_UNLOCK_GIANT(vfslocked);
784	return (0);
785}
786
787/*
788 * Helper function for raised chroot(2) security function:  Refuse if
789 * any filedescriptors are open directories.
790 */
791static int
792chroot_refuse_vdir_fds(fdp)
793	struct filedesc *fdp;
794{
795	struct vnode *vp;
796	struct file *fp;
797	int fd;
798
799	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
800	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
801		fp = fget_locked(fdp, fd);
802		if (fp == NULL)
803			continue;
804		if (fp->f_type == DTYPE_VNODE) {
805			vp = fp->f_vnode;
806			if (vp->v_type == VDIR)
807				return (EPERM);
808		}
809	}
810	return (0);
811}
812
813/*
814 * This sysctl determines if we will allow a process to chroot(2) if it
815 * has a directory open:
816 *	0: disallowed for all processes.
817 *	1: allowed for processes that were not already chroot(2)'ed.
818 *	2: allowed for all processes.
819 */
820
821static int chroot_allow_open_directories = 1;
822
823SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
824     &chroot_allow_open_directories, 0, "");
825
826/*
827 * Change notion of root (``/'') directory.
828 */
829#ifndef _SYS_SYSPROTO_H_
830struct chroot_args {
831	char	*path;
832};
833#endif
834int
835chroot(td, uap)
836	struct thread *td;
837	struct chroot_args /* {
838		char *path;
839	} */ *uap;
840{
841	int error;
842	struct nameidata nd;
843	int vfslocked;
844
845	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
846	if (error)
847		return (error);
848	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
849	    UIO_USERSPACE, uap->path, td);
850	error = namei(&nd);
851	if (error)
852		goto error;
853	vfslocked = NDHASGIANT(&nd);
854	if ((error = change_dir(nd.ni_vp, td)) != 0)
855		goto e_vunlock;
856#ifdef MAC
857	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
858		goto e_vunlock;
859#endif
860	VOP_UNLOCK(nd.ni_vp, 0, td);
861	error = change_root(nd.ni_vp, td);
862	vrele(nd.ni_vp);
863	VFS_UNLOCK_GIANT(vfslocked);
864	NDFREE(&nd, NDF_ONLY_PNBUF);
865	return (error);
866e_vunlock:
867	vput(nd.ni_vp);
868	VFS_UNLOCK_GIANT(vfslocked);
869error:
870	NDFREE(&nd, NDF_ONLY_PNBUF);
871	return (error);
872}
873
874/*
875 * Common routine for chroot and chdir.  Callers must provide a locked vnode
876 * instance.
877 */
878int
879change_dir(vp, td)
880	struct vnode *vp;
881	struct thread *td;
882{
883	int error;
884
885	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
886	if (vp->v_type != VDIR)
887		return (ENOTDIR);
888#ifdef MAC
889	error = mac_check_vnode_chdir(td->td_ucred, vp);
890	if (error)
891		return (error);
892#endif
893	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
894	return (error);
895}
896
897/*
898 * Common routine for kern_chroot() and jail_attach().  The caller is
899 * responsible for invoking suser() and mac_check_chroot() to authorize this
900 * operation.
901 */
902int
903change_root(vp, td)
904	struct vnode *vp;
905	struct thread *td;
906{
907	struct filedesc *fdp;
908	struct vnode *oldvp;
909	int vfslocked;
910	int error;
911
912	VFS_ASSERT_GIANT(vp->v_mount);
913	fdp = td->td_proc->p_fd;
914	FILEDESC_LOCK(fdp);
915	if (chroot_allow_open_directories == 0 ||
916	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
917		error = chroot_refuse_vdir_fds(fdp);
918		if (error) {
919			FILEDESC_UNLOCK(fdp);
920			return (error);
921		}
922	}
923	oldvp = fdp->fd_rdir;
924	fdp->fd_rdir = vp;
925	VREF(fdp->fd_rdir);
926	if (!fdp->fd_jdir) {
927		fdp->fd_jdir = vp;
928		VREF(fdp->fd_jdir);
929	}
930	FILEDESC_UNLOCK(fdp);
931	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
932	vrele(oldvp);
933	VFS_UNLOCK_GIANT(vfslocked);
934	return (0);
935}
936
937/*
938 * Check permissions, allocate an open file structure,
939 * and call the device open routine if any.
940 *
941 * MP SAFE
942 */
943#ifndef _SYS_SYSPROTO_H_
944struct open_args {
945	char	*path;
946	int	flags;
947	int	mode;
948};
949#endif
950int
951open(td, uap)
952	struct thread *td;
953	register struct open_args /* {
954		char *path;
955		int flags;
956		int mode;
957	} */ *uap;
958{
959
960	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
961}
962
963int
964kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
965    int mode)
966{
967	struct proc *p = td->td_proc;
968	struct filedesc *fdp = p->p_fd;
969	struct file *fp;
970	struct vnode *vp;
971	struct vattr vat;
972	struct mount *mp;
973	int cmode;
974	struct file *nfp;
975	int type, indx, error;
976	struct flock lf;
977	struct nameidata nd;
978	int vfslocked;
979
980	AUDIT_ARG(fflags, flags);
981	AUDIT_ARG(mode, mode);
982	if ((flags & O_ACCMODE) == O_ACCMODE)
983		return (EINVAL);
984	flags = FFLAGS(flags);
985	error = falloc(td, &nfp, &indx);
986	if (error)
987		return (error);
988	/* An extra reference on `nfp' has been held for us by falloc(). */
989	fp = nfp;
990	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
991	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
992	td->td_dupfd = -1;		/* XXX check for fdopen */
993	error = vn_open(&nd, &flags, cmode, indx);
994	if (error) {
995		/*
996		 * If the vn_open replaced the method vector, something
997		 * wonderous happened deep below and we just pass it up
998		 * pretending we know what we do.
999		 */
1000		if (error == ENXIO && fp->f_ops != &badfileops) {
1001			fdrop(fp, td);
1002			td->td_retval[0] = indx;
1003			return (0);
1004		}
1005
1006		/*
1007		 * release our own reference
1008		 */
1009		fdrop(fp, td);
1010
1011		/*
1012		 * handle special fdopen() case.  bleh.  dupfdopen() is
1013		 * responsible for dropping the old contents of ofiles[indx]
1014		 * if it succeeds.
1015		 */
1016		if ((error == ENODEV || error == ENXIO) &&
1017		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1018		    (error =
1019			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1020			td->td_retval[0] = indx;
1021			return (0);
1022		}
1023		/*
1024		 * Clean up the descriptor, but only if another thread hadn't
1025		 * replaced or closed it.
1026		 */
1027		fdclose(fdp, fp, indx, td);
1028
1029		if (error == ERESTART)
1030			error = EINTR;
1031		return (error);
1032	}
1033	td->td_dupfd = 0;
1034	vfslocked = NDHASGIANT(&nd);
1035	NDFREE(&nd, NDF_ONLY_PNBUF);
1036	vp = nd.ni_vp;
1037
1038	/*
1039	 * There should be 2 references on the file, one from the descriptor
1040	 * table, and one for us.
1041	 *
1042	 * Handle the case where someone closed the file (via its file
1043	 * descriptor) while we were blocked.  The end result should look
1044	 * like opening the file succeeded but it was immediately closed.
1045	 * We call vn_close() manually because we haven't yet hooked up
1046	 * the various 'struct file' fields.
1047	 */
1048	FILEDESC_LOCK(fdp);
1049	FILE_LOCK(fp);
1050	if (fp->f_count == 1) {
1051		mp = vp->v_mount;
1052		KASSERT(fdp->fd_ofiles[indx] != fp,
1053		    ("Open file descriptor lost all refs"));
1054		FILE_UNLOCK(fp);
1055		FILEDESC_UNLOCK(fdp);
1056		VOP_UNLOCK(vp, 0, td);
1057		vn_close(vp, flags & FMASK, fp->f_cred, td);
1058		VFS_UNLOCK_GIANT(vfslocked);
1059		fdrop(fp, td);
1060		td->td_retval[0] = indx;
1061		return (0);
1062	}
1063	fp->f_vnode = vp;
1064	if (fp->f_data == NULL)
1065		fp->f_data = vp;
1066	fp->f_flag = flags & FMASK;
1067	if (fp->f_ops == &badfileops)
1068		fp->f_ops = &vnops;
1069	fp->f_seqcount = 1;
1070	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1071	FILE_UNLOCK(fp);
1072	FILEDESC_UNLOCK(fdp);
1073
1074	VOP_UNLOCK(vp, 0, td);
1075	if (flags & (O_EXLOCK | O_SHLOCK)) {
1076		lf.l_whence = SEEK_SET;
1077		lf.l_start = 0;
1078		lf.l_len = 0;
1079		if (flags & O_EXLOCK)
1080			lf.l_type = F_WRLCK;
1081		else
1082			lf.l_type = F_RDLCK;
1083		type = F_FLOCK;
1084		if ((flags & FNONBLOCK) == 0)
1085			type |= F_WAIT;
1086		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1087			    type)) != 0)
1088			goto bad;
1089		fp->f_flag |= FHASLOCK;
1090	}
1091	if (flags & O_TRUNC) {
1092		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1093			goto bad;
1094		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1095		VATTR_NULL(&vat);
1096		vat.va_size = 0;
1097		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1098#ifdef MAC
1099		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1100		if (error == 0)
1101#endif
1102			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1103		VOP_UNLOCK(vp, 0, td);
1104		vn_finished_write(mp);
1105		if (error)
1106			goto bad;
1107	}
1108	VFS_UNLOCK_GIANT(vfslocked);
1109	/*
1110	 * Release our private reference, leaving the one associated with
1111	 * the descriptor table intact.
1112	 */
1113	fdrop(fp, td);
1114	td->td_retval[0] = indx;
1115	return (0);
1116bad:
1117	VFS_UNLOCK_GIANT(vfslocked);
1118	fdclose(fdp, fp, indx, td);
1119	fdrop(fp, td);
1120	return (error);
1121}
1122
1123#ifdef COMPAT_43
1124/*
1125 * Create a file.
1126 *
1127 * MP SAFE
1128 */
1129#ifndef _SYS_SYSPROTO_H_
1130struct ocreat_args {
1131	char	*path;
1132	int	mode;
1133};
1134#endif
1135int
1136ocreat(td, uap)
1137	struct thread *td;
1138	register struct ocreat_args /* {
1139		char *path;
1140		int mode;
1141	} */ *uap;
1142{
1143
1144	return (kern_open(td, uap->path, UIO_USERSPACE,
1145	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1146}
1147#endif /* COMPAT_43 */
1148
1149/*
1150 * Create a special file.
1151 */
1152#ifndef _SYS_SYSPROTO_H_
1153struct mknod_args {
1154	char	*path;
1155	int	mode;
1156	int	dev;
1157};
1158#endif
1159int
1160mknod(td, uap)
1161	struct thread *td;
1162	register struct mknod_args /* {
1163		char *path;
1164		int mode;
1165		int dev;
1166	} */ *uap;
1167{
1168
1169	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1170}
1171
1172int
1173kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1174    int dev)
1175{
1176	struct vnode *vp;
1177	struct mount *mp;
1178	struct vattr vattr;
1179	int error;
1180	int whiteout = 0;
1181	struct nameidata nd;
1182	int vfslocked;
1183
1184	AUDIT_ARG(mode, mode);
1185	AUDIT_ARG(dev, dev);
1186	switch (mode & S_IFMT) {
1187	case S_IFCHR:
1188	case S_IFBLK:
1189		error = suser(td);
1190		break;
1191	default:
1192		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1193		break;
1194	}
1195	if (error)
1196		return (error);
1197restart:
1198	bwillwrite();
1199	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1200	    pathseg, path, td);
1201	if ((error = namei(&nd)) != 0)
1202		return (error);
1203	vfslocked = NDHASGIANT(&nd);
1204	vp = nd.ni_vp;
1205	if (vp != NULL) {
1206		NDFREE(&nd, NDF_ONLY_PNBUF);
1207		if (vp == nd.ni_dvp)
1208			vrele(nd.ni_dvp);
1209		else
1210			vput(nd.ni_dvp);
1211		vrele(vp);
1212		VFS_UNLOCK_GIANT(vfslocked);
1213		return (EEXIST);
1214	} else {
1215		VATTR_NULL(&vattr);
1216		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1217		vattr.va_mode = (mode & ALLPERMS) &
1218		    ~td->td_proc->p_fd->fd_cmask;
1219		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1220		vattr.va_rdev = dev;
1221		whiteout = 0;
1222
1223		switch (mode & S_IFMT) {
1224		case S_IFMT:	/* used by badsect to flag bad sectors */
1225			vattr.va_type = VBAD;
1226			break;
1227		case S_IFCHR:
1228			vattr.va_type = VCHR;
1229			break;
1230		case S_IFBLK:
1231			vattr.va_type = VBLK;
1232			break;
1233		case S_IFWHT:
1234			whiteout = 1;
1235			break;
1236		default:
1237			error = EINVAL;
1238			break;
1239		}
1240	}
1241	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1242		NDFREE(&nd, NDF_ONLY_PNBUF);
1243		vput(nd.ni_dvp);
1244		VFS_UNLOCK_GIANT(vfslocked);
1245		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1246			return (error);
1247		goto restart;
1248	}
1249#ifdef MAC
1250	if (error == 0 && !whiteout)
1251		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1252		    &nd.ni_cnd, &vattr);
1253#endif
1254	if (!error) {
1255		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1256		if (whiteout)
1257			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1258		else {
1259			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1260						&nd.ni_cnd, &vattr);
1261			if (error == 0)
1262				vput(nd.ni_vp);
1263		}
1264	}
1265	NDFREE(&nd, NDF_ONLY_PNBUF);
1266	vput(nd.ni_dvp);
1267	vn_finished_write(mp);
1268	VFS_UNLOCK_GIANT(vfslocked);
1269	return (error);
1270}
1271
1272/*
1273 * Create a named pipe.
1274 */
1275#ifndef _SYS_SYSPROTO_H_
1276struct mkfifo_args {
1277	char	*path;
1278	int	mode;
1279};
1280#endif
1281int
1282mkfifo(td, uap)
1283	struct thread *td;
1284	register struct mkfifo_args /* {
1285		char *path;
1286		int mode;
1287	} */ *uap;
1288{
1289
1290	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1291}
1292
1293int
1294kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1295{
1296	struct mount *mp;
1297	struct vattr vattr;
1298	int error;
1299	struct nameidata nd;
1300	int vfslocked;
1301
1302	AUDIT_ARG(mode, mode);
1303restart:
1304	bwillwrite();
1305	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1306	    pathseg, path, td);
1307	if ((error = namei(&nd)) != 0)
1308		return (error);
1309	vfslocked = NDHASGIANT(&nd);
1310	if (nd.ni_vp != NULL) {
1311		NDFREE(&nd, NDF_ONLY_PNBUF);
1312		if (nd.ni_vp == nd.ni_dvp)
1313			vrele(nd.ni_dvp);
1314		else
1315			vput(nd.ni_dvp);
1316		vrele(nd.ni_vp);
1317		VFS_UNLOCK_GIANT(vfslocked);
1318		return (EEXIST);
1319	}
1320	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1321		NDFREE(&nd, NDF_ONLY_PNBUF);
1322		vput(nd.ni_dvp);
1323		VFS_UNLOCK_GIANT(vfslocked);
1324		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1325			return (error);
1326		goto restart;
1327	}
1328	VATTR_NULL(&vattr);
1329	vattr.va_type = VFIFO;
1330	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1331	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1332	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1333#ifdef MAC
1334	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1335	    &vattr);
1336	if (error)
1337		goto out;
1338#endif
1339	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1340	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1341	if (error == 0)
1342		vput(nd.ni_vp);
1343#ifdef MAC
1344out:
1345#endif
1346	vput(nd.ni_dvp);
1347	vn_finished_write(mp);
1348	VFS_UNLOCK_GIANT(vfslocked);
1349	NDFREE(&nd, NDF_ONLY_PNBUF);
1350	return (error);
1351}
1352
1353/*
1354 * Make a hard file link.
1355 */
1356#ifndef _SYS_SYSPROTO_H_
1357struct link_args {
1358	char	*path;
1359	char	*link;
1360};
1361#endif
1362int
1363link(td, uap)
1364	struct thread *td;
1365	register struct link_args /* {
1366		char *path;
1367		char *link;
1368	} */ *uap;
1369{
1370	int error;
1371
1372	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1373	return (error);
1374}
1375
1376static int hardlink_check_uid = 0;
1377SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1378    &hardlink_check_uid, 0,
1379    "Unprivileged processes cannot create hard links to files owned by other "
1380    "users");
1381static int hardlink_check_gid = 0;
1382SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1383    &hardlink_check_gid, 0,
1384    "Unprivileged processes cannot create hard links to files owned by other "
1385    "groups");
1386
1387static int
1388can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1389{
1390	struct vattr va;
1391	int error;
1392
1393	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1394		return (0);
1395
1396	if (!hardlink_check_uid && !hardlink_check_gid)
1397		return (0);
1398
1399	error = VOP_GETATTR(vp, &va, cred, td);
1400	if (error != 0)
1401		return (error);
1402
1403	if (hardlink_check_uid) {
1404		if (cred->cr_uid != va.va_uid)
1405			return (EPERM);
1406	}
1407
1408	if (hardlink_check_gid) {
1409		if (!groupmember(va.va_gid, cred))
1410			return (EPERM);
1411	}
1412
1413	return (0);
1414}
1415
1416int
1417kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1418{
1419	struct vnode *vp;
1420	struct mount *mp;
1421	struct nameidata nd;
1422	int vfslocked;
1423	int lvfslocked;
1424	int error;
1425
1426	bwillwrite();
1427	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1428	if ((error = namei(&nd)) != 0)
1429		return (error);
1430	vfslocked = NDHASGIANT(&nd);
1431	NDFREE(&nd, NDF_ONLY_PNBUF);
1432	vp = nd.ni_vp;
1433	if (vp->v_type == VDIR) {
1434		vrele(vp);
1435		VFS_UNLOCK_GIANT(vfslocked);
1436		return (EPERM);		/* POSIX */
1437	}
1438	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1439		vrele(vp);
1440		VFS_UNLOCK_GIANT(vfslocked);
1441		return (error);
1442	}
1443	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1444	    segflg, link, td);
1445	if ((error = namei(&nd)) == 0) {
1446		lvfslocked = NDHASGIANT(&nd);
1447		if (nd.ni_vp != NULL) {
1448			if (nd.ni_dvp == nd.ni_vp)
1449				vrele(nd.ni_dvp);
1450			else
1451				vput(nd.ni_dvp);
1452			vrele(nd.ni_vp);
1453			error = EEXIST;
1454		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1455		    == 0) {
1456			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1457			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1458			error = can_hardlink(vp, td, td->td_ucred);
1459			if (error == 0)
1460#ifdef MAC
1461				error = mac_check_vnode_link(td->td_ucred,
1462				    nd.ni_dvp, vp, &nd.ni_cnd);
1463			if (error == 0)
1464#endif
1465				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1466			VOP_UNLOCK(vp, 0, td);
1467			vput(nd.ni_dvp);
1468		}
1469		NDFREE(&nd, NDF_ONLY_PNBUF);
1470		VFS_UNLOCK_GIANT(lvfslocked);
1471	}
1472	vrele(vp);
1473	vn_finished_write(mp);
1474	VFS_UNLOCK_GIANT(vfslocked);
1475	return (error);
1476}
1477
1478/*
1479 * Make a symbolic link.
1480 */
1481#ifndef _SYS_SYSPROTO_H_
1482struct symlink_args {
1483	char	*path;
1484	char	*link;
1485};
1486#endif
1487int
1488symlink(td, uap)
1489	struct thread *td;
1490	register struct symlink_args /* {
1491		char *path;
1492		char *link;
1493	} */ *uap;
1494{
1495
1496	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1497}
1498
1499int
1500kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1501{
1502	struct mount *mp;
1503	struct vattr vattr;
1504	char *syspath;
1505	int error;
1506	struct nameidata nd;
1507	int vfslocked;
1508
1509	if (segflg == UIO_SYSSPACE) {
1510		syspath = path;
1511	} else {
1512		syspath = uma_zalloc(namei_zone, M_WAITOK);
1513		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1514			goto out;
1515	}
1516	AUDIT_ARG(text, syspath);
1517restart:
1518	bwillwrite();
1519	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1520	    segflg, link, td);
1521	if ((error = namei(&nd)) != 0)
1522		goto out;
1523	vfslocked = NDHASGIANT(&nd);
1524	if (nd.ni_vp) {
1525		NDFREE(&nd, NDF_ONLY_PNBUF);
1526		if (nd.ni_vp == nd.ni_dvp)
1527			vrele(nd.ni_dvp);
1528		else
1529			vput(nd.ni_dvp);
1530		vrele(nd.ni_vp);
1531		VFS_UNLOCK_GIANT(vfslocked);
1532		error = EEXIST;
1533		goto out;
1534	}
1535	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1536		NDFREE(&nd, NDF_ONLY_PNBUF);
1537		vput(nd.ni_dvp);
1538		VFS_UNLOCK_GIANT(vfslocked);
1539		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1540			goto out;
1541		goto restart;
1542	}
1543	VATTR_NULL(&vattr);
1544	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1545	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1546	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1547#ifdef MAC
1548	vattr.va_type = VLNK;
1549	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1550	    &vattr);
1551	if (error)
1552		goto out2;
1553#endif
1554	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1555	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1556	if (error == 0)
1557		vput(nd.ni_vp);
1558#ifdef MAC
1559out2:
1560#endif
1561	NDFREE(&nd, NDF_ONLY_PNBUF);
1562	vput(nd.ni_dvp);
1563	vn_finished_write(mp);
1564	VFS_UNLOCK_GIANT(vfslocked);
1565out:
1566	if (segflg != UIO_SYSSPACE)
1567		uma_zfree(namei_zone, syspath);
1568	return (error);
1569}
1570
1571/*
1572 * Delete a whiteout from the filesystem.
1573 */
1574int
1575undelete(td, uap)
1576	struct thread *td;
1577	register struct undelete_args /* {
1578		char *path;
1579	} */ *uap;
1580{
1581	int error;
1582	struct mount *mp;
1583	struct nameidata nd;
1584	int vfslocked;
1585
1586restart:
1587	bwillwrite();
1588	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1589	    UIO_USERSPACE, uap->path, td);
1590	error = namei(&nd);
1591	if (error)
1592		return (error);
1593	vfslocked = NDHASGIANT(&nd);
1594
1595	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1596		NDFREE(&nd, NDF_ONLY_PNBUF);
1597		if (nd.ni_vp == nd.ni_dvp)
1598			vrele(nd.ni_dvp);
1599		else
1600			vput(nd.ni_dvp);
1601		if (nd.ni_vp)
1602			vrele(nd.ni_vp);
1603		VFS_UNLOCK_GIANT(vfslocked);
1604		return (EEXIST);
1605	}
1606	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1607		NDFREE(&nd, NDF_ONLY_PNBUF);
1608		vput(nd.ni_dvp);
1609		VFS_UNLOCK_GIANT(vfslocked);
1610		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1611			return (error);
1612		goto restart;
1613	}
1614	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1615	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1616	NDFREE(&nd, NDF_ONLY_PNBUF);
1617	vput(nd.ni_dvp);
1618	vn_finished_write(mp);
1619	VFS_UNLOCK_GIANT(vfslocked);
1620	return (error);
1621}
1622
1623/*
1624 * Delete a name from the filesystem.
1625 */
1626#ifndef _SYS_SYSPROTO_H_
1627struct unlink_args {
1628	char	*path;
1629};
1630#endif
1631int
1632unlink(td, uap)
1633	struct thread *td;
1634	struct unlink_args /* {
1635		char *path;
1636	} */ *uap;
1637{
1638	int error;
1639
1640	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1641	return (error);
1642}
1643
1644int
1645kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1646{
1647	struct mount *mp;
1648	struct vnode *vp;
1649	int error;
1650	struct nameidata nd;
1651	int vfslocked;
1652
1653restart:
1654	bwillwrite();
1655	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1656	    pathseg, path, td);
1657	if ((error = namei(&nd)) != 0)
1658		return (error == EINVAL ? EPERM : error);
1659	vfslocked = NDHASGIANT(&nd);
1660	vp = nd.ni_vp;
1661	if (vp->v_type == VDIR)
1662		error = EPERM;		/* POSIX */
1663	else {
1664		/*
1665		 * The root of a mounted filesystem cannot be deleted.
1666		 *
1667		 * XXX: can this only be a VDIR case?
1668		 */
1669		if (vp->v_vflag & VV_ROOT)
1670			error = EBUSY;
1671	}
1672	if (error == 0) {
1673		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1674			NDFREE(&nd, NDF_ONLY_PNBUF);
1675			vput(nd.ni_dvp);
1676			if (vp == nd.ni_dvp)
1677				vrele(vp);
1678			else
1679				vput(vp);
1680			VFS_UNLOCK_GIANT(vfslocked);
1681			if ((error = vn_start_write(NULL, &mp,
1682			    V_XSLEEP | PCATCH)) != 0)
1683				return (error);
1684			goto restart;
1685		}
1686#ifdef MAC
1687		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1688		    &nd.ni_cnd);
1689		if (error)
1690			goto out;
1691#endif
1692		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1693		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1694#ifdef MAC
1695out:
1696#endif
1697		vn_finished_write(mp);
1698	}
1699	NDFREE(&nd, NDF_ONLY_PNBUF);
1700	vput(nd.ni_dvp);
1701	if (vp == nd.ni_dvp)
1702		vrele(vp);
1703	else
1704		vput(vp);
1705	VFS_UNLOCK_GIANT(vfslocked);
1706	return (error);
1707}
1708
1709/*
1710 * Reposition read/write file offset.
1711 */
1712#ifndef _SYS_SYSPROTO_H_
1713struct lseek_args {
1714	int	fd;
1715	int	pad;
1716	off_t	offset;
1717	int	whence;
1718};
1719#endif
1720int
1721lseek(td, uap)
1722	struct thread *td;
1723	register struct lseek_args /* {
1724		int fd;
1725		int pad;
1726		off_t offset;
1727		int whence;
1728	} */ *uap;
1729{
1730	struct ucred *cred = td->td_ucred;
1731	struct file *fp;
1732	struct vnode *vp;
1733	struct vattr vattr;
1734	off_t offset;
1735	int error, noneg;
1736	int vfslocked;
1737
1738	if ((error = fget(td, uap->fd, &fp)) != 0)
1739		return (error);
1740	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1741		fdrop(fp, td);
1742		return (ESPIPE);
1743	}
1744	vp = fp->f_vnode;
1745	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1746	noneg = (vp->v_type != VCHR);
1747	offset = uap->offset;
1748	switch (uap->whence) {
1749	case L_INCR:
1750		if (noneg &&
1751		    (fp->f_offset < 0 ||
1752		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1753			error = EOVERFLOW;
1754			break;
1755		}
1756		offset += fp->f_offset;
1757		break;
1758	case L_XTND:
1759		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1760		error = VOP_GETATTR(vp, &vattr, cred, td);
1761		VOP_UNLOCK(vp, 0, td);
1762		if (error)
1763			break;
1764		if (noneg &&
1765		    (vattr.va_size > OFF_MAX ||
1766		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1767			error = EOVERFLOW;
1768			break;
1769		}
1770		offset += vattr.va_size;
1771		break;
1772	case L_SET:
1773		break;
1774	default:
1775		error = EINVAL;
1776	}
1777	if (error == 0 && noneg && offset < 0)
1778		error = EINVAL;
1779	if (error != 0)
1780		goto drop;
1781	fp->f_offset = offset;
1782	*(off_t *)(td->td_retval) = fp->f_offset;
1783drop:
1784	fdrop(fp, td);
1785	VFS_UNLOCK_GIANT(vfslocked);
1786	return (error);
1787}
1788
1789#if defined(COMPAT_43)
1790/*
1791 * Reposition read/write file offset.
1792 */
1793#ifndef _SYS_SYSPROTO_H_
1794struct olseek_args {
1795	int	fd;
1796	long	offset;
1797	int	whence;
1798};
1799#endif
1800int
1801olseek(td, uap)
1802	struct thread *td;
1803	register struct olseek_args /* {
1804		int fd;
1805		long offset;
1806		int whence;
1807	} */ *uap;
1808{
1809	struct lseek_args /* {
1810		int fd;
1811		int pad;
1812		off_t offset;
1813		int whence;
1814	} */ nuap;
1815	int error;
1816
1817	nuap.fd = uap->fd;
1818	nuap.offset = uap->offset;
1819	nuap.whence = uap->whence;
1820	error = lseek(td, &nuap);
1821	return (error);
1822}
1823#endif /* COMPAT_43 */
1824
1825/*
1826 * Check access permissions using passed credentials.
1827 */
1828static int
1829vn_access(vp, user_flags, cred, td)
1830	struct vnode	*vp;
1831	int		user_flags;
1832	struct ucred	*cred;
1833	struct thread	*td;
1834{
1835	int error, flags;
1836
1837	/* Flags == 0 means only check for existence. */
1838	error = 0;
1839	if (user_flags) {
1840		flags = 0;
1841		if (user_flags & R_OK)
1842			flags |= VREAD;
1843		if (user_flags & W_OK)
1844			flags |= VWRITE;
1845		if (user_flags & X_OK)
1846			flags |= VEXEC;
1847#ifdef MAC
1848		error = mac_check_vnode_access(cred, vp, flags);
1849		if (error)
1850			return (error);
1851#endif
1852		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1853			error = VOP_ACCESS(vp, flags, cred, td);
1854	}
1855	return (error);
1856}
1857
1858/*
1859 * Check access permissions using "real" credentials.
1860 */
1861#ifndef _SYS_SYSPROTO_H_
1862struct access_args {
1863	char	*path;
1864	int	flags;
1865};
1866#endif
1867int
1868access(td, uap)
1869	struct thread *td;
1870	register struct access_args /* {
1871		char *path;
1872		int flags;
1873	} */ *uap;
1874{
1875
1876	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1877}
1878
1879int
1880kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1881{
1882	struct ucred *cred, *tmpcred;
1883	register struct vnode *vp;
1884	struct nameidata nd;
1885	int vfslocked;
1886	int error;
1887
1888	/*
1889	 * Create and modify a temporary credential instead of one that
1890	 * is potentially shared.  This could also mess up socket
1891	 * buffer accounting which can run in an interrupt context.
1892	 */
1893	cred = td->td_ucred;
1894	tmpcred = crdup(cred);
1895	tmpcred->cr_uid = cred->cr_ruid;
1896	tmpcred->cr_groups[0] = cred->cr_rgid;
1897	td->td_ucred = tmpcred;
1898	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1899	    pathseg, path, td);
1900	if ((error = namei(&nd)) != 0)
1901		goto out1;
1902	vfslocked = NDHASGIANT(&nd);
1903	vp = nd.ni_vp;
1904
1905	error = vn_access(vp, flags, tmpcred, td);
1906	NDFREE(&nd, NDF_ONLY_PNBUF);
1907	vput(vp);
1908	VFS_UNLOCK_GIANT(vfslocked);
1909out1:
1910	td->td_ucred = cred;
1911	crfree(tmpcred);
1912	return (error);
1913}
1914
1915/*
1916 * Check access permissions using "effective" credentials.
1917 */
1918#ifndef _SYS_SYSPROTO_H_
1919struct eaccess_args {
1920	char	*path;
1921	int	flags;
1922};
1923#endif
1924int
1925eaccess(td, uap)
1926	struct thread *td;
1927	register struct eaccess_args /* {
1928		char *path;
1929		int flags;
1930	} */ *uap;
1931{
1932
1933	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1934}
1935
1936int
1937kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1938{
1939	struct nameidata nd;
1940	struct vnode *vp;
1941	int vfslocked;
1942	int error;
1943
1944	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1945	    pathseg, path, td);
1946	if ((error = namei(&nd)) != 0)
1947		return (error);
1948	vp = nd.ni_vp;
1949	vfslocked = NDHASGIANT(&nd);
1950	error = vn_access(vp, flags, td->td_ucred, td);
1951	NDFREE(&nd, NDF_ONLY_PNBUF);
1952	vput(vp);
1953	VFS_UNLOCK_GIANT(vfslocked);
1954	return (error);
1955}
1956
1957#if defined(COMPAT_43)
1958/*
1959 * Get file status; this version follows links.
1960 */
1961#ifndef _SYS_SYSPROTO_H_
1962struct ostat_args {
1963	char	*path;
1964	struct ostat *ub;
1965};
1966#endif
1967int
1968ostat(td, uap)
1969	struct thread *td;
1970	register struct ostat_args /* {
1971		char *path;
1972		struct ostat *ub;
1973	} */ *uap;
1974{
1975	struct stat sb;
1976	struct ostat osb;
1977	int error;
1978
1979	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1980	if (error)
1981		return (error);
1982	cvtstat(&sb, &osb);
1983	error = copyout(&osb, uap->ub, sizeof (osb));
1984	return (error);
1985}
1986
1987/*
1988 * Get file status; this version does not follow links.
1989 */
1990#ifndef _SYS_SYSPROTO_H_
1991struct olstat_args {
1992	char	*path;
1993	struct ostat *ub;
1994};
1995#endif
1996int
1997olstat(td, uap)
1998	struct thread *td;
1999	register struct olstat_args /* {
2000		char *path;
2001		struct ostat *ub;
2002	} */ *uap;
2003{
2004	struct stat sb;
2005	struct ostat osb;
2006	int error;
2007
2008	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2009	if (error)
2010		return (error);
2011	cvtstat(&sb, &osb);
2012	error = copyout(&osb, uap->ub, sizeof (osb));
2013	return (error);
2014}
2015
2016/*
2017 * Convert from an old to a new stat structure.
2018 */
2019void
2020cvtstat(st, ost)
2021	struct stat *st;
2022	struct ostat *ost;
2023{
2024
2025	ost->st_dev = st->st_dev;
2026	ost->st_ino = st->st_ino;
2027	ost->st_mode = st->st_mode;
2028	ost->st_nlink = st->st_nlink;
2029	ost->st_uid = st->st_uid;
2030	ost->st_gid = st->st_gid;
2031	ost->st_rdev = st->st_rdev;
2032	if (st->st_size < (quad_t)1 << 32)
2033		ost->st_size = st->st_size;
2034	else
2035		ost->st_size = -2;
2036	ost->st_atime = st->st_atime;
2037	ost->st_mtime = st->st_mtime;
2038	ost->st_ctime = st->st_ctime;
2039	ost->st_blksize = st->st_blksize;
2040	ost->st_blocks = st->st_blocks;
2041	ost->st_flags = st->st_flags;
2042	ost->st_gen = st->st_gen;
2043}
2044#endif /* COMPAT_43 */
2045
2046/*
2047 * Get file status; this version follows links.
2048 */
2049#ifndef _SYS_SYSPROTO_H_
2050struct stat_args {
2051	char	*path;
2052	struct stat *ub;
2053};
2054#endif
2055int
2056stat(td, uap)
2057	struct thread *td;
2058	register struct stat_args /* {
2059		char *path;
2060		struct stat *ub;
2061	} */ *uap;
2062{
2063	struct stat sb;
2064	int error;
2065
2066	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2067	if (error == 0)
2068		error = copyout(&sb, uap->ub, sizeof (sb));
2069	return (error);
2070}
2071
2072int
2073kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2074{
2075	struct nameidata nd;
2076	struct stat sb;
2077	int error, vfslocked;
2078
2079	NDINIT(&nd, LOOKUP,
2080	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2081	    pathseg, path, td);
2082	if ((error = namei(&nd)) != 0)
2083		return (error);
2084	vfslocked = NDHASGIANT(&nd);
2085	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2086	NDFREE(&nd, NDF_ONLY_PNBUF);
2087	vput(nd.ni_vp);
2088	VFS_UNLOCK_GIANT(vfslocked);
2089	if (mtx_owned(&Giant))
2090		printf("stat(%d): %s\n", vfslocked, path);
2091	if (error)
2092		return (error);
2093	*sbp = sb;
2094	return (0);
2095}
2096
2097/*
2098 * Get file status; this version does not follow links.
2099 */
2100#ifndef _SYS_SYSPROTO_H_
2101struct lstat_args {
2102	char	*path;
2103	struct stat *ub;
2104};
2105#endif
2106int
2107lstat(td, uap)
2108	struct thread *td;
2109	register struct lstat_args /* {
2110		char *path;
2111		struct stat *ub;
2112	} */ *uap;
2113{
2114	struct stat sb;
2115	int error;
2116
2117	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2118	if (error == 0)
2119		error = copyout(&sb, uap->ub, sizeof (sb));
2120	return (error);
2121}
2122
2123int
2124kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2125{
2126	struct vnode *vp;
2127	struct stat sb;
2128	struct nameidata nd;
2129	int error, vfslocked;
2130
2131	NDINIT(&nd, LOOKUP,
2132	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2133	    pathseg, path, td);
2134	if ((error = namei(&nd)) != 0)
2135		return (error);
2136	vfslocked = NDHASGIANT(&nd);
2137	vp = nd.ni_vp;
2138	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2139	NDFREE(&nd, NDF_ONLY_PNBUF);
2140	vput(vp);
2141	VFS_UNLOCK_GIANT(vfslocked);
2142	if (error)
2143		return (error);
2144	*sbp = sb;
2145	return (0);
2146}
2147
2148/*
2149 * Implementation of the NetBSD [l]stat() functions.
2150 */
2151void
2152cvtnstat(sb, nsb)
2153	struct stat *sb;
2154	struct nstat *nsb;
2155{
2156	bzero(nsb, sizeof *nsb);
2157	nsb->st_dev = sb->st_dev;
2158	nsb->st_ino = sb->st_ino;
2159	nsb->st_mode = sb->st_mode;
2160	nsb->st_nlink = sb->st_nlink;
2161	nsb->st_uid = sb->st_uid;
2162	nsb->st_gid = sb->st_gid;
2163	nsb->st_rdev = sb->st_rdev;
2164	nsb->st_atimespec = sb->st_atimespec;
2165	nsb->st_mtimespec = sb->st_mtimespec;
2166	nsb->st_ctimespec = sb->st_ctimespec;
2167	nsb->st_size = sb->st_size;
2168	nsb->st_blocks = sb->st_blocks;
2169	nsb->st_blksize = sb->st_blksize;
2170	nsb->st_flags = sb->st_flags;
2171	nsb->st_gen = sb->st_gen;
2172	nsb->st_birthtimespec = sb->st_birthtimespec;
2173}
2174
2175#ifndef _SYS_SYSPROTO_H_
2176struct nstat_args {
2177	char	*path;
2178	struct nstat *ub;
2179};
2180#endif
2181int
2182nstat(td, uap)
2183	struct thread *td;
2184	register struct nstat_args /* {
2185		char *path;
2186		struct nstat *ub;
2187	} */ *uap;
2188{
2189	struct stat sb;
2190	struct nstat nsb;
2191	int error;
2192
2193	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2194	if (error)
2195		return (error);
2196	cvtnstat(&sb, &nsb);
2197	error = copyout(&nsb, uap->ub, sizeof (nsb));
2198	return (error);
2199}
2200
2201/*
2202 * NetBSD lstat.  Get file status; this version does not follow links.
2203 */
2204#ifndef _SYS_SYSPROTO_H_
2205struct lstat_args {
2206	char	*path;
2207	struct stat *ub;
2208};
2209#endif
2210int
2211nlstat(td, uap)
2212	struct thread *td;
2213	register struct nlstat_args /* {
2214		char *path;
2215		struct nstat *ub;
2216	} */ *uap;
2217{
2218	struct stat sb;
2219	struct nstat nsb;
2220	int error;
2221
2222	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2223	if (error)
2224		return (error);
2225	cvtnstat(&sb, &nsb);
2226	error = copyout(&nsb, uap->ub, sizeof (nsb));
2227	return (error);
2228}
2229
2230/*
2231 * Get configurable pathname variables.
2232 */
2233#ifndef _SYS_SYSPROTO_H_
2234struct pathconf_args {
2235	char	*path;
2236	int	name;
2237};
2238#endif
2239int
2240pathconf(td, uap)
2241	struct thread *td;
2242	register struct pathconf_args /* {
2243		char *path;
2244		int name;
2245	} */ *uap;
2246{
2247
2248	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2249}
2250
2251int
2252kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2253{
2254	struct nameidata nd;
2255	int error, vfslocked;
2256
2257	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2258	    pathseg, path, td);
2259	if ((error = namei(&nd)) != 0)
2260		return (error);
2261	vfslocked = NDHASGIANT(&nd);
2262	NDFREE(&nd, NDF_ONLY_PNBUF);
2263
2264	/* If asynchronous I/O is available, it works for all files. */
2265	if (name == _PC_ASYNC_IO)
2266		td->td_retval[0] = async_io_version;
2267	else
2268		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2269	vput(nd.ni_vp);
2270	VFS_UNLOCK_GIANT(vfslocked);
2271	return (error);
2272}
2273
2274/*
2275 * Return target name of a symbolic link.
2276 */
2277#ifndef _SYS_SYSPROTO_H_
2278struct readlink_args {
2279	char	*path;
2280	char	*buf;
2281	int	count;
2282};
2283#endif
2284int
2285readlink(td, uap)
2286	struct thread *td;
2287	register struct readlink_args /* {
2288		char *path;
2289		char *buf;
2290		int count;
2291	} */ *uap;
2292{
2293
2294	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2295	    UIO_USERSPACE, uap->count));
2296}
2297
2298int
2299kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2300    enum uio_seg bufseg, int count)
2301{
2302	register struct vnode *vp;
2303	struct iovec aiov;
2304	struct uio auio;
2305	int error;
2306	struct nameidata nd;
2307	int vfslocked;
2308
2309	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2310	    pathseg, path, td);
2311	if ((error = namei(&nd)) != 0)
2312		return (error);
2313	NDFREE(&nd, NDF_ONLY_PNBUF);
2314	vfslocked = NDHASGIANT(&nd);
2315	vp = nd.ni_vp;
2316#ifdef MAC
2317	error = mac_check_vnode_readlink(td->td_ucred, vp);
2318	if (error) {
2319		vput(vp);
2320		VFS_UNLOCK_GIANT(vfslocked);
2321		return (error);
2322	}
2323#endif
2324	if (vp->v_type != VLNK)
2325		error = EINVAL;
2326	else {
2327		aiov.iov_base = buf;
2328		aiov.iov_len = count;
2329		auio.uio_iov = &aiov;
2330		auio.uio_iovcnt = 1;
2331		auio.uio_offset = 0;
2332		auio.uio_rw = UIO_READ;
2333		auio.uio_segflg = bufseg;
2334		auio.uio_td = td;
2335		auio.uio_resid = count;
2336		error = VOP_READLINK(vp, &auio, td->td_ucred);
2337	}
2338	vput(vp);
2339	VFS_UNLOCK_GIANT(vfslocked);
2340	td->td_retval[0] = count - auio.uio_resid;
2341	return (error);
2342}
2343
2344/*
2345 * Common implementation code for chflags() and fchflags().
2346 */
2347static int
2348setfflags(td, vp, flags)
2349	struct thread *td;
2350	struct vnode *vp;
2351	int flags;
2352{
2353	int error;
2354	struct mount *mp;
2355	struct vattr vattr;
2356
2357	/*
2358	 * Prevent non-root users from setting flags on devices.  When
2359	 * a device is reused, users can retain ownership of the device
2360	 * if they are allowed to set flags and programs assume that
2361	 * chown can't fail when done as root.
2362	 */
2363	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2364		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2365		if (error)
2366			return (error);
2367	}
2368
2369	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2370		return (error);
2371	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2372	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2373	VATTR_NULL(&vattr);
2374	vattr.va_flags = flags;
2375#ifdef MAC
2376	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2377	if (error == 0)
2378#endif
2379		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2380	VOP_UNLOCK(vp, 0, td);
2381	vn_finished_write(mp);
2382	return (error);
2383}
2384
2385/*
2386 * Change flags of a file given a path name.
2387 */
2388#ifndef _SYS_SYSPROTO_H_
2389struct chflags_args {
2390	char	*path;
2391	int	flags;
2392};
2393#endif
2394int
2395chflags(td, uap)
2396	struct thread *td;
2397	register struct chflags_args /* {
2398		char *path;
2399		int flags;
2400	} */ *uap;
2401{
2402	int error;
2403	struct nameidata nd;
2404	int vfslocked;
2405
2406	AUDIT_ARG(fflags, uap->flags);
2407	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2408	    uap->path, td);
2409	if ((error = namei(&nd)) != 0)
2410		return (error);
2411	NDFREE(&nd, NDF_ONLY_PNBUF);
2412	vfslocked = NDHASGIANT(&nd);
2413	error = setfflags(td, nd.ni_vp, uap->flags);
2414	vrele(nd.ni_vp);
2415	VFS_UNLOCK_GIANT(vfslocked);
2416	return (error);
2417}
2418
2419/*
2420 * Same as chflags() but doesn't follow symlinks.
2421 */
2422int
2423lchflags(td, uap)
2424	struct thread *td;
2425	register struct lchflags_args /* {
2426		char *path;
2427		int flags;
2428	} */ *uap;
2429{
2430	int error;
2431	struct nameidata nd;
2432	int vfslocked;
2433
2434	AUDIT_ARG(fflags, uap->flags);
2435	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2436	    uap->path, td);
2437	if ((error = namei(&nd)) != 0)
2438		return (error);
2439	vfslocked = NDHASGIANT(&nd);
2440	NDFREE(&nd, NDF_ONLY_PNBUF);
2441	error = setfflags(td, nd.ni_vp, uap->flags);
2442	vrele(nd.ni_vp);
2443	VFS_UNLOCK_GIANT(vfslocked);
2444	return (error);
2445}
2446
2447/*
2448 * Change flags of a file given a file descriptor.
2449 */
2450#ifndef _SYS_SYSPROTO_H_
2451struct fchflags_args {
2452	int	fd;
2453	int	flags;
2454};
2455#endif
2456int
2457fchflags(td, uap)
2458	struct thread *td;
2459	register struct fchflags_args /* {
2460		int fd;
2461		int flags;
2462	} */ *uap;
2463{
2464	struct file *fp;
2465	int vfslocked;
2466	int error;
2467
2468	AUDIT_ARG(fd, uap->fd);
2469	AUDIT_ARG(fflags, uap->flags);
2470	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2471		return (error);
2472	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2473#ifdef AUDIT
2474	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2475	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2476	VOP_UNLOCK(fp->f_vnode, 0, td);
2477#endif
2478	error = setfflags(td, fp->f_vnode, uap->flags);
2479	VFS_UNLOCK_GIANT(vfslocked);
2480	fdrop(fp, td);
2481	return (error);
2482}
2483
2484/*
2485 * Common implementation code for chmod(), lchmod() and fchmod().
2486 */
2487static int
2488setfmode(td, vp, mode)
2489	struct thread *td;
2490	struct vnode *vp;
2491	int mode;
2492{
2493	int error;
2494	struct mount *mp;
2495	struct vattr vattr;
2496
2497	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2498		return (error);
2499	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2500	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2501	VATTR_NULL(&vattr);
2502	vattr.va_mode = mode & ALLPERMS;
2503#ifdef MAC
2504	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2505	if (error == 0)
2506#endif
2507		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2508	VOP_UNLOCK(vp, 0, td);
2509	vn_finished_write(mp);
2510	return (error);
2511}
2512
2513/*
2514 * Change mode of a file given path name.
2515 */
2516#ifndef _SYS_SYSPROTO_H_
2517struct chmod_args {
2518	char	*path;
2519	int	mode;
2520};
2521#endif
2522int
2523chmod(td, uap)
2524	struct thread *td;
2525	register struct chmod_args /* {
2526		char *path;
2527		int mode;
2528	} */ *uap;
2529{
2530
2531	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2532}
2533
2534int
2535kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2536{
2537	int error;
2538	struct nameidata nd;
2539	int vfslocked;
2540
2541	AUDIT_ARG(mode, mode);
2542	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2543	if ((error = namei(&nd)) != 0)
2544		return (error);
2545	vfslocked = NDHASGIANT(&nd);
2546	NDFREE(&nd, NDF_ONLY_PNBUF);
2547	error = setfmode(td, nd.ni_vp, mode);
2548	vrele(nd.ni_vp);
2549	VFS_UNLOCK_GIANT(vfslocked);
2550	return (error);
2551}
2552
2553/*
2554 * Change mode of a file given path name (don't follow links.)
2555 */
2556#ifndef _SYS_SYSPROTO_H_
2557struct lchmod_args {
2558	char	*path;
2559	int	mode;
2560};
2561#endif
2562int
2563lchmod(td, uap)
2564	struct thread *td;
2565	register struct lchmod_args /* {
2566		char *path;
2567		int mode;
2568	} */ *uap;
2569{
2570	int error;
2571	struct nameidata nd;
2572	int vfslocked;
2573
2574	AUDIT_ARG(mode, (mode_t)uap->mode);
2575	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2576	    uap->path, td);
2577	if ((error = namei(&nd)) != 0)
2578		return (error);
2579	vfslocked = NDHASGIANT(&nd);
2580	NDFREE(&nd, NDF_ONLY_PNBUF);
2581	error = setfmode(td, nd.ni_vp, uap->mode);
2582	vrele(nd.ni_vp);
2583	VFS_UNLOCK_GIANT(vfslocked);
2584	return (error);
2585}
2586
2587/*
2588 * Change mode of a file given a file descriptor.
2589 */
2590#ifndef _SYS_SYSPROTO_H_
2591struct fchmod_args {
2592	int	fd;
2593	int	mode;
2594};
2595#endif
2596int
2597fchmod(td, uap)
2598	struct thread *td;
2599	register struct fchmod_args /* {
2600		int fd;
2601		int mode;
2602	} */ *uap;
2603{
2604	struct file *fp;
2605	int vfslocked;
2606	int error;
2607
2608	AUDIT_ARG(fd, uap->fd);
2609	AUDIT_ARG(mode, uap->mode);
2610	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2611		return (error);
2612	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2613#ifdef AUDIT
2614	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2615	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2616	VOP_UNLOCK(fp->f_vnode, 0, td);
2617#endif
2618	error = setfmode(td, fp->f_vnode, uap->mode);
2619	VFS_UNLOCK_GIANT(vfslocked);
2620	fdrop(fp, td);
2621	return (error);
2622}
2623
2624/*
2625 * Common implementation for chown(), lchown(), and fchown()
2626 */
2627static int
2628setfown(td, vp, uid, gid)
2629	struct thread *td;
2630	struct vnode *vp;
2631	uid_t uid;
2632	gid_t gid;
2633{
2634	int error;
2635	struct mount *mp;
2636	struct vattr vattr;
2637
2638	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2639		return (error);
2640	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2641	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2642	VATTR_NULL(&vattr);
2643	vattr.va_uid = uid;
2644	vattr.va_gid = gid;
2645#ifdef MAC
2646	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2647	    vattr.va_gid);
2648	if (error == 0)
2649#endif
2650		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2651	VOP_UNLOCK(vp, 0, td);
2652	vn_finished_write(mp);
2653	return (error);
2654}
2655
2656/*
2657 * Set ownership given a path name.
2658 */
2659#ifndef _SYS_SYSPROTO_H_
2660struct chown_args {
2661	char	*path;
2662	int	uid;
2663	int	gid;
2664};
2665#endif
2666int
2667chown(td, uap)
2668	struct thread *td;
2669	register struct chown_args /* {
2670		char *path;
2671		int uid;
2672		int gid;
2673	} */ *uap;
2674{
2675
2676	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2677}
2678
2679int
2680kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2681    int gid)
2682{
2683	int error;
2684	struct nameidata nd;
2685	int vfslocked;
2686
2687	AUDIT_ARG(owner, uid, gid);
2688	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2689	if ((error = namei(&nd)) != 0)
2690		return (error);
2691	vfslocked = NDHASGIANT(&nd);
2692	NDFREE(&nd, NDF_ONLY_PNBUF);
2693	error = setfown(td, nd.ni_vp, uid, gid);
2694	vrele(nd.ni_vp);
2695	VFS_UNLOCK_GIANT(vfslocked);
2696	return (error);
2697}
2698
2699/*
2700 * Set ownership given a path name, do not cross symlinks.
2701 */
2702#ifndef _SYS_SYSPROTO_H_
2703struct lchown_args {
2704	char	*path;
2705	int	uid;
2706	int	gid;
2707};
2708#endif
2709int
2710lchown(td, uap)
2711	struct thread *td;
2712	register struct lchown_args /* {
2713		char *path;
2714		int uid;
2715		int gid;
2716	} */ *uap;
2717{
2718
2719	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2720}
2721
2722int
2723kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2724    int gid)
2725{
2726	int error;
2727	struct nameidata nd;
2728	int vfslocked;
2729
2730	AUDIT_ARG(owner, uid, gid);
2731	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2732	if ((error = namei(&nd)) != 0)
2733		return (error);
2734	vfslocked = NDHASGIANT(&nd);
2735	NDFREE(&nd, NDF_ONLY_PNBUF);
2736	error = setfown(td, nd.ni_vp, uid, gid);
2737	vrele(nd.ni_vp);
2738	VFS_UNLOCK_GIANT(vfslocked);
2739	return (error);
2740}
2741
2742/*
2743 * Set ownership given a file descriptor.
2744 */
2745#ifndef _SYS_SYSPROTO_H_
2746struct fchown_args {
2747	int	fd;
2748	int	uid;
2749	int	gid;
2750};
2751#endif
2752int
2753fchown(td, uap)
2754	struct thread *td;
2755	register struct fchown_args /* {
2756		int fd;
2757		int uid;
2758		int gid;
2759	} */ *uap;
2760{
2761	struct file *fp;
2762	int vfslocked;
2763	int error;
2764
2765	AUDIT_ARG(fd, uap->fd);
2766	AUDIT_ARG(owner, uap->uid, uap->gid);
2767	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2768		return (error);
2769	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2770#ifdef AUDIT
2771	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2772	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2773	VOP_UNLOCK(fp->f_vnode, 0, td);
2774#endif
2775	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2776	VFS_UNLOCK_GIANT(vfslocked);
2777	fdrop(fp, td);
2778	return (error);
2779}
2780
2781/*
2782 * Common implementation code for utimes(), lutimes(), and futimes().
2783 */
2784static int
2785getutimes(usrtvp, tvpseg, tsp)
2786	const struct timeval *usrtvp;
2787	enum uio_seg tvpseg;
2788	struct timespec *tsp;
2789{
2790	struct timeval tv[2];
2791	const struct timeval *tvp;
2792	int error;
2793
2794	if (usrtvp == NULL) {
2795		microtime(&tv[0]);
2796		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2797		tsp[1] = tsp[0];
2798	} else {
2799		if (tvpseg == UIO_SYSSPACE) {
2800			tvp = usrtvp;
2801		} else {
2802			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2803				return (error);
2804			tvp = tv;
2805		}
2806
2807		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2808		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2809			return (EINVAL);
2810		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2811		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2812	}
2813	return (0);
2814}
2815
2816/*
2817 * Common implementation code for utimes(), lutimes(), and futimes().
2818 */
2819static int
2820setutimes(td, vp, ts, numtimes, nullflag)
2821	struct thread *td;
2822	struct vnode *vp;
2823	const struct timespec *ts;
2824	int numtimes;
2825	int nullflag;
2826{
2827	int error, setbirthtime;
2828	struct mount *mp;
2829	struct vattr vattr;
2830
2831	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2832		return (error);
2833	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2834	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2835	setbirthtime = 0;
2836	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2837	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2838		setbirthtime = 1;
2839	VATTR_NULL(&vattr);
2840	vattr.va_atime = ts[0];
2841	vattr.va_mtime = ts[1];
2842	if (setbirthtime)
2843		vattr.va_birthtime = ts[1];
2844	if (numtimes > 2)
2845		vattr.va_birthtime = ts[2];
2846	if (nullflag)
2847		vattr.va_vaflags |= VA_UTIMES_NULL;
2848#ifdef MAC
2849	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2850	    vattr.va_mtime);
2851#endif
2852	if (error == 0)
2853		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2854	VOP_UNLOCK(vp, 0, td);
2855	vn_finished_write(mp);
2856	return (error);
2857}
2858
2859/*
2860 * Set the access and modification times of a file.
2861 */
2862#ifndef _SYS_SYSPROTO_H_
2863struct utimes_args {
2864	char	*path;
2865	struct	timeval *tptr;
2866};
2867#endif
2868int
2869utimes(td, uap)
2870	struct thread *td;
2871	register struct utimes_args /* {
2872		char *path;
2873		struct timeval *tptr;
2874	} */ *uap;
2875{
2876
2877	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2878	    UIO_USERSPACE));
2879}
2880
2881int
2882kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2883    struct timeval *tptr, enum uio_seg tptrseg)
2884{
2885	struct timespec ts[2];
2886	int error;
2887	struct nameidata nd;
2888	int vfslocked;
2889
2890	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2891		return (error);
2892	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2893	if ((error = namei(&nd)) != 0)
2894		return (error);
2895	vfslocked = NDHASGIANT(&nd);
2896	NDFREE(&nd, NDF_ONLY_PNBUF);
2897	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2898	vrele(nd.ni_vp);
2899	VFS_UNLOCK_GIANT(vfslocked);
2900	return (error);
2901}
2902
2903/*
2904 * Set the access and modification times of a file.
2905 */
2906#ifndef _SYS_SYSPROTO_H_
2907struct lutimes_args {
2908	char	*path;
2909	struct	timeval *tptr;
2910};
2911#endif
2912int
2913lutimes(td, uap)
2914	struct thread *td;
2915	register struct lutimes_args /* {
2916		char *path;
2917		struct timeval *tptr;
2918	} */ *uap;
2919{
2920
2921	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2922	    UIO_USERSPACE));
2923}
2924
2925int
2926kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2927    struct timeval *tptr, enum uio_seg tptrseg)
2928{
2929	struct timespec ts[2];
2930	int error;
2931	struct nameidata nd;
2932	int vfslocked;
2933
2934	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2935		return (error);
2936	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2937	if ((error = namei(&nd)) != 0)
2938		return (error);
2939	vfslocked = NDHASGIANT(&nd);
2940	NDFREE(&nd, NDF_ONLY_PNBUF);
2941	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2942	vrele(nd.ni_vp);
2943	VFS_UNLOCK_GIANT(vfslocked);
2944	return (error);
2945}
2946
2947/*
2948 * Set the access and modification times of a file.
2949 */
2950#ifndef _SYS_SYSPROTO_H_
2951struct futimes_args {
2952	int	fd;
2953	struct	timeval *tptr;
2954};
2955#endif
2956int
2957futimes(td, uap)
2958	struct thread *td;
2959	register struct futimes_args /* {
2960		int  fd;
2961		struct timeval *tptr;
2962	} */ *uap;
2963{
2964
2965	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2966}
2967
2968int
2969kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2970    enum uio_seg tptrseg)
2971{
2972	struct timespec ts[2];
2973	struct file *fp;
2974	int vfslocked;
2975	int error;
2976
2977	AUDIT_ARG(fd, fd);
2978	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2979		return (error);
2980	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2981		return (error);
2982	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2983#ifdef AUDIT
2984	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2985	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2986	VOP_UNLOCK(fp->f_vnode, 0, td);
2987#endif
2988	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2989	VFS_UNLOCK_GIANT(vfslocked);
2990	fdrop(fp, td);
2991	return (error);
2992}
2993
2994/*
2995 * Truncate a file given its path name.
2996 */
2997#ifndef _SYS_SYSPROTO_H_
2998struct truncate_args {
2999	char	*path;
3000	int	pad;
3001	off_t	length;
3002};
3003#endif
3004int
3005truncate(td, uap)
3006	struct thread *td;
3007	register struct truncate_args /* {
3008		char *path;
3009		int pad;
3010		off_t length;
3011	} */ *uap;
3012{
3013
3014	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3015}
3016
3017int
3018kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3019{
3020	struct mount *mp;
3021	struct vnode *vp;
3022	struct vattr vattr;
3023	int error;
3024	struct nameidata nd;
3025	int vfslocked;
3026
3027	if (length < 0)
3028		return(EINVAL);
3029	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3030	if ((error = namei(&nd)) != 0)
3031		return (error);
3032	vfslocked = NDHASGIANT(&nd);
3033	vp = nd.ni_vp;
3034	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3035		vrele(vp);
3036		VFS_UNLOCK_GIANT(vfslocked);
3037		return (error);
3038	}
3039	NDFREE(&nd, NDF_ONLY_PNBUF);
3040	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3041	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3042	if (vp->v_type == VDIR)
3043		error = EISDIR;
3044#ifdef MAC
3045	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3046	}
3047#endif
3048	else if ((error = vn_writechk(vp)) == 0 &&
3049	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3050		VATTR_NULL(&vattr);
3051		vattr.va_size = length;
3052		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3053	}
3054	vput(vp);
3055	vn_finished_write(mp);
3056	VFS_UNLOCK_GIANT(vfslocked);
3057	return (error);
3058}
3059
3060/*
3061 * Truncate a file given a file descriptor.
3062 */
3063#ifndef _SYS_SYSPROTO_H_
3064struct ftruncate_args {
3065	int	fd;
3066	int	pad;
3067	off_t	length;
3068};
3069#endif
3070int
3071ftruncate(td, uap)
3072	struct thread *td;
3073	register struct ftruncate_args /* {
3074		int fd;
3075		int pad;
3076		off_t length;
3077	} */ *uap;
3078{
3079	struct mount *mp;
3080	struct vattr vattr;
3081	struct vnode *vp;
3082	struct file *fp;
3083	int vfslocked;
3084	int error;
3085
3086	AUDIT_ARG(fd, uap->fd);
3087	if (uap->length < 0)
3088		return(EINVAL);
3089	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3090		return (error);
3091	if ((fp->f_flag & FWRITE) == 0) {
3092		fdrop(fp, td);
3093		return (EINVAL);
3094	}
3095	vp = fp->f_vnode;
3096	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3097	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3098		goto drop;
3099	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3100	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3101	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3102	if (vp->v_type == VDIR)
3103		error = EISDIR;
3104#ifdef MAC
3105	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3106	    vp))) {
3107	}
3108#endif
3109	else if ((error = vn_writechk(vp)) == 0) {
3110		VATTR_NULL(&vattr);
3111		vattr.va_size = uap->length;
3112		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3113	}
3114	VOP_UNLOCK(vp, 0, td);
3115	vn_finished_write(mp);
3116drop:
3117	VFS_UNLOCK_GIANT(vfslocked);
3118	fdrop(fp, td);
3119	return (error);
3120}
3121
3122#if defined(COMPAT_43)
3123/*
3124 * Truncate a file given its path name.
3125 */
3126#ifndef _SYS_SYSPROTO_H_
3127struct otruncate_args {
3128	char	*path;
3129	long	length;
3130};
3131#endif
3132int
3133otruncate(td, uap)
3134	struct thread *td;
3135	register struct otruncate_args /* {
3136		char *path;
3137		long length;
3138	} */ *uap;
3139{
3140	struct truncate_args /* {
3141		char *path;
3142		int pad;
3143		off_t length;
3144	} */ nuap;
3145
3146	nuap.path = uap->path;
3147	nuap.length = uap->length;
3148	return (truncate(td, &nuap));
3149}
3150
3151/*
3152 * Truncate a file given a file descriptor.
3153 */
3154#ifndef _SYS_SYSPROTO_H_
3155struct oftruncate_args {
3156	int	fd;
3157	long	length;
3158};
3159#endif
3160int
3161oftruncate(td, uap)
3162	struct thread *td;
3163	register struct oftruncate_args /* {
3164		int fd;
3165		long length;
3166	} */ *uap;
3167{
3168	struct ftruncate_args /* {
3169		int fd;
3170		int pad;
3171		off_t length;
3172	} */ nuap;
3173
3174	nuap.fd = uap->fd;
3175	nuap.length = uap->length;
3176	return (ftruncate(td, &nuap));
3177}
3178#endif /* COMPAT_43 */
3179
3180/*
3181 * Sync an open file.
3182 */
3183#ifndef _SYS_SYSPROTO_H_
3184struct fsync_args {
3185	int	fd;
3186};
3187#endif
3188int
3189fsync(td, uap)
3190	struct thread *td;
3191	struct fsync_args /* {
3192		int fd;
3193	} */ *uap;
3194{
3195	struct vnode *vp;
3196	struct mount *mp;
3197	struct file *fp;
3198	int vfslocked;
3199	int error;
3200
3201	AUDIT_ARG(fd, uap->fd);
3202	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3203		return (error);
3204	vp = fp->f_vnode;
3205	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3206	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3207		goto drop;
3208	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3209	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3210	if (vp->v_object != NULL) {
3211		VM_OBJECT_LOCK(vp->v_object);
3212		vm_object_page_clean(vp->v_object, 0, 0, 0);
3213		VM_OBJECT_UNLOCK(vp->v_object);
3214	}
3215	error = VOP_FSYNC(vp, MNT_WAIT, td);
3216
3217	VOP_UNLOCK(vp, 0, td);
3218	vn_finished_write(mp);
3219drop:
3220	VFS_UNLOCK_GIANT(vfslocked);
3221	fdrop(fp, td);
3222	return (error);
3223}
3224
3225/*
3226 * Rename files.  Source and destination must either both be directories,
3227 * or both not be directories.  If target is a directory, it must be empty.
3228 */
3229#ifndef _SYS_SYSPROTO_H_
3230struct rename_args {
3231	char	*from;
3232	char	*to;
3233};
3234#endif
3235int
3236rename(td, uap)
3237	struct thread *td;
3238	register struct rename_args /* {
3239		char *from;
3240		char *to;
3241	} */ *uap;
3242{
3243
3244	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3245}
3246
3247int
3248kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3249{
3250	struct mount *mp = NULL;
3251	struct vnode *tvp, *fvp, *tdvp;
3252	struct nameidata fromnd, tond;
3253	int tvfslocked;
3254	int fvfslocked;
3255	int error;
3256
3257	bwillwrite();
3258#ifdef MAC
3259	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3260	    AUDITVNODE1, pathseg, from, td);
3261#else
3262	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3263	    AUDITVNODE1, pathseg, from, td);
3264#endif
3265	if ((error = namei(&fromnd)) != 0)
3266		return (error);
3267	fvfslocked = NDHASGIANT(&fromnd);
3268	tvfslocked = 0;
3269#ifdef MAC
3270	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3271	    fromnd.ni_vp, &fromnd.ni_cnd);
3272	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3273	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3274#endif
3275	fvp = fromnd.ni_vp;
3276	if (error == 0)
3277		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3278	if (error != 0) {
3279		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3280		vrele(fromnd.ni_dvp);
3281		vrele(fvp);
3282		goto out1;
3283	}
3284	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3285	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3286	if (fromnd.ni_vp->v_type == VDIR)
3287		tond.ni_cnd.cn_flags |= WILLBEDIR;
3288	if ((error = namei(&tond)) != 0) {
3289		/* Translate error code for rename("dir1", "dir2/."). */
3290		if (error == EISDIR && fvp->v_type == VDIR)
3291			error = EINVAL;
3292		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3293		vrele(fromnd.ni_dvp);
3294		vrele(fvp);
3295		vn_finished_write(mp);
3296		goto out1;
3297	}
3298	tvfslocked = NDHASGIANT(&tond);
3299	tdvp = tond.ni_dvp;
3300	tvp = tond.ni_vp;
3301	if (tvp != NULL) {
3302		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3303			error = ENOTDIR;
3304			goto out;
3305		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3306			error = EISDIR;
3307			goto out;
3308		}
3309	}
3310	if (fvp == tdvp)
3311		error = EINVAL;
3312	/*
3313	 * If the source is the same as the destination (that is, if they
3314	 * are links to the same vnode), then there is nothing to do.
3315	 */
3316	if (fvp == tvp)
3317		error = -1;
3318#ifdef MAC
3319	else
3320		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3321		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3322#endif
3323out:
3324	if (!error) {
3325		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3326		if (fromnd.ni_dvp != tdvp) {
3327			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3328		}
3329		if (tvp) {
3330			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3331		}
3332		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3333				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3334		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3335		NDFREE(&tond, NDF_ONLY_PNBUF);
3336	} else {
3337		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3338		NDFREE(&tond, NDF_ONLY_PNBUF);
3339		if (tvp)
3340			vput(tvp);
3341		if (tdvp == tvp)
3342			vrele(tdvp);
3343		else
3344			vput(tdvp);
3345		vrele(fromnd.ni_dvp);
3346		vrele(fvp);
3347	}
3348	vrele(tond.ni_startdir);
3349	vn_finished_write(mp);
3350out1:
3351	if (fromnd.ni_startdir)
3352		vrele(fromnd.ni_startdir);
3353	VFS_UNLOCK_GIANT(fvfslocked);
3354	VFS_UNLOCK_GIANT(tvfslocked);
3355	if (error == -1)
3356		return (0);
3357	return (error);
3358}
3359
3360/*
3361 * Make a directory file.
3362 */
3363#ifndef _SYS_SYSPROTO_H_
3364struct mkdir_args {
3365	char	*path;
3366	int	mode;
3367};
3368#endif
3369int
3370mkdir(td, uap)
3371	struct thread *td;
3372	register struct mkdir_args /* {
3373		char *path;
3374		int mode;
3375	} */ *uap;
3376{
3377
3378	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3379}
3380
3381int
3382kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3383{
3384	struct mount *mp;
3385	struct vnode *vp;
3386	struct vattr vattr;
3387	int error;
3388	struct nameidata nd;
3389	int vfslocked;
3390
3391	AUDIT_ARG(mode, mode);
3392restart:
3393	bwillwrite();
3394	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3395	    segflg, path, td);
3396	nd.ni_cnd.cn_flags |= WILLBEDIR;
3397	if ((error = namei(&nd)) != 0)
3398		return (error);
3399	vfslocked = NDHASGIANT(&nd);
3400	vp = nd.ni_vp;
3401	if (vp != NULL) {
3402		NDFREE(&nd, NDF_ONLY_PNBUF);
3403		/*
3404		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3405		 * the strange behaviour of leaving the vnode unlocked
3406		 * if the target is the same vnode as the parent.
3407		 */
3408		if (vp == nd.ni_dvp)
3409			vrele(nd.ni_dvp);
3410		else
3411			vput(nd.ni_dvp);
3412		vrele(vp);
3413		VFS_UNLOCK_GIANT(vfslocked);
3414		return (EEXIST);
3415	}
3416	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3417		NDFREE(&nd, NDF_ONLY_PNBUF);
3418		vput(nd.ni_dvp);
3419		VFS_UNLOCK_GIANT(vfslocked);
3420		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3421			return (error);
3422		goto restart;
3423	}
3424	VATTR_NULL(&vattr);
3425	vattr.va_type = VDIR;
3426	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3427	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3428	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3429#ifdef MAC
3430	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3431	    &vattr);
3432	if (error)
3433		goto out;
3434#endif
3435	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3436	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3437#ifdef MAC
3438out:
3439#endif
3440	NDFREE(&nd, NDF_ONLY_PNBUF);
3441	vput(nd.ni_dvp);
3442	if (!error)
3443		vput(nd.ni_vp);
3444	vn_finished_write(mp);
3445	VFS_UNLOCK_GIANT(vfslocked);
3446	return (error);
3447}
3448
3449/*
3450 * Remove a directory file.
3451 */
3452#ifndef _SYS_SYSPROTO_H_
3453struct rmdir_args {
3454	char	*path;
3455};
3456#endif
3457int
3458rmdir(td, uap)
3459	struct thread *td;
3460	struct rmdir_args /* {
3461		char *path;
3462	} */ *uap;
3463{
3464
3465	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3466}
3467
3468int
3469kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3470{
3471	struct mount *mp;
3472	struct vnode *vp;
3473	int error;
3474	struct nameidata nd;
3475	int vfslocked;
3476
3477restart:
3478	bwillwrite();
3479	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3480	    pathseg, path, td);
3481	if ((error = namei(&nd)) != 0)
3482		return (error);
3483	vfslocked = NDHASGIANT(&nd);
3484	vp = nd.ni_vp;
3485	if (vp->v_type != VDIR) {
3486		error = ENOTDIR;
3487		goto out;
3488	}
3489	/*
3490	 * No rmdir "." please.
3491	 */
3492	if (nd.ni_dvp == vp) {
3493		error = EINVAL;
3494		goto out;
3495	}
3496	/*
3497	 * The root of a mounted filesystem cannot be deleted.
3498	 */
3499	if (vp->v_vflag & VV_ROOT) {
3500		error = EBUSY;
3501		goto out;
3502	}
3503#ifdef MAC
3504	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3505	    &nd.ni_cnd);
3506	if (error)
3507		goto out;
3508#endif
3509	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3510		NDFREE(&nd, NDF_ONLY_PNBUF);
3511		vput(vp);
3512		if (nd.ni_dvp == vp)
3513			vrele(nd.ni_dvp);
3514		else
3515			vput(nd.ni_dvp);
3516		VFS_UNLOCK_GIANT(vfslocked);
3517		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3518			return (error);
3519		goto restart;
3520	}
3521	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3522	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3523	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3524	vn_finished_write(mp);
3525out:
3526	NDFREE(&nd, NDF_ONLY_PNBUF);
3527	vput(vp);
3528	if (nd.ni_dvp == vp)
3529		vrele(nd.ni_dvp);
3530	else
3531		vput(nd.ni_dvp);
3532	VFS_UNLOCK_GIANT(vfslocked);
3533	return (error);
3534}
3535
3536#ifdef COMPAT_43
3537/*
3538 * Read a block of directory entries in a filesystem independent format.
3539 */
3540#ifndef _SYS_SYSPROTO_H_
3541struct ogetdirentries_args {
3542	int	fd;
3543	char	*buf;
3544	u_int	count;
3545	long	*basep;
3546};
3547#endif
3548int
3549ogetdirentries(td, uap)
3550	struct thread *td;
3551	register struct ogetdirentries_args /* {
3552		int fd;
3553		char *buf;
3554		u_int count;
3555		long *basep;
3556	} */ *uap;
3557{
3558	struct vnode *vp;
3559	struct file *fp;
3560	struct uio auio, kuio;
3561	struct iovec aiov, kiov;
3562	struct dirent *dp, *edp;
3563	caddr_t dirbuf;
3564	int error, eofflag, readcnt, vfslocked;
3565	long loff;
3566
3567	/* XXX arbitrary sanity limit on `count'. */
3568	if (uap->count > 64 * 1024)
3569		return (EINVAL);
3570	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3571		return (error);
3572	if ((fp->f_flag & FREAD) == 0) {
3573		fdrop(fp, td);
3574		return (EBADF);
3575	}
3576	vp = fp->f_vnode;
3577unionread:
3578	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3579	if (vp->v_type != VDIR) {
3580		VFS_UNLOCK_GIANT(vfslocked);
3581		fdrop(fp, td);
3582		return (EINVAL);
3583	}
3584	aiov.iov_base = uap->buf;
3585	aiov.iov_len = uap->count;
3586	auio.uio_iov = &aiov;
3587	auio.uio_iovcnt = 1;
3588	auio.uio_rw = UIO_READ;
3589	auio.uio_segflg = UIO_USERSPACE;
3590	auio.uio_td = td;
3591	auio.uio_resid = uap->count;
3592	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3593	loff = auio.uio_offset = fp->f_offset;
3594#ifdef MAC
3595	error = mac_check_vnode_readdir(td->td_ucred, vp);
3596	if (error) {
3597		VOP_UNLOCK(vp, 0, td);
3598		VFS_UNLOCK_GIANT(vfslocked);
3599		fdrop(fp, td);
3600		return (error);
3601	}
3602#endif
3603#	if (BYTE_ORDER != LITTLE_ENDIAN)
3604		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3605			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3606			    NULL, NULL);
3607			fp->f_offset = auio.uio_offset;
3608		} else
3609#	endif
3610	{
3611		kuio = auio;
3612		kuio.uio_iov = &kiov;
3613		kuio.uio_segflg = UIO_SYSSPACE;
3614		kiov.iov_len = uap->count;
3615		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3616		kiov.iov_base = dirbuf;
3617		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3618			    NULL, NULL);
3619		fp->f_offset = kuio.uio_offset;
3620		if (error == 0) {
3621			readcnt = uap->count - kuio.uio_resid;
3622			edp = (struct dirent *)&dirbuf[readcnt];
3623			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3624#				if (BYTE_ORDER == LITTLE_ENDIAN)
3625					/*
3626					 * The expected low byte of
3627					 * dp->d_namlen is our dp->d_type.
3628					 * The high MBZ byte of dp->d_namlen
3629					 * is our dp->d_namlen.
3630					 */
3631					dp->d_type = dp->d_namlen;
3632					dp->d_namlen = 0;
3633#				else
3634					/*
3635					 * The dp->d_type is the high byte
3636					 * of the expected dp->d_namlen,
3637					 * so must be zero'ed.
3638					 */
3639					dp->d_type = 0;
3640#				endif
3641				if (dp->d_reclen > 0) {
3642					dp = (struct dirent *)
3643					    ((char *)dp + dp->d_reclen);
3644				} else {
3645					error = EIO;
3646					break;
3647				}
3648			}
3649			if (dp >= edp)
3650				error = uiomove(dirbuf, readcnt, &auio);
3651		}
3652		FREE(dirbuf, M_TEMP);
3653	}
3654	VOP_UNLOCK(vp, 0, td);
3655	if (error) {
3656		VFS_UNLOCK_GIANT(vfslocked);
3657		fdrop(fp, td);
3658		return (error);
3659	}
3660	if (uap->count == auio.uio_resid) {
3661		if (union_dircheckp) {
3662			error = union_dircheckp(td, &vp, fp);
3663			if (error == -1) {
3664				VFS_UNLOCK_GIANT(vfslocked);
3665				goto unionread;
3666			}
3667			if (error) {
3668				VFS_UNLOCK_GIANT(vfslocked);
3669				fdrop(fp, td);
3670				return (error);
3671			}
3672		}
3673		/*
3674		 * XXX We could delay dropping the lock above but
3675		 * union_dircheckp complicates things.
3676		 */
3677		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3678		if ((vp->v_vflag & VV_ROOT) &&
3679		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3680			struct vnode *tvp = vp;
3681			vp = vp->v_mount->mnt_vnodecovered;
3682			VREF(vp);
3683			fp->f_vnode = vp;
3684			fp->f_data = vp;
3685			fp->f_offset = 0;
3686			vput(tvp);
3687			VFS_UNLOCK_GIANT(vfslocked);
3688			goto unionread;
3689		}
3690		VOP_UNLOCK(vp, 0, td);
3691	}
3692	VFS_UNLOCK_GIANT(vfslocked);
3693	error = copyout(&loff, uap->basep, sizeof(long));
3694	fdrop(fp, td);
3695	td->td_retval[0] = uap->count - auio.uio_resid;
3696	return (error);
3697}
3698#endif /* COMPAT_43 */
3699
3700/*
3701 * Read a block of directory entries in a filesystem independent format.
3702 */
3703#ifndef _SYS_SYSPROTO_H_
3704struct getdirentries_args {
3705	int	fd;
3706	char	*buf;
3707	u_int	count;
3708	long	*basep;
3709};
3710#endif
3711int
3712getdirentries(td, uap)
3713	struct thread *td;
3714	register struct getdirentries_args /* {
3715		int fd;
3716		char *buf;
3717		u_int count;
3718		long *basep;
3719	} */ *uap;
3720{
3721	struct vnode *vp;
3722	struct file *fp;
3723	struct uio auio;
3724	struct iovec aiov;
3725	int vfslocked;
3726	long loff;
3727	int error, eofflag;
3728
3729	AUDIT_ARG(fd, uap->fd);
3730	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3731		return (error);
3732	if ((fp->f_flag & FREAD) == 0) {
3733		fdrop(fp, td);
3734		return (EBADF);
3735	}
3736	vp = fp->f_vnode;
3737unionread:
3738	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3739	if (vp->v_type != VDIR) {
3740		error = EINVAL;
3741		goto fail;
3742	}
3743	aiov.iov_base = uap->buf;
3744	aiov.iov_len = uap->count;
3745	auio.uio_iov = &aiov;
3746	auio.uio_iovcnt = 1;
3747	auio.uio_rw = UIO_READ;
3748	auio.uio_segflg = UIO_USERSPACE;
3749	auio.uio_td = td;
3750	auio.uio_resid = uap->count;
3751	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3752	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3753	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3754	loff = auio.uio_offset = fp->f_offset;
3755#ifdef MAC
3756	error = mac_check_vnode_readdir(td->td_ucred, vp);
3757	if (error == 0)
3758#endif
3759		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3760		    NULL);
3761	fp->f_offset = auio.uio_offset;
3762	VOP_UNLOCK(vp, 0, td);
3763	if (error)
3764		goto fail;
3765	if (uap->count == auio.uio_resid) {
3766		if (union_dircheckp) {
3767			error = union_dircheckp(td, &vp, fp);
3768			if (error == -1) {
3769				VFS_UNLOCK_GIANT(vfslocked);
3770				goto unionread;
3771			}
3772			if (error)
3773				goto fail;
3774		}
3775		/*
3776		 * XXX We could delay dropping the lock above but
3777		 * union_dircheckp complicates things.
3778		 */
3779		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3780		if ((vp->v_vflag & VV_ROOT) &&
3781		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3782			struct vnode *tvp = vp;
3783			vp = vp->v_mount->mnt_vnodecovered;
3784			VREF(vp);
3785			fp->f_vnode = vp;
3786			fp->f_data = vp;
3787			fp->f_offset = 0;
3788			vput(tvp);
3789			VFS_UNLOCK_GIANT(vfslocked);
3790			goto unionread;
3791		}
3792		VOP_UNLOCK(vp, 0, td);
3793	}
3794	if (uap->basep != NULL) {
3795		error = copyout(&loff, uap->basep, sizeof(long));
3796	}
3797	td->td_retval[0] = uap->count - auio.uio_resid;
3798fail:
3799	VFS_UNLOCK_GIANT(vfslocked);
3800	fdrop(fp, td);
3801	return (error);
3802}
3803#ifndef _SYS_SYSPROTO_H_
3804struct getdents_args {
3805	int fd;
3806	char *buf;
3807	size_t count;
3808};
3809#endif
3810int
3811getdents(td, uap)
3812	struct thread *td;
3813	register struct getdents_args /* {
3814		int fd;
3815		char *buf;
3816		u_int count;
3817	} */ *uap;
3818{
3819	struct getdirentries_args ap;
3820	ap.fd = uap->fd;
3821	ap.buf = uap->buf;
3822	ap.count = uap->count;
3823	ap.basep = NULL;
3824	return (getdirentries(td, &ap));
3825}
3826
3827/*
3828 * Set the mode mask for creation of filesystem nodes.
3829 *
3830 * MP SAFE
3831 */
3832#ifndef _SYS_SYSPROTO_H_
3833struct umask_args {
3834	int	newmask;
3835};
3836#endif
3837int
3838umask(td, uap)
3839	struct thread *td;
3840	struct umask_args /* {
3841		int newmask;
3842	} */ *uap;
3843{
3844	register struct filedesc *fdp;
3845
3846	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3847	fdp = td->td_proc->p_fd;
3848	td->td_retval[0] = fdp->fd_cmask;
3849	fdp->fd_cmask = uap->newmask & ALLPERMS;
3850	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3851	return (0);
3852}
3853
3854/*
3855 * Void all references to file by ripping underlying filesystem
3856 * away from vnode.
3857 */
3858#ifndef _SYS_SYSPROTO_H_
3859struct revoke_args {
3860	char	*path;
3861};
3862#endif
3863int
3864revoke(td, uap)
3865	struct thread *td;
3866	register struct revoke_args /* {
3867		char *path;
3868	} */ *uap;
3869{
3870	struct vnode *vp;
3871	struct vattr vattr;
3872	int error;
3873	struct nameidata nd;
3874	int vfslocked;
3875
3876	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3877	    UIO_USERSPACE, uap->path, td);
3878	if ((error = namei(&nd)) != 0)
3879		return (error);
3880	vfslocked = NDHASGIANT(&nd);
3881	vp = nd.ni_vp;
3882	NDFREE(&nd, NDF_ONLY_PNBUF);
3883	if (vp->v_type != VCHR) {
3884		error = EINVAL;
3885		goto out;
3886	}
3887#ifdef MAC
3888	error = mac_check_vnode_revoke(td->td_ucred, vp);
3889	if (error)
3890		goto out;
3891#endif
3892	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3893	if (error)
3894		goto out;
3895	if (td->td_ucred->cr_uid != vattr.va_uid) {
3896		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3897		if (error)
3898			goto out;
3899	}
3900	if (vcount(vp) > 1)
3901		VOP_REVOKE(vp, REVOKEALL);
3902out:
3903	vput(vp);
3904	VFS_UNLOCK_GIANT(vfslocked);
3905	return (error);
3906}
3907
3908/*
3909 * Convert a user file descriptor to a kernel file entry.
3910 * A reference on the file entry is held upon returning.
3911 */
3912int
3913getvnode(fdp, fd, fpp)
3914	struct filedesc *fdp;
3915	int fd;
3916	struct file **fpp;
3917{
3918	int error;
3919	struct file *fp;
3920
3921	fp = NULL;
3922	if (fdp == NULL)
3923		error = EBADF;
3924	else {
3925		FILEDESC_LOCK(fdp);
3926		if ((u_int)fd >= fdp->fd_nfiles ||
3927		    (fp = fdp->fd_ofiles[fd]) == NULL)
3928			error = EBADF;
3929		else if (fp->f_vnode == NULL) {
3930			fp = NULL;
3931			error = EINVAL;
3932		} else {
3933			fhold(fp);
3934			error = 0;
3935		}
3936		FILEDESC_UNLOCK(fdp);
3937	}
3938	*fpp = fp;
3939	return (error);
3940}
3941
3942/*
3943 * Get (NFS) file handle
3944 */
3945#ifndef _SYS_SYSPROTO_H_
3946struct lgetfh_args {
3947	char	*fname;
3948	fhandle_t *fhp;
3949};
3950#endif
3951int
3952lgetfh(td, uap)
3953	struct thread *td;
3954	register struct lgetfh_args *uap;
3955{
3956	struct nameidata nd;
3957	fhandle_t fh;
3958	register struct vnode *vp;
3959	int vfslocked;
3960	int error;
3961
3962	error = suser(td);
3963	if (error)
3964		return (error);
3965	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3966	    UIO_USERSPACE, uap->fname, td);
3967	error = namei(&nd);
3968	if (error)
3969		return (error);
3970	vfslocked = NDHASGIANT(&nd);
3971	NDFREE(&nd, NDF_ONLY_PNBUF);
3972	vp = nd.ni_vp;
3973	bzero(&fh, sizeof(fh));
3974	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3975	error = VFS_VPTOFH(vp, &fh.fh_fid);
3976	vput(vp);
3977	VFS_UNLOCK_GIANT(vfslocked);
3978	if (error)
3979		return (error);
3980	error = copyout(&fh, uap->fhp, sizeof (fh));
3981	return (error);
3982}
3983
3984#ifndef _SYS_SYSPROTO_H_
3985struct getfh_args {
3986	char	*fname;
3987	fhandle_t *fhp;
3988};
3989#endif
3990int
3991getfh(td, uap)
3992	struct thread *td;
3993	register struct getfh_args *uap;
3994{
3995	struct nameidata nd;
3996	fhandle_t fh;
3997	register struct vnode *vp;
3998	int vfslocked;
3999	int error;
4000
4001	error = suser(td);
4002	if (error)
4003		return (error);
4004	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4005	    UIO_USERSPACE, uap->fname, td);
4006	error = namei(&nd);
4007	if (error)
4008		return (error);
4009	vfslocked = NDHASGIANT(&nd);
4010	NDFREE(&nd, NDF_ONLY_PNBUF);
4011	vp = nd.ni_vp;
4012	bzero(&fh, sizeof(fh));
4013	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4014	error = VFS_VPTOFH(vp, &fh.fh_fid);
4015	vput(vp);
4016	VFS_UNLOCK_GIANT(vfslocked);
4017	if (error)
4018		return (error);
4019	error = copyout(&fh, uap->fhp, sizeof (fh));
4020	return (error);
4021}
4022
4023/*
4024 * syscall for the rpc.lockd to use to translate a NFS file handle into
4025 * an open descriptor.
4026 *
4027 * warning: do not remove the suser() call or this becomes one giant
4028 * security hole.
4029 *
4030 * MP SAFE
4031 */
4032#ifndef _SYS_SYSPROTO_H_
4033struct fhopen_args {
4034	const struct fhandle *u_fhp;
4035	int flags;
4036};
4037#endif
4038int
4039fhopen(td, uap)
4040	struct thread *td;
4041	struct fhopen_args /* {
4042		const struct fhandle *u_fhp;
4043		int flags;
4044	} */ *uap;
4045{
4046	struct proc *p = td->td_proc;
4047	struct mount *mp;
4048	struct vnode *vp;
4049	struct fhandle fhp;
4050	struct vattr vat;
4051	struct vattr *vap = &vat;
4052	struct flock lf;
4053	struct file *fp;
4054	register struct filedesc *fdp = p->p_fd;
4055	int fmode, mode, error, type;
4056	struct file *nfp;
4057	int vfslocked;
4058	int indx;
4059
4060	error = suser(td);
4061	if (error)
4062		return (error);
4063	fmode = FFLAGS(uap->flags);
4064	/* why not allow a non-read/write open for our lockd? */
4065	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4066		return (EINVAL);
4067	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4068	if (error)
4069		return(error);
4070	/* find the mount point */
4071	mp = vfs_getvfs(&fhp.fh_fsid);
4072	if (mp == NULL)
4073		return (ESTALE);
4074	vfslocked = VFS_LOCK_GIANT(mp);
4075	/* now give me my vnode, it gets returned to me locked */
4076	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4077	if (error)
4078		goto out;
4079	/*
4080	 * from now on we have to make sure not
4081	 * to forget about the vnode
4082	 * any error that causes an abort must vput(vp)
4083	 * just set error = err and 'goto bad;'.
4084	 */
4085
4086	/*
4087	 * from vn_open
4088	 */
4089	if (vp->v_type == VLNK) {
4090		error = EMLINK;
4091		goto bad;
4092	}
4093	if (vp->v_type == VSOCK) {
4094		error = EOPNOTSUPP;
4095		goto bad;
4096	}
4097	mode = 0;
4098	if (fmode & (FWRITE | O_TRUNC)) {
4099		if (vp->v_type == VDIR) {
4100			error = EISDIR;
4101			goto bad;
4102		}
4103		error = vn_writechk(vp);
4104		if (error)
4105			goto bad;
4106		mode |= VWRITE;
4107	}
4108	if (fmode & FREAD)
4109		mode |= VREAD;
4110	if (fmode & O_APPEND)
4111		mode |= VAPPEND;
4112#ifdef MAC
4113	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4114	if (error)
4115		goto bad;
4116#endif
4117	if (mode) {
4118		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4119		if (error)
4120			goto bad;
4121	}
4122	if (fmode & O_TRUNC) {
4123		VOP_UNLOCK(vp, 0, td);				/* XXX */
4124		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4125			vrele(vp);
4126			goto out;
4127		}
4128		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4129		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4130#ifdef MAC
4131		/*
4132		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4133		 * should be right.
4134		 */
4135		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4136		if (error == 0) {
4137#endif
4138			VATTR_NULL(vap);
4139			vap->va_size = 0;
4140			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4141#ifdef MAC
4142		}
4143#endif
4144		vn_finished_write(mp);
4145		if (error)
4146			goto bad;
4147	}
4148	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4149	if (error)
4150		goto bad;
4151
4152	if (fmode & FWRITE)
4153		vp->v_writecount++;
4154
4155	/*
4156	 * end of vn_open code
4157	 */
4158
4159	if ((error = falloc(td, &nfp, &indx)) != 0) {
4160		if (fmode & FWRITE)
4161			vp->v_writecount--;
4162		goto bad;
4163	}
4164	/* An extra reference on `nfp' has been held for us by falloc(). */
4165	fp = nfp;
4166
4167	nfp->f_vnode = vp;
4168	nfp->f_data = vp;
4169	nfp->f_flag = fmode & FMASK;
4170	nfp->f_ops = &vnops;
4171	nfp->f_type = DTYPE_VNODE;
4172	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4173		lf.l_whence = SEEK_SET;
4174		lf.l_start = 0;
4175		lf.l_len = 0;
4176		if (fmode & O_EXLOCK)
4177			lf.l_type = F_WRLCK;
4178		else
4179			lf.l_type = F_RDLCK;
4180		type = F_FLOCK;
4181		if ((fmode & FNONBLOCK) == 0)
4182			type |= F_WAIT;
4183		VOP_UNLOCK(vp, 0, td);
4184		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4185			    type)) != 0) {
4186			/*
4187			 * The lock request failed.  Normally close the
4188			 * descriptor but handle the case where someone might
4189			 * have dup()d or close()d it when we weren't looking.
4190			 */
4191			fdclose(fdp, fp, indx, td);
4192
4193			/*
4194			 * release our private reference
4195			 */
4196			fdrop(fp, td);
4197			goto out;
4198		}
4199		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4200		fp->f_flag |= FHASLOCK;
4201	}
4202
4203	VOP_UNLOCK(vp, 0, td);
4204	fdrop(fp, td);
4205	vfs_rel(mp);
4206	VFS_UNLOCK_GIANT(vfslocked);
4207	td->td_retval[0] = indx;
4208	return (0);
4209
4210bad:
4211	vput(vp);
4212out:
4213	vfs_rel(mp);
4214	VFS_UNLOCK_GIANT(vfslocked);
4215	return (error);
4216}
4217
4218/*
4219 * Stat an (NFS) file handle.
4220 *
4221 * MP SAFE
4222 */
4223#ifndef _SYS_SYSPROTO_H_
4224struct fhstat_args {
4225	struct fhandle *u_fhp;
4226	struct stat *sb;
4227};
4228#endif
4229int
4230fhstat(td, uap)
4231	struct thread *td;
4232	register struct fhstat_args /* {
4233		struct fhandle *u_fhp;
4234		struct stat *sb;
4235	} */ *uap;
4236{
4237	struct stat sb;
4238	fhandle_t fh;
4239	struct mount *mp;
4240	struct vnode *vp;
4241	int vfslocked;
4242	int error;
4243
4244	error = suser(td);
4245	if (error)
4246		return (error);
4247	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4248	if (error)
4249		return (error);
4250	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4251		return (ESTALE);
4252	vfslocked = VFS_LOCK_GIANT(mp);
4253	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4254		vfs_rel(mp);
4255		VFS_UNLOCK_GIANT(vfslocked);
4256		return (error);
4257	}
4258	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4259	vput(vp);
4260	vfs_rel(mp);
4261	VFS_UNLOCK_GIANT(vfslocked);
4262	if (error)
4263		return (error);
4264	error = copyout(&sb, uap->sb, sizeof(sb));
4265	return (error);
4266}
4267
4268/*
4269 * Implement fstatfs() for (NFS) file handles.
4270 *
4271 * MP SAFE
4272 */
4273#ifndef _SYS_SYSPROTO_H_
4274struct fhstatfs_args {
4275	struct fhandle *u_fhp;
4276	struct statfs *buf;
4277};
4278#endif
4279int
4280fhstatfs(td, uap)
4281	struct thread *td;
4282	struct fhstatfs_args /* {
4283		struct fhandle *u_fhp;
4284		struct statfs *buf;
4285	} */ *uap;
4286{
4287	struct statfs sf;
4288	fhandle_t fh;
4289	int error;
4290
4291	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4292	if (error)
4293		return (error);
4294	error = kern_fhstatfs(td, fh, &sf);
4295	if (error)
4296		return (error);
4297	return (copyout(&sf, uap->buf, sizeof(sf)));
4298}
4299
4300int
4301kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4302{
4303	struct statfs *sp;
4304	struct mount *mp;
4305	struct vnode *vp;
4306	int vfslocked;
4307	int error;
4308
4309	error = suser(td);
4310	if (error)
4311		return (error);
4312	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4313		return (ESTALE);
4314	vfslocked = VFS_LOCK_GIANT(mp);
4315	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4316	if (error) {
4317		VFS_UNLOCK_GIANT(vfslocked);
4318		vfs_rel(mp);
4319		return (error);
4320	}
4321	vput(vp);
4322	error = prison_canseemount(td->td_ucred, mp);
4323	if (error)
4324		goto out;
4325#ifdef MAC
4326	error = mac_check_mount_stat(td->td_ucred, mp);
4327	if (error)
4328		goto out;
4329#endif
4330	/*
4331	 * Set these in case the underlying filesystem fails to do so.
4332	 */
4333	sp = &mp->mnt_stat;
4334	sp->f_version = STATFS_VERSION;
4335	sp->f_namemax = NAME_MAX;
4336	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4337	error = VFS_STATFS(mp, sp, td);
4338	if (error == 0)
4339		*buf = *sp;
4340out:
4341	vfs_rel(mp);
4342	VFS_UNLOCK_GIANT(vfslocked);
4343	return (error);
4344}
4345
4346/*
4347 * Syscall to push extended attribute configuration information into the
4348 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4349 * a command (int cmd), and attribute name and misc data.  For now, the
4350 * attribute name is left in userspace for consumption by the VFS_op.
4351 * It will probably be changed to be copied into sysspace by the
4352 * syscall in the future, once issues with various consumers of the
4353 * attribute code have raised their hands.
4354 *
4355 * Currently this is used only by UFS Extended Attributes.
4356 */
4357int
4358extattrctl(td, uap)
4359	struct thread *td;
4360	struct extattrctl_args /* {
4361		const char *path;
4362		int cmd;
4363		const char *filename;
4364		int attrnamespace;
4365		const char *attrname;
4366	} */ *uap;
4367{
4368	struct vnode *filename_vp;
4369	struct nameidata nd;
4370	struct mount *mp, *mp_writable;
4371	char attrname[EXTATTR_MAXNAMELEN];
4372	int vfslocked, fnvfslocked, error;
4373
4374	AUDIT_ARG(cmd, uap->cmd);
4375	AUDIT_ARG(value, uap->attrnamespace);
4376	/*
4377	 * uap->attrname is not always defined.  We check again later when we
4378	 * invoke the VFS call so as to pass in NULL there if needed.
4379	 */
4380	if (uap->attrname != NULL) {
4381		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4382		    NULL);
4383		if (error)
4384			return (error);
4385	}
4386	AUDIT_ARG(text, attrname);
4387
4388	vfslocked = fnvfslocked = 0;
4389	/*
4390	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4391	 * which VFS_EXTATTRCTL() will later release.
4392	 */
4393	filename_vp = NULL;
4394	if (uap->filename != NULL) {
4395		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4396		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4397		error = namei(&nd);
4398		if (error)
4399			return (error);
4400		fnvfslocked = NDHASGIANT(&nd);
4401		filename_vp = nd.ni_vp;
4402		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4403	}
4404
4405	/* uap->path is always defined. */
4406	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4407	    uap->path, td);
4408	error = namei(&nd);
4409	if (error) {
4410		if (filename_vp != NULL)
4411			vput(filename_vp);
4412		goto out;
4413	}
4414	vfslocked = NDHASGIANT(&nd);
4415	mp = nd.ni_vp->v_mount;
4416	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4417	NDFREE(&nd, 0);
4418	if (error) {
4419		if (filename_vp != NULL)
4420			vput(filename_vp);
4421		goto out;
4422	}
4423
4424	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4425	    uap->attrname != NULL ? attrname : NULL, td);
4426
4427	vn_finished_write(mp_writable);
4428	/*
4429	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4430	 * filename_vp, so vrele it if it is defined.
4431	 */
4432	if (filename_vp != NULL)
4433		vrele(filename_vp);
4434out:
4435	VFS_UNLOCK_GIANT(fnvfslocked);
4436	VFS_UNLOCK_GIANT(vfslocked);
4437	return (error);
4438}
4439
4440/*-
4441 * Set a named extended attribute on a file or directory
4442 *
4443 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4444 *            kernelspace string pointer "attrname", userspace buffer
4445 *            pointer "data", buffer length "nbytes", thread "td".
4446 * Returns: 0 on success, an error number otherwise
4447 * Locks: none
4448 * References: vp must be a valid reference for the duration of the call
4449 */
4450static int
4451extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4452    void *data, size_t nbytes, struct thread *td)
4453{
4454	struct mount *mp;
4455	struct uio auio;
4456	struct iovec aiov;
4457	ssize_t cnt;
4458	int error;
4459
4460	VFS_ASSERT_GIANT(vp->v_mount);
4461	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4462	if (error)
4463		return (error);
4464	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4465	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4466
4467	aiov.iov_base = data;
4468	aiov.iov_len = nbytes;
4469	auio.uio_iov = &aiov;
4470	auio.uio_iovcnt = 1;
4471	auio.uio_offset = 0;
4472	if (nbytes > INT_MAX) {
4473		error = EINVAL;
4474		goto done;
4475	}
4476	auio.uio_resid = nbytes;
4477	auio.uio_rw = UIO_WRITE;
4478	auio.uio_segflg = UIO_USERSPACE;
4479	auio.uio_td = td;
4480	cnt = nbytes;
4481
4482#ifdef MAC
4483	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4484	    attrname, &auio);
4485	if (error)
4486		goto done;
4487#endif
4488
4489	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4490	    td->td_ucred, td);
4491	cnt -= auio.uio_resid;
4492	td->td_retval[0] = cnt;
4493
4494done:
4495	VOP_UNLOCK(vp, 0, td);
4496	vn_finished_write(mp);
4497	return (error);
4498}
4499
4500int
4501extattr_set_fd(td, uap)
4502	struct thread *td;
4503	struct extattr_set_fd_args /* {
4504		int fd;
4505		int attrnamespace;
4506		const char *attrname;
4507		void *data;
4508		size_t nbytes;
4509	} */ *uap;
4510{
4511	struct file *fp;
4512	char attrname[EXTATTR_MAXNAMELEN];
4513	int vfslocked, error;
4514
4515	AUDIT_ARG(fd, uap->fd);
4516	AUDIT_ARG(value, uap->attrnamespace);
4517	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4518	if (error)
4519		return (error);
4520	AUDIT_ARG(text, attrname);
4521
4522	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4523	if (error)
4524		return (error);
4525
4526	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4527	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4528	    attrname, uap->data, uap->nbytes, td);
4529	fdrop(fp, td);
4530	VFS_UNLOCK_GIANT(vfslocked);
4531
4532	return (error);
4533}
4534
4535int
4536extattr_set_file(td, uap)
4537	struct thread *td;
4538	struct extattr_set_file_args /* {
4539		const char *path;
4540		int attrnamespace;
4541		const char *attrname;
4542		void *data;
4543		size_t nbytes;
4544	} */ *uap;
4545{
4546	struct nameidata nd;
4547	char attrname[EXTATTR_MAXNAMELEN];
4548	int vfslocked, error;
4549
4550	AUDIT_ARG(value, uap->attrnamespace);
4551	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4552	if (error)
4553		return (error);
4554	AUDIT_ARG(text, attrname);
4555
4556	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4557	    uap->path, td);
4558	error = namei(&nd);
4559	if (error)
4560		return (error);
4561	NDFREE(&nd, NDF_ONLY_PNBUF);
4562
4563	vfslocked = NDHASGIANT(&nd);
4564	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4565	    uap->data, uap->nbytes, td);
4566
4567	vrele(nd.ni_vp);
4568	VFS_UNLOCK_GIANT(vfslocked);
4569	return (error);
4570}
4571
4572int
4573extattr_set_link(td, uap)
4574	struct thread *td;
4575	struct extattr_set_link_args /* {
4576		const char *path;
4577		int attrnamespace;
4578		const char *attrname;
4579		void *data;
4580		size_t nbytes;
4581	} */ *uap;
4582{
4583	struct nameidata nd;
4584	char attrname[EXTATTR_MAXNAMELEN];
4585	int vfslocked, error;
4586
4587	AUDIT_ARG(value, uap->attrnamespace);
4588	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4589	if (error)
4590		return (error);
4591	AUDIT_ARG(text, attrname);
4592
4593	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4594	    uap->path, td);
4595	error = namei(&nd);
4596	if (error)
4597		return (error);
4598	NDFREE(&nd, NDF_ONLY_PNBUF);
4599
4600	vfslocked = NDHASGIANT(&nd);
4601	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4602	    uap->data, uap->nbytes, td);
4603
4604	vrele(nd.ni_vp);
4605	VFS_UNLOCK_GIANT(vfslocked);
4606	return (error);
4607}
4608
4609/*-
4610 * Get a named extended attribute on a file or directory
4611 *
4612 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4613 *            kernelspace string pointer "attrname", userspace buffer
4614 *            pointer "data", buffer length "nbytes", thread "td".
4615 * Returns: 0 on success, an error number otherwise
4616 * Locks: none
4617 * References: vp must be a valid reference for the duration of the call
4618 */
4619static int
4620extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4621    void *data, size_t nbytes, struct thread *td)
4622{
4623	struct uio auio, *auiop;
4624	struct iovec aiov;
4625	ssize_t cnt;
4626	size_t size, *sizep;
4627	int error;
4628
4629	VFS_ASSERT_GIANT(vp->v_mount);
4630	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4631	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4632
4633	/*
4634	 * Slightly unusual semantics: if the user provides a NULL data
4635	 * pointer, they don't want to receive the data, just the
4636	 * maximum read length.
4637	 */
4638	auiop = NULL;
4639	sizep = NULL;
4640	cnt = 0;
4641	if (data != NULL) {
4642		aiov.iov_base = data;
4643		aiov.iov_len = nbytes;
4644		auio.uio_iov = &aiov;
4645		auio.uio_iovcnt = 1;
4646		auio.uio_offset = 0;
4647		if (nbytes > INT_MAX) {
4648			error = EINVAL;
4649			goto done;
4650		}
4651		auio.uio_resid = nbytes;
4652		auio.uio_rw = UIO_READ;
4653		auio.uio_segflg = UIO_USERSPACE;
4654		auio.uio_td = td;
4655		auiop = &auio;
4656		cnt = nbytes;
4657	} else
4658		sizep = &size;
4659
4660#ifdef MAC
4661	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4662	    attrname, &auio);
4663	if (error)
4664		goto done;
4665#endif
4666
4667	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4668	    td->td_ucred, td);
4669
4670	if (auiop != NULL) {
4671		cnt -= auio.uio_resid;
4672		td->td_retval[0] = cnt;
4673	} else
4674		td->td_retval[0] = size;
4675
4676done:
4677	VOP_UNLOCK(vp, 0, td);
4678	return (error);
4679}
4680
4681int
4682extattr_get_fd(td, uap)
4683	struct thread *td;
4684	struct extattr_get_fd_args /* {
4685		int fd;
4686		int attrnamespace;
4687		const char *attrname;
4688		void *data;
4689		size_t nbytes;
4690	} */ *uap;
4691{
4692	struct file *fp;
4693	char attrname[EXTATTR_MAXNAMELEN];
4694	int vfslocked, error;
4695
4696	AUDIT_ARG(fd, uap->fd);
4697	AUDIT_ARG(value, uap->attrnamespace);
4698	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4699	if (error)
4700		return (error);
4701	AUDIT_ARG(text, attrname);
4702
4703	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4704	if (error)
4705		return (error);
4706
4707	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4708	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4709	    attrname, uap->data, uap->nbytes, td);
4710
4711	fdrop(fp, td);
4712	VFS_UNLOCK_GIANT(vfslocked);
4713	return (error);
4714}
4715
4716int
4717extattr_get_file(td, uap)
4718	struct thread *td;
4719	struct extattr_get_file_args /* {
4720		const char *path;
4721		int attrnamespace;
4722		const char *attrname;
4723		void *data;
4724		size_t nbytes;
4725	} */ *uap;
4726{
4727	struct nameidata nd;
4728	char attrname[EXTATTR_MAXNAMELEN];
4729	int vfslocked, error;
4730
4731	AUDIT_ARG(value, uap->attrnamespace);
4732	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4733	if (error)
4734		return (error);
4735	AUDIT_ARG(text, attrname);
4736
4737	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4738	    uap->path, td);
4739	error = namei(&nd);
4740	if (error)
4741		return (error);
4742	NDFREE(&nd, NDF_ONLY_PNBUF);
4743
4744	vfslocked = NDHASGIANT(&nd);
4745	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4746	    uap->data, uap->nbytes, td);
4747
4748	vrele(nd.ni_vp);
4749	VFS_UNLOCK_GIANT(vfslocked);
4750	return (error);
4751}
4752
4753int
4754extattr_get_link(td, uap)
4755	struct thread *td;
4756	struct extattr_get_link_args /* {
4757		const char *path;
4758		int attrnamespace;
4759		const char *attrname;
4760		void *data;
4761		size_t nbytes;
4762	} */ *uap;
4763{
4764	struct nameidata nd;
4765	char attrname[EXTATTR_MAXNAMELEN];
4766	int vfslocked, error;
4767
4768	AUDIT_ARG(value, uap->attrnamespace);
4769	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4770	if (error)
4771		return (error);
4772	AUDIT_ARG(text, attrname);
4773
4774	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4775	    uap->path, td);
4776	error = namei(&nd);
4777	if (error)
4778		return (error);
4779	NDFREE(&nd, NDF_ONLY_PNBUF);
4780
4781	vfslocked = NDHASGIANT(&nd);
4782	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4783	    uap->data, uap->nbytes, td);
4784
4785	vrele(nd.ni_vp);
4786	VFS_UNLOCK_GIANT(vfslocked);
4787	return (error);
4788}
4789
4790/*
4791 * extattr_delete_vp(): Delete a named extended attribute on a file or
4792 *                      directory
4793 *
4794 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4795 *            kernelspace string pointer "attrname", proc "p"
4796 * Returns: 0 on success, an error number otherwise
4797 * Locks: none
4798 * References: vp must be a valid reference for the duration of the call
4799 */
4800static int
4801extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4802    struct thread *td)
4803{
4804	struct mount *mp;
4805	int error;
4806
4807	VFS_ASSERT_GIANT(vp->v_mount);
4808	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4809	if (error)
4810		return (error);
4811	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4812	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4813
4814#ifdef MAC
4815	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4816	    attrname);
4817	if (error)
4818		goto done;
4819#endif
4820
4821	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4822	    td);
4823	if (error == EOPNOTSUPP)
4824		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4825		    td->td_ucred, td);
4826#ifdef MAC
4827done:
4828#endif
4829	VOP_UNLOCK(vp, 0, td);
4830	vn_finished_write(mp);
4831	return (error);
4832}
4833
4834int
4835extattr_delete_fd(td, uap)
4836	struct thread *td;
4837	struct extattr_delete_fd_args /* {
4838		int fd;
4839		int attrnamespace;
4840		const char *attrname;
4841	} */ *uap;
4842{
4843	struct file *fp;
4844	char attrname[EXTATTR_MAXNAMELEN];
4845	int vfslocked, error;
4846
4847	AUDIT_ARG(fd, uap->fd);
4848	AUDIT_ARG(value, uap->attrnamespace);
4849	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4850	if (error)
4851		return (error);
4852	AUDIT_ARG(text, attrname);
4853
4854	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4855	if (error)
4856		return (error);
4857
4858	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4859	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4860	    attrname, td);
4861	fdrop(fp, td);
4862	VFS_UNLOCK_GIANT(vfslocked);
4863	return (error);
4864}
4865
4866int
4867extattr_delete_file(td, uap)
4868	struct thread *td;
4869	struct extattr_delete_file_args /* {
4870		const char *path;
4871		int attrnamespace;
4872		const char *attrname;
4873	} */ *uap;
4874{
4875	struct nameidata nd;
4876	char attrname[EXTATTR_MAXNAMELEN];
4877	int vfslocked, error;
4878
4879	AUDIT_ARG(value, uap->attrnamespace);
4880	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4881	if (error)
4882		return(error);
4883	AUDIT_ARG(text, attrname);
4884
4885	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4886	    uap->path, td);
4887	error = namei(&nd);
4888	if (error)
4889		return(error);
4890	NDFREE(&nd, NDF_ONLY_PNBUF);
4891
4892	vfslocked = NDHASGIANT(&nd);
4893	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4894	vrele(nd.ni_vp);
4895	VFS_UNLOCK_GIANT(vfslocked);
4896	return(error);
4897}
4898
4899int
4900extattr_delete_link(td, uap)
4901	struct thread *td;
4902	struct extattr_delete_link_args /* {
4903		const char *path;
4904		int attrnamespace;
4905		const char *attrname;
4906	} */ *uap;
4907{
4908	struct nameidata nd;
4909	char attrname[EXTATTR_MAXNAMELEN];
4910	int vfslocked, error;
4911
4912	AUDIT_ARG(value, uap->attrnamespace);
4913	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4914	if (error)
4915		return(error);
4916	AUDIT_ARG(text, attrname);
4917
4918	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4919	    uap->path, td);
4920	error = namei(&nd);
4921	if (error)
4922		return(error);
4923	NDFREE(&nd, NDF_ONLY_PNBUF);
4924
4925	vfslocked = NDHASGIANT(&nd);
4926	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4927	vrele(nd.ni_vp);
4928	VFS_UNLOCK_GIANT(vfslocked);
4929	return(error);
4930}
4931
4932/*-
4933 * Retrieve a list of extended attributes on a file or directory.
4934 *
4935 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4936 *            userspace buffer pointer "data", buffer length "nbytes",
4937 *            thread "td".
4938 * Returns: 0 on success, an error number otherwise
4939 * Locks: none
4940 * References: vp must be a valid reference for the duration of the call
4941 */
4942static int
4943extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4944    size_t nbytes, struct thread *td)
4945{
4946	struct uio auio, *auiop;
4947	size_t size, *sizep;
4948	struct iovec aiov;
4949	ssize_t cnt;
4950	int error;
4951
4952	VFS_ASSERT_GIANT(vp->v_mount);
4953	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4954	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4955
4956	auiop = NULL;
4957	sizep = NULL;
4958	cnt = 0;
4959	if (data != NULL) {
4960		aiov.iov_base = data;
4961		aiov.iov_len = nbytes;
4962		auio.uio_iov = &aiov;
4963		auio.uio_iovcnt = 1;
4964		auio.uio_offset = 0;
4965		if (nbytes > INT_MAX) {
4966			error = EINVAL;
4967			goto done;
4968		}
4969		auio.uio_resid = nbytes;
4970		auio.uio_rw = UIO_READ;
4971		auio.uio_segflg = UIO_USERSPACE;
4972		auio.uio_td = td;
4973		auiop = &auio;
4974		cnt = nbytes;
4975	} else
4976		sizep = &size;
4977
4978#ifdef MAC
4979	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4980	if (error)
4981		goto done;
4982#endif
4983
4984	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4985	    td->td_ucred, td);
4986
4987	if (auiop != NULL) {
4988		cnt -= auio.uio_resid;
4989		td->td_retval[0] = cnt;
4990	} else
4991		td->td_retval[0] = size;
4992
4993done:
4994	VOP_UNLOCK(vp, 0, td);
4995	return (error);
4996}
4997
4998
4999int
5000extattr_list_fd(td, uap)
5001	struct thread *td;
5002	struct extattr_list_fd_args /* {
5003		int fd;
5004		int attrnamespace;
5005		void *data;
5006		size_t nbytes;
5007	} */ *uap;
5008{
5009	struct file *fp;
5010	int vfslocked, error;
5011
5012	AUDIT_ARG(fd, uap->fd);
5013	AUDIT_ARG(value, uap->attrnamespace);
5014	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5015	if (error)
5016		return (error);
5017
5018	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5019	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5020	    uap->nbytes, td);
5021
5022	fdrop(fp, td);
5023	VFS_UNLOCK_GIANT(vfslocked);
5024	return (error);
5025}
5026
5027int
5028extattr_list_file(td, uap)
5029	struct thread*td;
5030	struct extattr_list_file_args /* {
5031		const char *path;
5032		int attrnamespace;
5033		void *data;
5034		size_t nbytes;
5035	} */ *uap;
5036{
5037	struct nameidata nd;
5038	int vfslocked, error;
5039
5040	AUDIT_ARG(value, uap->attrnamespace);
5041	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5042	    uap->path, td);
5043	error = namei(&nd);
5044	if (error)
5045		return (error);
5046	NDFREE(&nd, NDF_ONLY_PNBUF);
5047
5048	vfslocked = NDHASGIANT(&nd);
5049	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5050	    uap->nbytes, td);
5051
5052	vrele(nd.ni_vp);
5053	VFS_UNLOCK_GIANT(vfslocked);
5054	return (error);
5055}
5056
5057int
5058extattr_list_link(td, uap)
5059	struct thread*td;
5060	struct extattr_list_link_args /* {
5061		const char *path;
5062		int attrnamespace;
5063		void *data;
5064		size_t nbytes;
5065	} */ *uap;
5066{
5067	struct nameidata nd;
5068	int vfslocked, error;
5069
5070	AUDIT_ARG(value, uap->attrnamespace);
5071	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5072	    uap->path, td);
5073	error = namei(&nd);
5074	if (error)
5075		return (error);
5076	NDFREE(&nd, NDF_ONLY_PNBUF);
5077
5078	vfslocked = NDHASGIANT(&nd);
5079	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5080	    uap->nbytes, td);
5081
5082	vrele(nd.ni_vp);
5083	VFS_UNLOCK_GIANT(vfslocked);
5084	return (error);
5085}
5086