vfs_syscalls.c revision 167232
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 167232 2007-03-05 13:10:58Z rwatson $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/sysproto.h>
52#include <sys/namei.h>
53#include <sys/filedesc.h>
54#include <sys/kernel.h>
55#include <sys/fcntl.h>
56#include <sys/file.h>
57#include <sys/limits.h>
58#include <sys/linker.h>
59#include <sys/stat.h>
60#include <sys/sx.h>
61#include <sys/unistd.h>
62#include <sys/vnode.h>
63#include <sys/priv.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/jail.h>
67#include <sys/syscallsubr.h>
68#include <sys/sysctl.h>
69
70#include <machine/stdarg.h>
71
72#include <security/audit/audit.h>
73#include <security/mac/mac_framework.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/uma.h>
79
80static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83static int setfmode(struct thread *td, struct vnode *, int);
84static int setfflags(struct thread *td, struct vnode *, int);
85static int setutimes(struct thread *td, struct vnode *,
86    const struct timespec *, int, int);
87static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88    struct thread *td);
89
90/*
91 * The module initialization routine for POSIX asynchronous I/O will
92 * set this to the version of AIO that it implements.  (Zero means
93 * that it is not implemented.)  This value is used here by pathconf()
94 * and in kern_descrip.c by fpathconf().
95 */
96int async_io_version;
97
98#ifdef DEBUG
99static int syncprt = 0;
100SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
101#endif
102
103/*
104 * Sync each mounted filesystem.
105 */
106#ifndef _SYS_SYSPROTO_H_
107struct sync_args {
108	int     dummy;
109};
110#endif
111/* ARGSUSED */
112int
113sync(td, uap)
114	struct thread *td;
115	struct sync_args *uap;
116{
117	struct mount *mp, *nmp;
118	int vfslocked;
119
120	mtx_lock(&mountlist_mtx);
121	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
122		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
123			nmp = TAILQ_NEXT(mp, mnt_list);
124			continue;
125		}
126		vfslocked = VFS_LOCK_GIANT(mp);
127		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
128		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
129			MNT_ILOCK(mp);
130			mp->mnt_noasync++;
131			mp->mnt_kern_flag &= ~MNTK_ASYNC;
132			MNT_IUNLOCK(mp);
133			vfs_msync(mp, MNT_NOWAIT);
134			VFS_SYNC(mp, MNT_NOWAIT, td);
135			MNT_ILOCK(mp);
136			mp->mnt_noasync--;
137			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
138			    mp->mnt_noasync == 0)
139				mp->mnt_kern_flag |= MNTK_ASYNC;
140			MNT_IUNLOCK(mp);
141			vn_finished_write(mp);
142		}
143		VFS_UNLOCK_GIANT(vfslocked);
144		mtx_lock(&mountlist_mtx);
145		nmp = TAILQ_NEXT(mp, mnt_list);
146		vfs_unbusy(mp, td);
147	}
148	mtx_unlock(&mountlist_mtx);
149	return (0);
150}
151
152/* XXX PRISON: could be per prison flag */
153static int prison_quotas;
154#if 0
155SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156#endif
157
158/*
159 * Change filesystem quotas.
160 */
161#ifndef _SYS_SYSPROTO_H_
162struct quotactl_args {
163	char *path;
164	int cmd;
165	int uid;
166	caddr_t arg;
167};
168#endif
169int
170quotactl(td, uap)
171	struct thread *td;
172	register struct quotactl_args /* {
173		char *path;
174		int cmd;
175		int uid;
176		caddr_t arg;
177	} */ *uap;
178{
179	struct mount *mp, *vmp;
180	int vfslocked;
181	int error;
182	struct nameidata nd;
183
184	AUDIT_ARG(cmd, uap->cmd);
185	AUDIT_ARG(uid, uap->uid);
186	if (jailed(td->td_ucred) && !prison_quotas)
187		return (EPERM);
188	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
189	   UIO_USERSPACE, uap->path, td);
190	if ((error = namei(&nd)) != 0)
191		return (error);
192	vfslocked = NDHASGIANT(&nd);
193	NDFREE(&nd, NDF_ONLY_PNBUF);
194	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
195	mp = nd.ni_vp->v_mount;
196	vrele(nd.ni_vp);
197	if (error)
198		goto out;
199	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
200	vn_finished_write(vmp);
201out:
202	VFS_UNLOCK_GIANT(vfslocked);
203	return (error);
204}
205
206/*
207 * Get filesystem statistics.
208 */
209#ifndef _SYS_SYSPROTO_H_
210struct statfs_args {
211	char *path;
212	struct statfs *buf;
213};
214#endif
215int
216statfs(td, uap)
217	struct thread *td;
218	register struct statfs_args /* {
219		char *path;
220		struct statfs *buf;
221	} */ *uap;
222{
223	struct statfs sf;
224	int error;
225
226	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
227	if (error == 0)
228		error = copyout(&sf, uap->buf, sizeof(sf));
229	return (error);
230}
231
232int
233kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
234    struct statfs *buf)
235{
236	struct mount *mp;
237	struct statfs *sp, sb;
238	int vfslocked;
239	int error;
240	struct nameidata nd;
241
242	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
243	    pathseg, path, td);
244	error = namei(&nd);
245	if (error)
246		return (error);
247	vfslocked = NDHASGIANT(&nd);
248	mp = nd.ni_vp->v_mount;
249	vfs_ref(mp);
250	NDFREE(&nd, NDF_ONLY_PNBUF);
251	vput(nd.ni_vp);
252#ifdef MAC
253	error = mac_check_mount_stat(td->td_ucred, mp);
254	if (error)
255		goto out;
256#endif
257	/*
258	 * Set these in case the underlying filesystem fails to do so.
259	 */
260	sp = &mp->mnt_stat;
261	sp->f_version = STATFS_VERSION;
262	sp->f_namemax = NAME_MAX;
263	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
264	error = VFS_STATFS(mp, sp, td);
265	if (error)
266		goto out;
267	if (priv_check(td, PRIV_VFS_GENERATION)) {
268		bcopy(sp, &sb, sizeof(sb));
269		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
270		prison_enforce_statfs(td->td_ucred, mp, &sb);
271		sp = &sb;
272	}
273	*buf = *sp;
274out:
275	vfs_rel(mp);
276	VFS_UNLOCK_GIANT(vfslocked);
277	if (mtx_owned(&Giant))
278		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
279	return (error);
280}
281
282/*
283 * Get filesystem statistics.
284 */
285#ifndef _SYS_SYSPROTO_H_
286struct fstatfs_args {
287	int fd;
288	struct statfs *buf;
289};
290#endif
291int
292fstatfs(td, uap)
293	struct thread *td;
294	register struct fstatfs_args /* {
295		int fd;
296		struct statfs *buf;
297	} */ *uap;
298{
299	struct statfs sf;
300	int error;
301
302	error = kern_fstatfs(td, uap->fd, &sf);
303	if (error == 0)
304		error = copyout(&sf, uap->buf, sizeof(sf));
305	return (error);
306}
307
308int
309kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
310{
311	struct file *fp;
312	struct mount *mp;
313	struct statfs *sp, sb;
314	int vfslocked;
315	struct vnode *vp;
316	int error;
317
318	AUDIT_ARG(fd, fd);
319	error = getvnode(td->td_proc->p_fd, fd, &fp);
320	if (error)
321		return (error);
322	vp = fp->f_vnode;
323	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
324	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
325#ifdef AUDIT
326	AUDIT_ARG(vnode, vp, ARG_VNODE1);
327#endif
328	mp = vp->v_mount;
329	if (mp)
330		vfs_ref(mp);
331	VOP_UNLOCK(vp, 0, td);
332	fdrop(fp, td);
333	if (vp->v_iflag & VI_DOOMED) {
334		error = EBADF;
335		goto out;
336	}
337#ifdef MAC
338	error = mac_check_mount_stat(td->td_ucred, mp);
339	if (error)
340		goto out;
341#endif
342	/*
343	 * Set these in case the underlying filesystem fails to do so.
344	 */
345	sp = &mp->mnt_stat;
346	sp->f_version = STATFS_VERSION;
347	sp->f_namemax = NAME_MAX;
348	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
349	error = VFS_STATFS(mp, sp, td);
350	if (error)
351		goto out;
352	if (priv_check(td, PRIV_VFS_GENERATION)) {
353		bcopy(sp, &sb, sizeof(sb));
354		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
355		prison_enforce_statfs(td->td_ucred, mp, &sb);
356		sp = &sb;
357	}
358	*buf = *sp;
359out:
360	if (mp)
361		vfs_rel(mp);
362	VFS_UNLOCK_GIANT(vfslocked);
363	return (error);
364}
365
366/*
367 * Get statistics on all filesystems.
368 */
369#ifndef _SYS_SYSPROTO_H_
370struct getfsstat_args {
371	struct statfs *buf;
372	long bufsize;
373	int flags;
374};
375#endif
376int
377getfsstat(td, uap)
378	struct thread *td;
379	register struct getfsstat_args /* {
380		struct statfs *buf;
381		long bufsize;
382		int flags;
383	} */ *uap;
384{
385
386	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
387	    uap->flags));
388}
389
390/*
391 * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
392 * 	The caller is responsible for freeing memory which will be allocated
393 *	in '*buf'.
394 */
395int
396kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
397    enum uio_seg bufseg, int flags)
398{
399	struct mount *mp, *nmp;
400	struct statfs *sfsp, *sp, sb;
401	size_t count, maxcount;
402	int vfslocked;
403	int error;
404
405	maxcount = bufsize / sizeof(struct statfs);
406	if (bufsize == 0)
407		sfsp = NULL;
408	else if (bufseg == UIO_USERSPACE)
409		sfsp = *buf;
410	else /* if (bufseg == UIO_SYSSPACE) */ {
411		count = 0;
412		mtx_lock(&mountlist_mtx);
413		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
414			count++;
415		}
416		mtx_unlock(&mountlist_mtx);
417		if (maxcount > count)
418			maxcount = count;
419		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
420		    M_WAITOK);
421	}
422	count = 0;
423	mtx_lock(&mountlist_mtx);
424	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
425		if (prison_canseemount(td->td_ucred, mp) != 0) {
426			nmp = TAILQ_NEXT(mp, mnt_list);
427			continue;
428		}
429#ifdef MAC
430		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
431			nmp = TAILQ_NEXT(mp, mnt_list);
432			continue;
433		}
434#endif
435		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
436			nmp = TAILQ_NEXT(mp, mnt_list);
437			continue;
438		}
439		vfslocked = VFS_LOCK_GIANT(mp);
440		if (sfsp && count < maxcount) {
441			sp = &mp->mnt_stat;
442			/*
443			 * Set these in case the underlying filesystem
444			 * fails to do so.
445			 */
446			sp->f_version = STATFS_VERSION;
447			sp->f_namemax = NAME_MAX;
448			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
449			/*
450			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
451			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
452			 * overrides MNT_WAIT.
453			 */
454			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
455			    (flags & MNT_WAIT)) &&
456			    (error = VFS_STATFS(mp, sp, td))) {
457				VFS_UNLOCK_GIANT(vfslocked);
458				mtx_lock(&mountlist_mtx);
459				nmp = TAILQ_NEXT(mp, mnt_list);
460				vfs_unbusy(mp, td);
461				continue;
462			}
463			if (priv_check(td, PRIV_VFS_GENERATION)) {
464				bcopy(sp, &sb, sizeof(sb));
465				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
466				prison_enforce_statfs(td->td_ucred, mp, &sb);
467				sp = &sb;
468			}
469			if (bufseg == UIO_SYSSPACE)
470				bcopy(sp, sfsp, sizeof(*sp));
471			else /* if (bufseg == UIO_USERSPACE) */ {
472				error = copyout(sp, sfsp, sizeof(*sp));
473				if (error) {
474					vfs_unbusy(mp, td);
475					VFS_UNLOCK_GIANT(vfslocked);
476					return (error);
477				}
478			}
479			sfsp++;
480		}
481		VFS_UNLOCK_GIANT(vfslocked);
482		count++;
483		mtx_lock(&mountlist_mtx);
484		nmp = TAILQ_NEXT(mp, mnt_list);
485		vfs_unbusy(mp, td);
486	}
487	mtx_unlock(&mountlist_mtx);
488	if (sfsp && count > maxcount)
489		td->td_retval[0] = maxcount;
490	else
491		td->td_retval[0] = count;
492	return (0);
493}
494
495#ifdef COMPAT_FREEBSD4
496/*
497 * Get old format filesystem statistics.
498 */
499static void cvtstatfs(struct statfs *, struct ostatfs *);
500
501#ifndef _SYS_SYSPROTO_H_
502struct freebsd4_statfs_args {
503	char *path;
504	struct ostatfs *buf;
505};
506#endif
507int
508freebsd4_statfs(td, uap)
509	struct thread *td;
510	struct freebsd4_statfs_args /* {
511		char *path;
512		struct ostatfs *buf;
513	} */ *uap;
514{
515	struct ostatfs osb;
516	struct statfs sf;
517	int error;
518
519	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
520	if (error)
521		return (error);
522	cvtstatfs(&sf, &osb);
523	return (copyout(&osb, uap->buf, sizeof(osb)));
524}
525
526/*
527 * Get filesystem statistics.
528 */
529#ifndef _SYS_SYSPROTO_H_
530struct freebsd4_fstatfs_args {
531	int fd;
532	struct ostatfs *buf;
533};
534#endif
535int
536freebsd4_fstatfs(td, uap)
537	struct thread *td;
538	struct freebsd4_fstatfs_args /* {
539		int fd;
540		struct ostatfs *buf;
541	} */ *uap;
542{
543	struct ostatfs osb;
544	struct statfs sf;
545	int error;
546
547	error = kern_fstatfs(td, uap->fd, &sf);
548	if (error)
549		return (error);
550	cvtstatfs(&sf, &osb);
551	return (copyout(&osb, uap->buf, sizeof(osb)));
552}
553
554/*
555 * Get statistics on all filesystems.
556 */
557#ifndef _SYS_SYSPROTO_H_
558struct freebsd4_getfsstat_args {
559	struct ostatfs *buf;
560	long bufsize;
561	int flags;
562};
563#endif
564int
565freebsd4_getfsstat(td, uap)
566	struct thread *td;
567	register struct freebsd4_getfsstat_args /* {
568		struct ostatfs *buf;
569		long bufsize;
570		int flags;
571	} */ *uap;
572{
573	struct statfs *buf, *sp;
574	struct ostatfs osb;
575	size_t count, size;
576	int error;
577
578	count = uap->bufsize / sizeof(struct ostatfs);
579	size = count * sizeof(struct statfs);
580	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
581	if (size > 0) {
582		count = td->td_retval[0];
583		sp = buf;
584		while (count > 0 && error == 0) {
585			cvtstatfs(sp, &osb);
586			error = copyout(&osb, uap->buf, sizeof(osb));
587			sp++;
588			uap->buf++;
589			count--;
590		}
591		free(buf, M_TEMP);
592	}
593	return (error);
594}
595
596/*
597 * Implement fstatfs() for (NFS) file handles.
598 */
599#ifndef _SYS_SYSPROTO_H_
600struct freebsd4_fhstatfs_args {
601	struct fhandle *u_fhp;
602	struct ostatfs *buf;
603};
604#endif
605int
606freebsd4_fhstatfs(td, uap)
607	struct thread *td;
608	struct freebsd4_fhstatfs_args /* {
609		struct fhandle *u_fhp;
610		struct ostatfs *buf;
611	} */ *uap;
612{
613	struct ostatfs osb;
614	struct statfs sf;
615	fhandle_t fh;
616	int error;
617
618	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
619	if (error)
620		return (error);
621	error = kern_fhstatfs(td, fh, &sf);
622	if (error)
623		return (error);
624	cvtstatfs(&sf, &osb);
625	return (copyout(&osb, uap->buf, sizeof(osb)));
626}
627
628/*
629 * Convert a new format statfs structure to an old format statfs structure.
630 */
631static void
632cvtstatfs(nsp, osp)
633	struct statfs *nsp;
634	struct ostatfs *osp;
635{
636
637	bzero(osp, sizeof(*osp));
638	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
639	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
640	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
641	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
642	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
643	osp->f_files = MIN(nsp->f_files, LONG_MAX);
644	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
645	osp->f_owner = nsp->f_owner;
646	osp->f_type = nsp->f_type;
647	osp->f_flags = nsp->f_flags;
648	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
649	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
650	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
651	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
652	strlcpy(osp->f_fstypename, nsp->f_fstypename,
653	    MIN(MFSNAMELEN, OMFSNAMELEN));
654	strlcpy(osp->f_mntonname, nsp->f_mntonname,
655	    MIN(MNAMELEN, OMNAMELEN));
656	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
657	    MIN(MNAMELEN, OMNAMELEN));
658	osp->f_fsid = nsp->f_fsid;
659}
660#endif /* COMPAT_FREEBSD4 */
661
662/*
663 * Change current working directory to a given file descriptor.
664 */
665#ifndef _SYS_SYSPROTO_H_
666struct fchdir_args {
667	int	fd;
668};
669#endif
670int
671fchdir(td, uap)
672	struct thread *td;
673	struct fchdir_args /* {
674		int fd;
675	} */ *uap;
676{
677	register struct filedesc *fdp = td->td_proc->p_fd;
678	struct vnode *vp, *tdp, *vpold;
679	struct mount *mp;
680	struct file *fp;
681	int vfslocked;
682	int error;
683
684	AUDIT_ARG(fd, uap->fd);
685	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
686		return (error);
687	vp = fp->f_vnode;
688	VREF(vp);
689	fdrop(fp, td);
690	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
691	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
692	AUDIT_ARG(vnode, vp, ARG_VNODE1);
693	error = change_dir(vp, td);
694	while (!error && (mp = vp->v_mountedhere) != NULL) {
695		int tvfslocked;
696		if (vfs_busy(mp, 0, 0, td))
697			continue;
698		tvfslocked = VFS_LOCK_GIANT(mp);
699		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
700		vfs_unbusy(mp, td);
701		if (error) {
702			VFS_UNLOCK_GIANT(tvfslocked);
703			break;
704		}
705		vput(vp);
706		VFS_UNLOCK_GIANT(vfslocked);
707		vp = tdp;
708		vfslocked = tvfslocked;
709	}
710	if (error) {
711		vput(vp);
712		VFS_UNLOCK_GIANT(vfslocked);
713		return (error);
714	}
715	VOP_UNLOCK(vp, 0, td);
716	VFS_UNLOCK_GIANT(vfslocked);
717	FILEDESC_LOCK_FAST(fdp);
718	vpold = fdp->fd_cdir;
719	fdp->fd_cdir = vp;
720	FILEDESC_UNLOCK_FAST(fdp);
721	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
722	vrele(vpold);
723	VFS_UNLOCK_GIANT(vfslocked);
724	return (0);
725}
726
727/*
728 * Change current working directory (``.'').
729 */
730#ifndef _SYS_SYSPROTO_H_
731struct chdir_args {
732	char	*path;
733};
734#endif
735int
736chdir(td, uap)
737	struct thread *td;
738	struct chdir_args /* {
739		char *path;
740	} */ *uap;
741{
742
743	return (kern_chdir(td, uap->path, UIO_USERSPACE));
744}
745
746int
747kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
748{
749	register struct filedesc *fdp = td->td_proc->p_fd;
750	int error;
751	struct nameidata nd;
752	struct vnode *vp;
753	int vfslocked;
754
755	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
756	    pathseg, path, td);
757	if ((error = namei(&nd)) != 0)
758		return (error);
759	vfslocked = NDHASGIANT(&nd);
760	if ((error = change_dir(nd.ni_vp, td)) != 0) {
761		vput(nd.ni_vp);
762		VFS_UNLOCK_GIANT(vfslocked);
763		NDFREE(&nd, NDF_ONLY_PNBUF);
764		return (error);
765	}
766	VOP_UNLOCK(nd.ni_vp, 0, td);
767	VFS_UNLOCK_GIANT(vfslocked);
768	NDFREE(&nd, NDF_ONLY_PNBUF);
769	FILEDESC_LOCK_FAST(fdp);
770	vp = fdp->fd_cdir;
771	fdp->fd_cdir = nd.ni_vp;
772	FILEDESC_UNLOCK_FAST(fdp);
773	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
774	vrele(vp);
775	VFS_UNLOCK_GIANT(vfslocked);
776	return (0);
777}
778
779/*
780 * Helper function for raised chroot(2) security function:  Refuse if
781 * any filedescriptors are open directories.
782 */
783static int
784chroot_refuse_vdir_fds(fdp)
785	struct filedesc *fdp;
786{
787	struct vnode *vp;
788	struct file *fp;
789	int fd;
790
791	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
792	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
793		fp = fget_locked(fdp, fd);
794		if (fp == NULL)
795			continue;
796		if (fp->f_type == DTYPE_VNODE) {
797			vp = fp->f_vnode;
798			if (vp->v_type == VDIR)
799				return (EPERM);
800		}
801	}
802	return (0);
803}
804
805/*
806 * This sysctl determines if we will allow a process to chroot(2) if it
807 * has a directory open:
808 *	0: disallowed for all processes.
809 *	1: allowed for processes that were not already chroot(2)'ed.
810 *	2: allowed for all processes.
811 */
812
813static int chroot_allow_open_directories = 1;
814
815SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
816     &chroot_allow_open_directories, 0, "");
817
818/*
819 * Change notion of root (``/'') directory.
820 */
821#ifndef _SYS_SYSPROTO_H_
822struct chroot_args {
823	char	*path;
824};
825#endif
826int
827chroot(td, uap)
828	struct thread *td;
829	struct chroot_args /* {
830		char *path;
831	} */ *uap;
832{
833	int error;
834	struct nameidata nd;
835	int vfslocked;
836
837	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
838	    SUSER_ALLOWJAIL);
839	if (error)
840		return (error);
841	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
842	    UIO_USERSPACE, uap->path, td);
843	error = namei(&nd);
844	if (error)
845		goto error;
846	vfslocked = NDHASGIANT(&nd);
847	if ((error = change_dir(nd.ni_vp, td)) != 0)
848		goto e_vunlock;
849#ifdef MAC
850	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
851		goto e_vunlock;
852#endif
853	VOP_UNLOCK(nd.ni_vp, 0, td);
854	error = change_root(nd.ni_vp, td);
855	vrele(nd.ni_vp);
856	VFS_UNLOCK_GIANT(vfslocked);
857	NDFREE(&nd, NDF_ONLY_PNBUF);
858	return (error);
859e_vunlock:
860	vput(nd.ni_vp);
861	VFS_UNLOCK_GIANT(vfslocked);
862error:
863	NDFREE(&nd, NDF_ONLY_PNBUF);
864	return (error);
865}
866
867/*
868 * Common routine for chroot and chdir.  Callers must provide a locked vnode
869 * instance.
870 */
871int
872change_dir(vp, td)
873	struct vnode *vp;
874	struct thread *td;
875{
876	int error;
877
878	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
879	if (vp->v_type != VDIR)
880		return (ENOTDIR);
881#ifdef MAC
882	error = mac_check_vnode_chdir(td->td_ucred, vp);
883	if (error)
884		return (error);
885#endif
886	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
887	return (error);
888}
889
890/*
891 * Common routine for kern_chroot() and jail_attach().  The caller is
892 * responsible for invoking priv_check() and mac_check_chroot() to authorize
893 * this operation.
894 */
895int
896change_root(vp, td)
897	struct vnode *vp;
898	struct thread *td;
899{
900	struct filedesc *fdp;
901	struct vnode *oldvp;
902	int vfslocked;
903	int error;
904
905	VFS_ASSERT_GIANT(vp->v_mount);
906	fdp = td->td_proc->p_fd;
907	FILEDESC_LOCK(fdp);
908	if (chroot_allow_open_directories == 0 ||
909	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
910		error = chroot_refuse_vdir_fds(fdp);
911		if (error) {
912			FILEDESC_UNLOCK(fdp);
913			return (error);
914		}
915	}
916	oldvp = fdp->fd_rdir;
917	fdp->fd_rdir = vp;
918	VREF(fdp->fd_rdir);
919	if (!fdp->fd_jdir) {
920		fdp->fd_jdir = vp;
921		VREF(fdp->fd_jdir);
922	}
923	FILEDESC_UNLOCK(fdp);
924	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
925	vrele(oldvp);
926	VFS_UNLOCK_GIANT(vfslocked);
927	return (0);
928}
929
930/*
931 * Check permissions, allocate an open file structure, and call the device
932 * open routine if any.
933 */
934#ifndef _SYS_SYSPROTO_H_
935struct open_args {
936	char	*path;
937	int	flags;
938	int	mode;
939};
940#endif
941int
942open(td, uap)
943	struct thread *td;
944	register struct open_args /* {
945		char *path;
946		int flags;
947		int mode;
948	} */ *uap;
949{
950
951	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
952}
953
954int
955kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
956    int mode)
957{
958	struct proc *p = td->td_proc;
959	struct filedesc *fdp = p->p_fd;
960	struct file *fp;
961	struct vnode *vp;
962	struct vattr vat;
963	struct mount *mp;
964	int cmode;
965	struct file *nfp;
966	int type, indx, error;
967	struct flock lf;
968	struct nameidata nd;
969	int vfslocked;
970
971	AUDIT_ARG(fflags, flags);
972	AUDIT_ARG(mode, mode);
973	if ((flags & O_ACCMODE) == O_ACCMODE)
974		return (EINVAL);
975	flags = FFLAGS(flags);
976	error = falloc(td, &nfp, &indx);
977	if (error)
978		return (error);
979	/* An extra reference on `nfp' has been held for us by falloc(). */
980	fp = nfp;
981	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
982	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
983	td->td_dupfd = -1;		/* XXX check for fdopen */
984	error = vn_open(&nd, &flags, cmode, indx);
985	if (error) {
986		/*
987		 * If the vn_open replaced the method vector, something
988		 * wonderous happened deep below and we just pass it up
989		 * pretending we know what we do.
990		 */
991		if (error == ENXIO && fp->f_ops != &badfileops) {
992			fdrop(fp, td);
993			td->td_retval[0] = indx;
994			return (0);
995		}
996
997		/*
998		 * release our own reference
999		 */
1000		fdrop(fp, td);
1001
1002		/*
1003		 * handle special fdopen() case.  bleh.  dupfdopen() is
1004		 * responsible for dropping the old contents of ofiles[indx]
1005		 * if it succeeds.
1006		 */
1007		if ((error == ENODEV || error == ENXIO) &&
1008		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1009		    (error =
1010			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1011			td->td_retval[0] = indx;
1012			return (0);
1013		}
1014		/*
1015		 * Clean up the descriptor, but only if another thread hadn't
1016		 * replaced or closed it.
1017		 */
1018		fdclose(fdp, fp, indx, td);
1019
1020		if (error == ERESTART)
1021			error = EINTR;
1022		return (error);
1023	}
1024	td->td_dupfd = 0;
1025	vfslocked = NDHASGIANT(&nd);
1026	NDFREE(&nd, NDF_ONLY_PNBUF);
1027	vp = nd.ni_vp;
1028
1029	/*
1030	 * There should be 2 references on the file, one from the descriptor
1031	 * table, and one for us.
1032	 *
1033	 * Handle the case where someone closed the file (via its file
1034	 * descriptor) while we were blocked.  The end result should look
1035	 * like opening the file succeeded but it was immediately closed.
1036	 * We call vn_close() manually because we haven't yet hooked up
1037	 * the various 'struct file' fields.
1038	 */
1039	FILEDESC_LOCK(fdp);
1040	FILE_LOCK(fp);
1041	if (fp->f_count == 1) {
1042		mp = vp->v_mount;
1043		KASSERT(fdp->fd_ofiles[indx] != fp,
1044		    ("Open file descriptor lost all refs"));
1045		FILE_UNLOCK(fp);
1046		FILEDESC_UNLOCK(fdp);
1047		VOP_UNLOCK(vp, 0, td);
1048		vn_close(vp, flags & FMASK, fp->f_cred, td);
1049		VFS_UNLOCK_GIANT(vfslocked);
1050		fdrop(fp, td);
1051		td->td_retval[0] = indx;
1052		return (0);
1053	}
1054	fp->f_vnode = vp;
1055	if (fp->f_data == NULL)
1056		fp->f_data = vp;
1057	fp->f_flag = flags & FMASK;
1058	if (fp->f_ops == &badfileops)
1059		fp->f_ops = &vnops;
1060	fp->f_seqcount = 1;
1061	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1062	FILE_UNLOCK(fp);
1063	FILEDESC_UNLOCK(fdp);
1064
1065	VOP_UNLOCK(vp, 0, td);
1066	if (flags & (O_EXLOCK | O_SHLOCK)) {
1067		lf.l_whence = SEEK_SET;
1068		lf.l_start = 0;
1069		lf.l_len = 0;
1070		if (flags & O_EXLOCK)
1071			lf.l_type = F_WRLCK;
1072		else
1073			lf.l_type = F_RDLCK;
1074		type = F_FLOCK;
1075		if ((flags & FNONBLOCK) == 0)
1076			type |= F_WAIT;
1077		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1078			    type)) != 0)
1079			goto bad;
1080		fp->f_flag |= FHASLOCK;
1081	}
1082	if (flags & O_TRUNC) {
1083		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1084			goto bad;
1085		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1086		VATTR_NULL(&vat);
1087		vat.va_size = 0;
1088		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1089#ifdef MAC
1090		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1091		if (error == 0)
1092#endif
1093			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1094		VOP_UNLOCK(vp, 0, td);
1095		vn_finished_write(mp);
1096		if (error)
1097			goto bad;
1098	}
1099	VFS_UNLOCK_GIANT(vfslocked);
1100	/*
1101	 * Release our private reference, leaving the one associated with
1102	 * the descriptor table intact.
1103	 */
1104	fdrop(fp, td);
1105	td->td_retval[0] = indx;
1106	return (0);
1107bad:
1108	VFS_UNLOCK_GIANT(vfslocked);
1109	fdclose(fdp, fp, indx, td);
1110	fdrop(fp, td);
1111	return (error);
1112}
1113
1114#ifdef COMPAT_43
1115/*
1116 * Create a file.
1117 */
1118#ifndef _SYS_SYSPROTO_H_
1119struct ocreat_args {
1120	char	*path;
1121	int	mode;
1122};
1123#endif
1124int
1125ocreat(td, uap)
1126	struct thread *td;
1127	register struct ocreat_args /* {
1128		char *path;
1129		int mode;
1130	} */ *uap;
1131{
1132
1133	return (kern_open(td, uap->path, UIO_USERSPACE,
1134	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1135}
1136#endif /* COMPAT_43 */
1137
1138/*
1139 * Create a special file.
1140 */
1141#ifndef _SYS_SYSPROTO_H_
1142struct mknod_args {
1143	char	*path;
1144	int	mode;
1145	int	dev;
1146};
1147#endif
1148int
1149mknod(td, uap)
1150	struct thread *td;
1151	register struct mknod_args /* {
1152		char *path;
1153		int mode;
1154		int dev;
1155	} */ *uap;
1156{
1157
1158	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1159}
1160
1161int
1162kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1163    int dev)
1164{
1165	struct vnode *vp;
1166	struct mount *mp;
1167	struct vattr vattr;
1168	int error;
1169	int whiteout = 0;
1170	struct nameidata nd;
1171	int vfslocked;
1172
1173	AUDIT_ARG(mode, mode);
1174	AUDIT_ARG(dev, dev);
1175	switch (mode & S_IFMT) {
1176	case S_IFCHR:
1177	case S_IFBLK:
1178		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1179		break;
1180	case S_IFMT:
1181		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1182		break;
1183	case S_IFWHT:
1184		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1185		break;
1186	default:
1187		error = EINVAL;
1188		break;
1189	}
1190	if (error)
1191		return (error);
1192restart:
1193	bwillwrite();
1194	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1195	    pathseg, path, td);
1196	if ((error = namei(&nd)) != 0)
1197		return (error);
1198	vfslocked = NDHASGIANT(&nd);
1199	vp = nd.ni_vp;
1200	if (vp != NULL) {
1201		NDFREE(&nd, NDF_ONLY_PNBUF);
1202		if (vp == nd.ni_dvp)
1203			vrele(nd.ni_dvp);
1204		else
1205			vput(nd.ni_dvp);
1206		vrele(vp);
1207		VFS_UNLOCK_GIANT(vfslocked);
1208		return (EEXIST);
1209	} else {
1210		VATTR_NULL(&vattr);
1211		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1212		vattr.va_mode = (mode & ALLPERMS) &
1213		    ~td->td_proc->p_fd->fd_cmask;
1214		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1215		vattr.va_rdev = dev;
1216		whiteout = 0;
1217
1218		switch (mode & S_IFMT) {
1219		case S_IFMT:	/* used by badsect to flag bad sectors */
1220			vattr.va_type = VBAD;
1221			break;
1222		case S_IFCHR:
1223			vattr.va_type = VCHR;
1224			break;
1225		case S_IFBLK:
1226			vattr.va_type = VBLK;
1227			break;
1228		case S_IFWHT:
1229			whiteout = 1;
1230			break;
1231		default:
1232			panic("kern_mknod: invalid mode");
1233		}
1234	}
1235	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1236		NDFREE(&nd, NDF_ONLY_PNBUF);
1237		vput(nd.ni_dvp);
1238		VFS_UNLOCK_GIANT(vfslocked);
1239		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1240			return (error);
1241		goto restart;
1242	}
1243#ifdef MAC
1244	if (error == 0 && !whiteout)
1245		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1246		    &nd.ni_cnd, &vattr);
1247#endif
1248	if (!error) {
1249		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1250		if (whiteout)
1251			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1252		else {
1253			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1254						&nd.ni_cnd, &vattr);
1255			if (error == 0)
1256				vput(nd.ni_vp);
1257		}
1258	}
1259	NDFREE(&nd, NDF_ONLY_PNBUF);
1260	vput(nd.ni_dvp);
1261	vn_finished_write(mp);
1262	VFS_UNLOCK_GIANT(vfslocked);
1263	return (error);
1264}
1265
1266/*
1267 * Create a named pipe.
1268 */
1269#ifndef _SYS_SYSPROTO_H_
1270struct mkfifo_args {
1271	char	*path;
1272	int	mode;
1273};
1274#endif
1275int
1276mkfifo(td, uap)
1277	struct thread *td;
1278	register struct mkfifo_args /* {
1279		char *path;
1280		int mode;
1281	} */ *uap;
1282{
1283
1284	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1285}
1286
1287int
1288kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1289{
1290	struct mount *mp;
1291	struct vattr vattr;
1292	int error;
1293	struct nameidata nd;
1294	int vfslocked;
1295
1296	AUDIT_ARG(mode, mode);
1297restart:
1298	bwillwrite();
1299	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1300	    pathseg, path, td);
1301	if ((error = namei(&nd)) != 0)
1302		return (error);
1303	vfslocked = NDHASGIANT(&nd);
1304	if (nd.ni_vp != NULL) {
1305		NDFREE(&nd, NDF_ONLY_PNBUF);
1306		if (nd.ni_vp == nd.ni_dvp)
1307			vrele(nd.ni_dvp);
1308		else
1309			vput(nd.ni_dvp);
1310		vrele(nd.ni_vp);
1311		VFS_UNLOCK_GIANT(vfslocked);
1312		return (EEXIST);
1313	}
1314	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1315		NDFREE(&nd, NDF_ONLY_PNBUF);
1316		vput(nd.ni_dvp);
1317		VFS_UNLOCK_GIANT(vfslocked);
1318		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1319			return (error);
1320		goto restart;
1321	}
1322	VATTR_NULL(&vattr);
1323	vattr.va_type = VFIFO;
1324	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1325	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1326	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1327#ifdef MAC
1328	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1329	    &vattr);
1330	if (error)
1331		goto out;
1332#endif
1333	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1334	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1335	if (error == 0)
1336		vput(nd.ni_vp);
1337#ifdef MAC
1338out:
1339#endif
1340	vput(nd.ni_dvp);
1341	vn_finished_write(mp);
1342	VFS_UNLOCK_GIANT(vfslocked);
1343	NDFREE(&nd, NDF_ONLY_PNBUF);
1344	return (error);
1345}
1346
1347/*
1348 * Make a hard file link.
1349 */
1350#ifndef _SYS_SYSPROTO_H_
1351struct link_args {
1352	char	*path;
1353	char	*link;
1354};
1355#endif
1356int
1357link(td, uap)
1358	struct thread *td;
1359	register struct link_args /* {
1360		char *path;
1361		char *link;
1362	} */ *uap;
1363{
1364	int error;
1365
1366	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1367	return (error);
1368}
1369
1370static int hardlink_check_uid = 0;
1371SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1372    &hardlink_check_uid, 0,
1373    "Unprivileged processes cannot create hard links to files owned by other "
1374    "users");
1375static int hardlink_check_gid = 0;
1376SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1377    &hardlink_check_gid, 0,
1378    "Unprivileged processes cannot create hard links to files owned by other "
1379    "groups");
1380
1381static int
1382can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1383{
1384	struct vattr va;
1385	int error;
1386
1387	if (!hardlink_check_uid && !hardlink_check_gid)
1388		return (0);
1389
1390	error = VOP_GETATTR(vp, &va, cred, td);
1391	if (error != 0)
1392		return (error);
1393
1394	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1395		error = priv_check_cred(cred, PRIV_VFS_LINK,
1396		    SUSER_ALLOWJAIL);
1397		if (error)
1398			return (error);
1399	}
1400
1401	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1402		error = priv_check_cred(cred, PRIV_VFS_LINK,
1403		    SUSER_ALLOWJAIL);
1404		if (error)
1405			return (error);
1406	}
1407
1408	return (0);
1409}
1410
1411int
1412kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1413{
1414	struct vnode *vp;
1415	struct mount *mp;
1416	struct nameidata nd;
1417	int vfslocked;
1418	int lvfslocked;
1419	int error;
1420
1421	bwillwrite();
1422	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1423	if ((error = namei(&nd)) != 0)
1424		return (error);
1425	vfslocked = NDHASGIANT(&nd);
1426	NDFREE(&nd, NDF_ONLY_PNBUF);
1427	vp = nd.ni_vp;
1428	if (vp->v_type == VDIR) {
1429		vrele(vp);
1430		VFS_UNLOCK_GIANT(vfslocked);
1431		return (EPERM);		/* POSIX */
1432	}
1433	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1434		vrele(vp);
1435		VFS_UNLOCK_GIANT(vfslocked);
1436		return (error);
1437	}
1438	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1439	    segflg, link, td);
1440	if ((error = namei(&nd)) == 0) {
1441		lvfslocked = NDHASGIANT(&nd);
1442		if (nd.ni_vp != NULL) {
1443			if (nd.ni_dvp == nd.ni_vp)
1444				vrele(nd.ni_dvp);
1445			else
1446				vput(nd.ni_dvp);
1447			vrele(nd.ni_vp);
1448			error = EEXIST;
1449		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1450		    == 0) {
1451			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1452			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1453			error = can_hardlink(vp, td, td->td_ucred);
1454			if (error == 0)
1455#ifdef MAC
1456				error = mac_check_vnode_link(td->td_ucred,
1457				    nd.ni_dvp, vp, &nd.ni_cnd);
1458			if (error == 0)
1459#endif
1460				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1461			VOP_UNLOCK(vp, 0, td);
1462			vput(nd.ni_dvp);
1463		}
1464		NDFREE(&nd, NDF_ONLY_PNBUF);
1465		VFS_UNLOCK_GIANT(lvfslocked);
1466	}
1467	vrele(vp);
1468	vn_finished_write(mp);
1469	VFS_UNLOCK_GIANT(vfslocked);
1470	return (error);
1471}
1472
1473/*
1474 * Make a symbolic link.
1475 */
1476#ifndef _SYS_SYSPROTO_H_
1477struct symlink_args {
1478	char	*path;
1479	char	*link;
1480};
1481#endif
1482int
1483symlink(td, uap)
1484	struct thread *td;
1485	register struct symlink_args /* {
1486		char *path;
1487		char *link;
1488	} */ *uap;
1489{
1490
1491	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1492}
1493
1494int
1495kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1496{
1497	struct mount *mp;
1498	struct vattr vattr;
1499	char *syspath;
1500	int error;
1501	struct nameidata nd;
1502	int vfslocked;
1503
1504	if (segflg == UIO_SYSSPACE) {
1505		syspath = path;
1506	} else {
1507		syspath = uma_zalloc(namei_zone, M_WAITOK);
1508		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1509			goto out;
1510	}
1511	AUDIT_ARG(text, syspath);
1512restart:
1513	bwillwrite();
1514	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1515	    segflg, link, td);
1516	if ((error = namei(&nd)) != 0)
1517		goto out;
1518	vfslocked = NDHASGIANT(&nd);
1519	if (nd.ni_vp) {
1520		NDFREE(&nd, NDF_ONLY_PNBUF);
1521		if (nd.ni_vp == nd.ni_dvp)
1522			vrele(nd.ni_dvp);
1523		else
1524			vput(nd.ni_dvp);
1525		vrele(nd.ni_vp);
1526		VFS_UNLOCK_GIANT(vfslocked);
1527		error = EEXIST;
1528		goto out;
1529	}
1530	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1531		NDFREE(&nd, NDF_ONLY_PNBUF);
1532		vput(nd.ni_dvp);
1533		VFS_UNLOCK_GIANT(vfslocked);
1534		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1535			goto out;
1536		goto restart;
1537	}
1538	VATTR_NULL(&vattr);
1539	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1540	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1541	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1542#ifdef MAC
1543	vattr.va_type = VLNK;
1544	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1545	    &vattr);
1546	if (error)
1547		goto out2;
1548#endif
1549	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1550	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1551	if (error == 0)
1552		vput(nd.ni_vp);
1553#ifdef MAC
1554out2:
1555#endif
1556	NDFREE(&nd, NDF_ONLY_PNBUF);
1557	vput(nd.ni_dvp);
1558	vn_finished_write(mp);
1559	VFS_UNLOCK_GIANT(vfslocked);
1560out:
1561	if (segflg != UIO_SYSSPACE)
1562		uma_zfree(namei_zone, syspath);
1563	return (error);
1564}
1565
1566/*
1567 * Delete a whiteout from the filesystem.
1568 */
1569int
1570undelete(td, uap)
1571	struct thread *td;
1572	register struct undelete_args /* {
1573		char *path;
1574	} */ *uap;
1575{
1576	int error;
1577	struct mount *mp;
1578	struct nameidata nd;
1579	int vfslocked;
1580
1581restart:
1582	bwillwrite();
1583	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1584	    UIO_USERSPACE, uap->path, td);
1585	error = namei(&nd);
1586	if (error)
1587		return (error);
1588	vfslocked = NDHASGIANT(&nd);
1589
1590	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1591		NDFREE(&nd, NDF_ONLY_PNBUF);
1592		if (nd.ni_vp == nd.ni_dvp)
1593			vrele(nd.ni_dvp);
1594		else
1595			vput(nd.ni_dvp);
1596		if (nd.ni_vp)
1597			vrele(nd.ni_vp);
1598		VFS_UNLOCK_GIANT(vfslocked);
1599		return (EEXIST);
1600	}
1601	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1602		NDFREE(&nd, NDF_ONLY_PNBUF);
1603		vput(nd.ni_dvp);
1604		VFS_UNLOCK_GIANT(vfslocked);
1605		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1606			return (error);
1607		goto restart;
1608	}
1609	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1610	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1611	NDFREE(&nd, NDF_ONLY_PNBUF);
1612	vput(nd.ni_dvp);
1613	vn_finished_write(mp);
1614	VFS_UNLOCK_GIANT(vfslocked);
1615	return (error);
1616}
1617
1618/*
1619 * Delete a name from the filesystem.
1620 */
1621#ifndef _SYS_SYSPROTO_H_
1622struct unlink_args {
1623	char	*path;
1624};
1625#endif
1626int
1627unlink(td, uap)
1628	struct thread *td;
1629	struct unlink_args /* {
1630		char *path;
1631	} */ *uap;
1632{
1633	int error;
1634
1635	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1636	return (error);
1637}
1638
1639int
1640kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1641{
1642	struct mount *mp;
1643	struct vnode *vp;
1644	int error;
1645	struct nameidata nd;
1646	int vfslocked;
1647
1648restart:
1649	bwillwrite();
1650	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1651	    pathseg, path, td);
1652	if ((error = namei(&nd)) != 0)
1653		return (error == EINVAL ? EPERM : error);
1654	vfslocked = NDHASGIANT(&nd);
1655	vp = nd.ni_vp;
1656	if (vp->v_type == VDIR)
1657		error = EPERM;		/* POSIX */
1658	else {
1659		/*
1660		 * The root of a mounted filesystem cannot be deleted.
1661		 *
1662		 * XXX: can this only be a VDIR case?
1663		 */
1664		if (vp->v_vflag & VV_ROOT)
1665			error = EBUSY;
1666	}
1667	if (error == 0) {
1668		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1669			NDFREE(&nd, NDF_ONLY_PNBUF);
1670			vput(nd.ni_dvp);
1671			if (vp == nd.ni_dvp)
1672				vrele(vp);
1673			else
1674				vput(vp);
1675			VFS_UNLOCK_GIANT(vfslocked);
1676			if ((error = vn_start_write(NULL, &mp,
1677			    V_XSLEEP | PCATCH)) != 0)
1678				return (error);
1679			goto restart;
1680		}
1681#ifdef MAC
1682		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1683		    &nd.ni_cnd);
1684		if (error)
1685			goto out;
1686#endif
1687		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1688		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1689#ifdef MAC
1690out:
1691#endif
1692		vn_finished_write(mp);
1693	}
1694	NDFREE(&nd, NDF_ONLY_PNBUF);
1695	vput(nd.ni_dvp);
1696	if (vp == nd.ni_dvp)
1697		vrele(vp);
1698	else
1699		vput(vp);
1700	VFS_UNLOCK_GIANT(vfslocked);
1701	return (error);
1702}
1703
1704/*
1705 * Reposition read/write file offset.
1706 */
1707#ifndef _SYS_SYSPROTO_H_
1708struct lseek_args {
1709	int	fd;
1710	int	pad;
1711	off_t	offset;
1712	int	whence;
1713};
1714#endif
1715int
1716lseek(td, uap)
1717	struct thread *td;
1718	register struct lseek_args /* {
1719		int fd;
1720		int pad;
1721		off_t offset;
1722		int whence;
1723	} */ *uap;
1724{
1725	struct ucred *cred = td->td_ucred;
1726	struct file *fp;
1727	struct vnode *vp;
1728	struct vattr vattr;
1729	off_t offset;
1730	int error, noneg;
1731	int vfslocked;
1732
1733	if ((error = fget(td, uap->fd, &fp)) != 0)
1734		return (error);
1735	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1736		fdrop(fp, td);
1737		return (ESPIPE);
1738	}
1739	vp = fp->f_vnode;
1740	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1741	noneg = (vp->v_type != VCHR);
1742	offset = uap->offset;
1743	switch (uap->whence) {
1744	case L_INCR:
1745		if (noneg &&
1746		    (fp->f_offset < 0 ||
1747		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1748			error = EOVERFLOW;
1749			break;
1750		}
1751		offset += fp->f_offset;
1752		break;
1753	case L_XTND:
1754		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1755		error = VOP_GETATTR(vp, &vattr, cred, td);
1756		VOP_UNLOCK(vp, 0, td);
1757		if (error)
1758			break;
1759		if (noneg &&
1760		    (vattr.va_size > OFF_MAX ||
1761		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1762			error = EOVERFLOW;
1763			break;
1764		}
1765		offset += vattr.va_size;
1766		break;
1767	case L_SET:
1768		break;
1769	default:
1770		error = EINVAL;
1771	}
1772	if (error == 0 && noneg && offset < 0)
1773		error = EINVAL;
1774	if (error != 0)
1775		goto drop;
1776	fp->f_offset = offset;
1777	*(off_t *)(td->td_retval) = fp->f_offset;
1778drop:
1779	fdrop(fp, td);
1780	VFS_UNLOCK_GIANT(vfslocked);
1781	return (error);
1782}
1783
1784#if defined(COMPAT_43)
1785/*
1786 * Reposition read/write file offset.
1787 */
1788#ifndef _SYS_SYSPROTO_H_
1789struct olseek_args {
1790	int	fd;
1791	long	offset;
1792	int	whence;
1793};
1794#endif
1795int
1796olseek(td, uap)
1797	struct thread *td;
1798	register struct olseek_args /* {
1799		int fd;
1800		long offset;
1801		int whence;
1802	} */ *uap;
1803{
1804	struct lseek_args /* {
1805		int fd;
1806		int pad;
1807		off_t offset;
1808		int whence;
1809	} */ nuap;
1810	int error;
1811
1812	nuap.fd = uap->fd;
1813	nuap.offset = uap->offset;
1814	nuap.whence = uap->whence;
1815	error = lseek(td, &nuap);
1816	return (error);
1817}
1818#endif /* COMPAT_43 */
1819
1820/*
1821 * Check access permissions using passed credentials.
1822 */
1823static int
1824vn_access(vp, user_flags, cred, td)
1825	struct vnode	*vp;
1826	int		user_flags;
1827	struct ucred	*cred;
1828	struct thread	*td;
1829{
1830	int error, flags;
1831
1832	/* Flags == 0 means only check for existence. */
1833	error = 0;
1834	if (user_flags) {
1835		flags = 0;
1836		if (user_flags & R_OK)
1837			flags |= VREAD;
1838		if (user_flags & W_OK)
1839			flags |= VWRITE;
1840		if (user_flags & X_OK)
1841			flags |= VEXEC;
1842#ifdef MAC
1843		error = mac_check_vnode_access(cred, vp, flags);
1844		if (error)
1845			return (error);
1846#endif
1847		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1848			error = VOP_ACCESS(vp, flags, cred, td);
1849	}
1850	return (error);
1851}
1852
1853/*
1854 * Check access permissions using "real" credentials.
1855 */
1856#ifndef _SYS_SYSPROTO_H_
1857struct access_args {
1858	char	*path;
1859	int	flags;
1860};
1861#endif
1862int
1863access(td, uap)
1864	struct thread *td;
1865	register struct access_args /* {
1866		char *path;
1867		int flags;
1868	} */ *uap;
1869{
1870
1871	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1872}
1873
1874int
1875kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1876{
1877	struct ucred *cred, *tmpcred;
1878	register struct vnode *vp;
1879	struct nameidata nd;
1880	int vfslocked;
1881	int error;
1882
1883	/*
1884	 * Create and modify a temporary credential instead of one that
1885	 * is potentially shared.  This could also mess up socket
1886	 * buffer accounting which can run in an interrupt context.
1887	 */
1888	cred = td->td_ucred;
1889	tmpcred = crdup(cred);
1890	tmpcred->cr_uid = cred->cr_ruid;
1891	tmpcred->cr_groups[0] = cred->cr_rgid;
1892	td->td_ucred = tmpcred;
1893	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1894	    pathseg, path, td);
1895	if ((error = namei(&nd)) != 0)
1896		goto out1;
1897	vfslocked = NDHASGIANT(&nd);
1898	vp = nd.ni_vp;
1899
1900	error = vn_access(vp, flags, tmpcred, td);
1901	NDFREE(&nd, NDF_ONLY_PNBUF);
1902	vput(vp);
1903	VFS_UNLOCK_GIANT(vfslocked);
1904out1:
1905	td->td_ucred = cred;
1906	crfree(tmpcred);
1907	return (error);
1908}
1909
1910/*
1911 * Check access permissions using "effective" credentials.
1912 */
1913#ifndef _SYS_SYSPROTO_H_
1914struct eaccess_args {
1915	char	*path;
1916	int	flags;
1917};
1918#endif
1919int
1920eaccess(td, uap)
1921	struct thread *td;
1922	register struct eaccess_args /* {
1923		char *path;
1924		int flags;
1925	} */ *uap;
1926{
1927
1928	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1929}
1930
1931int
1932kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1933{
1934	struct nameidata nd;
1935	struct vnode *vp;
1936	int vfslocked;
1937	int error;
1938
1939	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1940	    pathseg, path, td);
1941	if ((error = namei(&nd)) != 0)
1942		return (error);
1943	vp = nd.ni_vp;
1944	vfslocked = NDHASGIANT(&nd);
1945	error = vn_access(vp, flags, td->td_ucred, td);
1946	NDFREE(&nd, NDF_ONLY_PNBUF);
1947	vput(vp);
1948	VFS_UNLOCK_GIANT(vfslocked);
1949	return (error);
1950}
1951
1952#if defined(COMPAT_43)
1953/*
1954 * Get file status; this version follows links.
1955 */
1956#ifndef _SYS_SYSPROTO_H_
1957struct ostat_args {
1958	char	*path;
1959	struct ostat *ub;
1960};
1961#endif
1962int
1963ostat(td, uap)
1964	struct thread *td;
1965	register struct ostat_args /* {
1966		char *path;
1967		struct ostat *ub;
1968	} */ *uap;
1969{
1970	struct stat sb;
1971	struct ostat osb;
1972	int error;
1973
1974	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1975	if (error)
1976		return (error);
1977	cvtstat(&sb, &osb);
1978	error = copyout(&osb, uap->ub, sizeof (osb));
1979	return (error);
1980}
1981
1982/*
1983 * Get file status; this version does not follow links.
1984 */
1985#ifndef _SYS_SYSPROTO_H_
1986struct olstat_args {
1987	char	*path;
1988	struct ostat *ub;
1989};
1990#endif
1991int
1992olstat(td, uap)
1993	struct thread *td;
1994	register struct olstat_args /* {
1995		char *path;
1996		struct ostat *ub;
1997	} */ *uap;
1998{
1999	struct stat sb;
2000	struct ostat osb;
2001	int error;
2002
2003	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2004	if (error)
2005		return (error);
2006	cvtstat(&sb, &osb);
2007	error = copyout(&osb, uap->ub, sizeof (osb));
2008	return (error);
2009}
2010
2011/*
2012 * Convert from an old to a new stat structure.
2013 */
2014void
2015cvtstat(st, ost)
2016	struct stat *st;
2017	struct ostat *ost;
2018{
2019
2020	ost->st_dev = st->st_dev;
2021	ost->st_ino = st->st_ino;
2022	ost->st_mode = st->st_mode;
2023	ost->st_nlink = st->st_nlink;
2024	ost->st_uid = st->st_uid;
2025	ost->st_gid = st->st_gid;
2026	ost->st_rdev = st->st_rdev;
2027	if (st->st_size < (quad_t)1 << 32)
2028		ost->st_size = st->st_size;
2029	else
2030		ost->st_size = -2;
2031	ost->st_atime = st->st_atime;
2032	ost->st_mtime = st->st_mtime;
2033	ost->st_ctime = st->st_ctime;
2034	ost->st_blksize = st->st_blksize;
2035	ost->st_blocks = st->st_blocks;
2036	ost->st_flags = st->st_flags;
2037	ost->st_gen = st->st_gen;
2038}
2039#endif /* COMPAT_43 */
2040
2041/*
2042 * Get file status; this version follows links.
2043 */
2044#ifndef _SYS_SYSPROTO_H_
2045struct stat_args {
2046	char	*path;
2047	struct stat *ub;
2048};
2049#endif
2050int
2051stat(td, uap)
2052	struct thread *td;
2053	register struct stat_args /* {
2054		char *path;
2055		struct stat *ub;
2056	} */ *uap;
2057{
2058	struct stat sb;
2059	int error;
2060
2061	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2062	if (error == 0)
2063		error = copyout(&sb, uap->ub, sizeof (sb));
2064	return (error);
2065}
2066
2067int
2068kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2069{
2070	struct nameidata nd;
2071	struct stat sb;
2072	int error, vfslocked;
2073
2074	NDINIT(&nd, LOOKUP,
2075	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2076	    pathseg, path, td);
2077	if ((error = namei(&nd)) != 0)
2078		return (error);
2079	vfslocked = NDHASGIANT(&nd);
2080	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2081	NDFREE(&nd, NDF_ONLY_PNBUF);
2082	vput(nd.ni_vp);
2083	VFS_UNLOCK_GIANT(vfslocked);
2084	if (mtx_owned(&Giant))
2085		printf("stat(%d): %s\n", vfslocked, path);
2086	if (error)
2087		return (error);
2088	*sbp = sb;
2089	return (0);
2090}
2091
2092/*
2093 * Get file status; this version does not follow links.
2094 */
2095#ifndef _SYS_SYSPROTO_H_
2096struct lstat_args {
2097	char	*path;
2098	struct stat *ub;
2099};
2100#endif
2101int
2102lstat(td, uap)
2103	struct thread *td;
2104	register struct lstat_args /* {
2105		char *path;
2106		struct stat *ub;
2107	} */ *uap;
2108{
2109	struct stat sb;
2110	int error;
2111
2112	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2113	if (error == 0)
2114		error = copyout(&sb, uap->ub, sizeof (sb));
2115	return (error);
2116}
2117
2118int
2119kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2120{
2121	struct vnode *vp;
2122	struct stat sb;
2123	struct nameidata nd;
2124	int error, vfslocked;
2125
2126	NDINIT(&nd, LOOKUP,
2127	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2128	    pathseg, path, td);
2129	if ((error = namei(&nd)) != 0)
2130		return (error);
2131	vfslocked = NDHASGIANT(&nd);
2132	vp = nd.ni_vp;
2133	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2134	NDFREE(&nd, NDF_ONLY_PNBUF);
2135	vput(vp);
2136	VFS_UNLOCK_GIANT(vfslocked);
2137	if (error)
2138		return (error);
2139	*sbp = sb;
2140	return (0);
2141}
2142
2143/*
2144 * Implementation of the NetBSD [l]stat() functions.
2145 */
2146void
2147cvtnstat(sb, nsb)
2148	struct stat *sb;
2149	struct nstat *nsb;
2150{
2151	bzero(nsb, sizeof *nsb);
2152	nsb->st_dev = sb->st_dev;
2153	nsb->st_ino = sb->st_ino;
2154	nsb->st_mode = sb->st_mode;
2155	nsb->st_nlink = sb->st_nlink;
2156	nsb->st_uid = sb->st_uid;
2157	nsb->st_gid = sb->st_gid;
2158	nsb->st_rdev = sb->st_rdev;
2159	nsb->st_atimespec = sb->st_atimespec;
2160	nsb->st_mtimespec = sb->st_mtimespec;
2161	nsb->st_ctimespec = sb->st_ctimespec;
2162	nsb->st_size = sb->st_size;
2163	nsb->st_blocks = sb->st_blocks;
2164	nsb->st_blksize = sb->st_blksize;
2165	nsb->st_flags = sb->st_flags;
2166	nsb->st_gen = sb->st_gen;
2167	nsb->st_birthtimespec = sb->st_birthtimespec;
2168}
2169
2170#ifndef _SYS_SYSPROTO_H_
2171struct nstat_args {
2172	char	*path;
2173	struct nstat *ub;
2174};
2175#endif
2176int
2177nstat(td, uap)
2178	struct thread *td;
2179	register struct nstat_args /* {
2180		char *path;
2181		struct nstat *ub;
2182	} */ *uap;
2183{
2184	struct stat sb;
2185	struct nstat nsb;
2186	int error;
2187
2188	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2189	if (error)
2190		return (error);
2191	cvtnstat(&sb, &nsb);
2192	error = copyout(&nsb, uap->ub, sizeof (nsb));
2193	return (error);
2194}
2195
2196/*
2197 * NetBSD lstat.  Get file status; this version does not follow links.
2198 */
2199#ifndef _SYS_SYSPROTO_H_
2200struct lstat_args {
2201	char	*path;
2202	struct stat *ub;
2203};
2204#endif
2205int
2206nlstat(td, uap)
2207	struct thread *td;
2208	register struct nlstat_args /* {
2209		char *path;
2210		struct nstat *ub;
2211	} */ *uap;
2212{
2213	struct stat sb;
2214	struct nstat nsb;
2215	int error;
2216
2217	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2218	if (error)
2219		return (error);
2220	cvtnstat(&sb, &nsb);
2221	error = copyout(&nsb, uap->ub, sizeof (nsb));
2222	return (error);
2223}
2224
2225/*
2226 * Get configurable pathname variables.
2227 */
2228#ifndef _SYS_SYSPROTO_H_
2229struct pathconf_args {
2230	char	*path;
2231	int	name;
2232};
2233#endif
2234int
2235pathconf(td, uap)
2236	struct thread *td;
2237	register struct pathconf_args /* {
2238		char *path;
2239		int name;
2240	} */ *uap;
2241{
2242
2243	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2244}
2245
2246int
2247kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2248{
2249	struct nameidata nd;
2250	int error, vfslocked;
2251
2252	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2253	    pathseg, path, td);
2254	if ((error = namei(&nd)) != 0)
2255		return (error);
2256	vfslocked = NDHASGIANT(&nd);
2257	NDFREE(&nd, NDF_ONLY_PNBUF);
2258
2259	/* If asynchronous I/O is available, it works for all files. */
2260	if (name == _PC_ASYNC_IO)
2261		td->td_retval[0] = async_io_version;
2262	else
2263		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2264	vput(nd.ni_vp);
2265	VFS_UNLOCK_GIANT(vfslocked);
2266	return (error);
2267}
2268
2269/*
2270 * Return target name of a symbolic link.
2271 */
2272#ifndef _SYS_SYSPROTO_H_
2273struct readlink_args {
2274	char	*path;
2275	char	*buf;
2276	int	count;
2277};
2278#endif
2279int
2280readlink(td, uap)
2281	struct thread *td;
2282	register struct readlink_args /* {
2283		char *path;
2284		char *buf;
2285		int count;
2286	} */ *uap;
2287{
2288
2289	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2290	    UIO_USERSPACE, uap->count));
2291}
2292
2293int
2294kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2295    enum uio_seg bufseg, int count)
2296{
2297	register struct vnode *vp;
2298	struct iovec aiov;
2299	struct uio auio;
2300	int error;
2301	struct nameidata nd;
2302	int vfslocked;
2303
2304	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2305	    pathseg, path, td);
2306	if ((error = namei(&nd)) != 0)
2307		return (error);
2308	NDFREE(&nd, NDF_ONLY_PNBUF);
2309	vfslocked = NDHASGIANT(&nd);
2310	vp = nd.ni_vp;
2311#ifdef MAC
2312	error = mac_check_vnode_readlink(td->td_ucred, vp);
2313	if (error) {
2314		vput(vp);
2315		VFS_UNLOCK_GIANT(vfslocked);
2316		return (error);
2317	}
2318#endif
2319	if (vp->v_type != VLNK)
2320		error = EINVAL;
2321	else {
2322		aiov.iov_base = buf;
2323		aiov.iov_len = count;
2324		auio.uio_iov = &aiov;
2325		auio.uio_iovcnt = 1;
2326		auio.uio_offset = 0;
2327		auio.uio_rw = UIO_READ;
2328		auio.uio_segflg = bufseg;
2329		auio.uio_td = td;
2330		auio.uio_resid = count;
2331		error = VOP_READLINK(vp, &auio, td->td_ucred);
2332	}
2333	vput(vp);
2334	VFS_UNLOCK_GIANT(vfslocked);
2335	td->td_retval[0] = count - auio.uio_resid;
2336	return (error);
2337}
2338
2339/*
2340 * Common implementation code for chflags() and fchflags().
2341 */
2342static int
2343setfflags(td, vp, flags)
2344	struct thread *td;
2345	struct vnode *vp;
2346	int flags;
2347{
2348	int error;
2349	struct mount *mp;
2350	struct vattr vattr;
2351
2352	/*
2353	 * Prevent non-root users from setting flags on devices.  When
2354	 * a device is reused, users can retain ownership of the device
2355	 * if they are allowed to set flags and programs assume that
2356	 * chown can't fail when done as root.
2357	 */
2358	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2359		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2360		    SUSER_ALLOWJAIL);
2361		if (error)
2362			return (error);
2363	}
2364
2365	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2366		return (error);
2367	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2368	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2369	VATTR_NULL(&vattr);
2370	vattr.va_flags = flags;
2371#ifdef MAC
2372	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2373	if (error == 0)
2374#endif
2375		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2376	VOP_UNLOCK(vp, 0, td);
2377	vn_finished_write(mp);
2378	return (error);
2379}
2380
2381/*
2382 * Change flags of a file given a path name.
2383 */
2384#ifndef _SYS_SYSPROTO_H_
2385struct chflags_args {
2386	char	*path;
2387	int	flags;
2388};
2389#endif
2390int
2391chflags(td, uap)
2392	struct thread *td;
2393	register struct chflags_args /* {
2394		char *path;
2395		int flags;
2396	} */ *uap;
2397{
2398	int error;
2399	struct nameidata nd;
2400	int vfslocked;
2401
2402	AUDIT_ARG(fflags, uap->flags);
2403	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2404	    uap->path, td);
2405	if ((error = namei(&nd)) != 0)
2406		return (error);
2407	NDFREE(&nd, NDF_ONLY_PNBUF);
2408	vfslocked = NDHASGIANT(&nd);
2409	error = setfflags(td, nd.ni_vp, uap->flags);
2410	vrele(nd.ni_vp);
2411	VFS_UNLOCK_GIANT(vfslocked);
2412	return (error);
2413}
2414
2415/*
2416 * Same as chflags() but doesn't follow symlinks.
2417 */
2418int
2419lchflags(td, uap)
2420	struct thread *td;
2421	register struct lchflags_args /* {
2422		char *path;
2423		int flags;
2424	} */ *uap;
2425{
2426	int error;
2427	struct nameidata nd;
2428	int vfslocked;
2429
2430	AUDIT_ARG(fflags, uap->flags);
2431	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2432	    uap->path, td);
2433	if ((error = namei(&nd)) != 0)
2434		return (error);
2435	vfslocked = NDHASGIANT(&nd);
2436	NDFREE(&nd, NDF_ONLY_PNBUF);
2437	error = setfflags(td, nd.ni_vp, uap->flags);
2438	vrele(nd.ni_vp);
2439	VFS_UNLOCK_GIANT(vfslocked);
2440	return (error);
2441}
2442
2443/*
2444 * Change flags of a file given a file descriptor.
2445 */
2446#ifndef _SYS_SYSPROTO_H_
2447struct fchflags_args {
2448	int	fd;
2449	int	flags;
2450};
2451#endif
2452int
2453fchflags(td, uap)
2454	struct thread *td;
2455	register struct fchflags_args /* {
2456		int fd;
2457		int flags;
2458	} */ *uap;
2459{
2460	struct file *fp;
2461	int vfslocked;
2462	int error;
2463
2464	AUDIT_ARG(fd, uap->fd);
2465	AUDIT_ARG(fflags, uap->flags);
2466	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2467		return (error);
2468	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2469#ifdef AUDIT
2470	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2471	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2472	VOP_UNLOCK(fp->f_vnode, 0, td);
2473#endif
2474	error = setfflags(td, fp->f_vnode, uap->flags);
2475	VFS_UNLOCK_GIANT(vfslocked);
2476	fdrop(fp, td);
2477	return (error);
2478}
2479
2480/*
2481 * Common implementation code for chmod(), lchmod() and fchmod().
2482 */
2483static int
2484setfmode(td, vp, mode)
2485	struct thread *td;
2486	struct vnode *vp;
2487	int mode;
2488{
2489	int error;
2490	struct mount *mp;
2491	struct vattr vattr;
2492
2493	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2494		return (error);
2495	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2496	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2497	VATTR_NULL(&vattr);
2498	vattr.va_mode = mode & ALLPERMS;
2499#ifdef MAC
2500	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2501	if (error == 0)
2502#endif
2503		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2504	VOP_UNLOCK(vp, 0, td);
2505	vn_finished_write(mp);
2506	return (error);
2507}
2508
2509/*
2510 * Change mode of a file given path name.
2511 */
2512#ifndef _SYS_SYSPROTO_H_
2513struct chmod_args {
2514	char	*path;
2515	int	mode;
2516};
2517#endif
2518int
2519chmod(td, uap)
2520	struct thread *td;
2521	register struct chmod_args /* {
2522		char *path;
2523		int mode;
2524	} */ *uap;
2525{
2526
2527	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2528}
2529
2530int
2531kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2532{
2533	int error;
2534	struct nameidata nd;
2535	int vfslocked;
2536
2537	AUDIT_ARG(mode, mode);
2538	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2539	if ((error = namei(&nd)) != 0)
2540		return (error);
2541	vfslocked = NDHASGIANT(&nd);
2542	NDFREE(&nd, NDF_ONLY_PNBUF);
2543	error = setfmode(td, nd.ni_vp, mode);
2544	vrele(nd.ni_vp);
2545	VFS_UNLOCK_GIANT(vfslocked);
2546	return (error);
2547}
2548
2549/*
2550 * Change mode of a file given path name (don't follow links.)
2551 */
2552#ifndef _SYS_SYSPROTO_H_
2553struct lchmod_args {
2554	char	*path;
2555	int	mode;
2556};
2557#endif
2558int
2559lchmod(td, uap)
2560	struct thread *td;
2561	register struct lchmod_args /* {
2562		char *path;
2563		int mode;
2564	} */ *uap;
2565{
2566	int error;
2567	struct nameidata nd;
2568	int vfslocked;
2569
2570	AUDIT_ARG(mode, (mode_t)uap->mode);
2571	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2572	    uap->path, td);
2573	if ((error = namei(&nd)) != 0)
2574		return (error);
2575	vfslocked = NDHASGIANT(&nd);
2576	NDFREE(&nd, NDF_ONLY_PNBUF);
2577	error = setfmode(td, nd.ni_vp, uap->mode);
2578	vrele(nd.ni_vp);
2579	VFS_UNLOCK_GIANT(vfslocked);
2580	return (error);
2581}
2582
2583/*
2584 * Change mode of a file given a file descriptor.
2585 */
2586#ifndef _SYS_SYSPROTO_H_
2587struct fchmod_args {
2588	int	fd;
2589	int	mode;
2590};
2591#endif
2592int
2593fchmod(td, uap)
2594	struct thread *td;
2595	register struct fchmod_args /* {
2596		int fd;
2597		int mode;
2598	} */ *uap;
2599{
2600	struct file *fp;
2601	int vfslocked;
2602	int error;
2603
2604	AUDIT_ARG(fd, uap->fd);
2605	AUDIT_ARG(mode, uap->mode);
2606	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2607		return (error);
2608	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2609#ifdef AUDIT
2610	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2611	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2612	VOP_UNLOCK(fp->f_vnode, 0, td);
2613#endif
2614	error = setfmode(td, fp->f_vnode, uap->mode);
2615	VFS_UNLOCK_GIANT(vfslocked);
2616	fdrop(fp, td);
2617	return (error);
2618}
2619
2620/*
2621 * Common implementation for chown(), lchown(), and fchown()
2622 */
2623static int
2624setfown(td, vp, uid, gid)
2625	struct thread *td;
2626	struct vnode *vp;
2627	uid_t uid;
2628	gid_t gid;
2629{
2630	int error;
2631	struct mount *mp;
2632	struct vattr vattr;
2633
2634	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2635		return (error);
2636	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2637	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2638	VATTR_NULL(&vattr);
2639	vattr.va_uid = uid;
2640	vattr.va_gid = gid;
2641#ifdef MAC
2642	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2643	    vattr.va_gid);
2644	if (error == 0)
2645#endif
2646		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2647	VOP_UNLOCK(vp, 0, td);
2648	vn_finished_write(mp);
2649	return (error);
2650}
2651
2652/*
2653 * Set ownership given a path name.
2654 */
2655#ifndef _SYS_SYSPROTO_H_
2656struct chown_args {
2657	char	*path;
2658	int	uid;
2659	int	gid;
2660};
2661#endif
2662int
2663chown(td, uap)
2664	struct thread *td;
2665	register struct chown_args /* {
2666		char *path;
2667		int uid;
2668		int gid;
2669	} */ *uap;
2670{
2671
2672	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2673}
2674
2675int
2676kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2677    int gid)
2678{
2679	int error;
2680	struct nameidata nd;
2681	int vfslocked;
2682
2683	AUDIT_ARG(owner, uid, gid);
2684	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2685	if ((error = namei(&nd)) != 0)
2686		return (error);
2687	vfslocked = NDHASGIANT(&nd);
2688	NDFREE(&nd, NDF_ONLY_PNBUF);
2689	error = setfown(td, nd.ni_vp, uid, gid);
2690	vrele(nd.ni_vp);
2691	VFS_UNLOCK_GIANT(vfslocked);
2692	return (error);
2693}
2694
2695/*
2696 * Set ownership given a path name, do not cross symlinks.
2697 */
2698#ifndef _SYS_SYSPROTO_H_
2699struct lchown_args {
2700	char	*path;
2701	int	uid;
2702	int	gid;
2703};
2704#endif
2705int
2706lchown(td, uap)
2707	struct thread *td;
2708	register struct lchown_args /* {
2709		char *path;
2710		int uid;
2711		int gid;
2712	} */ *uap;
2713{
2714
2715	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2716}
2717
2718int
2719kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2720    int gid)
2721{
2722	int error;
2723	struct nameidata nd;
2724	int vfslocked;
2725
2726	AUDIT_ARG(owner, uid, gid);
2727	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2728	if ((error = namei(&nd)) != 0)
2729		return (error);
2730	vfslocked = NDHASGIANT(&nd);
2731	NDFREE(&nd, NDF_ONLY_PNBUF);
2732	error = setfown(td, nd.ni_vp, uid, gid);
2733	vrele(nd.ni_vp);
2734	VFS_UNLOCK_GIANT(vfslocked);
2735	return (error);
2736}
2737
2738/*
2739 * Set ownership given a file descriptor.
2740 */
2741#ifndef _SYS_SYSPROTO_H_
2742struct fchown_args {
2743	int	fd;
2744	int	uid;
2745	int	gid;
2746};
2747#endif
2748int
2749fchown(td, uap)
2750	struct thread *td;
2751	register struct fchown_args /* {
2752		int fd;
2753		int uid;
2754		int gid;
2755	} */ *uap;
2756{
2757	struct file *fp;
2758	int vfslocked;
2759	int error;
2760
2761	AUDIT_ARG(fd, uap->fd);
2762	AUDIT_ARG(owner, uap->uid, uap->gid);
2763	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2764		return (error);
2765	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2766#ifdef AUDIT
2767	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2768	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2769	VOP_UNLOCK(fp->f_vnode, 0, td);
2770#endif
2771	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2772	VFS_UNLOCK_GIANT(vfslocked);
2773	fdrop(fp, td);
2774	return (error);
2775}
2776
2777/*
2778 * Common implementation code for utimes(), lutimes(), and futimes().
2779 */
2780static int
2781getutimes(usrtvp, tvpseg, tsp)
2782	const struct timeval *usrtvp;
2783	enum uio_seg tvpseg;
2784	struct timespec *tsp;
2785{
2786	struct timeval tv[2];
2787	const struct timeval *tvp;
2788	int error;
2789
2790	if (usrtvp == NULL) {
2791		microtime(&tv[0]);
2792		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2793		tsp[1] = tsp[0];
2794	} else {
2795		if (tvpseg == UIO_SYSSPACE) {
2796			tvp = usrtvp;
2797		} else {
2798			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2799				return (error);
2800			tvp = tv;
2801		}
2802
2803		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2804		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2805			return (EINVAL);
2806		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2807		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2808	}
2809	return (0);
2810}
2811
2812/*
2813 * Common implementation code for utimes(), lutimes(), and futimes().
2814 */
2815static int
2816setutimes(td, vp, ts, numtimes, nullflag)
2817	struct thread *td;
2818	struct vnode *vp;
2819	const struct timespec *ts;
2820	int numtimes;
2821	int nullflag;
2822{
2823	int error, setbirthtime;
2824	struct mount *mp;
2825	struct vattr vattr;
2826
2827	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2828		return (error);
2829	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2830	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2831	setbirthtime = 0;
2832	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2833	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2834		setbirthtime = 1;
2835	VATTR_NULL(&vattr);
2836	vattr.va_atime = ts[0];
2837	vattr.va_mtime = ts[1];
2838	if (setbirthtime)
2839		vattr.va_birthtime = ts[1];
2840	if (numtimes > 2)
2841		vattr.va_birthtime = ts[2];
2842	if (nullflag)
2843		vattr.va_vaflags |= VA_UTIMES_NULL;
2844#ifdef MAC
2845	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2846	    vattr.va_mtime);
2847#endif
2848	if (error == 0)
2849		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2850	VOP_UNLOCK(vp, 0, td);
2851	vn_finished_write(mp);
2852	return (error);
2853}
2854
2855/*
2856 * Set the access and modification times of a file.
2857 */
2858#ifndef _SYS_SYSPROTO_H_
2859struct utimes_args {
2860	char	*path;
2861	struct	timeval *tptr;
2862};
2863#endif
2864int
2865utimes(td, uap)
2866	struct thread *td;
2867	register struct utimes_args /* {
2868		char *path;
2869		struct timeval *tptr;
2870	} */ *uap;
2871{
2872
2873	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2874	    UIO_USERSPACE));
2875}
2876
2877int
2878kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2879    struct timeval *tptr, enum uio_seg tptrseg)
2880{
2881	struct timespec ts[2];
2882	int error;
2883	struct nameidata nd;
2884	int vfslocked;
2885
2886	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2887		return (error);
2888	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2889	if ((error = namei(&nd)) != 0)
2890		return (error);
2891	vfslocked = NDHASGIANT(&nd);
2892	NDFREE(&nd, NDF_ONLY_PNBUF);
2893	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2894	vrele(nd.ni_vp);
2895	VFS_UNLOCK_GIANT(vfslocked);
2896	return (error);
2897}
2898
2899/*
2900 * Set the access and modification times of a file.
2901 */
2902#ifndef _SYS_SYSPROTO_H_
2903struct lutimes_args {
2904	char	*path;
2905	struct	timeval *tptr;
2906};
2907#endif
2908int
2909lutimes(td, uap)
2910	struct thread *td;
2911	register struct lutimes_args /* {
2912		char *path;
2913		struct timeval *tptr;
2914	} */ *uap;
2915{
2916
2917	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2918	    UIO_USERSPACE));
2919}
2920
2921int
2922kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2923    struct timeval *tptr, enum uio_seg tptrseg)
2924{
2925	struct timespec ts[2];
2926	int error;
2927	struct nameidata nd;
2928	int vfslocked;
2929
2930	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2931		return (error);
2932	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2933	if ((error = namei(&nd)) != 0)
2934		return (error);
2935	vfslocked = NDHASGIANT(&nd);
2936	NDFREE(&nd, NDF_ONLY_PNBUF);
2937	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2938	vrele(nd.ni_vp);
2939	VFS_UNLOCK_GIANT(vfslocked);
2940	return (error);
2941}
2942
2943/*
2944 * Set the access and modification times of a file.
2945 */
2946#ifndef _SYS_SYSPROTO_H_
2947struct futimes_args {
2948	int	fd;
2949	struct	timeval *tptr;
2950};
2951#endif
2952int
2953futimes(td, uap)
2954	struct thread *td;
2955	register struct futimes_args /* {
2956		int  fd;
2957		struct timeval *tptr;
2958	} */ *uap;
2959{
2960
2961	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2962}
2963
2964int
2965kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2966    enum uio_seg tptrseg)
2967{
2968	struct timespec ts[2];
2969	struct file *fp;
2970	int vfslocked;
2971	int error;
2972
2973	AUDIT_ARG(fd, fd);
2974	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2975		return (error);
2976	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2977		return (error);
2978	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2979#ifdef AUDIT
2980	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2981	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2982	VOP_UNLOCK(fp->f_vnode, 0, td);
2983#endif
2984	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2985	VFS_UNLOCK_GIANT(vfslocked);
2986	fdrop(fp, td);
2987	return (error);
2988}
2989
2990/*
2991 * Truncate a file given its path name.
2992 */
2993#ifndef _SYS_SYSPROTO_H_
2994struct truncate_args {
2995	char	*path;
2996	int	pad;
2997	off_t	length;
2998};
2999#endif
3000int
3001truncate(td, uap)
3002	struct thread *td;
3003	register struct truncate_args /* {
3004		char *path;
3005		int pad;
3006		off_t length;
3007	} */ *uap;
3008{
3009
3010	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3011}
3012
3013int
3014kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3015{
3016	struct mount *mp;
3017	struct vnode *vp;
3018	struct vattr vattr;
3019	int error;
3020	struct nameidata nd;
3021	int vfslocked;
3022
3023	if (length < 0)
3024		return(EINVAL);
3025	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3026	if ((error = namei(&nd)) != 0)
3027		return (error);
3028	vfslocked = NDHASGIANT(&nd);
3029	vp = nd.ni_vp;
3030	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3031		vrele(vp);
3032		VFS_UNLOCK_GIANT(vfslocked);
3033		return (error);
3034	}
3035	NDFREE(&nd, NDF_ONLY_PNBUF);
3036	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3037	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3038	if (vp->v_type == VDIR)
3039		error = EISDIR;
3040#ifdef MAC
3041	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3042	}
3043#endif
3044	else if ((error = vn_writechk(vp)) == 0 &&
3045	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3046		VATTR_NULL(&vattr);
3047		vattr.va_size = length;
3048		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3049	}
3050	vput(vp);
3051	vn_finished_write(mp);
3052	VFS_UNLOCK_GIANT(vfslocked);
3053	return (error);
3054}
3055
3056/*
3057 * Truncate a file given a file descriptor.
3058 */
3059#ifndef _SYS_SYSPROTO_H_
3060struct ftruncate_args {
3061	int	fd;
3062	int	pad;
3063	off_t	length;
3064};
3065#endif
3066int
3067ftruncate(td, uap)
3068	struct thread *td;
3069	register struct ftruncate_args /* {
3070		int fd;
3071		int pad;
3072		off_t length;
3073	} */ *uap;
3074{
3075	struct mount *mp;
3076	struct vattr vattr;
3077	struct vnode *vp;
3078	struct file *fp;
3079	int vfslocked;
3080	int error;
3081
3082	AUDIT_ARG(fd, uap->fd);
3083	if (uap->length < 0)
3084		return(EINVAL);
3085	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3086		return (error);
3087	if ((fp->f_flag & FWRITE) == 0) {
3088		fdrop(fp, td);
3089		return (EINVAL);
3090	}
3091	vp = fp->f_vnode;
3092	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3093	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3094		goto drop;
3095	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3096	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3097	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3098	if (vp->v_type == VDIR)
3099		error = EISDIR;
3100#ifdef MAC
3101	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3102	    vp))) {
3103	}
3104#endif
3105	else if ((error = vn_writechk(vp)) == 0) {
3106		VATTR_NULL(&vattr);
3107		vattr.va_size = uap->length;
3108		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3109	}
3110	VOP_UNLOCK(vp, 0, td);
3111	vn_finished_write(mp);
3112drop:
3113	VFS_UNLOCK_GIANT(vfslocked);
3114	fdrop(fp, td);
3115	return (error);
3116}
3117
3118#if defined(COMPAT_43)
3119/*
3120 * Truncate a file given its path name.
3121 */
3122#ifndef _SYS_SYSPROTO_H_
3123struct otruncate_args {
3124	char	*path;
3125	long	length;
3126};
3127#endif
3128int
3129otruncate(td, uap)
3130	struct thread *td;
3131	register struct otruncate_args /* {
3132		char *path;
3133		long length;
3134	} */ *uap;
3135{
3136	struct truncate_args /* {
3137		char *path;
3138		int pad;
3139		off_t length;
3140	} */ nuap;
3141
3142	nuap.path = uap->path;
3143	nuap.length = uap->length;
3144	return (truncate(td, &nuap));
3145}
3146
3147/*
3148 * Truncate a file given a file descriptor.
3149 */
3150#ifndef _SYS_SYSPROTO_H_
3151struct oftruncate_args {
3152	int	fd;
3153	long	length;
3154};
3155#endif
3156int
3157oftruncate(td, uap)
3158	struct thread *td;
3159	register struct oftruncate_args /* {
3160		int fd;
3161		long length;
3162	} */ *uap;
3163{
3164	struct ftruncate_args /* {
3165		int fd;
3166		int pad;
3167		off_t length;
3168	} */ nuap;
3169
3170	nuap.fd = uap->fd;
3171	nuap.length = uap->length;
3172	return (ftruncate(td, &nuap));
3173}
3174#endif /* COMPAT_43 */
3175
3176/*
3177 * Sync an open file.
3178 */
3179#ifndef _SYS_SYSPROTO_H_
3180struct fsync_args {
3181	int	fd;
3182};
3183#endif
3184int
3185fsync(td, uap)
3186	struct thread *td;
3187	struct fsync_args /* {
3188		int fd;
3189	} */ *uap;
3190{
3191	struct vnode *vp;
3192	struct mount *mp;
3193	struct file *fp;
3194	int vfslocked;
3195	int error;
3196
3197	AUDIT_ARG(fd, uap->fd);
3198	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3199		return (error);
3200	vp = fp->f_vnode;
3201	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3202	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3203		goto drop;
3204	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3205	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3206	if (vp->v_object != NULL) {
3207		VM_OBJECT_LOCK(vp->v_object);
3208		vm_object_page_clean(vp->v_object, 0, 0, 0);
3209		VM_OBJECT_UNLOCK(vp->v_object);
3210	}
3211	error = VOP_FSYNC(vp, MNT_WAIT, td);
3212
3213	VOP_UNLOCK(vp, 0, td);
3214	vn_finished_write(mp);
3215drop:
3216	VFS_UNLOCK_GIANT(vfslocked);
3217	fdrop(fp, td);
3218	return (error);
3219}
3220
3221/*
3222 * Rename files.  Source and destination must either both be directories, or
3223 * both not be directories.  If target is a directory, it must be empty.
3224 */
3225#ifndef _SYS_SYSPROTO_H_
3226struct rename_args {
3227	char	*from;
3228	char	*to;
3229};
3230#endif
3231int
3232rename(td, uap)
3233	struct thread *td;
3234	register struct rename_args /* {
3235		char *from;
3236		char *to;
3237	} */ *uap;
3238{
3239
3240	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3241}
3242
3243int
3244kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3245{
3246	struct mount *mp = NULL;
3247	struct vnode *tvp, *fvp, *tdvp;
3248	struct nameidata fromnd, tond;
3249	int tvfslocked;
3250	int fvfslocked;
3251	int error;
3252
3253	bwillwrite();
3254#ifdef MAC
3255	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3256	    AUDITVNODE1, pathseg, from, td);
3257#else
3258	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3259	    AUDITVNODE1, pathseg, from, td);
3260#endif
3261	if ((error = namei(&fromnd)) != 0)
3262		return (error);
3263	fvfslocked = NDHASGIANT(&fromnd);
3264	tvfslocked = 0;
3265#ifdef MAC
3266	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3267	    fromnd.ni_vp, &fromnd.ni_cnd);
3268	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3269	if (fromnd.ni_dvp != fromnd.ni_vp)
3270		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3271#endif
3272	fvp = fromnd.ni_vp;
3273	if (error == 0)
3274		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3275	if (error != 0) {
3276		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3277		vrele(fromnd.ni_dvp);
3278		vrele(fvp);
3279		goto out1;
3280	}
3281	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3282	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3283	if (fromnd.ni_vp->v_type == VDIR)
3284		tond.ni_cnd.cn_flags |= WILLBEDIR;
3285	if ((error = namei(&tond)) != 0) {
3286		/* Translate error code for rename("dir1", "dir2/."). */
3287		if (error == EISDIR && fvp->v_type == VDIR)
3288			error = EINVAL;
3289		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3290		vrele(fromnd.ni_dvp);
3291		vrele(fvp);
3292		vn_finished_write(mp);
3293		goto out1;
3294	}
3295	tvfslocked = NDHASGIANT(&tond);
3296	tdvp = tond.ni_dvp;
3297	tvp = tond.ni_vp;
3298	if (tvp != NULL) {
3299		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3300			error = ENOTDIR;
3301			goto out;
3302		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3303			error = EISDIR;
3304			goto out;
3305		}
3306	}
3307	if (fvp == tdvp)
3308		error = EINVAL;
3309	/*
3310	 * If the source is the same as the destination (that is, if they
3311	 * are links to the same vnode), then there is nothing to do.
3312	 */
3313	if (fvp == tvp)
3314		error = -1;
3315#ifdef MAC
3316	else
3317		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3318		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3319#endif
3320out:
3321	if (!error) {
3322		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3323		if (fromnd.ni_dvp != tdvp) {
3324			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3325		}
3326		if (tvp) {
3327			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3328		}
3329		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3330				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3331		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3332		NDFREE(&tond, NDF_ONLY_PNBUF);
3333	} else {
3334		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3335		NDFREE(&tond, NDF_ONLY_PNBUF);
3336		if (tvp)
3337			vput(tvp);
3338		if (tdvp == tvp)
3339			vrele(tdvp);
3340		else
3341			vput(tdvp);
3342		vrele(fromnd.ni_dvp);
3343		vrele(fvp);
3344	}
3345	vrele(tond.ni_startdir);
3346	vn_finished_write(mp);
3347out1:
3348	if (fromnd.ni_startdir)
3349		vrele(fromnd.ni_startdir);
3350	VFS_UNLOCK_GIANT(fvfslocked);
3351	VFS_UNLOCK_GIANT(tvfslocked);
3352	if (error == -1)
3353		return (0);
3354	return (error);
3355}
3356
3357/*
3358 * Make a directory file.
3359 */
3360#ifndef _SYS_SYSPROTO_H_
3361struct mkdir_args {
3362	char	*path;
3363	int	mode;
3364};
3365#endif
3366int
3367mkdir(td, uap)
3368	struct thread *td;
3369	register struct mkdir_args /* {
3370		char *path;
3371		int mode;
3372	} */ *uap;
3373{
3374
3375	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3376}
3377
3378int
3379kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3380{
3381	struct mount *mp;
3382	struct vnode *vp;
3383	struct vattr vattr;
3384	int error;
3385	struct nameidata nd;
3386	int vfslocked;
3387
3388	AUDIT_ARG(mode, mode);
3389restart:
3390	bwillwrite();
3391	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3392	    segflg, path, td);
3393	nd.ni_cnd.cn_flags |= WILLBEDIR;
3394	if ((error = namei(&nd)) != 0)
3395		return (error);
3396	vfslocked = NDHASGIANT(&nd);
3397	vp = nd.ni_vp;
3398	if (vp != NULL) {
3399		NDFREE(&nd, NDF_ONLY_PNBUF);
3400		/*
3401		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3402		 * the strange behaviour of leaving the vnode unlocked
3403		 * if the target is the same vnode as the parent.
3404		 */
3405		if (vp == nd.ni_dvp)
3406			vrele(nd.ni_dvp);
3407		else
3408			vput(nd.ni_dvp);
3409		vrele(vp);
3410		VFS_UNLOCK_GIANT(vfslocked);
3411		return (EEXIST);
3412	}
3413	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3414		NDFREE(&nd, NDF_ONLY_PNBUF);
3415		vput(nd.ni_dvp);
3416		VFS_UNLOCK_GIANT(vfslocked);
3417		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3418			return (error);
3419		goto restart;
3420	}
3421	VATTR_NULL(&vattr);
3422	vattr.va_type = VDIR;
3423	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3424	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3425	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3426#ifdef MAC
3427	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3428	    &vattr);
3429	if (error)
3430		goto out;
3431#endif
3432	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3433	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3434#ifdef MAC
3435out:
3436#endif
3437	NDFREE(&nd, NDF_ONLY_PNBUF);
3438	vput(nd.ni_dvp);
3439	if (!error)
3440		vput(nd.ni_vp);
3441	vn_finished_write(mp);
3442	VFS_UNLOCK_GIANT(vfslocked);
3443	return (error);
3444}
3445
3446/*
3447 * Remove a directory file.
3448 */
3449#ifndef _SYS_SYSPROTO_H_
3450struct rmdir_args {
3451	char	*path;
3452};
3453#endif
3454int
3455rmdir(td, uap)
3456	struct thread *td;
3457	struct rmdir_args /* {
3458		char *path;
3459	} */ *uap;
3460{
3461
3462	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3463}
3464
3465int
3466kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3467{
3468	struct mount *mp;
3469	struct vnode *vp;
3470	int error;
3471	struct nameidata nd;
3472	int vfslocked;
3473
3474restart:
3475	bwillwrite();
3476	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3477	    pathseg, path, td);
3478	if ((error = namei(&nd)) != 0)
3479		return (error);
3480	vfslocked = NDHASGIANT(&nd);
3481	vp = nd.ni_vp;
3482	if (vp->v_type != VDIR) {
3483		error = ENOTDIR;
3484		goto out;
3485	}
3486	/*
3487	 * No rmdir "." please.
3488	 */
3489	if (nd.ni_dvp == vp) {
3490		error = EINVAL;
3491		goto out;
3492	}
3493	/*
3494	 * The root of a mounted filesystem cannot be deleted.
3495	 */
3496	if (vp->v_vflag & VV_ROOT) {
3497		error = EBUSY;
3498		goto out;
3499	}
3500#ifdef MAC
3501	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3502	    &nd.ni_cnd);
3503	if (error)
3504		goto out;
3505#endif
3506	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3507		NDFREE(&nd, NDF_ONLY_PNBUF);
3508		vput(vp);
3509		if (nd.ni_dvp == vp)
3510			vrele(nd.ni_dvp);
3511		else
3512			vput(nd.ni_dvp);
3513		VFS_UNLOCK_GIANT(vfslocked);
3514		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3515			return (error);
3516		goto restart;
3517	}
3518	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3519	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3520	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3521	vn_finished_write(mp);
3522out:
3523	NDFREE(&nd, NDF_ONLY_PNBUF);
3524	vput(vp);
3525	if (nd.ni_dvp == vp)
3526		vrele(nd.ni_dvp);
3527	else
3528		vput(nd.ni_dvp);
3529	VFS_UNLOCK_GIANT(vfslocked);
3530	return (error);
3531}
3532
3533#ifdef COMPAT_43
3534/*
3535 * Read a block of directory entries in a filesystem independent format.
3536 */
3537#ifndef _SYS_SYSPROTO_H_
3538struct ogetdirentries_args {
3539	int	fd;
3540	char	*buf;
3541	u_int	count;
3542	long	*basep;
3543};
3544#endif
3545int
3546ogetdirentries(td, uap)
3547	struct thread *td;
3548	register struct ogetdirentries_args /* {
3549		int fd;
3550		char *buf;
3551		u_int count;
3552		long *basep;
3553	} */ *uap;
3554{
3555	struct vnode *vp;
3556	struct file *fp;
3557	struct uio auio, kuio;
3558	struct iovec aiov, kiov;
3559	struct dirent *dp, *edp;
3560	caddr_t dirbuf;
3561	int error, eofflag, readcnt, vfslocked;
3562	long loff;
3563
3564	/* XXX arbitrary sanity limit on `count'. */
3565	if (uap->count > 64 * 1024)
3566		return (EINVAL);
3567	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3568		return (error);
3569	if ((fp->f_flag & FREAD) == 0) {
3570		fdrop(fp, td);
3571		return (EBADF);
3572	}
3573	vp = fp->f_vnode;
3574unionread:
3575	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3576	if (vp->v_type != VDIR) {
3577		VFS_UNLOCK_GIANT(vfslocked);
3578		fdrop(fp, td);
3579		return (EINVAL);
3580	}
3581	aiov.iov_base = uap->buf;
3582	aiov.iov_len = uap->count;
3583	auio.uio_iov = &aiov;
3584	auio.uio_iovcnt = 1;
3585	auio.uio_rw = UIO_READ;
3586	auio.uio_segflg = UIO_USERSPACE;
3587	auio.uio_td = td;
3588	auio.uio_resid = uap->count;
3589	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3590	loff = auio.uio_offset = fp->f_offset;
3591#ifdef MAC
3592	error = mac_check_vnode_readdir(td->td_ucred, vp);
3593	if (error) {
3594		VOP_UNLOCK(vp, 0, td);
3595		VFS_UNLOCK_GIANT(vfslocked);
3596		fdrop(fp, td);
3597		return (error);
3598	}
3599#endif
3600#	if (BYTE_ORDER != LITTLE_ENDIAN)
3601		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3602			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3603			    NULL, NULL);
3604			fp->f_offset = auio.uio_offset;
3605		} else
3606#	endif
3607	{
3608		kuio = auio;
3609		kuio.uio_iov = &kiov;
3610		kuio.uio_segflg = UIO_SYSSPACE;
3611		kiov.iov_len = uap->count;
3612		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3613		kiov.iov_base = dirbuf;
3614		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3615			    NULL, NULL);
3616		fp->f_offset = kuio.uio_offset;
3617		if (error == 0) {
3618			readcnt = uap->count - kuio.uio_resid;
3619			edp = (struct dirent *)&dirbuf[readcnt];
3620			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3621#				if (BYTE_ORDER == LITTLE_ENDIAN)
3622					/*
3623					 * The expected low byte of
3624					 * dp->d_namlen is our dp->d_type.
3625					 * The high MBZ byte of dp->d_namlen
3626					 * is our dp->d_namlen.
3627					 */
3628					dp->d_type = dp->d_namlen;
3629					dp->d_namlen = 0;
3630#				else
3631					/*
3632					 * The dp->d_type is the high byte
3633					 * of the expected dp->d_namlen,
3634					 * so must be zero'ed.
3635					 */
3636					dp->d_type = 0;
3637#				endif
3638				if (dp->d_reclen > 0) {
3639					dp = (struct dirent *)
3640					    ((char *)dp + dp->d_reclen);
3641				} else {
3642					error = EIO;
3643					break;
3644				}
3645			}
3646			if (dp >= edp)
3647				error = uiomove(dirbuf, readcnt, &auio);
3648		}
3649		FREE(dirbuf, M_TEMP);
3650	}
3651	if (error) {
3652		VOP_UNLOCK(vp, 0, td);
3653		VFS_UNLOCK_GIANT(vfslocked);
3654		fdrop(fp, td);
3655		return (error);
3656	}
3657	if (uap->count == auio.uio_resid &&
3658	    (vp->v_vflag & VV_ROOT) &&
3659	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3660		struct vnode *tvp = vp;
3661		vp = vp->v_mount->mnt_vnodecovered;
3662		VREF(vp);
3663		fp->f_vnode = vp;
3664		fp->f_data = vp;
3665		fp->f_offset = 0;
3666		vput(tvp);
3667		VFS_UNLOCK_GIANT(vfslocked);
3668		goto unionread;
3669	}
3670	VOP_UNLOCK(vp, 0, td);
3671	VFS_UNLOCK_GIANT(vfslocked);
3672	error = copyout(&loff, uap->basep, sizeof(long));
3673	fdrop(fp, td);
3674	td->td_retval[0] = uap->count - auio.uio_resid;
3675	return (error);
3676}
3677#endif /* COMPAT_43 */
3678
3679/*
3680 * Read a block of directory entries in a filesystem independent format.
3681 */
3682#ifndef _SYS_SYSPROTO_H_
3683struct getdirentries_args {
3684	int	fd;
3685	char	*buf;
3686	u_int	count;
3687	long	*basep;
3688};
3689#endif
3690int
3691getdirentries(td, uap)
3692	struct thread *td;
3693	register struct getdirentries_args /* {
3694		int fd;
3695		char *buf;
3696		u_int count;
3697		long *basep;
3698	} */ *uap;
3699{
3700	struct vnode *vp;
3701	struct file *fp;
3702	struct uio auio;
3703	struct iovec aiov;
3704	int vfslocked;
3705	long loff;
3706	int error, eofflag;
3707
3708	AUDIT_ARG(fd, uap->fd);
3709	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3710		return (error);
3711	if ((fp->f_flag & FREAD) == 0) {
3712		fdrop(fp, td);
3713		return (EBADF);
3714	}
3715	vp = fp->f_vnode;
3716unionread:
3717	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3718	if (vp->v_type != VDIR) {
3719		VFS_UNLOCK_GIANT(vfslocked);
3720		error = EINVAL;
3721		goto fail;
3722	}
3723	aiov.iov_base = uap->buf;
3724	aiov.iov_len = uap->count;
3725	auio.uio_iov = &aiov;
3726	auio.uio_iovcnt = 1;
3727	auio.uio_rw = UIO_READ;
3728	auio.uio_segflg = UIO_USERSPACE;
3729	auio.uio_td = td;
3730	auio.uio_resid = uap->count;
3731	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3732	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3733	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3734	loff = auio.uio_offset = fp->f_offset;
3735#ifdef MAC
3736	error = mac_check_vnode_readdir(td->td_ucred, vp);
3737	if (error == 0)
3738#endif
3739		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3740		    NULL);
3741	fp->f_offset = auio.uio_offset;
3742	if (error) {
3743		VOP_UNLOCK(vp, 0, td);
3744		VFS_UNLOCK_GIANT(vfslocked);
3745		goto fail;
3746	}
3747	if (uap->count == auio.uio_resid &&
3748	    (vp->v_vflag & VV_ROOT) &&
3749	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3750		struct vnode *tvp = vp;
3751		vp = vp->v_mount->mnt_vnodecovered;
3752		VREF(vp);
3753		fp->f_vnode = vp;
3754		fp->f_data = vp;
3755		fp->f_offset = 0;
3756		vput(tvp);
3757		VFS_UNLOCK_GIANT(vfslocked);
3758		goto unionread;
3759	}
3760	VOP_UNLOCK(vp, 0, td);
3761	VFS_UNLOCK_GIANT(vfslocked);
3762	if (uap->basep != NULL) {
3763		error = copyout(&loff, uap->basep, sizeof(long));
3764	}
3765	td->td_retval[0] = uap->count - auio.uio_resid;
3766fail:
3767	fdrop(fp, td);
3768	return (error);
3769}
3770
3771#ifndef _SYS_SYSPROTO_H_
3772struct getdents_args {
3773	int fd;
3774	char *buf;
3775	size_t count;
3776};
3777#endif
3778int
3779getdents(td, uap)
3780	struct thread *td;
3781	register struct getdents_args /* {
3782		int fd;
3783		char *buf;
3784		u_int count;
3785	} */ *uap;
3786{
3787	struct getdirentries_args ap;
3788	ap.fd = uap->fd;
3789	ap.buf = uap->buf;
3790	ap.count = uap->count;
3791	ap.basep = NULL;
3792	return (getdirentries(td, &ap));
3793}
3794
3795/*
3796 * Set the mode mask for creation of filesystem nodes.
3797 */
3798#ifndef _SYS_SYSPROTO_H_
3799struct umask_args {
3800	int	newmask;
3801};
3802#endif
3803int
3804umask(td, uap)
3805	struct thread *td;
3806	struct umask_args /* {
3807		int newmask;
3808	} */ *uap;
3809{
3810	register struct filedesc *fdp;
3811
3812	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3813	fdp = td->td_proc->p_fd;
3814	td->td_retval[0] = fdp->fd_cmask;
3815	fdp->fd_cmask = uap->newmask & ALLPERMS;
3816	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3817	return (0);
3818}
3819
3820/*
3821 * Void all references to file by ripping underlying filesystem away from
3822 * vnode.
3823 */
3824#ifndef _SYS_SYSPROTO_H_
3825struct revoke_args {
3826	char	*path;
3827};
3828#endif
3829int
3830revoke(td, uap)
3831	struct thread *td;
3832	register struct revoke_args /* {
3833		char *path;
3834	} */ *uap;
3835{
3836	struct vnode *vp;
3837	struct vattr vattr;
3838	int error;
3839	struct nameidata nd;
3840	int vfslocked;
3841
3842	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3843	    UIO_USERSPACE, uap->path, td);
3844	if ((error = namei(&nd)) != 0)
3845		return (error);
3846	vfslocked = NDHASGIANT(&nd);
3847	vp = nd.ni_vp;
3848	NDFREE(&nd, NDF_ONLY_PNBUF);
3849	if (vp->v_type != VCHR) {
3850		error = EINVAL;
3851		goto out;
3852	}
3853#ifdef MAC
3854	error = mac_check_vnode_revoke(td->td_ucred, vp);
3855	if (error)
3856		goto out;
3857#endif
3858	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3859	if (error)
3860		goto out;
3861	if (td->td_ucred->cr_uid != vattr.va_uid) {
3862		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3863		    SUSER_ALLOWJAIL);
3864		if (error)
3865			goto out;
3866	}
3867	if (vcount(vp) > 1)
3868		VOP_REVOKE(vp, REVOKEALL);
3869out:
3870	vput(vp);
3871	VFS_UNLOCK_GIANT(vfslocked);
3872	return (error);
3873}
3874
3875/*
3876 * Convert a user file descriptor to a kernel file entry.
3877 * A reference on the file entry is held upon returning.
3878 */
3879int
3880getvnode(fdp, fd, fpp)
3881	struct filedesc *fdp;
3882	int fd;
3883	struct file **fpp;
3884{
3885	int error;
3886	struct file *fp;
3887
3888	fp = NULL;
3889	if (fdp == NULL)
3890		error = EBADF;
3891	else {
3892		FILEDESC_LOCK(fdp);
3893		if ((u_int)fd >= fdp->fd_nfiles ||
3894		    (fp = fdp->fd_ofiles[fd]) == NULL)
3895			error = EBADF;
3896		else if (fp->f_vnode == NULL) {
3897			fp = NULL;
3898			error = EINVAL;
3899		} else {
3900			fhold(fp);
3901			error = 0;
3902		}
3903		FILEDESC_UNLOCK(fdp);
3904	}
3905	*fpp = fp;
3906	return (error);
3907}
3908
3909/*
3910 * Get an (NFS) file handle.
3911 */
3912#ifndef _SYS_SYSPROTO_H_
3913struct lgetfh_args {
3914	char	*fname;
3915	fhandle_t *fhp;
3916};
3917#endif
3918int
3919lgetfh(td, uap)
3920	struct thread *td;
3921	register struct lgetfh_args *uap;
3922{
3923	struct nameidata nd;
3924	fhandle_t fh;
3925	register struct vnode *vp;
3926	int vfslocked;
3927	int error;
3928
3929	error = priv_check(td, PRIV_VFS_GETFH);
3930	if (error)
3931		return (error);
3932	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3933	    UIO_USERSPACE, uap->fname, td);
3934	error = namei(&nd);
3935	if (error)
3936		return (error);
3937	vfslocked = NDHASGIANT(&nd);
3938	NDFREE(&nd, NDF_ONLY_PNBUF);
3939	vp = nd.ni_vp;
3940	bzero(&fh, sizeof(fh));
3941	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3942	error = VOP_VPTOFH(vp, &fh.fh_fid);
3943	vput(vp);
3944	VFS_UNLOCK_GIANT(vfslocked);
3945	if (error)
3946		return (error);
3947	error = copyout(&fh, uap->fhp, sizeof (fh));
3948	return (error);
3949}
3950
3951#ifndef _SYS_SYSPROTO_H_
3952struct getfh_args {
3953	char	*fname;
3954	fhandle_t *fhp;
3955};
3956#endif
3957int
3958getfh(td, uap)
3959	struct thread *td;
3960	register struct getfh_args *uap;
3961{
3962	struct nameidata nd;
3963	fhandle_t fh;
3964	register struct vnode *vp;
3965	int vfslocked;
3966	int error;
3967
3968	error = priv_check(td, PRIV_VFS_GETFH);
3969	if (error)
3970		return (error);
3971	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3972	    UIO_USERSPACE, uap->fname, td);
3973	error = namei(&nd);
3974	if (error)
3975		return (error);
3976	vfslocked = NDHASGIANT(&nd);
3977	NDFREE(&nd, NDF_ONLY_PNBUF);
3978	vp = nd.ni_vp;
3979	bzero(&fh, sizeof(fh));
3980	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3981	error = VOP_VPTOFH(vp, &fh.fh_fid);
3982	vput(vp);
3983	VFS_UNLOCK_GIANT(vfslocked);
3984	if (error)
3985		return (error);
3986	error = copyout(&fh, uap->fhp, sizeof (fh));
3987	return (error);
3988}
3989
3990/*
3991 * syscall for the rpc.lockd to use to translate a NFS file handle into an
3992 * open descriptor.
3993 *
3994 * warning: do not remove the priv_check() call or this becomes one giant
3995 * security hole.
3996 */
3997#ifndef _SYS_SYSPROTO_H_
3998struct fhopen_args {
3999	const struct fhandle *u_fhp;
4000	int flags;
4001};
4002#endif
4003int
4004fhopen(td, uap)
4005	struct thread *td;
4006	struct fhopen_args /* {
4007		const struct fhandle *u_fhp;
4008		int flags;
4009	} */ *uap;
4010{
4011	struct proc *p = td->td_proc;
4012	struct mount *mp;
4013	struct vnode *vp;
4014	struct fhandle fhp;
4015	struct vattr vat;
4016	struct vattr *vap = &vat;
4017	struct flock lf;
4018	struct file *fp;
4019	register struct filedesc *fdp = p->p_fd;
4020	int fmode, mode, error, type;
4021	struct file *nfp;
4022	int vfslocked;
4023	int indx;
4024
4025	error = priv_check(td, PRIV_VFS_FHOPEN);
4026	if (error)
4027		return (error);
4028	fmode = FFLAGS(uap->flags);
4029	/* why not allow a non-read/write open for our lockd? */
4030	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4031		return (EINVAL);
4032	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4033	if (error)
4034		return(error);
4035	/* find the mount point */
4036	mp = vfs_getvfs(&fhp.fh_fsid);
4037	if (mp == NULL)
4038		return (ESTALE);
4039	vfslocked = VFS_LOCK_GIANT(mp);
4040	/* now give me my vnode, it gets returned to me locked */
4041	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4042	if (error)
4043		goto out;
4044	/*
4045	 * from now on we have to make sure not
4046	 * to forget about the vnode
4047	 * any error that causes an abort must vput(vp)
4048	 * just set error = err and 'goto bad;'.
4049	 */
4050
4051	/*
4052	 * from vn_open
4053	 */
4054	if (vp->v_type == VLNK) {
4055		error = EMLINK;
4056		goto bad;
4057	}
4058	if (vp->v_type == VSOCK) {
4059		error = EOPNOTSUPP;
4060		goto bad;
4061	}
4062	mode = 0;
4063	if (fmode & (FWRITE | O_TRUNC)) {
4064		if (vp->v_type == VDIR) {
4065			error = EISDIR;
4066			goto bad;
4067		}
4068		error = vn_writechk(vp);
4069		if (error)
4070			goto bad;
4071		mode |= VWRITE;
4072	}
4073	if (fmode & FREAD)
4074		mode |= VREAD;
4075	if (fmode & O_APPEND)
4076		mode |= VAPPEND;
4077#ifdef MAC
4078	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4079	if (error)
4080		goto bad;
4081#endif
4082	if (mode) {
4083		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4084		if (error)
4085			goto bad;
4086	}
4087	if (fmode & O_TRUNC) {
4088		VOP_UNLOCK(vp, 0, td);				/* XXX */
4089		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4090			vrele(vp);
4091			goto out;
4092		}
4093		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4094		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4095#ifdef MAC
4096		/*
4097		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4098		 * should be right.
4099		 */
4100		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4101		if (error == 0) {
4102#endif
4103			VATTR_NULL(vap);
4104			vap->va_size = 0;
4105			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4106#ifdef MAC
4107		}
4108#endif
4109		vn_finished_write(mp);
4110		if (error)
4111			goto bad;
4112	}
4113	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4114	if (error)
4115		goto bad;
4116
4117	if (fmode & FWRITE)
4118		vp->v_writecount++;
4119
4120	/*
4121	 * end of vn_open code
4122	 */
4123
4124	if ((error = falloc(td, &nfp, &indx)) != 0) {
4125		if (fmode & FWRITE)
4126			vp->v_writecount--;
4127		goto bad;
4128	}
4129	/* An extra reference on `nfp' has been held for us by falloc(). */
4130	fp = nfp;
4131
4132	nfp->f_vnode = vp;
4133	nfp->f_data = vp;
4134	nfp->f_flag = fmode & FMASK;
4135	nfp->f_ops = &vnops;
4136	nfp->f_type = DTYPE_VNODE;
4137	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4138		lf.l_whence = SEEK_SET;
4139		lf.l_start = 0;
4140		lf.l_len = 0;
4141		if (fmode & O_EXLOCK)
4142			lf.l_type = F_WRLCK;
4143		else
4144			lf.l_type = F_RDLCK;
4145		type = F_FLOCK;
4146		if ((fmode & FNONBLOCK) == 0)
4147			type |= F_WAIT;
4148		VOP_UNLOCK(vp, 0, td);
4149		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4150			    type)) != 0) {
4151			/*
4152			 * The lock request failed.  Normally close the
4153			 * descriptor but handle the case where someone might
4154			 * have dup()d or close()d it when we weren't looking.
4155			 */
4156			fdclose(fdp, fp, indx, td);
4157
4158			/*
4159			 * release our private reference
4160			 */
4161			fdrop(fp, td);
4162			goto out;
4163		}
4164		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4165		fp->f_flag |= FHASLOCK;
4166	}
4167
4168	VOP_UNLOCK(vp, 0, td);
4169	fdrop(fp, td);
4170	vfs_rel(mp);
4171	VFS_UNLOCK_GIANT(vfslocked);
4172	td->td_retval[0] = indx;
4173	return (0);
4174
4175bad:
4176	vput(vp);
4177out:
4178	vfs_rel(mp);
4179	VFS_UNLOCK_GIANT(vfslocked);
4180	return (error);
4181}
4182
4183/*
4184 * Stat an (NFS) file handle.
4185 */
4186#ifndef _SYS_SYSPROTO_H_
4187struct fhstat_args {
4188	struct fhandle *u_fhp;
4189	struct stat *sb;
4190};
4191#endif
4192int
4193fhstat(td, uap)
4194	struct thread *td;
4195	register struct fhstat_args /* {
4196		struct fhandle *u_fhp;
4197		struct stat *sb;
4198	} */ *uap;
4199{
4200	struct stat sb;
4201	fhandle_t fh;
4202	struct mount *mp;
4203	struct vnode *vp;
4204	int vfslocked;
4205	int error;
4206
4207	error = priv_check(td, PRIV_VFS_FHSTAT);
4208	if (error)
4209		return (error);
4210	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4211	if (error)
4212		return (error);
4213	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4214		return (ESTALE);
4215	vfslocked = VFS_LOCK_GIANT(mp);
4216	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4217		vfs_rel(mp);
4218		VFS_UNLOCK_GIANT(vfslocked);
4219		return (error);
4220	}
4221	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4222	vput(vp);
4223	vfs_rel(mp);
4224	VFS_UNLOCK_GIANT(vfslocked);
4225	if (error)
4226		return (error);
4227	error = copyout(&sb, uap->sb, sizeof(sb));
4228	return (error);
4229}
4230
4231/*
4232 * Implement fstatfs() for (NFS) file handles.
4233 */
4234#ifndef _SYS_SYSPROTO_H_
4235struct fhstatfs_args {
4236	struct fhandle *u_fhp;
4237	struct statfs *buf;
4238};
4239#endif
4240int
4241fhstatfs(td, uap)
4242	struct thread *td;
4243	struct fhstatfs_args /* {
4244		struct fhandle *u_fhp;
4245		struct statfs *buf;
4246	} */ *uap;
4247{
4248	struct statfs sf;
4249	fhandle_t fh;
4250	int error;
4251
4252	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4253	if (error)
4254		return (error);
4255	error = kern_fhstatfs(td, fh, &sf);
4256	if (error)
4257		return (error);
4258	return (copyout(&sf, uap->buf, sizeof(sf)));
4259}
4260
4261int
4262kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4263{
4264	struct statfs *sp;
4265	struct mount *mp;
4266	struct vnode *vp;
4267	int vfslocked;
4268	int error;
4269
4270	error = priv_check(td, PRIV_VFS_FHSTATFS);
4271	if (error)
4272		return (error);
4273	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4274		return (ESTALE);
4275	vfslocked = VFS_LOCK_GIANT(mp);
4276	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4277	if (error) {
4278		VFS_UNLOCK_GIANT(vfslocked);
4279		vfs_rel(mp);
4280		return (error);
4281	}
4282	vput(vp);
4283	error = prison_canseemount(td->td_ucred, mp);
4284	if (error)
4285		goto out;
4286#ifdef MAC
4287	error = mac_check_mount_stat(td->td_ucred, mp);
4288	if (error)
4289		goto out;
4290#endif
4291	/*
4292	 * Set these in case the underlying filesystem fails to do so.
4293	 */
4294	sp = &mp->mnt_stat;
4295	sp->f_version = STATFS_VERSION;
4296	sp->f_namemax = NAME_MAX;
4297	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4298	error = VFS_STATFS(mp, sp, td);
4299	if (error == 0)
4300		*buf = *sp;
4301out:
4302	vfs_rel(mp);
4303	VFS_UNLOCK_GIANT(vfslocked);
4304	return (error);
4305}
4306