vfs_syscalls.c revision 168267
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 168267 2007-04-02 13:40:38Z jhb $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/sysproto.h>
52#include <sys/namei.h>
53#include <sys/filedesc.h>
54#include <sys/kernel.h>
55#include <sys/fcntl.h>
56#include <sys/file.h>
57#include <sys/limits.h>
58#include <sys/linker.h>
59#include <sys/stat.h>
60#include <sys/sx.h>
61#include <sys/unistd.h>
62#include <sys/vnode.h>
63#include <sys/priv.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/jail.h>
67#include <sys/syscallsubr.h>
68#include <sys/sysctl.h>
69
70#include <machine/stdarg.h>
71
72#include <security/audit/audit.h>
73#include <security/mac/mac_framework.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/uma.h>
79
80static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83static int setfmode(struct thread *td, struct vnode *, int);
84static int setfflags(struct thread *td, struct vnode *, int);
85static int setutimes(struct thread *td, struct vnode *,
86    const struct timespec *, int, int);
87static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88    struct thread *td);
89
90/*
91 * The module initialization routine for POSIX asynchronous I/O will
92 * set this to the version of AIO that it implements.  (Zero means
93 * that it is not implemented.)  This value is used here by pathconf()
94 * and in kern_descrip.c by fpathconf().
95 */
96int async_io_version;
97
98#ifdef DEBUG
99static int syncprt = 0;
100SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
101#endif
102
103/*
104 * Sync each mounted filesystem.
105 */
106#ifndef _SYS_SYSPROTO_H_
107struct sync_args {
108	int     dummy;
109};
110#endif
111/* ARGSUSED */
112int
113sync(td, uap)
114	struct thread *td;
115	struct sync_args *uap;
116{
117	struct mount *mp, *nmp;
118	int vfslocked;
119
120	mtx_lock(&mountlist_mtx);
121	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
122		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
123			nmp = TAILQ_NEXT(mp, mnt_list);
124			continue;
125		}
126		vfslocked = VFS_LOCK_GIANT(mp);
127		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
128		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
129			MNT_ILOCK(mp);
130			mp->mnt_noasync++;
131			mp->mnt_kern_flag &= ~MNTK_ASYNC;
132			MNT_IUNLOCK(mp);
133			vfs_msync(mp, MNT_NOWAIT);
134			VFS_SYNC(mp, MNT_NOWAIT, td);
135			MNT_ILOCK(mp);
136			mp->mnt_noasync--;
137			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
138			    mp->mnt_noasync == 0)
139				mp->mnt_kern_flag |= MNTK_ASYNC;
140			MNT_IUNLOCK(mp);
141			vn_finished_write(mp);
142		}
143		VFS_UNLOCK_GIANT(vfslocked);
144		mtx_lock(&mountlist_mtx);
145		nmp = TAILQ_NEXT(mp, mnt_list);
146		vfs_unbusy(mp, td);
147	}
148	mtx_unlock(&mountlist_mtx);
149	return (0);
150}
151
152/* XXX PRISON: could be per prison flag */
153static int prison_quotas;
154#if 0
155SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156#endif
157
158/*
159 * Change filesystem quotas.
160 */
161#ifndef _SYS_SYSPROTO_H_
162struct quotactl_args {
163	char *path;
164	int cmd;
165	int uid;
166	caddr_t arg;
167};
168#endif
169int
170quotactl(td, uap)
171	struct thread *td;
172	register struct quotactl_args /* {
173		char *path;
174		int cmd;
175		int uid;
176		caddr_t arg;
177	} */ *uap;
178{
179	struct mount *mp;
180	int vfslocked;
181	int error;
182	struct nameidata nd;
183
184	AUDIT_ARG(cmd, uap->cmd);
185	AUDIT_ARG(uid, uap->uid);
186	if (jailed(td->td_ucred) && !prison_quotas)
187		return (EPERM);
188	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
189	   UIO_USERSPACE, uap->path, td);
190	if ((error = namei(&nd)) != 0)
191		return (error);
192	vfslocked = NDHASGIANT(&nd);
193	NDFREE(&nd, NDF_ONLY_PNBUF);
194	mp = nd.ni_vp->v_mount;
195	if ((error = vfs_busy(mp, 0, NULL, td))) {
196		vrele(nd.ni_vp);
197		VFS_UNLOCK_GIANT(vfslocked);
198		return (error);
199	}
200	vrele(nd.ni_vp);
201	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202	vfs_unbusy(mp, td);
203	VFS_UNLOCK_GIANT(vfslocked);
204	return (error);
205}
206
207/*
208 * Get filesystem statistics.
209 */
210#ifndef _SYS_SYSPROTO_H_
211struct statfs_args {
212	char *path;
213	struct statfs *buf;
214};
215#endif
216int
217statfs(td, uap)
218	struct thread *td;
219	register struct statfs_args /* {
220		char *path;
221		struct statfs *buf;
222	} */ *uap;
223{
224	struct statfs sf;
225	int error;
226
227	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
228	if (error == 0)
229		error = copyout(&sf, uap->buf, sizeof(sf));
230	return (error);
231}
232
233int
234kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
235    struct statfs *buf)
236{
237	struct mount *mp;
238	struct statfs *sp, sb;
239	int vfslocked;
240	int error;
241	struct nameidata nd;
242
243	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
244	    pathseg, path, td);
245	error = namei(&nd);
246	if (error)
247		return (error);
248	vfslocked = NDHASGIANT(&nd);
249	mp = nd.ni_vp->v_mount;
250	vfs_ref(mp);
251	NDFREE(&nd, NDF_ONLY_PNBUF);
252	vput(nd.ni_vp);
253#ifdef MAC
254	error = mac_check_mount_stat(td->td_ucred, mp);
255	if (error)
256		goto out;
257#endif
258	/*
259	 * Set these in case the underlying filesystem fails to do so.
260	 */
261	sp = &mp->mnt_stat;
262	sp->f_version = STATFS_VERSION;
263	sp->f_namemax = NAME_MAX;
264	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
265	error = VFS_STATFS(mp, sp, td);
266	if (error)
267		goto out;
268	if (priv_check(td, PRIV_VFS_GENERATION)) {
269		bcopy(sp, &sb, sizeof(sb));
270		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
271		prison_enforce_statfs(td->td_ucred, mp, &sb);
272		sp = &sb;
273	}
274	*buf = *sp;
275out:
276	vfs_rel(mp);
277	VFS_UNLOCK_GIANT(vfslocked);
278	if (mtx_owned(&Giant))
279		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
280	return (error);
281}
282
283/*
284 * Get filesystem statistics.
285 */
286#ifndef _SYS_SYSPROTO_H_
287struct fstatfs_args {
288	int fd;
289	struct statfs *buf;
290};
291#endif
292int
293fstatfs(td, uap)
294	struct thread *td;
295	register struct fstatfs_args /* {
296		int fd;
297		struct statfs *buf;
298	} */ *uap;
299{
300	struct statfs sf;
301	int error;
302
303	error = kern_fstatfs(td, uap->fd, &sf);
304	if (error == 0)
305		error = copyout(&sf, uap->buf, sizeof(sf));
306	return (error);
307}
308
309int
310kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
311{
312	struct file *fp;
313	struct mount *mp;
314	struct statfs *sp, sb;
315	int vfslocked;
316	struct vnode *vp;
317	int error;
318
319	AUDIT_ARG(fd, fd);
320	error = getvnode(td->td_proc->p_fd, fd, &fp);
321	if (error)
322		return (error);
323	vp = fp->f_vnode;
324	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
325	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
326#ifdef AUDIT
327	AUDIT_ARG(vnode, vp, ARG_VNODE1);
328#endif
329	mp = vp->v_mount;
330	if (mp)
331		vfs_ref(mp);
332	VOP_UNLOCK(vp, 0, td);
333	fdrop(fp, td);
334	if (vp->v_iflag & VI_DOOMED) {
335		error = EBADF;
336		goto out;
337	}
338#ifdef MAC
339	error = mac_check_mount_stat(td->td_ucred, mp);
340	if (error)
341		goto out;
342#endif
343	/*
344	 * Set these in case the underlying filesystem fails to do so.
345	 */
346	sp = &mp->mnt_stat;
347	sp->f_version = STATFS_VERSION;
348	sp->f_namemax = NAME_MAX;
349	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
350	error = VFS_STATFS(mp, sp, td);
351	if (error)
352		goto out;
353	if (priv_check(td, PRIV_VFS_GENERATION)) {
354		bcopy(sp, &sb, sizeof(sb));
355		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
356		prison_enforce_statfs(td->td_ucred, mp, &sb);
357		sp = &sb;
358	}
359	*buf = *sp;
360out:
361	if (mp)
362		vfs_rel(mp);
363	VFS_UNLOCK_GIANT(vfslocked);
364	return (error);
365}
366
367/*
368 * Get statistics on all filesystems.
369 */
370#ifndef _SYS_SYSPROTO_H_
371struct getfsstat_args {
372	struct statfs *buf;
373	long bufsize;
374	int flags;
375};
376#endif
377int
378getfsstat(td, uap)
379	struct thread *td;
380	register struct getfsstat_args /* {
381		struct statfs *buf;
382		long bufsize;
383		int flags;
384	} */ *uap;
385{
386
387	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
388	    uap->flags));
389}
390
391/*
392 * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
393 * 	The caller is responsible for freeing memory which will be allocated
394 *	in '*buf'.
395 */
396int
397kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
398    enum uio_seg bufseg, int flags)
399{
400	struct mount *mp, *nmp;
401	struct statfs *sfsp, *sp, sb;
402	size_t count, maxcount;
403	int vfslocked;
404	int error;
405
406	maxcount = bufsize / sizeof(struct statfs);
407	if (bufsize == 0)
408		sfsp = NULL;
409	else if (bufseg == UIO_USERSPACE)
410		sfsp = *buf;
411	else /* if (bufseg == UIO_SYSSPACE) */ {
412		count = 0;
413		mtx_lock(&mountlist_mtx);
414		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
415			count++;
416		}
417		mtx_unlock(&mountlist_mtx);
418		if (maxcount > count)
419			maxcount = count;
420		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
421		    M_WAITOK);
422	}
423	count = 0;
424	mtx_lock(&mountlist_mtx);
425	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
426		if (prison_canseemount(td->td_ucred, mp) != 0) {
427			nmp = TAILQ_NEXT(mp, mnt_list);
428			continue;
429		}
430#ifdef MAC
431		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
432			nmp = TAILQ_NEXT(mp, mnt_list);
433			continue;
434		}
435#endif
436		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
437			nmp = TAILQ_NEXT(mp, mnt_list);
438			continue;
439		}
440		vfslocked = VFS_LOCK_GIANT(mp);
441		if (sfsp && count < maxcount) {
442			sp = &mp->mnt_stat;
443			/*
444			 * Set these in case the underlying filesystem
445			 * fails to do so.
446			 */
447			sp->f_version = STATFS_VERSION;
448			sp->f_namemax = NAME_MAX;
449			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
450			/*
451			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
452			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
453			 * overrides MNT_WAIT.
454			 */
455			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
456			    (flags & MNT_WAIT)) &&
457			    (error = VFS_STATFS(mp, sp, td))) {
458				VFS_UNLOCK_GIANT(vfslocked);
459				mtx_lock(&mountlist_mtx);
460				nmp = TAILQ_NEXT(mp, mnt_list);
461				vfs_unbusy(mp, td);
462				continue;
463			}
464			if (priv_check(td, PRIV_VFS_GENERATION)) {
465				bcopy(sp, &sb, sizeof(sb));
466				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
467				prison_enforce_statfs(td->td_ucred, mp, &sb);
468				sp = &sb;
469			}
470			if (bufseg == UIO_SYSSPACE)
471				bcopy(sp, sfsp, sizeof(*sp));
472			else /* if (bufseg == UIO_USERSPACE) */ {
473				error = copyout(sp, sfsp, sizeof(*sp));
474				if (error) {
475					vfs_unbusy(mp, td);
476					VFS_UNLOCK_GIANT(vfslocked);
477					return (error);
478				}
479			}
480			sfsp++;
481		}
482		VFS_UNLOCK_GIANT(vfslocked);
483		count++;
484		mtx_lock(&mountlist_mtx);
485		nmp = TAILQ_NEXT(mp, mnt_list);
486		vfs_unbusy(mp, td);
487	}
488	mtx_unlock(&mountlist_mtx);
489	if (sfsp && count > maxcount)
490		td->td_retval[0] = maxcount;
491	else
492		td->td_retval[0] = count;
493	return (0);
494}
495
496#ifdef COMPAT_FREEBSD4
497/*
498 * Get old format filesystem statistics.
499 */
500static void cvtstatfs(struct statfs *, struct ostatfs *);
501
502#ifndef _SYS_SYSPROTO_H_
503struct freebsd4_statfs_args {
504	char *path;
505	struct ostatfs *buf;
506};
507#endif
508int
509freebsd4_statfs(td, uap)
510	struct thread *td;
511	struct freebsd4_statfs_args /* {
512		char *path;
513		struct ostatfs *buf;
514	} */ *uap;
515{
516	struct ostatfs osb;
517	struct statfs sf;
518	int error;
519
520	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
521	if (error)
522		return (error);
523	cvtstatfs(&sf, &osb);
524	return (copyout(&osb, uap->buf, sizeof(osb)));
525}
526
527/*
528 * Get filesystem statistics.
529 */
530#ifndef _SYS_SYSPROTO_H_
531struct freebsd4_fstatfs_args {
532	int fd;
533	struct ostatfs *buf;
534};
535#endif
536int
537freebsd4_fstatfs(td, uap)
538	struct thread *td;
539	struct freebsd4_fstatfs_args /* {
540		int fd;
541		struct ostatfs *buf;
542	} */ *uap;
543{
544	struct ostatfs osb;
545	struct statfs sf;
546	int error;
547
548	error = kern_fstatfs(td, uap->fd, &sf);
549	if (error)
550		return (error);
551	cvtstatfs(&sf, &osb);
552	return (copyout(&osb, uap->buf, sizeof(osb)));
553}
554
555/*
556 * Get statistics on all filesystems.
557 */
558#ifndef _SYS_SYSPROTO_H_
559struct freebsd4_getfsstat_args {
560	struct ostatfs *buf;
561	long bufsize;
562	int flags;
563};
564#endif
565int
566freebsd4_getfsstat(td, uap)
567	struct thread *td;
568	register struct freebsd4_getfsstat_args /* {
569		struct ostatfs *buf;
570		long bufsize;
571		int flags;
572	} */ *uap;
573{
574	struct statfs *buf, *sp;
575	struct ostatfs osb;
576	size_t count, size;
577	int error;
578
579	count = uap->bufsize / sizeof(struct ostatfs);
580	size = count * sizeof(struct statfs);
581	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
582	if (size > 0) {
583		count = td->td_retval[0];
584		sp = buf;
585		while (count > 0 && error == 0) {
586			cvtstatfs(sp, &osb);
587			error = copyout(&osb, uap->buf, sizeof(osb));
588			sp++;
589			uap->buf++;
590			count--;
591		}
592		free(buf, M_TEMP);
593	}
594	return (error);
595}
596
597/*
598 * Implement fstatfs() for (NFS) file handles.
599 */
600#ifndef _SYS_SYSPROTO_H_
601struct freebsd4_fhstatfs_args {
602	struct fhandle *u_fhp;
603	struct ostatfs *buf;
604};
605#endif
606int
607freebsd4_fhstatfs(td, uap)
608	struct thread *td;
609	struct freebsd4_fhstatfs_args /* {
610		struct fhandle *u_fhp;
611		struct ostatfs *buf;
612	} */ *uap;
613{
614	struct ostatfs osb;
615	struct statfs sf;
616	fhandle_t fh;
617	int error;
618
619	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
620	if (error)
621		return (error);
622	error = kern_fhstatfs(td, fh, &sf);
623	if (error)
624		return (error);
625	cvtstatfs(&sf, &osb);
626	return (copyout(&osb, uap->buf, sizeof(osb)));
627}
628
629/*
630 * Convert a new format statfs structure to an old format statfs structure.
631 */
632static void
633cvtstatfs(nsp, osp)
634	struct statfs *nsp;
635	struct ostatfs *osp;
636{
637
638	bzero(osp, sizeof(*osp));
639	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
640	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
641	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
642	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
643	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
644	osp->f_files = MIN(nsp->f_files, LONG_MAX);
645	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
646	osp->f_owner = nsp->f_owner;
647	osp->f_type = nsp->f_type;
648	osp->f_flags = nsp->f_flags;
649	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
650	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
651	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
652	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
653	strlcpy(osp->f_fstypename, nsp->f_fstypename,
654	    MIN(MFSNAMELEN, OMFSNAMELEN));
655	strlcpy(osp->f_mntonname, nsp->f_mntonname,
656	    MIN(MNAMELEN, OMNAMELEN));
657	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
658	    MIN(MNAMELEN, OMNAMELEN));
659	osp->f_fsid = nsp->f_fsid;
660}
661#endif /* COMPAT_FREEBSD4 */
662
663/*
664 * Change current working directory to a given file descriptor.
665 */
666#ifndef _SYS_SYSPROTO_H_
667struct fchdir_args {
668	int	fd;
669};
670#endif
671int
672fchdir(td, uap)
673	struct thread *td;
674	struct fchdir_args /* {
675		int fd;
676	} */ *uap;
677{
678	register struct filedesc *fdp = td->td_proc->p_fd;
679	struct vnode *vp, *tdp, *vpold;
680	struct mount *mp;
681	struct file *fp;
682	int vfslocked;
683	int error;
684
685	AUDIT_ARG(fd, uap->fd);
686	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
687		return (error);
688	vp = fp->f_vnode;
689	VREF(vp);
690	fdrop(fp, td);
691	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
692	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
693	AUDIT_ARG(vnode, vp, ARG_VNODE1);
694	error = change_dir(vp, td);
695	while (!error && (mp = vp->v_mountedhere) != NULL) {
696		int tvfslocked;
697		if (vfs_busy(mp, 0, 0, td))
698			continue;
699		tvfslocked = VFS_LOCK_GIANT(mp);
700		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
701		vfs_unbusy(mp, td);
702		if (error) {
703			VFS_UNLOCK_GIANT(tvfslocked);
704			break;
705		}
706		vput(vp);
707		VFS_UNLOCK_GIANT(vfslocked);
708		vp = tdp;
709		vfslocked = tvfslocked;
710	}
711	if (error) {
712		vput(vp);
713		VFS_UNLOCK_GIANT(vfslocked);
714		return (error);
715	}
716	VOP_UNLOCK(vp, 0, td);
717	VFS_UNLOCK_GIANT(vfslocked);
718	FILEDESC_LOCK_FAST(fdp);
719	vpold = fdp->fd_cdir;
720	fdp->fd_cdir = vp;
721	FILEDESC_UNLOCK_FAST(fdp);
722	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
723	vrele(vpold);
724	VFS_UNLOCK_GIANT(vfslocked);
725	return (0);
726}
727
728/*
729 * Change current working directory (``.'').
730 */
731#ifndef _SYS_SYSPROTO_H_
732struct chdir_args {
733	char	*path;
734};
735#endif
736int
737chdir(td, uap)
738	struct thread *td;
739	struct chdir_args /* {
740		char *path;
741	} */ *uap;
742{
743
744	return (kern_chdir(td, uap->path, UIO_USERSPACE));
745}
746
747int
748kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
749{
750	register struct filedesc *fdp = td->td_proc->p_fd;
751	int error;
752	struct nameidata nd;
753	struct vnode *vp;
754	int vfslocked;
755
756	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
757	    pathseg, path, td);
758	if ((error = namei(&nd)) != 0)
759		return (error);
760	vfslocked = NDHASGIANT(&nd);
761	if ((error = change_dir(nd.ni_vp, td)) != 0) {
762		vput(nd.ni_vp);
763		VFS_UNLOCK_GIANT(vfslocked);
764		NDFREE(&nd, NDF_ONLY_PNBUF);
765		return (error);
766	}
767	VOP_UNLOCK(nd.ni_vp, 0, td);
768	VFS_UNLOCK_GIANT(vfslocked);
769	NDFREE(&nd, NDF_ONLY_PNBUF);
770	FILEDESC_LOCK_FAST(fdp);
771	vp = fdp->fd_cdir;
772	fdp->fd_cdir = nd.ni_vp;
773	FILEDESC_UNLOCK_FAST(fdp);
774	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
775	vrele(vp);
776	VFS_UNLOCK_GIANT(vfslocked);
777	return (0);
778}
779
780/*
781 * Helper function for raised chroot(2) security function:  Refuse if
782 * any filedescriptors are open directories.
783 */
784static int
785chroot_refuse_vdir_fds(fdp)
786	struct filedesc *fdp;
787{
788	struct vnode *vp;
789	struct file *fp;
790	int fd;
791
792	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
793	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
794		fp = fget_locked(fdp, fd);
795		if (fp == NULL)
796			continue;
797		if (fp->f_type == DTYPE_VNODE) {
798			vp = fp->f_vnode;
799			if (vp->v_type == VDIR)
800				return (EPERM);
801		}
802	}
803	return (0);
804}
805
806/*
807 * This sysctl determines if we will allow a process to chroot(2) if it
808 * has a directory open:
809 *	0: disallowed for all processes.
810 *	1: allowed for processes that were not already chroot(2)'ed.
811 *	2: allowed for all processes.
812 */
813
814static int chroot_allow_open_directories = 1;
815
816SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
817     &chroot_allow_open_directories, 0, "");
818
819/*
820 * Change notion of root (``/'') directory.
821 */
822#ifndef _SYS_SYSPROTO_H_
823struct chroot_args {
824	char	*path;
825};
826#endif
827int
828chroot(td, uap)
829	struct thread *td;
830	struct chroot_args /* {
831		char *path;
832	} */ *uap;
833{
834	int error;
835	struct nameidata nd;
836	int vfslocked;
837
838	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
839	    SUSER_ALLOWJAIL);
840	if (error)
841		return (error);
842	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
843	    UIO_USERSPACE, uap->path, td);
844	error = namei(&nd);
845	if (error)
846		goto error;
847	vfslocked = NDHASGIANT(&nd);
848	if ((error = change_dir(nd.ni_vp, td)) != 0)
849		goto e_vunlock;
850#ifdef MAC
851	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
852		goto e_vunlock;
853#endif
854	VOP_UNLOCK(nd.ni_vp, 0, td);
855	error = change_root(nd.ni_vp, td);
856	vrele(nd.ni_vp);
857	VFS_UNLOCK_GIANT(vfslocked);
858	NDFREE(&nd, NDF_ONLY_PNBUF);
859	return (error);
860e_vunlock:
861	vput(nd.ni_vp);
862	VFS_UNLOCK_GIANT(vfslocked);
863error:
864	NDFREE(&nd, NDF_ONLY_PNBUF);
865	return (error);
866}
867
868/*
869 * Common routine for chroot and chdir.  Callers must provide a locked vnode
870 * instance.
871 */
872int
873change_dir(vp, td)
874	struct vnode *vp;
875	struct thread *td;
876{
877	int error;
878
879	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
880	if (vp->v_type != VDIR)
881		return (ENOTDIR);
882#ifdef MAC
883	error = mac_check_vnode_chdir(td->td_ucred, vp);
884	if (error)
885		return (error);
886#endif
887	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
888	return (error);
889}
890
891/*
892 * Common routine for kern_chroot() and jail_attach().  The caller is
893 * responsible for invoking priv_check() and mac_check_chroot() to authorize
894 * this operation.
895 */
896int
897change_root(vp, td)
898	struct vnode *vp;
899	struct thread *td;
900{
901	struct filedesc *fdp;
902	struct vnode *oldvp;
903	int vfslocked;
904	int error;
905
906	VFS_ASSERT_GIANT(vp->v_mount);
907	fdp = td->td_proc->p_fd;
908	FILEDESC_LOCK(fdp);
909	if (chroot_allow_open_directories == 0 ||
910	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
911		error = chroot_refuse_vdir_fds(fdp);
912		if (error) {
913			FILEDESC_UNLOCK(fdp);
914			return (error);
915		}
916	}
917	oldvp = fdp->fd_rdir;
918	fdp->fd_rdir = vp;
919	VREF(fdp->fd_rdir);
920	if (!fdp->fd_jdir) {
921		fdp->fd_jdir = vp;
922		VREF(fdp->fd_jdir);
923	}
924	FILEDESC_UNLOCK(fdp);
925	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
926	vrele(oldvp);
927	VFS_UNLOCK_GIANT(vfslocked);
928	return (0);
929}
930
931/*
932 * Check permissions, allocate an open file structure, and call the device
933 * open routine if any.
934 */
935#ifndef _SYS_SYSPROTO_H_
936struct open_args {
937	char	*path;
938	int	flags;
939	int	mode;
940};
941#endif
942int
943open(td, uap)
944	struct thread *td;
945	register struct open_args /* {
946		char *path;
947		int flags;
948		int mode;
949	} */ *uap;
950{
951
952	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
953}
954
955int
956kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
957    int mode)
958{
959	struct proc *p = td->td_proc;
960	struct filedesc *fdp = p->p_fd;
961	struct file *fp;
962	struct vnode *vp;
963	struct vattr vat;
964	struct mount *mp;
965	int cmode;
966	struct file *nfp;
967	int type, indx, error;
968	struct flock lf;
969	struct nameidata nd;
970	int vfslocked;
971
972	AUDIT_ARG(fflags, flags);
973	AUDIT_ARG(mode, mode);
974	if ((flags & O_ACCMODE) == O_ACCMODE)
975		return (EINVAL);
976	flags = FFLAGS(flags);
977	error = falloc(td, &nfp, &indx);
978	if (error)
979		return (error);
980	/* An extra reference on `nfp' has been held for us by falloc(). */
981	fp = nfp;
982	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
983	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
984	td->td_dupfd = -1;		/* XXX check for fdopen */
985	error = vn_open(&nd, &flags, cmode, indx);
986	if (error) {
987		/*
988		 * If the vn_open replaced the method vector, something
989		 * wonderous happened deep below and we just pass it up
990		 * pretending we know what we do.
991		 */
992		if (error == ENXIO && fp->f_ops != &badfileops) {
993			fdrop(fp, td);
994			td->td_retval[0] = indx;
995			return (0);
996		}
997
998		/*
999		 * handle special fdopen() case.  bleh.  dupfdopen() is
1000		 * responsible for dropping the old contents of ofiles[indx]
1001		 * if it succeeds.
1002		 */
1003		if ((error == ENODEV || error == ENXIO) &&
1004		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1005		    (error =
1006			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1007			td->td_retval[0] = indx;
1008			fdrop(fp, td);
1009			return (0);
1010		}
1011		/*
1012		 * Clean up the descriptor, but only if another thread hadn't
1013		 * replaced or closed it.
1014		 */
1015		fdclose(fdp, fp, indx, td);
1016		fdrop(fp, td);
1017
1018		if (error == ERESTART)
1019			error = EINTR;
1020		return (error);
1021	}
1022	td->td_dupfd = 0;
1023	vfslocked = NDHASGIANT(&nd);
1024	NDFREE(&nd, NDF_ONLY_PNBUF);
1025	vp = nd.ni_vp;
1026
1027	FILEDESC_LOCK_FAST(fdp);
1028	FILE_LOCK(fp);
1029	fp->f_vnode = vp;
1030	if (fp->f_data == NULL)
1031		fp->f_data = vp;
1032	fp->f_flag = flags & FMASK;
1033	if (fp->f_ops == &badfileops)
1034		fp->f_ops = &vnops;
1035	fp->f_seqcount = 1;
1036	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1037	FILE_UNLOCK(fp);
1038	FILEDESC_UNLOCK_FAST(fdp);
1039
1040	VOP_UNLOCK(vp, 0, td);
1041	if (flags & (O_EXLOCK | O_SHLOCK)) {
1042		lf.l_whence = SEEK_SET;
1043		lf.l_start = 0;
1044		lf.l_len = 0;
1045		if (flags & O_EXLOCK)
1046			lf.l_type = F_WRLCK;
1047		else
1048			lf.l_type = F_RDLCK;
1049		type = F_FLOCK;
1050		if ((flags & FNONBLOCK) == 0)
1051			type |= F_WAIT;
1052		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1053			    type)) != 0)
1054			goto bad;
1055		fp->f_flag |= FHASLOCK;
1056	}
1057	if (flags & O_TRUNC) {
1058		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1059			goto bad;
1060		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1061		VATTR_NULL(&vat);
1062		vat.va_size = 0;
1063		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1064#ifdef MAC
1065		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1066		if (error == 0)
1067#endif
1068			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1069		VOP_UNLOCK(vp, 0, td);
1070		vn_finished_write(mp);
1071		if (error)
1072			goto bad;
1073	}
1074	VFS_UNLOCK_GIANT(vfslocked);
1075	/*
1076	 * Release our private reference, leaving the one associated with
1077	 * the descriptor table intact.
1078	 */
1079	fdrop(fp, td);
1080	td->td_retval[0] = indx;
1081	return (0);
1082bad:
1083	VFS_UNLOCK_GIANT(vfslocked);
1084	fdclose(fdp, fp, indx, td);
1085	fdrop(fp, td);
1086	return (error);
1087}
1088
1089#ifdef COMPAT_43
1090/*
1091 * Create a file.
1092 */
1093#ifndef _SYS_SYSPROTO_H_
1094struct ocreat_args {
1095	char	*path;
1096	int	mode;
1097};
1098#endif
1099int
1100ocreat(td, uap)
1101	struct thread *td;
1102	register struct ocreat_args /* {
1103		char *path;
1104		int mode;
1105	} */ *uap;
1106{
1107
1108	return (kern_open(td, uap->path, UIO_USERSPACE,
1109	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1110}
1111#endif /* COMPAT_43 */
1112
1113/*
1114 * Create a special file.
1115 */
1116#ifndef _SYS_SYSPROTO_H_
1117struct mknod_args {
1118	char	*path;
1119	int	mode;
1120	int	dev;
1121};
1122#endif
1123int
1124mknod(td, uap)
1125	struct thread *td;
1126	register struct mknod_args /* {
1127		char *path;
1128		int mode;
1129		int dev;
1130	} */ *uap;
1131{
1132
1133	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1134}
1135
1136int
1137kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1138    int dev)
1139{
1140	struct vnode *vp;
1141	struct mount *mp;
1142	struct vattr vattr;
1143	int error;
1144	int whiteout = 0;
1145	struct nameidata nd;
1146	int vfslocked;
1147
1148	AUDIT_ARG(mode, mode);
1149	AUDIT_ARG(dev, dev);
1150	switch (mode & S_IFMT) {
1151	case S_IFCHR:
1152	case S_IFBLK:
1153		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1154		break;
1155	case S_IFMT:
1156		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1157		break;
1158	case S_IFWHT:
1159		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1160		break;
1161	default:
1162		error = EINVAL;
1163		break;
1164	}
1165	if (error)
1166		return (error);
1167restart:
1168	bwillwrite();
1169	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1170	    pathseg, path, td);
1171	if ((error = namei(&nd)) != 0)
1172		return (error);
1173	vfslocked = NDHASGIANT(&nd);
1174	vp = nd.ni_vp;
1175	if (vp != NULL) {
1176		NDFREE(&nd, NDF_ONLY_PNBUF);
1177		if (vp == nd.ni_dvp)
1178			vrele(nd.ni_dvp);
1179		else
1180			vput(nd.ni_dvp);
1181		vrele(vp);
1182		VFS_UNLOCK_GIANT(vfslocked);
1183		return (EEXIST);
1184	} else {
1185		VATTR_NULL(&vattr);
1186		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1187		vattr.va_mode = (mode & ALLPERMS) &
1188		    ~td->td_proc->p_fd->fd_cmask;
1189		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1190		vattr.va_rdev = dev;
1191		whiteout = 0;
1192
1193		switch (mode & S_IFMT) {
1194		case S_IFMT:	/* used by badsect to flag bad sectors */
1195			vattr.va_type = VBAD;
1196			break;
1197		case S_IFCHR:
1198			vattr.va_type = VCHR;
1199			break;
1200		case S_IFBLK:
1201			vattr.va_type = VBLK;
1202			break;
1203		case S_IFWHT:
1204			whiteout = 1;
1205			break;
1206		default:
1207			panic("kern_mknod: invalid mode");
1208		}
1209	}
1210	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1211		NDFREE(&nd, NDF_ONLY_PNBUF);
1212		vput(nd.ni_dvp);
1213		VFS_UNLOCK_GIANT(vfslocked);
1214		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1215			return (error);
1216		goto restart;
1217	}
1218#ifdef MAC
1219	if (error == 0 && !whiteout)
1220		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1221		    &nd.ni_cnd, &vattr);
1222#endif
1223	if (!error) {
1224		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1225		if (whiteout)
1226			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1227		else {
1228			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1229						&nd.ni_cnd, &vattr);
1230			if (error == 0)
1231				vput(nd.ni_vp);
1232		}
1233	}
1234	NDFREE(&nd, NDF_ONLY_PNBUF);
1235	vput(nd.ni_dvp);
1236	vn_finished_write(mp);
1237	VFS_UNLOCK_GIANT(vfslocked);
1238	return (error);
1239}
1240
1241/*
1242 * Create a named pipe.
1243 */
1244#ifndef _SYS_SYSPROTO_H_
1245struct mkfifo_args {
1246	char	*path;
1247	int	mode;
1248};
1249#endif
1250int
1251mkfifo(td, uap)
1252	struct thread *td;
1253	register struct mkfifo_args /* {
1254		char *path;
1255		int mode;
1256	} */ *uap;
1257{
1258
1259	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1260}
1261
1262int
1263kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1264{
1265	struct mount *mp;
1266	struct vattr vattr;
1267	int error;
1268	struct nameidata nd;
1269	int vfslocked;
1270
1271	AUDIT_ARG(mode, mode);
1272restart:
1273	bwillwrite();
1274	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1275	    pathseg, path, td);
1276	if ((error = namei(&nd)) != 0)
1277		return (error);
1278	vfslocked = NDHASGIANT(&nd);
1279	if (nd.ni_vp != NULL) {
1280		NDFREE(&nd, NDF_ONLY_PNBUF);
1281		if (nd.ni_vp == nd.ni_dvp)
1282			vrele(nd.ni_dvp);
1283		else
1284			vput(nd.ni_dvp);
1285		vrele(nd.ni_vp);
1286		VFS_UNLOCK_GIANT(vfslocked);
1287		return (EEXIST);
1288	}
1289	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1290		NDFREE(&nd, NDF_ONLY_PNBUF);
1291		vput(nd.ni_dvp);
1292		VFS_UNLOCK_GIANT(vfslocked);
1293		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1294			return (error);
1295		goto restart;
1296	}
1297	VATTR_NULL(&vattr);
1298	vattr.va_type = VFIFO;
1299	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1300	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1301	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1302#ifdef MAC
1303	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1304	    &vattr);
1305	if (error)
1306		goto out;
1307#endif
1308	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1309	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1310	if (error == 0)
1311		vput(nd.ni_vp);
1312#ifdef MAC
1313out:
1314#endif
1315	vput(nd.ni_dvp);
1316	vn_finished_write(mp);
1317	VFS_UNLOCK_GIANT(vfslocked);
1318	NDFREE(&nd, NDF_ONLY_PNBUF);
1319	return (error);
1320}
1321
1322/*
1323 * Make a hard file link.
1324 */
1325#ifndef _SYS_SYSPROTO_H_
1326struct link_args {
1327	char	*path;
1328	char	*link;
1329};
1330#endif
1331int
1332link(td, uap)
1333	struct thread *td;
1334	register struct link_args /* {
1335		char *path;
1336		char *link;
1337	} */ *uap;
1338{
1339	int error;
1340
1341	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1342	return (error);
1343}
1344
1345static int hardlink_check_uid = 0;
1346SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1347    &hardlink_check_uid, 0,
1348    "Unprivileged processes cannot create hard links to files owned by other "
1349    "users");
1350static int hardlink_check_gid = 0;
1351SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1352    &hardlink_check_gid, 0,
1353    "Unprivileged processes cannot create hard links to files owned by other "
1354    "groups");
1355
1356static int
1357can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1358{
1359	struct vattr va;
1360	int error;
1361
1362	if (!hardlink_check_uid && !hardlink_check_gid)
1363		return (0);
1364
1365	error = VOP_GETATTR(vp, &va, cred, td);
1366	if (error != 0)
1367		return (error);
1368
1369	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1370		error = priv_check_cred(cred, PRIV_VFS_LINK,
1371		    SUSER_ALLOWJAIL);
1372		if (error)
1373			return (error);
1374	}
1375
1376	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1377		error = priv_check_cred(cred, PRIV_VFS_LINK,
1378		    SUSER_ALLOWJAIL);
1379		if (error)
1380			return (error);
1381	}
1382
1383	return (0);
1384}
1385
1386int
1387kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1388{
1389	struct vnode *vp;
1390	struct mount *mp;
1391	struct nameidata nd;
1392	int vfslocked;
1393	int lvfslocked;
1394	int error;
1395
1396	bwillwrite();
1397	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1398	if ((error = namei(&nd)) != 0)
1399		return (error);
1400	vfslocked = NDHASGIANT(&nd);
1401	NDFREE(&nd, NDF_ONLY_PNBUF);
1402	vp = nd.ni_vp;
1403	if (vp->v_type == VDIR) {
1404		vrele(vp);
1405		VFS_UNLOCK_GIANT(vfslocked);
1406		return (EPERM);		/* POSIX */
1407	}
1408	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1409		vrele(vp);
1410		VFS_UNLOCK_GIANT(vfslocked);
1411		return (error);
1412	}
1413	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1414	    segflg, link, td);
1415	if ((error = namei(&nd)) == 0) {
1416		lvfslocked = NDHASGIANT(&nd);
1417		if (nd.ni_vp != NULL) {
1418			if (nd.ni_dvp == nd.ni_vp)
1419				vrele(nd.ni_dvp);
1420			else
1421				vput(nd.ni_dvp);
1422			vrele(nd.ni_vp);
1423			error = EEXIST;
1424		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1425		    == 0) {
1426			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1427			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1428			error = can_hardlink(vp, td, td->td_ucred);
1429			if (error == 0)
1430#ifdef MAC
1431				error = mac_check_vnode_link(td->td_ucred,
1432				    nd.ni_dvp, vp, &nd.ni_cnd);
1433			if (error == 0)
1434#endif
1435				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1436			VOP_UNLOCK(vp, 0, td);
1437			vput(nd.ni_dvp);
1438		}
1439		NDFREE(&nd, NDF_ONLY_PNBUF);
1440		VFS_UNLOCK_GIANT(lvfslocked);
1441	}
1442	vrele(vp);
1443	vn_finished_write(mp);
1444	VFS_UNLOCK_GIANT(vfslocked);
1445	return (error);
1446}
1447
1448/*
1449 * Make a symbolic link.
1450 */
1451#ifndef _SYS_SYSPROTO_H_
1452struct symlink_args {
1453	char	*path;
1454	char	*link;
1455};
1456#endif
1457int
1458symlink(td, uap)
1459	struct thread *td;
1460	register struct symlink_args /* {
1461		char *path;
1462		char *link;
1463	} */ *uap;
1464{
1465
1466	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1467}
1468
1469int
1470kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1471{
1472	struct mount *mp;
1473	struct vattr vattr;
1474	char *syspath;
1475	int error;
1476	struct nameidata nd;
1477	int vfslocked;
1478
1479	if (segflg == UIO_SYSSPACE) {
1480		syspath = path;
1481	} else {
1482		syspath = uma_zalloc(namei_zone, M_WAITOK);
1483		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1484			goto out;
1485	}
1486	AUDIT_ARG(text, syspath);
1487restart:
1488	bwillwrite();
1489	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1490	    segflg, link, td);
1491	if ((error = namei(&nd)) != 0)
1492		goto out;
1493	vfslocked = NDHASGIANT(&nd);
1494	if (nd.ni_vp) {
1495		NDFREE(&nd, NDF_ONLY_PNBUF);
1496		if (nd.ni_vp == nd.ni_dvp)
1497			vrele(nd.ni_dvp);
1498		else
1499			vput(nd.ni_dvp);
1500		vrele(nd.ni_vp);
1501		VFS_UNLOCK_GIANT(vfslocked);
1502		error = EEXIST;
1503		goto out;
1504	}
1505	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1506		NDFREE(&nd, NDF_ONLY_PNBUF);
1507		vput(nd.ni_dvp);
1508		VFS_UNLOCK_GIANT(vfslocked);
1509		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1510			goto out;
1511		goto restart;
1512	}
1513	VATTR_NULL(&vattr);
1514	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1515	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1516	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1517#ifdef MAC
1518	vattr.va_type = VLNK;
1519	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1520	    &vattr);
1521	if (error)
1522		goto out2;
1523#endif
1524	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1525	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1526	if (error == 0)
1527		vput(nd.ni_vp);
1528#ifdef MAC
1529out2:
1530#endif
1531	NDFREE(&nd, NDF_ONLY_PNBUF);
1532	vput(nd.ni_dvp);
1533	vn_finished_write(mp);
1534	VFS_UNLOCK_GIANT(vfslocked);
1535out:
1536	if (segflg != UIO_SYSSPACE)
1537		uma_zfree(namei_zone, syspath);
1538	return (error);
1539}
1540
1541/*
1542 * Delete a whiteout from the filesystem.
1543 */
1544int
1545undelete(td, uap)
1546	struct thread *td;
1547	register struct undelete_args /* {
1548		char *path;
1549	} */ *uap;
1550{
1551	int error;
1552	struct mount *mp;
1553	struct nameidata nd;
1554	int vfslocked;
1555
1556restart:
1557	bwillwrite();
1558	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1559	    UIO_USERSPACE, uap->path, td);
1560	error = namei(&nd);
1561	if (error)
1562		return (error);
1563	vfslocked = NDHASGIANT(&nd);
1564
1565	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1566		NDFREE(&nd, NDF_ONLY_PNBUF);
1567		if (nd.ni_vp == nd.ni_dvp)
1568			vrele(nd.ni_dvp);
1569		else
1570			vput(nd.ni_dvp);
1571		if (nd.ni_vp)
1572			vrele(nd.ni_vp);
1573		VFS_UNLOCK_GIANT(vfslocked);
1574		return (EEXIST);
1575	}
1576	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1577		NDFREE(&nd, NDF_ONLY_PNBUF);
1578		vput(nd.ni_dvp);
1579		VFS_UNLOCK_GIANT(vfslocked);
1580		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1581			return (error);
1582		goto restart;
1583	}
1584	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1585	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1586	NDFREE(&nd, NDF_ONLY_PNBUF);
1587	vput(nd.ni_dvp);
1588	vn_finished_write(mp);
1589	VFS_UNLOCK_GIANT(vfslocked);
1590	return (error);
1591}
1592
1593/*
1594 * Delete a name from the filesystem.
1595 */
1596#ifndef _SYS_SYSPROTO_H_
1597struct unlink_args {
1598	char	*path;
1599};
1600#endif
1601int
1602unlink(td, uap)
1603	struct thread *td;
1604	struct unlink_args /* {
1605		char *path;
1606	} */ *uap;
1607{
1608	int error;
1609
1610	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1611	return (error);
1612}
1613
1614int
1615kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1616{
1617	struct mount *mp;
1618	struct vnode *vp;
1619	int error;
1620	struct nameidata nd;
1621	int vfslocked;
1622
1623restart:
1624	bwillwrite();
1625	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1626	    pathseg, path, td);
1627	if ((error = namei(&nd)) != 0)
1628		return (error == EINVAL ? EPERM : error);
1629	vfslocked = NDHASGIANT(&nd);
1630	vp = nd.ni_vp;
1631	if (vp->v_type == VDIR)
1632		error = EPERM;		/* POSIX */
1633	else {
1634		/*
1635		 * The root of a mounted filesystem cannot be deleted.
1636		 *
1637		 * XXX: can this only be a VDIR case?
1638		 */
1639		if (vp->v_vflag & VV_ROOT)
1640			error = EBUSY;
1641	}
1642	if (error == 0) {
1643		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1644			NDFREE(&nd, NDF_ONLY_PNBUF);
1645			vput(nd.ni_dvp);
1646			if (vp == nd.ni_dvp)
1647				vrele(vp);
1648			else
1649				vput(vp);
1650			VFS_UNLOCK_GIANT(vfslocked);
1651			if ((error = vn_start_write(NULL, &mp,
1652			    V_XSLEEP | PCATCH)) != 0)
1653				return (error);
1654			goto restart;
1655		}
1656#ifdef MAC
1657		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1658		    &nd.ni_cnd);
1659		if (error)
1660			goto out;
1661#endif
1662		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1663		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1664#ifdef MAC
1665out:
1666#endif
1667		vn_finished_write(mp);
1668	}
1669	NDFREE(&nd, NDF_ONLY_PNBUF);
1670	vput(nd.ni_dvp);
1671	if (vp == nd.ni_dvp)
1672		vrele(vp);
1673	else
1674		vput(vp);
1675	VFS_UNLOCK_GIANT(vfslocked);
1676	return (error);
1677}
1678
1679/*
1680 * Reposition read/write file offset.
1681 */
1682#ifndef _SYS_SYSPROTO_H_
1683struct lseek_args {
1684	int	fd;
1685	int	pad;
1686	off_t	offset;
1687	int	whence;
1688};
1689#endif
1690int
1691lseek(td, uap)
1692	struct thread *td;
1693	register struct lseek_args /* {
1694		int fd;
1695		int pad;
1696		off_t offset;
1697		int whence;
1698	} */ *uap;
1699{
1700	struct ucred *cred = td->td_ucred;
1701	struct file *fp;
1702	struct vnode *vp;
1703	struct vattr vattr;
1704	off_t offset;
1705	int error, noneg;
1706	int vfslocked;
1707
1708	if ((error = fget(td, uap->fd, &fp)) != 0)
1709		return (error);
1710	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1711		fdrop(fp, td);
1712		return (ESPIPE);
1713	}
1714	vp = fp->f_vnode;
1715	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1716	noneg = (vp->v_type != VCHR);
1717	offset = uap->offset;
1718	switch (uap->whence) {
1719	case L_INCR:
1720		if (noneg &&
1721		    (fp->f_offset < 0 ||
1722		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1723			error = EOVERFLOW;
1724			break;
1725		}
1726		offset += fp->f_offset;
1727		break;
1728	case L_XTND:
1729		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1730		error = VOP_GETATTR(vp, &vattr, cred, td);
1731		VOP_UNLOCK(vp, 0, td);
1732		if (error)
1733			break;
1734		if (noneg &&
1735		    (vattr.va_size > OFF_MAX ||
1736		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1737			error = EOVERFLOW;
1738			break;
1739		}
1740		offset += vattr.va_size;
1741		break;
1742	case L_SET:
1743		break;
1744	default:
1745		error = EINVAL;
1746	}
1747	if (error == 0 && noneg && offset < 0)
1748		error = EINVAL;
1749	if (error != 0)
1750		goto drop;
1751	fp->f_offset = offset;
1752	*(off_t *)(td->td_retval) = fp->f_offset;
1753drop:
1754	fdrop(fp, td);
1755	VFS_UNLOCK_GIANT(vfslocked);
1756	return (error);
1757}
1758
1759#if defined(COMPAT_43)
1760/*
1761 * Reposition read/write file offset.
1762 */
1763#ifndef _SYS_SYSPROTO_H_
1764struct olseek_args {
1765	int	fd;
1766	long	offset;
1767	int	whence;
1768};
1769#endif
1770int
1771olseek(td, uap)
1772	struct thread *td;
1773	register struct olseek_args /* {
1774		int fd;
1775		long offset;
1776		int whence;
1777	} */ *uap;
1778{
1779	struct lseek_args /* {
1780		int fd;
1781		int pad;
1782		off_t offset;
1783		int whence;
1784	} */ nuap;
1785	int error;
1786
1787	nuap.fd = uap->fd;
1788	nuap.offset = uap->offset;
1789	nuap.whence = uap->whence;
1790	error = lseek(td, &nuap);
1791	return (error);
1792}
1793#endif /* COMPAT_43 */
1794
1795/*
1796 * Check access permissions using passed credentials.
1797 */
1798static int
1799vn_access(vp, user_flags, cred, td)
1800	struct vnode	*vp;
1801	int		user_flags;
1802	struct ucred	*cred;
1803	struct thread	*td;
1804{
1805	int error, flags;
1806
1807	/* Flags == 0 means only check for existence. */
1808	error = 0;
1809	if (user_flags) {
1810		flags = 0;
1811		if (user_flags & R_OK)
1812			flags |= VREAD;
1813		if (user_flags & W_OK)
1814			flags |= VWRITE;
1815		if (user_flags & X_OK)
1816			flags |= VEXEC;
1817#ifdef MAC
1818		error = mac_check_vnode_access(cred, vp, flags);
1819		if (error)
1820			return (error);
1821#endif
1822		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1823			error = VOP_ACCESS(vp, flags, cred, td);
1824	}
1825	return (error);
1826}
1827
1828/*
1829 * Check access permissions using "real" credentials.
1830 */
1831#ifndef _SYS_SYSPROTO_H_
1832struct access_args {
1833	char	*path;
1834	int	flags;
1835};
1836#endif
1837int
1838access(td, uap)
1839	struct thread *td;
1840	register struct access_args /* {
1841		char *path;
1842		int flags;
1843	} */ *uap;
1844{
1845
1846	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1847}
1848
1849int
1850kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1851{
1852	struct ucred *cred, *tmpcred;
1853	register struct vnode *vp;
1854	struct nameidata nd;
1855	int vfslocked;
1856	int error;
1857
1858	/*
1859	 * Create and modify a temporary credential instead of one that
1860	 * is potentially shared.  This could also mess up socket
1861	 * buffer accounting which can run in an interrupt context.
1862	 */
1863	cred = td->td_ucred;
1864	tmpcred = crdup(cred);
1865	tmpcred->cr_uid = cred->cr_ruid;
1866	tmpcred->cr_groups[0] = cred->cr_rgid;
1867	td->td_ucred = tmpcred;
1868	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1869	    pathseg, path, td);
1870	if ((error = namei(&nd)) != 0)
1871		goto out1;
1872	vfslocked = NDHASGIANT(&nd);
1873	vp = nd.ni_vp;
1874
1875	error = vn_access(vp, flags, tmpcred, td);
1876	NDFREE(&nd, NDF_ONLY_PNBUF);
1877	vput(vp);
1878	VFS_UNLOCK_GIANT(vfslocked);
1879out1:
1880	td->td_ucred = cred;
1881	crfree(tmpcred);
1882	return (error);
1883}
1884
1885/*
1886 * Check access permissions using "effective" credentials.
1887 */
1888#ifndef _SYS_SYSPROTO_H_
1889struct eaccess_args {
1890	char	*path;
1891	int	flags;
1892};
1893#endif
1894int
1895eaccess(td, uap)
1896	struct thread *td;
1897	register struct eaccess_args /* {
1898		char *path;
1899		int flags;
1900	} */ *uap;
1901{
1902
1903	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1904}
1905
1906int
1907kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1908{
1909	struct nameidata nd;
1910	struct vnode *vp;
1911	int vfslocked;
1912	int error;
1913
1914	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1915	    pathseg, path, td);
1916	if ((error = namei(&nd)) != 0)
1917		return (error);
1918	vp = nd.ni_vp;
1919	vfslocked = NDHASGIANT(&nd);
1920	error = vn_access(vp, flags, td->td_ucred, td);
1921	NDFREE(&nd, NDF_ONLY_PNBUF);
1922	vput(vp);
1923	VFS_UNLOCK_GIANT(vfslocked);
1924	return (error);
1925}
1926
1927#if defined(COMPAT_43)
1928/*
1929 * Get file status; this version follows links.
1930 */
1931#ifndef _SYS_SYSPROTO_H_
1932struct ostat_args {
1933	char	*path;
1934	struct ostat *ub;
1935};
1936#endif
1937int
1938ostat(td, uap)
1939	struct thread *td;
1940	register struct ostat_args /* {
1941		char *path;
1942		struct ostat *ub;
1943	} */ *uap;
1944{
1945	struct stat sb;
1946	struct ostat osb;
1947	int error;
1948
1949	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1950	if (error)
1951		return (error);
1952	cvtstat(&sb, &osb);
1953	error = copyout(&osb, uap->ub, sizeof (osb));
1954	return (error);
1955}
1956
1957/*
1958 * Get file status; this version does not follow links.
1959 */
1960#ifndef _SYS_SYSPROTO_H_
1961struct olstat_args {
1962	char	*path;
1963	struct ostat *ub;
1964};
1965#endif
1966int
1967olstat(td, uap)
1968	struct thread *td;
1969	register struct olstat_args /* {
1970		char *path;
1971		struct ostat *ub;
1972	} */ *uap;
1973{
1974	struct stat sb;
1975	struct ostat osb;
1976	int error;
1977
1978	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1979	if (error)
1980		return (error);
1981	cvtstat(&sb, &osb);
1982	error = copyout(&osb, uap->ub, sizeof (osb));
1983	return (error);
1984}
1985
1986/*
1987 * Convert from an old to a new stat structure.
1988 */
1989void
1990cvtstat(st, ost)
1991	struct stat *st;
1992	struct ostat *ost;
1993{
1994
1995	ost->st_dev = st->st_dev;
1996	ost->st_ino = st->st_ino;
1997	ost->st_mode = st->st_mode;
1998	ost->st_nlink = st->st_nlink;
1999	ost->st_uid = st->st_uid;
2000	ost->st_gid = st->st_gid;
2001	ost->st_rdev = st->st_rdev;
2002	if (st->st_size < (quad_t)1 << 32)
2003		ost->st_size = st->st_size;
2004	else
2005		ost->st_size = -2;
2006	ost->st_atime = st->st_atime;
2007	ost->st_mtime = st->st_mtime;
2008	ost->st_ctime = st->st_ctime;
2009	ost->st_blksize = st->st_blksize;
2010	ost->st_blocks = st->st_blocks;
2011	ost->st_flags = st->st_flags;
2012	ost->st_gen = st->st_gen;
2013}
2014#endif /* COMPAT_43 */
2015
2016/*
2017 * Get file status; this version follows links.
2018 */
2019#ifndef _SYS_SYSPROTO_H_
2020struct stat_args {
2021	char	*path;
2022	struct stat *ub;
2023};
2024#endif
2025int
2026stat(td, uap)
2027	struct thread *td;
2028	register struct stat_args /* {
2029		char *path;
2030		struct stat *ub;
2031	} */ *uap;
2032{
2033	struct stat sb;
2034	int error;
2035
2036	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2037	if (error == 0)
2038		error = copyout(&sb, uap->ub, sizeof (sb));
2039	return (error);
2040}
2041
2042int
2043kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2044{
2045	struct nameidata nd;
2046	struct stat sb;
2047	int error, vfslocked;
2048
2049	NDINIT(&nd, LOOKUP,
2050	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2051	    pathseg, path, td);
2052	if ((error = namei(&nd)) != 0)
2053		return (error);
2054	vfslocked = NDHASGIANT(&nd);
2055	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2056	NDFREE(&nd, NDF_ONLY_PNBUF);
2057	vput(nd.ni_vp);
2058	VFS_UNLOCK_GIANT(vfslocked);
2059	if (mtx_owned(&Giant))
2060		printf("stat(%d): %s\n", vfslocked, path);
2061	if (error)
2062		return (error);
2063	*sbp = sb;
2064	return (0);
2065}
2066
2067/*
2068 * Get file status; this version does not follow links.
2069 */
2070#ifndef _SYS_SYSPROTO_H_
2071struct lstat_args {
2072	char	*path;
2073	struct stat *ub;
2074};
2075#endif
2076int
2077lstat(td, uap)
2078	struct thread *td;
2079	register struct lstat_args /* {
2080		char *path;
2081		struct stat *ub;
2082	} */ *uap;
2083{
2084	struct stat sb;
2085	int error;
2086
2087	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2088	if (error == 0)
2089		error = copyout(&sb, uap->ub, sizeof (sb));
2090	return (error);
2091}
2092
2093int
2094kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2095{
2096	struct vnode *vp;
2097	struct stat sb;
2098	struct nameidata nd;
2099	int error, vfslocked;
2100
2101	NDINIT(&nd, LOOKUP,
2102	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2103	    pathseg, path, td);
2104	if ((error = namei(&nd)) != 0)
2105		return (error);
2106	vfslocked = NDHASGIANT(&nd);
2107	vp = nd.ni_vp;
2108	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2109	NDFREE(&nd, NDF_ONLY_PNBUF);
2110	vput(vp);
2111	VFS_UNLOCK_GIANT(vfslocked);
2112	if (error)
2113		return (error);
2114	*sbp = sb;
2115	return (0);
2116}
2117
2118/*
2119 * Implementation of the NetBSD [l]stat() functions.
2120 */
2121void
2122cvtnstat(sb, nsb)
2123	struct stat *sb;
2124	struct nstat *nsb;
2125{
2126	bzero(nsb, sizeof *nsb);
2127	nsb->st_dev = sb->st_dev;
2128	nsb->st_ino = sb->st_ino;
2129	nsb->st_mode = sb->st_mode;
2130	nsb->st_nlink = sb->st_nlink;
2131	nsb->st_uid = sb->st_uid;
2132	nsb->st_gid = sb->st_gid;
2133	nsb->st_rdev = sb->st_rdev;
2134	nsb->st_atimespec = sb->st_atimespec;
2135	nsb->st_mtimespec = sb->st_mtimespec;
2136	nsb->st_ctimespec = sb->st_ctimespec;
2137	nsb->st_size = sb->st_size;
2138	nsb->st_blocks = sb->st_blocks;
2139	nsb->st_blksize = sb->st_blksize;
2140	nsb->st_flags = sb->st_flags;
2141	nsb->st_gen = sb->st_gen;
2142	nsb->st_birthtimespec = sb->st_birthtimespec;
2143}
2144
2145#ifndef _SYS_SYSPROTO_H_
2146struct nstat_args {
2147	char	*path;
2148	struct nstat *ub;
2149};
2150#endif
2151int
2152nstat(td, uap)
2153	struct thread *td;
2154	register struct nstat_args /* {
2155		char *path;
2156		struct nstat *ub;
2157	} */ *uap;
2158{
2159	struct stat sb;
2160	struct nstat nsb;
2161	int error;
2162
2163	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2164	if (error)
2165		return (error);
2166	cvtnstat(&sb, &nsb);
2167	error = copyout(&nsb, uap->ub, sizeof (nsb));
2168	return (error);
2169}
2170
2171/*
2172 * NetBSD lstat.  Get file status; this version does not follow links.
2173 */
2174#ifndef _SYS_SYSPROTO_H_
2175struct lstat_args {
2176	char	*path;
2177	struct stat *ub;
2178};
2179#endif
2180int
2181nlstat(td, uap)
2182	struct thread *td;
2183	register struct nlstat_args /* {
2184		char *path;
2185		struct nstat *ub;
2186	} */ *uap;
2187{
2188	struct stat sb;
2189	struct nstat nsb;
2190	int error;
2191
2192	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2193	if (error)
2194		return (error);
2195	cvtnstat(&sb, &nsb);
2196	error = copyout(&nsb, uap->ub, sizeof (nsb));
2197	return (error);
2198}
2199
2200/*
2201 * Get configurable pathname variables.
2202 */
2203#ifndef _SYS_SYSPROTO_H_
2204struct pathconf_args {
2205	char	*path;
2206	int	name;
2207};
2208#endif
2209int
2210pathconf(td, uap)
2211	struct thread *td;
2212	register struct pathconf_args /* {
2213		char *path;
2214		int name;
2215	} */ *uap;
2216{
2217
2218	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2219}
2220
2221int
2222kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2223{
2224	struct nameidata nd;
2225	int error, vfslocked;
2226
2227	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2228	    pathseg, path, td);
2229	if ((error = namei(&nd)) != 0)
2230		return (error);
2231	vfslocked = NDHASGIANT(&nd);
2232	NDFREE(&nd, NDF_ONLY_PNBUF);
2233
2234	/* If asynchronous I/O is available, it works for all files. */
2235	if (name == _PC_ASYNC_IO)
2236		td->td_retval[0] = async_io_version;
2237	else
2238		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2239	vput(nd.ni_vp);
2240	VFS_UNLOCK_GIANT(vfslocked);
2241	return (error);
2242}
2243
2244/*
2245 * Return target name of a symbolic link.
2246 */
2247#ifndef _SYS_SYSPROTO_H_
2248struct readlink_args {
2249	char	*path;
2250	char	*buf;
2251	int	count;
2252};
2253#endif
2254int
2255readlink(td, uap)
2256	struct thread *td;
2257	register struct readlink_args /* {
2258		char *path;
2259		char *buf;
2260		int count;
2261	} */ *uap;
2262{
2263
2264	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2265	    UIO_USERSPACE, uap->count));
2266}
2267
2268int
2269kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2270    enum uio_seg bufseg, int count)
2271{
2272	register struct vnode *vp;
2273	struct iovec aiov;
2274	struct uio auio;
2275	int error;
2276	struct nameidata nd;
2277	int vfslocked;
2278
2279	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2280	    pathseg, path, td);
2281	if ((error = namei(&nd)) != 0)
2282		return (error);
2283	NDFREE(&nd, NDF_ONLY_PNBUF);
2284	vfslocked = NDHASGIANT(&nd);
2285	vp = nd.ni_vp;
2286#ifdef MAC
2287	error = mac_check_vnode_readlink(td->td_ucred, vp);
2288	if (error) {
2289		vput(vp);
2290		VFS_UNLOCK_GIANT(vfslocked);
2291		return (error);
2292	}
2293#endif
2294	if (vp->v_type != VLNK)
2295		error = EINVAL;
2296	else {
2297		aiov.iov_base = buf;
2298		aiov.iov_len = count;
2299		auio.uio_iov = &aiov;
2300		auio.uio_iovcnt = 1;
2301		auio.uio_offset = 0;
2302		auio.uio_rw = UIO_READ;
2303		auio.uio_segflg = bufseg;
2304		auio.uio_td = td;
2305		auio.uio_resid = count;
2306		error = VOP_READLINK(vp, &auio, td->td_ucred);
2307	}
2308	vput(vp);
2309	VFS_UNLOCK_GIANT(vfslocked);
2310	td->td_retval[0] = count - auio.uio_resid;
2311	return (error);
2312}
2313
2314/*
2315 * Common implementation code for chflags() and fchflags().
2316 */
2317static int
2318setfflags(td, vp, flags)
2319	struct thread *td;
2320	struct vnode *vp;
2321	int flags;
2322{
2323	int error;
2324	struct mount *mp;
2325	struct vattr vattr;
2326
2327	/*
2328	 * Prevent non-root users from setting flags on devices.  When
2329	 * a device is reused, users can retain ownership of the device
2330	 * if they are allowed to set flags and programs assume that
2331	 * chown can't fail when done as root.
2332	 */
2333	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2334		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2335		    SUSER_ALLOWJAIL);
2336		if (error)
2337			return (error);
2338	}
2339
2340	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2341		return (error);
2342	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2343	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2344	VATTR_NULL(&vattr);
2345	vattr.va_flags = flags;
2346#ifdef MAC
2347	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2348	if (error == 0)
2349#endif
2350		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2351	VOP_UNLOCK(vp, 0, td);
2352	vn_finished_write(mp);
2353	return (error);
2354}
2355
2356/*
2357 * Change flags of a file given a path name.
2358 */
2359#ifndef _SYS_SYSPROTO_H_
2360struct chflags_args {
2361	char	*path;
2362	int	flags;
2363};
2364#endif
2365int
2366chflags(td, uap)
2367	struct thread *td;
2368	register struct chflags_args /* {
2369		char *path;
2370		int flags;
2371	} */ *uap;
2372{
2373	int error;
2374	struct nameidata nd;
2375	int vfslocked;
2376
2377	AUDIT_ARG(fflags, uap->flags);
2378	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2379	    uap->path, td);
2380	if ((error = namei(&nd)) != 0)
2381		return (error);
2382	NDFREE(&nd, NDF_ONLY_PNBUF);
2383	vfslocked = NDHASGIANT(&nd);
2384	error = setfflags(td, nd.ni_vp, uap->flags);
2385	vrele(nd.ni_vp);
2386	VFS_UNLOCK_GIANT(vfslocked);
2387	return (error);
2388}
2389
2390/*
2391 * Same as chflags() but doesn't follow symlinks.
2392 */
2393int
2394lchflags(td, uap)
2395	struct thread *td;
2396	register struct lchflags_args /* {
2397		char *path;
2398		int flags;
2399	} */ *uap;
2400{
2401	int error;
2402	struct nameidata nd;
2403	int vfslocked;
2404
2405	AUDIT_ARG(fflags, uap->flags);
2406	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2407	    uap->path, td);
2408	if ((error = namei(&nd)) != 0)
2409		return (error);
2410	vfslocked = NDHASGIANT(&nd);
2411	NDFREE(&nd, NDF_ONLY_PNBUF);
2412	error = setfflags(td, nd.ni_vp, uap->flags);
2413	vrele(nd.ni_vp);
2414	VFS_UNLOCK_GIANT(vfslocked);
2415	return (error);
2416}
2417
2418/*
2419 * Change flags of a file given a file descriptor.
2420 */
2421#ifndef _SYS_SYSPROTO_H_
2422struct fchflags_args {
2423	int	fd;
2424	int	flags;
2425};
2426#endif
2427int
2428fchflags(td, uap)
2429	struct thread *td;
2430	register struct fchflags_args /* {
2431		int fd;
2432		int flags;
2433	} */ *uap;
2434{
2435	struct file *fp;
2436	int vfslocked;
2437	int error;
2438
2439	AUDIT_ARG(fd, uap->fd);
2440	AUDIT_ARG(fflags, uap->flags);
2441	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2442		return (error);
2443	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2444#ifdef AUDIT
2445	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2446	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2447	VOP_UNLOCK(fp->f_vnode, 0, td);
2448#endif
2449	error = setfflags(td, fp->f_vnode, uap->flags);
2450	VFS_UNLOCK_GIANT(vfslocked);
2451	fdrop(fp, td);
2452	return (error);
2453}
2454
2455/*
2456 * Common implementation code for chmod(), lchmod() and fchmod().
2457 */
2458static int
2459setfmode(td, vp, mode)
2460	struct thread *td;
2461	struct vnode *vp;
2462	int mode;
2463{
2464	int error;
2465	struct mount *mp;
2466	struct vattr vattr;
2467
2468	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2469		return (error);
2470	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2471	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2472	VATTR_NULL(&vattr);
2473	vattr.va_mode = mode & ALLPERMS;
2474#ifdef MAC
2475	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2476	if (error == 0)
2477#endif
2478		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2479	VOP_UNLOCK(vp, 0, td);
2480	vn_finished_write(mp);
2481	return (error);
2482}
2483
2484/*
2485 * Change mode of a file given path name.
2486 */
2487#ifndef _SYS_SYSPROTO_H_
2488struct chmod_args {
2489	char	*path;
2490	int	mode;
2491};
2492#endif
2493int
2494chmod(td, uap)
2495	struct thread *td;
2496	register struct chmod_args /* {
2497		char *path;
2498		int mode;
2499	} */ *uap;
2500{
2501
2502	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2503}
2504
2505int
2506kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2507{
2508	int error;
2509	struct nameidata nd;
2510	int vfslocked;
2511
2512	AUDIT_ARG(mode, mode);
2513	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2514	if ((error = namei(&nd)) != 0)
2515		return (error);
2516	vfslocked = NDHASGIANT(&nd);
2517	NDFREE(&nd, NDF_ONLY_PNBUF);
2518	error = setfmode(td, nd.ni_vp, mode);
2519	vrele(nd.ni_vp);
2520	VFS_UNLOCK_GIANT(vfslocked);
2521	return (error);
2522}
2523
2524/*
2525 * Change mode of a file given path name (don't follow links.)
2526 */
2527#ifndef _SYS_SYSPROTO_H_
2528struct lchmod_args {
2529	char	*path;
2530	int	mode;
2531};
2532#endif
2533int
2534lchmod(td, uap)
2535	struct thread *td;
2536	register struct lchmod_args /* {
2537		char *path;
2538		int mode;
2539	} */ *uap;
2540{
2541	int error;
2542	struct nameidata nd;
2543	int vfslocked;
2544
2545	AUDIT_ARG(mode, (mode_t)uap->mode);
2546	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2547	    uap->path, td);
2548	if ((error = namei(&nd)) != 0)
2549		return (error);
2550	vfslocked = NDHASGIANT(&nd);
2551	NDFREE(&nd, NDF_ONLY_PNBUF);
2552	error = setfmode(td, nd.ni_vp, uap->mode);
2553	vrele(nd.ni_vp);
2554	VFS_UNLOCK_GIANT(vfslocked);
2555	return (error);
2556}
2557
2558/*
2559 * Change mode of a file given a file descriptor.
2560 */
2561#ifndef _SYS_SYSPROTO_H_
2562struct fchmod_args {
2563	int	fd;
2564	int	mode;
2565};
2566#endif
2567int
2568fchmod(td, uap)
2569	struct thread *td;
2570	register struct fchmod_args /* {
2571		int fd;
2572		int mode;
2573	} */ *uap;
2574{
2575	struct file *fp;
2576	int vfslocked;
2577	int error;
2578
2579	AUDIT_ARG(fd, uap->fd);
2580	AUDIT_ARG(mode, uap->mode);
2581	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2582		return (error);
2583	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2584#ifdef AUDIT
2585	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2586	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2587	VOP_UNLOCK(fp->f_vnode, 0, td);
2588#endif
2589	error = setfmode(td, fp->f_vnode, uap->mode);
2590	VFS_UNLOCK_GIANT(vfslocked);
2591	fdrop(fp, td);
2592	return (error);
2593}
2594
2595/*
2596 * Common implementation for chown(), lchown(), and fchown()
2597 */
2598static int
2599setfown(td, vp, uid, gid)
2600	struct thread *td;
2601	struct vnode *vp;
2602	uid_t uid;
2603	gid_t gid;
2604{
2605	int error;
2606	struct mount *mp;
2607	struct vattr vattr;
2608
2609	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2610		return (error);
2611	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2612	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2613	VATTR_NULL(&vattr);
2614	vattr.va_uid = uid;
2615	vattr.va_gid = gid;
2616#ifdef MAC
2617	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2618	    vattr.va_gid);
2619	if (error == 0)
2620#endif
2621		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2622	VOP_UNLOCK(vp, 0, td);
2623	vn_finished_write(mp);
2624	return (error);
2625}
2626
2627/*
2628 * Set ownership given a path name.
2629 */
2630#ifndef _SYS_SYSPROTO_H_
2631struct chown_args {
2632	char	*path;
2633	int	uid;
2634	int	gid;
2635};
2636#endif
2637int
2638chown(td, uap)
2639	struct thread *td;
2640	register struct chown_args /* {
2641		char *path;
2642		int uid;
2643		int gid;
2644	} */ *uap;
2645{
2646
2647	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2648}
2649
2650int
2651kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2652    int gid)
2653{
2654	int error;
2655	struct nameidata nd;
2656	int vfslocked;
2657
2658	AUDIT_ARG(owner, uid, gid);
2659	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2660	if ((error = namei(&nd)) != 0)
2661		return (error);
2662	vfslocked = NDHASGIANT(&nd);
2663	NDFREE(&nd, NDF_ONLY_PNBUF);
2664	error = setfown(td, nd.ni_vp, uid, gid);
2665	vrele(nd.ni_vp);
2666	VFS_UNLOCK_GIANT(vfslocked);
2667	return (error);
2668}
2669
2670/*
2671 * Set ownership given a path name, do not cross symlinks.
2672 */
2673#ifndef _SYS_SYSPROTO_H_
2674struct lchown_args {
2675	char	*path;
2676	int	uid;
2677	int	gid;
2678};
2679#endif
2680int
2681lchown(td, uap)
2682	struct thread *td;
2683	register struct lchown_args /* {
2684		char *path;
2685		int uid;
2686		int gid;
2687	} */ *uap;
2688{
2689
2690	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2691}
2692
2693int
2694kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2695    int gid)
2696{
2697	int error;
2698	struct nameidata nd;
2699	int vfslocked;
2700
2701	AUDIT_ARG(owner, uid, gid);
2702	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2703	if ((error = namei(&nd)) != 0)
2704		return (error);
2705	vfslocked = NDHASGIANT(&nd);
2706	NDFREE(&nd, NDF_ONLY_PNBUF);
2707	error = setfown(td, nd.ni_vp, uid, gid);
2708	vrele(nd.ni_vp);
2709	VFS_UNLOCK_GIANT(vfslocked);
2710	return (error);
2711}
2712
2713/*
2714 * Set ownership given a file descriptor.
2715 */
2716#ifndef _SYS_SYSPROTO_H_
2717struct fchown_args {
2718	int	fd;
2719	int	uid;
2720	int	gid;
2721};
2722#endif
2723int
2724fchown(td, uap)
2725	struct thread *td;
2726	register struct fchown_args /* {
2727		int fd;
2728		int uid;
2729		int gid;
2730	} */ *uap;
2731{
2732	struct file *fp;
2733	int vfslocked;
2734	int error;
2735
2736	AUDIT_ARG(fd, uap->fd);
2737	AUDIT_ARG(owner, uap->uid, uap->gid);
2738	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2739		return (error);
2740	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2741#ifdef AUDIT
2742	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2743	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2744	VOP_UNLOCK(fp->f_vnode, 0, td);
2745#endif
2746	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2747	VFS_UNLOCK_GIANT(vfslocked);
2748	fdrop(fp, td);
2749	return (error);
2750}
2751
2752/*
2753 * Common implementation code for utimes(), lutimes(), and futimes().
2754 */
2755static int
2756getutimes(usrtvp, tvpseg, tsp)
2757	const struct timeval *usrtvp;
2758	enum uio_seg tvpseg;
2759	struct timespec *tsp;
2760{
2761	struct timeval tv[2];
2762	const struct timeval *tvp;
2763	int error;
2764
2765	if (usrtvp == NULL) {
2766		microtime(&tv[0]);
2767		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2768		tsp[1] = tsp[0];
2769	} else {
2770		if (tvpseg == UIO_SYSSPACE) {
2771			tvp = usrtvp;
2772		} else {
2773			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2774				return (error);
2775			tvp = tv;
2776		}
2777
2778		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2779		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2780			return (EINVAL);
2781		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2782		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2783	}
2784	return (0);
2785}
2786
2787/*
2788 * Common implementation code for utimes(), lutimes(), and futimes().
2789 */
2790static int
2791setutimes(td, vp, ts, numtimes, nullflag)
2792	struct thread *td;
2793	struct vnode *vp;
2794	const struct timespec *ts;
2795	int numtimes;
2796	int nullflag;
2797{
2798	int error, setbirthtime;
2799	struct mount *mp;
2800	struct vattr vattr;
2801
2802	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2803		return (error);
2804	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2805	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2806	setbirthtime = 0;
2807	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2808	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2809		setbirthtime = 1;
2810	VATTR_NULL(&vattr);
2811	vattr.va_atime = ts[0];
2812	vattr.va_mtime = ts[1];
2813	if (setbirthtime)
2814		vattr.va_birthtime = ts[1];
2815	if (numtimes > 2)
2816		vattr.va_birthtime = ts[2];
2817	if (nullflag)
2818		vattr.va_vaflags |= VA_UTIMES_NULL;
2819#ifdef MAC
2820	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2821	    vattr.va_mtime);
2822#endif
2823	if (error == 0)
2824		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2825	VOP_UNLOCK(vp, 0, td);
2826	vn_finished_write(mp);
2827	return (error);
2828}
2829
2830/*
2831 * Set the access and modification times of a file.
2832 */
2833#ifndef _SYS_SYSPROTO_H_
2834struct utimes_args {
2835	char	*path;
2836	struct	timeval *tptr;
2837};
2838#endif
2839int
2840utimes(td, uap)
2841	struct thread *td;
2842	register struct utimes_args /* {
2843		char *path;
2844		struct timeval *tptr;
2845	} */ *uap;
2846{
2847
2848	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2849	    UIO_USERSPACE));
2850}
2851
2852int
2853kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2854    struct timeval *tptr, enum uio_seg tptrseg)
2855{
2856	struct timespec ts[2];
2857	int error;
2858	struct nameidata nd;
2859	int vfslocked;
2860
2861	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2862		return (error);
2863	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2864	if ((error = namei(&nd)) != 0)
2865		return (error);
2866	vfslocked = NDHASGIANT(&nd);
2867	NDFREE(&nd, NDF_ONLY_PNBUF);
2868	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2869	vrele(nd.ni_vp);
2870	VFS_UNLOCK_GIANT(vfslocked);
2871	return (error);
2872}
2873
2874/*
2875 * Set the access and modification times of a file.
2876 */
2877#ifndef _SYS_SYSPROTO_H_
2878struct lutimes_args {
2879	char	*path;
2880	struct	timeval *tptr;
2881};
2882#endif
2883int
2884lutimes(td, uap)
2885	struct thread *td;
2886	register struct lutimes_args /* {
2887		char *path;
2888		struct timeval *tptr;
2889	} */ *uap;
2890{
2891
2892	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2893	    UIO_USERSPACE));
2894}
2895
2896int
2897kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2898    struct timeval *tptr, enum uio_seg tptrseg)
2899{
2900	struct timespec ts[2];
2901	int error;
2902	struct nameidata nd;
2903	int vfslocked;
2904
2905	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2906		return (error);
2907	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2908	if ((error = namei(&nd)) != 0)
2909		return (error);
2910	vfslocked = NDHASGIANT(&nd);
2911	NDFREE(&nd, NDF_ONLY_PNBUF);
2912	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2913	vrele(nd.ni_vp);
2914	VFS_UNLOCK_GIANT(vfslocked);
2915	return (error);
2916}
2917
2918/*
2919 * Set the access and modification times of a file.
2920 */
2921#ifndef _SYS_SYSPROTO_H_
2922struct futimes_args {
2923	int	fd;
2924	struct	timeval *tptr;
2925};
2926#endif
2927int
2928futimes(td, uap)
2929	struct thread *td;
2930	register struct futimes_args /* {
2931		int  fd;
2932		struct timeval *tptr;
2933	} */ *uap;
2934{
2935
2936	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2937}
2938
2939int
2940kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2941    enum uio_seg tptrseg)
2942{
2943	struct timespec ts[2];
2944	struct file *fp;
2945	int vfslocked;
2946	int error;
2947
2948	AUDIT_ARG(fd, fd);
2949	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2950		return (error);
2951	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2952		return (error);
2953	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2954#ifdef AUDIT
2955	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2956	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2957	VOP_UNLOCK(fp->f_vnode, 0, td);
2958#endif
2959	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2960	VFS_UNLOCK_GIANT(vfslocked);
2961	fdrop(fp, td);
2962	return (error);
2963}
2964
2965/*
2966 * Truncate a file given its path name.
2967 */
2968#ifndef _SYS_SYSPROTO_H_
2969struct truncate_args {
2970	char	*path;
2971	int	pad;
2972	off_t	length;
2973};
2974#endif
2975int
2976truncate(td, uap)
2977	struct thread *td;
2978	register struct truncate_args /* {
2979		char *path;
2980		int pad;
2981		off_t length;
2982	} */ *uap;
2983{
2984
2985	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2986}
2987
2988int
2989kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2990{
2991	struct mount *mp;
2992	struct vnode *vp;
2993	struct vattr vattr;
2994	int error;
2995	struct nameidata nd;
2996	int vfslocked;
2997
2998	if (length < 0)
2999		return(EINVAL);
3000	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3001	if ((error = namei(&nd)) != 0)
3002		return (error);
3003	vfslocked = NDHASGIANT(&nd);
3004	vp = nd.ni_vp;
3005	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3006		vrele(vp);
3007		VFS_UNLOCK_GIANT(vfslocked);
3008		return (error);
3009	}
3010	NDFREE(&nd, NDF_ONLY_PNBUF);
3011	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3012	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3013	if (vp->v_type == VDIR)
3014		error = EISDIR;
3015#ifdef MAC
3016	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3017	}
3018#endif
3019	else if ((error = vn_writechk(vp)) == 0 &&
3020	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3021		VATTR_NULL(&vattr);
3022		vattr.va_size = length;
3023		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3024	}
3025	vput(vp);
3026	vn_finished_write(mp);
3027	VFS_UNLOCK_GIANT(vfslocked);
3028	return (error);
3029}
3030
3031/*
3032 * Truncate a file given a file descriptor.
3033 */
3034#ifndef _SYS_SYSPROTO_H_
3035struct ftruncate_args {
3036	int	fd;
3037	int	pad;
3038	off_t	length;
3039};
3040#endif
3041int
3042ftruncate(td, uap)
3043	struct thread *td;
3044	register struct ftruncate_args /* {
3045		int fd;
3046		int pad;
3047		off_t length;
3048	} */ *uap;
3049{
3050	struct mount *mp;
3051	struct vattr vattr;
3052	struct vnode *vp;
3053	struct file *fp;
3054	int vfslocked;
3055	int error;
3056
3057	AUDIT_ARG(fd, uap->fd);
3058	if (uap->length < 0)
3059		return(EINVAL);
3060	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3061		return (error);
3062	if ((fp->f_flag & FWRITE) == 0) {
3063		fdrop(fp, td);
3064		return (EINVAL);
3065	}
3066	vp = fp->f_vnode;
3067	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3068	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3069		goto drop;
3070	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3071	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3072	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3073	if (vp->v_type == VDIR)
3074		error = EISDIR;
3075#ifdef MAC
3076	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3077	    vp))) {
3078	}
3079#endif
3080	else if ((error = vn_writechk(vp)) == 0) {
3081		VATTR_NULL(&vattr);
3082		vattr.va_size = uap->length;
3083		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3084	}
3085	VOP_UNLOCK(vp, 0, td);
3086	vn_finished_write(mp);
3087drop:
3088	VFS_UNLOCK_GIANT(vfslocked);
3089	fdrop(fp, td);
3090	return (error);
3091}
3092
3093#if defined(COMPAT_43)
3094/*
3095 * Truncate a file given its path name.
3096 */
3097#ifndef _SYS_SYSPROTO_H_
3098struct otruncate_args {
3099	char	*path;
3100	long	length;
3101};
3102#endif
3103int
3104otruncate(td, uap)
3105	struct thread *td;
3106	register struct otruncate_args /* {
3107		char *path;
3108		long length;
3109	} */ *uap;
3110{
3111	struct truncate_args /* {
3112		char *path;
3113		int pad;
3114		off_t length;
3115	} */ nuap;
3116
3117	nuap.path = uap->path;
3118	nuap.length = uap->length;
3119	return (truncate(td, &nuap));
3120}
3121
3122/*
3123 * Truncate a file given a file descriptor.
3124 */
3125#ifndef _SYS_SYSPROTO_H_
3126struct oftruncate_args {
3127	int	fd;
3128	long	length;
3129};
3130#endif
3131int
3132oftruncate(td, uap)
3133	struct thread *td;
3134	register struct oftruncate_args /* {
3135		int fd;
3136		long length;
3137	} */ *uap;
3138{
3139	struct ftruncate_args /* {
3140		int fd;
3141		int pad;
3142		off_t length;
3143	} */ nuap;
3144
3145	nuap.fd = uap->fd;
3146	nuap.length = uap->length;
3147	return (ftruncate(td, &nuap));
3148}
3149#endif /* COMPAT_43 */
3150
3151/*
3152 * Sync an open file.
3153 */
3154#ifndef _SYS_SYSPROTO_H_
3155struct fsync_args {
3156	int	fd;
3157};
3158#endif
3159int
3160fsync(td, uap)
3161	struct thread *td;
3162	struct fsync_args /* {
3163		int fd;
3164	} */ *uap;
3165{
3166	struct vnode *vp;
3167	struct mount *mp;
3168	struct file *fp;
3169	int vfslocked;
3170	int error;
3171
3172	AUDIT_ARG(fd, uap->fd);
3173	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3174		return (error);
3175	vp = fp->f_vnode;
3176	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3177	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3178		goto drop;
3179	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3180	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3181	if (vp->v_object != NULL) {
3182		VM_OBJECT_LOCK(vp->v_object);
3183		vm_object_page_clean(vp->v_object, 0, 0, 0);
3184		VM_OBJECT_UNLOCK(vp->v_object);
3185	}
3186	error = VOP_FSYNC(vp, MNT_WAIT, td);
3187
3188	VOP_UNLOCK(vp, 0, td);
3189	vn_finished_write(mp);
3190drop:
3191	VFS_UNLOCK_GIANT(vfslocked);
3192	fdrop(fp, td);
3193	return (error);
3194}
3195
3196/*
3197 * Rename files.  Source and destination must either both be directories, or
3198 * both not be directories.  If target is a directory, it must be empty.
3199 */
3200#ifndef _SYS_SYSPROTO_H_
3201struct rename_args {
3202	char	*from;
3203	char	*to;
3204};
3205#endif
3206int
3207rename(td, uap)
3208	struct thread *td;
3209	register struct rename_args /* {
3210		char *from;
3211		char *to;
3212	} */ *uap;
3213{
3214
3215	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3216}
3217
3218int
3219kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3220{
3221	struct mount *mp = NULL;
3222	struct vnode *tvp, *fvp, *tdvp;
3223	struct nameidata fromnd, tond;
3224	int tvfslocked;
3225	int fvfslocked;
3226	int error;
3227
3228	bwillwrite();
3229#ifdef MAC
3230	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3231	    AUDITVNODE1, pathseg, from, td);
3232#else
3233	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3234	    AUDITVNODE1, pathseg, from, td);
3235#endif
3236	if ((error = namei(&fromnd)) != 0)
3237		return (error);
3238	fvfslocked = NDHASGIANT(&fromnd);
3239	tvfslocked = 0;
3240#ifdef MAC
3241	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3242	    fromnd.ni_vp, &fromnd.ni_cnd);
3243	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3244	if (fromnd.ni_dvp != fromnd.ni_vp)
3245		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3246#endif
3247	fvp = fromnd.ni_vp;
3248	if (error == 0)
3249		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3250	if (error != 0) {
3251		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3252		vrele(fromnd.ni_dvp);
3253		vrele(fvp);
3254		goto out1;
3255	}
3256	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3257	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3258	if (fromnd.ni_vp->v_type == VDIR)
3259		tond.ni_cnd.cn_flags |= WILLBEDIR;
3260	if ((error = namei(&tond)) != 0) {
3261		/* Translate error code for rename("dir1", "dir2/."). */
3262		if (error == EISDIR && fvp->v_type == VDIR)
3263			error = EINVAL;
3264		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3265		vrele(fromnd.ni_dvp);
3266		vrele(fvp);
3267		vn_finished_write(mp);
3268		goto out1;
3269	}
3270	tvfslocked = NDHASGIANT(&tond);
3271	tdvp = tond.ni_dvp;
3272	tvp = tond.ni_vp;
3273	if (tvp != NULL) {
3274		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3275			error = ENOTDIR;
3276			goto out;
3277		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3278			error = EISDIR;
3279			goto out;
3280		}
3281	}
3282	if (fvp == tdvp)
3283		error = EINVAL;
3284	/*
3285	 * If the source is the same as the destination (that is, if they
3286	 * are links to the same vnode), then there is nothing to do.
3287	 */
3288	if (fvp == tvp)
3289		error = -1;
3290#ifdef MAC
3291	else
3292		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3293		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3294#endif
3295out:
3296	if (!error) {
3297		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3298		if (fromnd.ni_dvp != tdvp) {
3299			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3300		}
3301		if (tvp) {
3302			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3303		}
3304		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3305				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3306		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3307		NDFREE(&tond, NDF_ONLY_PNBUF);
3308	} else {
3309		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3310		NDFREE(&tond, NDF_ONLY_PNBUF);
3311		if (tvp)
3312			vput(tvp);
3313		if (tdvp == tvp)
3314			vrele(tdvp);
3315		else
3316			vput(tdvp);
3317		vrele(fromnd.ni_dvp);
3318		vrele(fvp);
3319	}
3320	vrele(tond.ni_startdir);
3321	vn_finished_write(mp);
3322out1:
3323	if (fromnd.ni_startdir)
3324		vrele(fromnd.ni_startdir);
3325	VFS_UNLOCK_GIANT(fvfslocked);
3326	VFS_UNLOCK_GIANT(tvfslocked);
3327	if (error == -1)
3328		return (0);
3329	return (error);
3330}
3331
3332/*
3333 * Make a directory file.
3334 */
3335#ifndef _SYS_SYSPROTO_H_
3336struct mkdir_args {
3337	char	*path;
3338	int	mode;
3339};
3340#endif
3341int
3342mkdir(td, uap)
3343	struct thread *td;
3344	register struct mkdir_args /* {
3345		char *path;
3346		int mode;
3347	} */ *uap;
3348{
3349
3350	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3351}
3352
3353int
3354kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3355{
3356	struct mount *mp;
3357	struct vnode *vp;
3358	struct vattr vattr;
3359	int error;
3360	struct nameidata nd;
3361	int vfslocked;
3362
3363	AUDIT_ARG(mode, mode);
3364restart:
3365	bwillwrite();
3366	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3367	    segflg, path, td);
3368	nd.ni_cnd.cn_flags |= WILLBEDIR;
3369	if ((error = namei(&nd)) != 0)
3370		return (error);
3371	vfslocked = NDHASGIANT(&nd);
3372	vp = nd.ni_vp;
3373	if (vp != NULL) {
3374		NDFREE(&nd, NDF_ONLY_PNBUF);
3375		/*
3376		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3377		 * the strange behaviour of leaving the vnode unlocked
3378		 * if the target is the same vnode as the parent.
3379		 */
3380		if (vp == nd.ni_dvp)
3381			vrele(nd.ni_dvp);
3382		else
3383			vput(nd.ni_dvp);
3384		vrele(vp);
3385		VFS_UNLOCK_GIANT(vfslocked);
3386		return (EEXIST);
3387	}
3388	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3389		NDFREE(&nd, NDF_ONLY_PNBUF);
3390		vput(nd.ni_dvp);
3391		VFS_UNLOCK_GIANT(vfslocked);
3392		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3393			return (error);
3394		goto restart;
3395	}
3396	VATTR_NULL(&vattr);
3397	vattr.va_type = VDIR;
3398	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3399	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3400	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3401#ifdef MAC
3402	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3403	    &vattr);
3404	if (error)
3405		goto out;
3406#endif
3407	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3408	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3409#ifdef MAC
3410out:
3411#endif
3412	NDFREE(&nd, NDF_ONLY_PNBUF);
3413	vput(nd.ni_dvp);
3414	if (!error)
3415		vput(nd.ni_vp);
3416	vn_finished_write(mp);
3417	VFS_UNLOCK_GIANT(vfslocked);
3418	return (error);
3419}
3420
3421/*
3422 * Remove a directory file.
3423 */
3424#ifndef _SYS_SYSPROTO_H_
3425struct rmdir_args {
3426	char	*path;
3427};
3428#endif
3429int
3430rmdir(td, uap)
3431	struct thread *td;
3432	struct rmdir_args /* {
3433		char *path;
3434	} */ *uap;
3435{
3436
3437	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3438}
3439
3440int
3441kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3442{
3443	struct mount *mp;
3444	struct vnode *vp;
3445	int error;
3446	struct nameidata nd;
3447	int vfslocked;
3448
3449restart:
3450	bwillwrite();
3451	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3452	    pathseg, path, td);
3453	if ((error = namei(&nd)) != 0)
3454		return (error);
3455	vfslocked = NDHASGIANT(&nd);
3456	vp = nd.ni_vp;
3457	if (vp->v_type != VDIR) {
3458		error = ENOTDIR;
3459		goto out;
3460	}
3461	/*
3462	 * No rmdir "." please.
3463	 */
3464	if (nd.ni_dvp == vp) {
3465		error = EINVAL;
3466		goto out;
3467	}
3468	/*
3469	 * The root of a mounted filesystem cannot be deleted.
3470	 */
3471	if (vp->v_vflag & VV_ROOT) {
3472		error = EBUSY;
3473		goto out;
3474	}
3475#ifdef MAC
3476	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3477	    &nd.ni_cnd);
3478	if (error)
3479		goto out;
3480#endif
3481	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3482		NDFREE(&nd, NDF_ONLY_PNBUF);
3483		vput(vp);
3484		if (nd.ni_dvp == vp)
3485			vrele(nd.ni_dvp);
3486		else
3487			vput(nd.ni_dvp);
3488		VFS_UNLOCK_GIANT(vfslocked);
3489		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3490			return (error);
3491		goto restart;
3492	}
3493	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3494	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3495	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3496	vn_finished_write(mp);
3497out:
3498	NDFREE(&nd, NDF_ONLY_PNBUF);
3499	vput(vp);
3500	if (nd.ni_dvp == vp)
3501		vrele(nd.ni_dvp);
3502	else
3503		vput(nd.ni_dvp);
3504	VFS_UNLOCK_GIANT(vfslocked);
3505	return (error);
3506}
3507
3508#ifdef COMPAT_43
3509/*
3510 * Read a block of directory entries in a filesystem independent format.
3511 */
3512#ifndef _SYS_SYSPROTO_H_
3513struct ogetdirentries_args {
3514	int	fd;
3515	char	*buf;
3516	u_int	count;
3517	long	*basep;
3518};
3519#endif
3520int
3521ogetdirentries(td, uap)
3522	struct thread *td;
3523	register struct ogetdirentries_args /* {
3524		int fd;
3525		char *buf;
3526		u_int count;
3527		long *basep;
3528	} */ *uap;
3529{
3530	struct vnode *vp;
3531	struct file *fp;
3532	struct uio auio, kuio;
3533	struct iovec aiov, kiov;
3534	struct dirent *dp, *edp;
3535	caddr_t dirbuf;
3536	int error, eofflag, readcnt, vfslocked;
3537	long loff;
3538
3539	/* XXX arbitrary sanity limit on `count'. */
3540	if (uap->count > 64 * 1024)
3541		return (EINVAL);
3542	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3543		return (error);
3544	if ((fp->f_flag & FREAD) == 0) {
3545		fdrop(fp, td);
3546		return (EBADF);
3547	}
3548	vp = fp->f_vnode;
3549unionread:
3550	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3551	if (vp->v_type != VDIR) {
3552		VFS_UNLOCK_GIANT(vfslocked);
3553		fdrop(fp, td);
3554		return (EINVAL);
3555	}
3556	aiov.iov_base = uap->buf;
3557	aiov.iov_len = uap->count;
3558	auio.uio_iov = &aiov;
3559	auio.uio_iovcnt = 1;
3560	auio.uio_rw = UIO_READ;
3561	auio.uio_segflg = UIO_USERSPACE;
3562	auio.uio_td = td;
3563	auio.uio_resid = uap->count;
3564	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3565	loff = auio.uio_offset = fp->f_offset;
3566#ifdef MAC
3567	error = mac_check_vnode_readdir(td->td_ucred, vp);
3568	if (error) {
3569		VOP_UNLOCK(vp, 0, td);
3570		VFS_UNLOCK_GIANT(vfslocked);
3571		fdrop(fp, td);
3572		return (error);
3573	}
3574#endif
3575#	if (BYTE_ORDER != LITTLE_ENDIAN)
3576		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3577			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3578			    NULL, NULL);
3579			fp->f_offset = auio.uio_offset;
3580		} else
3581#	endif
3582	{
3583		kuio = auio;
3584		kuio.uio_iov = &kiov;
3585		kuio.uio_segflg = UIO_SYSSPACE;
3586		kiov.iov_len = uap->count;
3587		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3588		kiov.iov_base = dirbuf;
3589		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3590			    NULL, NULL);
3591		fp->f_offset = kuio.uio_offset;
3592		if (error == 0) {
3593			readcnt = uap->count - kuio.uio_resid;
3594			edp = (struct dirent *)&dirbuf[readcnt];
3595			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3596#				if (BYTE_ORDER == LITTLE_ENDIAN)
3597					/*
3598					 * The expected low byte of
3599					 * dp->d_namlen is our dp->d_type.
3600					 * The high MBZ byte of dp->d_namlen
3601					 * is our dp->d_namlen.
3602					 */
3603					dp->d_type = dp->d_namlen;
3604					dp->d_namlen = 0;
3605#				else
3606					/*
3607					 * The dp->d_type is the high byte
3608					 * of the expected dp->d_namlen,
3609					 * so must be zero'ed.
3610					 */
3611					dp->d_type = 0;
3612#				endif
3613				if (dp->d_reclen > 0) {
3614					dp = (struct dirent *)
3615					    ((char *)dp + dp->d_reclen);
3616				} else {
3617					error = EIO;
3618					break;
3619				}
3620			}
3621			if (dp >= edp)
3622				error = uiomove(dirbuf, readcnt, &auio);
3623		}
3624		FREE(dirbuf, M_TEMP);
3625	}
3626	if (error) {
3627		VOP_UNLOCK(vp, 0, td);
3628		VFS_UNLOCK_GIANT(vfslocked);
3629		fdrop(fp, td);
3630		return (error);
3631	}
3632	if (uap->count == auio.uio_resid &&
3633	    (vp->v_vflag & VV_ROOT) &&
3634	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3635		struct vnode *tvp = vp;
3636		vp = vp->v_mount->mnt_vnodecovered;
3637		VREF(vp);
3638		fp->f_vnode = vp;
3639		fp->f_data = vp;
3640		fp->f_offset = 0;
3641		vput(tvp);
3642		VFS_UNLOCK_GIANT(vfslocked);
3643		goto unionread;
3644	}
3645	VOP_UNLOCK(vp, 0, td);
3646	VFS_UNLOCK_GIANT(vfslocked);
3647	error = copyout(&loff, uap->basep, sizeof(long));
3648	fdrop(fp, td);
3649	td->td_retval[0] = uap->count - auio.uio_resid;
3650	return (error);
3651}
3652#endif /* COMPAT_43 */
3653
3654/*
3655 * Read a block of directory entries in a filesystem independent format.
3656 */
3657#ifndef _SYS_SYSPROTO_H_
3658struct getdirentries_args {
3659	int	fd;
3660	char	*buf;
3661	u_int	count;
3662	long	*basep;
3663};
3664#endif
3665int
3666getdirentries(td, uap)
3667	struct thread *td;
3668	register struct getdirentries_args /* {
3669		int fd;
3670		char *buf;
3671		u_int count;
3672		long *basep;
3673	} */ *uap;
3674{
3675	struct vnode *vp;
3676	struct file *fp;
3677	struct uio auio;
3678	struct iovec aiov;
3679	int vfslocked;
3680	long loff;
3681	int error, eofflag;
3682
3683	AUDIT_ARG(fd, uap->fd);
3684	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3685		return (error);
3686	if ((fp->f_flag & FREAD) == 0) {
3687		fdrop(fp, td);
3688		return (EBADF);
3689	}
3690	vp = fp->f_vnode;
3691unionread:
3692	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3693	if (vp->v_type != VDIR) {
3694		VFS_UNLOCK_GIANT(vfslocked);
3695		error = EINVAL;
3696		goto fail;
3697	}
3698	aiov.iov_base = uap->buf;
3699	aiov.iov_len = uap->count;
3700	auio.uio_iov = &aiov;
3701	auio.uio_iovcnt = 1;
3702	auio.uio_rw = UIO_READ;
3703	auio.uio_segflg = UIO_USERSPACE;
3704	auio.uio_td = td;
3705	auio.uio_resid = uap->count;
3706	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3707	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3708	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3709	loff = auio.uio_offset = fp->f_offset;
3710#ifdef MAC
3711	error = mac_check_vnode_readdir(td->td_ucred, vp);
3712	if (error == 0)
3713#endif
3714		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3715		    NULL);
3716	fp->f_offset = auio.uio_offset;
3717	if (error) {
3718		VOP_UNLOCK(vp, 0, td);
3719		VFS_UNLOCK_GIANT(vfslocked);
3720		goto fail;
3721	}
3722	if (uap->count == auio.uio_resid &&
3723	    (vp->v_vflag & VV_ROOT) &&
3724	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3725		struct vnode *tvp = vp;
3726		vp = vp->v_mount->mnt_vnodecovered;
3727		VREF(vp);
3728		fp->f_vnode = vp;
3729		fp->f_data = vp;
3730		fp->f_offset = 0;
3731		vput(tvp);
3732		VFS_UNLOCK_GIANT(vfslocked);
3733		goto unionread;
3734	}
3735	VOP_UNLOCK(vp, 0, td);
3736	VFS_UNLOCK_GIANT(vfslocked);
3737	if (uap->basep != NULL) {
3738		error = copyout(&loff, uap->basep, sizeof(long));
3739	}
3740	td->td_retval[0] = uap->count - auio.uio_resid;
3741fail:
3742	fdrop(fp, td);
3743	return (error);
3744}
3745
3746#ifndef _SYS_SYSPROTO_H_
3747struct getdents_args {
3748	int fd;
3749	char *buf;
3750	size_t count;
3751};
3752#endif
3753int
3754getdents(td, uap)
3755	struct thread *td;
3756	register struct getdents_args /* {
3757		int fd;
3758		char *buf;
3759		u_int count;
3760	} */ *uap;
3761{
3762	struct getdirentries_args ap;
3763	ap.fd = uap->fd;
3764	ap.buf = uap->buf;
3765	ap.count = uap->count;
3766	ap.basep = NULL;
3767	return (getdirentries(td, &ap));
3768}
3769
3770/*
3771 * Set the mode mask for creation of filesystem nodes.
3772 */
3773#ifndef _SYS_SYSPROTO_H_
3774struct umask_args {
3775	int	newmask;
3776};
3777#endif
3778int
3779umask(td, uap)
3780	struct thread *td;
3781	struct umask_args /* {
3782		int newmask;
3783	} */ *uap;
3784{
3785	register struct filedesc *fdp;
3786
3787	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3788	fdp = td->td_proc->p_fd;
3789	td->td_retval[0] = fdp->fd_cmask;
3790	fdp->fd_cmask = uap->newmask & ALLPERMS;
3791	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3792	return (0);
3793}
3794
3795/*
3796 * Void all references to file by ripping underlying filesystem away from
3797 * vnode.
3798 */
3799#ifndef _SYS_SYSPROTO_H_
3800struct revoke_args {
3801	char	*path;
3802};
3803#endif
3804int
3805revoke(td, uap)
3806	struct thread *td;
3807	register struct revoke_args /* {
3808		char *path;
3809	} */ *uap;
3810{
3811	struct vnode *vp;
3812	struct vattr vattr;
3813	int error;
3814	struct nameidata nd;
3815	int vfslocked;
3816
3817	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3818	    UIO_USERSPACE, uap->path, td);
3819	if ((error = namei(&nd)) != 0)
3820		return (error);
3821	vfslocked = NDHASGIANT(&nd);
3822	vp = nd.ni_vp;
3823	NDFREE(&nd, NDF_ONLY_PNBUF);
3824	if (vp->v_type != VCHR) {
3825		error = EINVAL;
3826		goto out;
3827	}
3828#ifdef MAC
3829	error = mac_check_vnode_revoke(td->td_ucred, vp);
3830	if (error)
3831		goto out;
3832#endif
3833	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3834	if (error)
3835		goto out;
3836	if (td->td_ucred->cr_uid != vattr.va_uid) {
3837		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3838		    SUSER_ALLOWJAIL);
3839		if (error)
3840			goto out;
3841	}
3842	if (vcount(vp) > 1)
3843		VOP_REVOKE(vp, REVOKEALL);
3844out:
3845	vput(vp);
3846	VFS_UNLOCK_GIANT(vfslocked);
3847	return (error);
3848}
3849
3850/*
3851 * Convert a user file descriptor to a kernel file entry.
3852 * A reference on the file entry is held upon returning.
3853 */
3854int
3855getvnode(fdp, fd, fpp)
3856	struct filedesc *fdp;
3857	int fd;
3858	struct file **fpp;
3859{
3860	int error;
3861	struct file *fp;
3862
3863	fp = NULL;
3864	if (fdp == NULL)
3865		error = EBADF;
3866	else {
3867		FILEDESC_LOCK(fdp);
3868		if ((u_int)fd >= fdp->fd_nfiles ||
3869		    (fp = fdp->fd_ofiles[fd]) == NULL)
3870			error = EBADF;
3871		else if (fp->f_vnode == NULL) {
3872			fp = NULL;
3873			error = EINVAL;
3874		} else {
3875			fhold(fp);
3876			error = 0;
3877		}
3878		FILEDESC_UNLOCK(fdp);
3879	}
3880	*fpp = fp;
3881	return (error);
3882}
3883
3884/*
3885 * Get an (NFS) file handle.
3886 */
3887#ifndef _SYS_SYSPROTO_H_
3888struct lgetfh_args {
3889	char	*fname;
3890	fhandle_t *fhp;
3891};
3892#endif
3893int
3894lgetfh(td, uap)
3895	struct thread *td;
3896	register struct lgetfh_args *uap;
3897{
3898	struct nameidata nd;
3899	fhandle_t fh;
3900	register struct vnode *vp;
3901	int vfslocked;
3902	int error;
3903
3904	error = priv_check(td, PRIV_VFS_GETFH);
3905	if (error)
3906		return (error);
3907	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3908	    UIO_USERSPACE, uap->fname, td);
3909	error = namei(&nd);
3910	if (error)
3911		return (error);
3912	vfslocked = NDHASGIANT(&nd);
3913	NDFREE(&nd, NDF_ONLY_PNBUF);
3914	vp = nd.ni_vp;
3915	bzero(&fh, sizeof(fh));
3916	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3917	error = VOP_VPTOFH(vp, &fh.fh_fid);
3918	vput(vp);
3919	VFS_UNLOCK_GIANT(vfslocked);
3920	if (error)
3921		return (error);
3922	error = copyout(&fh, uap->fhp, sizeof (fh));
3923	return (error);
3924}
3925
3926#ifndef _SYS_SYSPROTO_H_
3927struct getfh_args {
3928	char	*fname;
3929	fhandle_t *fhp;
3930};
3931#endif
3932int
3933getfh(td, uap)
3934	struct thread *td;
3935	register struct getfh_args *uap;
3936{
3937	struct nameidata nd;
3938	fhandle_t fh;
3939	register struct vnode *vp;
3940	int vfslocked;
3941	int error;
3942
3943	error = priv_check(td, PRIV_VFS_GETFH);
3944	if (error)
3945		return (error);
3946	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3947	    UIO_USERSPACE, uap->fname, td);
3948	error = namei(&nd);
3949	if (error)
3950		return (error);
3951	vfslocked = NDHASGIANT(&nd);
3952	NDFREE(&nd, NDF_ONLY_PNBUF);
3953	vp = nd.ni_vp;
3954	bzero(&fh, sizeof(fh));
3955	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3956	error = VOP_VPTOFH(vp, &fh.fh_fid);
3957	vput(vp);
3958	VFS_UNLOCK_GIANT(vfslocked);
3959	if (error)
3960		return (error);
3961	error = copyout(&fh, uap->fhp, sizeof (fh));
3962	return (error);
3963}
3964
3965/*
3966 * syscall for the rpc.lockd to use to translate a NFS file handle into an
3967 * open descriptor.
3968 *
3969 * warning: do not remove the priv_check() call or this becomes one giant
3970 * security hole.
3971 */
3972#ifndef _SYS_SYSPROTO_H_
3973struct fhopen_args {
3974	const struct fhandle *u_fhp;
3975	int flags;
3976};
3977#endif
3978int
3979fhopen(td, uap)
3980	struct thread *td;
3981	struct fhopen_args /* {
3982		const struct fhandle *u_fhp;
3983		int flags;
3984	} */ *uap;
3985{
3986	struct proc *p = td->td_proc;
3987	struct mount *mp;
3988	struct vnode *vp;
3989	struct fhandle fhp;
3990	struct vattr vat;
3991	struct vattr *vap = &vat;
3992	struct flock lf;
3993	struct file *fp;
3994	register struct filedesc *fdp = p->p_fd;
3995	int fmode, mode, error, type;
3996	struct file *nfp;
3997	int vfslocked;
3998	int indx;
3999
4000	error = priv_check(td, PRIV_VFS_FHOPEN);
4001	if (error)
4002		return (error);
4003	fmode = FFLAGS(uap->flags);
4004	/* why not allow a non-read/write open for our lockd? */
4005	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4006		return (EINVAL);
4007	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4008	if (error)
4009		return(error);
4010	/* find the mount point */
4011	mp = vfs_getvfs(&fhp.fh_fsid);
4012	if (mp == NULL)
4013		return (ESTALE);
4014	vfslocked = VFS_LOCK_GIANT(mp);
4015	/* now give me my vnode, it gets returned to me locked */
4016	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4017	if (error)
4018		goto out;
4019	/*
4020	 * from now on we have to make sure not
4021	 * to forget about the vnode
4022	 * any error that causes an abort must vput(vp)
4023	 * just set error = err and 'goto bad;'.
4024	 */
4025
4026	/*
4027	 * from vn_open
4028	 */
4029	if (vp->v_type == VLNK) {
4030		error = EMLINK;
4031		goto bad;
4032	}
4033	if (vp->v_type == VSOCK) {
4034		error = EOPNOTSUPP;
4035		goto bad;
4036	}
4037	mode = 0;
4038	if (fmode & (FWRITE | O_TRUNC)) {
4039		if (vp->v_type == VDIR) {
4040			error = EISDIR;
4041			goto bad;
4042		}
4043		error = vn_writechk(vp);
4044		if (error)
4045			goto bad;
4046		mode |= VWRITE;
4047	}
4048	if (fmode & FREAD)
4049		mode |= VREAD;
4050	if (fmode & O_APPEND)
4051		mode |= VAPPEND;
4052#ifdef MAC
4053	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4054	if (error)
4055		goto bad;
4056#endif
4057	if (mode) {
4058		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4059		if (error)
4060			goto bad;
4061	}
4062	if (fmode & O_TRUNC) {
4063		VOP_UNLOCK(vp, 0, td);				/* XXX */
4064		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4065			vrele(vp);
4066			goto out;
4067		}
4068		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4069		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4070#ifdef MAC
4071		/*
4072		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4073		 * should be right.
4074		 */
4075		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4076		if (error == 0) {
4077#endif
4078			VATTR_NULL(vap);
4079			vap->va_size = 0;
4080			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4081#ifdef MAC
4082		}
4083#endif
4084		vn_finished_write(mp);
4085		if (error)
4086			goto bad;
4087	}
4088	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4089	if (error)
4090		goto bad;
4091
4092	if (fmode & FWRITE)
4093		vp->v_writecount++;
4094
4095	/*
4096	 * end of vn_open code
4097	 */
4098
4099	if ((error = falloc(td, &nfp, &indx)) != 0) {
4100		if (fmode & FWRITE)
4101			vp->v_writecount--;
4102		goto bad;
4103	}
4104	/* An extra reference on `nfp' has been held for us by falloc(). */
4105	fp = nfp;
4106
4107	nfp->f_vnode = vp;
4108	nfp->f_data = vp;
4109	nfp->f_flag = fmode & FMASK;
4110	nfp->f_ops = &vnops;
4111	nfp->f_type = DTYPE_VNODE;
4112	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4113		lf.l_whence = SEEK_SET;
4114		lf.l_start = 0;
4115		lf.l_len = 0;
4116		if (fmode & O_EXLOCK)
4117			lf.l_type = F_WRLCK;
4118		else
4119			lf.l_type = F_RDLCK;
4120		type = F_FLOCK;
4121		if ((fmode & FNONBLOCK) == 0)
4122			type |= F_WAIT;
4123		VOP_UNLOCK(vp, 0, td);
4124		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4125			    type)) != 0) {
4126			/*
4127			 * The lock request failed.  Normally close the
4128			 * descriptor but handle the case where someone might
4129			 * have dup()d or close()d it when we weren't looking.
4130			 */
4131			fdclose(fdp, fp, indx, td);
4132
4133			/*
4134			 * release our private reference
4135			 */
4136			fdrop(fp, td);
4137			goto out;
4138		}
4139		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4140		fp->f_flag |= FHASLOCK;
4141	}
4142
4143	VOP_UNLOCK(vp, 0, td);
4144	fdrop(fp, td);
4145	vfs_rel(mp);
4146	VFS_UNLOCK_GIANT(vfslocked);
4147	td->td_retval[0] = indx;
4148	return (0);
4149
4150bad:
4151	vput(vp);
4152out:
4153	vfs_rel(mp);
4154	VFS_UNLOCK_GIANT(vfslocked);
4155	return (error);
4156}
4157
4158/*
4159 * Stat an (NFS) file handle.
4160 */
4161#ifndef _SYS_SYSPROTO_H_
4162struct fhstat_args {
4163	struct fhandle *u_fhp;
4164	struct stat *sb;
4165};
4166#endif
4167int
4168fhstat(td, uap)
4169	struct thread *td;
4170	register struct fhstat_args /* {
4171		struct fhandle *u_fhp;
4172		struct stat *sb;
4173	} */ *uap;
4174{
4175	struct stat sb;
4176	fhandle_t fh;
4177	struct mount *mp;
4178	struct vnode *vp;
4179	int vfslocked;
4180	int error;
4181
4182	error = priv_check(td, PRIV_VFS_FHSTAT);
4183	if (error)
4184		return (error);
4185	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4186	if (error)
4187		return (error);
4188	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4189		return (ESTALE);
4190	vfslocked = VFS_LOCK_GIANT(mp);
4191	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4192		vfs_rel(mp);
4193		VFS_UNLOCK_GIANT(vfslocked);
4194		return (error);
4195	}
4196	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4197	vput(vp);
4198	vfs_rel(mp);
4199	VFS_UNLOCK_GIANT(vfslocked);
4200	if (error)
4201		return (error);
4202	error = copyout(&sb, uap->sb, sizeof(sb));
4203	return (error);
4204}
4205
4206/*
4207 * Implement fstatfs() for (NFS) file handles.
4208 */
4209#ifndef _SYS_SYSPROTO_H_
4210struct fhstatfs_args {
4211	struct fhandle *u_fhp;
4212	struct statfs *buf;
4213};
4214#endif
4215int
4216fhstatfs(td, uap)
4217	struct thread *td;
4218	struct fhstatfs_args /* {
4219		struct fhandle *u_fhp;
4220		struct statfs *buf;
4221	} */ *uap;
4222{
4223	struct statfs sf;
4224	fhandle_t fh;
4225	int error;
4226
4227	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4228	if (error)
4229		return (error);
4230	error = kern_fhstatfs(td, fh, &sf);
4231	if (error)
4232		return (error);
4233	return (copyout(&sf, uap->buf, sizeof(sf)));
4234}
4235
4236int
4237kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4238{
4239	struct statfs *sp;
4240	struct mount *mp;
4241	struct vnode *vp;
4242	int vfslocked;
4243	int error;
4244
4245	error = priv_check(td, PRIV_VFS_FHSTATFS);
4246	if (error)
4247		return (error);
4248	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4249		return (ESTALE);
4250	vfslocked = VFS_LOCK_GIANT(mp);
4251	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4252	if (error) {
4253		VFS_UNLOCK_GIANT(vfslocked);
4254		vfs_rel(mp);
4255		return (error);
4256	}
4257	vput(vp);
4258	error = prison_canseemount(td->td_ucred, mp);
4259	if (error)
4260		goto out;
4261#ifdef MAC
4262	error = mac_check_mount_stat(td->td_ucred, mp);
4263	if (error)
4264		goto out;
4265#endif
4266	/*
4267	 * Set these in case the underlying filesystem fails to do so.
4268	 */
4269	sp = &mp->mnt_stat;
4270	sp->f_version = STATFS_VERSION;
4271	sp->f_namemax = NAME_MAX;
4272	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4273	error = VFS_STATFS(mp, sp, td);
4274	if (error == 0)
4275		*buf = *sp;
4276out:
4277	vfs_rel(mp);
4278	VFS_UNLOCK_GIANT(vfslocked);
4279	return (error);
4280}
4281