vfs_syscalls.c revision 166824
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 166824 2007-02-19 10:56:09Z kib $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/sysproto.h>
52#include <sys/namei.h>
53#include <sys/filedesc.h>
54#include <sys/kernel.h>
55#include <sys/fcntl.h>
56#include <sys/file.h>
57#include <sys/limits.h>
58#include <sys/linker.h>
59#include <sys/stat.h>
60#include <sys/sx.h>
61#include <sys/unistd.h>
62#include <sys/vnode.h>
63#include <sys/priv.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/jail.h>
67#include <sys/syscallsubr.h>
68#include <sys/sysctl.h>
69
70#include <machine/stdarg.h>
71
72#include <security/audit/audit.h>
73#include <security/mac/mac_framework.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/uma.h>
79
80static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83static int setfmode(struct thread *td, struct vnode *, int);
84static int setfflags(struct thread *td, struct vnode *, int);
85static int setutimes(struct thread *td, struct vnode *,
86    const struct timespec *, int, int);
87static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88    struct thread *td);
89
90/*
91 * The module initialization routine for POSIX asynchronous I/O will
92 * set this to the version of AIO that it implements.  (Zero means
93 * that it is not implemented.)  This value is used here by pathconf()
94 * and in kern_descrip.c by fpathconf().
95 */
96int async_io_version;
97
98/*
99 * Sync each mounted filesystem.
100 */
101#ifndef _SYS_SYSPROTO_H_
102struct sync_args {
103	int     dummy;
104};
105#endif
106
107#ifdef DEBUG
108static int syncprt = 0;
109SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
110#endif
111
112/* ARGSUSED */
113int
114sync(td, uap)
115	struct thread *td;
116	struct sync_args *uap;
117{
118	struct mount *mp, *nmp;
119	int vfslocked;
120
121	mtx_lock(&mountlist_mtx);
122	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124			nmp = TAILQ_NEXT(mp, mnt_list);
125			continue;
126		}
127		vfslocked = VFS_LOCK_GIANT(mp);
128		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130			MNT_ILOCK(mp);
131			mp->mnt_noasync++;
132			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133			MNT_IUNLOCK(mp);
134			vfs_msync(mp, MNT_NOWAIT);
135			VFS_SYNC(mp, MNT_NOWAIT, td);
136			MNT_ILOCK(mp);
137			mp->mnt_noasync--;
138			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139			    mp->mnt_noasync == 0)
140				mp->mnt_kern_flag |= MNTK_ASYNC;
141			MNT_IUNLOCK(mp);
142			vn_finished_write(mp);
143		}
144		VFS_UNLOCK_GIANT(vfslocked);
145		mtx_lock(&mountlist_mtx);
146		nmp = TAILQ_NEXT(mp, mnt_list);
147		vfs_unbusy(mp, td);
148	}
149	mtx_unlock(&mountlist_mtx);
150	return (0);
151}
152
153/* XXX PRISON: could be per prison flag */
154static int prison_quotas;
155#if 0
156SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157#endif
158
159/*
160 * Change filesystem quotas.
161 *
162 * MP SAFE
163 */
164#ifndef _SYS_SYSPROTO_H_
165struct quotactl_args {
166	char *path;
167	int cmd;
168	int uid;
169	caddr_t arg;
170};
171#endif
172int
173quotactl(td, uap)
174	struct thread *td;
175	register struct quotactl_args /* {
176		char *path;
177		int cmd;
178		int uid;
179		caddr_t arg;
180	} */ *uap;
181{
182	struct mount *mp, *vmp;
183	int vfslocked;
184	int error;
185	struct nameidata nd;
186
187	AUDIT_ARG(cmd, uap->cmd);
188	AUDIT_ARG(uid, uap->uid);
189	if (jailed(td->td_ucred) && !prison_quotas)
190		return (EPERM);
191	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
192	   UIO_USERSPACE, uap->path, td);
193	if ((error = namei(&nd)) != 0)
194		return (error);
195	vfslocked = NDHASGIANT(&nd);
196	NDFREE(&nd, NDF_ONLY_PNBUF);
197	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
198	mp = nd.ni_vp->v_mount;
199	vrele(nd.ni_vp);
200	if (error)
201		goto out;
202	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203	vn_finished_write(vmp);
204out:
205	VFS_UNLOCK_GIANT(vfslocked);
206	return (error);
207}
208
209/*
210 * Get filesystem statistics.
211 */
212#ifndef _SYS_SYSPROTO_H_
213struct statfs_args {
214	char *path;
215	struct statfs *buf;
216};
217#endif
218int
219statfs(td, uap)
220	struct thread *td;
221	register struct statfs_args /* {
222		char *path;
223		struct statfs *buf;
224	} */ *uap;
225{
226	struct statfs sf;
227	int error;
228
229	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
230	if (error == 0)
231		error = copyout(&sf, uap->buf, sizeof(sf));
232	return (error);
233}
234
235int
236kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
237    struct statfs *buf)
238{
239	struct mount *mp;
240	struct statfs *sp, sb;
241	int vfslocked;
242	int error;
243	struct nameidata nd;
244
245	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
246	    pathseg, path, td);
247	error = namei(&nd);
248	if (error)
249		return (error);
250	vfslocked = NDHASGIANT(&nd);
251	mp = nd.ni_vp->v_mount;
252	vfs_ref(mp);
253	NDFREE(&nd, NDF_ONLY_PNBUF);
254	vput(nd.ni_vp);
255#ifdef MAC
256	error = mac_check_mount_stat(td->td_ucred, mp);
257	if (error)
258		goto out;
259#endif
260	/*
261	 * Set these in case the underlying filesystem fails to do so.
262	 */
263	sp = &mp->mnt_stat;
264	sp->f_version = STATFS_VERSION;
265	sp->f_namemax = NAME_MAX;
266	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
267	error = VFS_STATFS(mp, sp, td);
268	if (error)
269		goto out;
270	if (priv_check(td, PRIV_VFS_GENERATION)) {
271		bcopy(sp, &sb, sizeof(sb));
272		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
273		prison_enforce_statfs(td->td_ucred, mp, &sb);
274		sp = &sb;
275	}
276	*buf = *sp;
277out:
278	vfs_rel(mp);
279	VFS_UNLOCK_GIANT(vfslocked);
280	if (mtx_owned(&Giant))
281		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
282	return (error);
283}
284
285/*
286 * Get filesystem statistics.
287 */
288#ifndef _SYS_SYSPROTO_H_
289struct fstatfs_args {
290	int fd;
291	struct statfs *buf;
292};
293#endif
294int
295fstatfs(td, uap)
296	struct thread *td;
297	register struct fstatfs_args /* {
298		int fd;
299		struct statfs *buf;
300	} */ *uap;
301{
302	struct statfs sf;
303	int error;
304
305	error = kern_fstatfs(td, uap->fd, &sf);
306	if (error == 0)
307		error = copyout(&sf, uap->buf, sizeof(sf));
308	return (error);
309}
310
311int
312kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
313{
314	struct file *fp;
315	struct mount *mp;
316	struct statfs *sp, sb;
317	int vfslocked;
318	struct vnode *vp;
319	int error;
320
321	AUDIT_ARG(fd, fd);
322	error = getvnode(td->td_proc->p_fd, fd, &fp);
323	if (error)
324		return (error);
325	vp = fp->f_vnode;
326	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
327	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
328#ifdef AUDIT
329	AUDIT_ARG(vnode, vp, ARG_VNODE1);
330#endif
331	mp = vp->v_mount;
332	if (mp)
333		vfs_ref(mp);
334	VOP_UNLOCK(vp, 0, td);
335	fdrop(fp, td);
336	if (vp->v_iflag & VI_DOOMED) {
337		error = EBADF;
338		goto out;
339	}
340#ifdef MAC
341	error = mac_check_mount_stat(td->td_ucred, mp);
342	if (error)
343		goto out;
344#endif
345	/*
346	 * Set these in case the underlying filesystem fails to do so.
347	 */
348	sp = &mp->mnt_stat;
349	sp->f_version = STATFS_VERSION;
350	sp->f_namemax = NAME_MAX;
351	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
352	error = VFS_STATFS(mp, sp, td);
353	if (error)
354		goto out;
355	if (priv_check(td, PRIV_VFS_GENERATION)) {
356		bcopy(sp, &sb, sizeof(sb));
357		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
358		prison_enforce_statfs(td->td_ucred, mp, &sb);
359		sp = &sb;
360	}
361	*buf = *sp;
362out:
363	if (mp)
364		vfs_rel(mp);
365	VFS_UNLOCK_GIANT(vfslocked);
366	return (error);
367}
368
369/*
370 * Get statistics on all filesystems.
371 */
372#ifndef _SYS_SYSPROTO_H_
373struct getfsstat_args {
374	struct statfs *buf;
375	long bufsize;
376	int flags;
377};
378#endif
379int
380getfsstat(td, uap)
381	struct thread *td;
382	register struct getfsstat_args /* {
383		struct statfs *buf;
384		long bufsize;
385		int flags;
386	} */ *uap;
387{
388
389	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
390	    uap->flags));
391}
392
393/*
394 * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
395 * 	The caller is responsible for freeing memory which will be allocated
396 *	in '*buf'.
397 */
398int
399kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
400    enum uio_seg bufseg, int flags)
401{
402	struct mount *mp, *nmp;
403	struct statfs *sfsp, *sp, sb;
404	size_t count, maxcount;
405	int vfslocked;
406	int error;
407
408	maxcount = bufsize / sizeof(struct statfs);
409	if (bufsize == 0)
410		sfsp = NULL;
411	else if (bufseg == UIO_USERSPACE)
412		sfsp = *buf;
413	else /* if (bufseg == UIO_SYSSPACE) */ {
414		count = 0;
415		mtx_lock(&mountlist_mtx);
416		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
417			count++;
418		}
419		mtx_unlock(&mountlist_mtx);
420		if (maxcount > count)
421			maxcount = count;
422		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
423		    M_WAITOK);
424	}
425	count = 0;
426	mtx_lock(&mountlist_mtx);
427	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
428		if (prison_canseemount(td->td_ucred, mp) != 0) {
429			nmp = TAILQ_NEXT(mp, mnt_list);
430			continue;
431		}
432#ifdef MAC
433		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
434			nmp = TAILQ_NEXT(mp, mnt_list);
435			continue;
436		}
437#endif
438		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
439			nmp = TAILQ_NEXT(mp, mnt_list);
440			continue;
441		}
442		vfslocked = VFS_LOCK_GIANT(mp);
443		if (sfsp && count < maxcount) {
444			sp = &mp->mnt_stat;
445			/*
446			 * Set these in case the underlying filesystem
447			 * fails to do so.
448			 */
449			sp->f_version = STATFS_VERSION;
450			sp->f_namemax = NAME_MAX;
451			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
452			/*
453			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
454			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
455			 * overrides MNT_WAIT.
456			 */
457			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
458			    (flags & MNT_WAIT)) &&
459			    (error = VFS_STATFS(mp, sp, td))) {
460				VFS_UNLOCK_GIANT(vfslocked);
461				mtx_lock(&mountlist_mtx);
462				nmp = TAILQ_NEXT(mp, mnt_list);
463				vfs_unbusy(mp, td);
464				continue;
465			}
466			if (priv_check(td, PRIV_VFS_GENERATION)) {
467				bcopy(sp, &sb, sizeof(sb));
468				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
469				prison_enforce_statfs(td->td_ucred, mp, &sb);
470				sp = &sb;
471			}
472			if (bufseg == UIO_SYSSPACE)
473				bcopy(sp, sfsp, sizeof(*sp));
474			else /* if (bufseg == UIO_USERSPACE) */ {
475				error = copyout(sp, sfsp, sizeof(*sp));
476				if (error) {
477					vfs_unbusy(mp, td);
478					VFS_UNLOCK_GIANT(vfslocked);
479					return (error);
480				}
481			}
482			sfsp++;
483		}
484		VFS_UNLOCK_GIANT(vfslocked);
485		count++;
486		mtx_lock(&mountlist_mtx);
487		nmp = TAILQ_NEXT(mp, mnt_list);
488		vfs_unbusy(mp, td);
489	}
490	mtx_unlock(&mountlist_mtx);
491	if (sfsp && count > maxcount)
492		td->td_retval[0] = maxcount;
493	else
494		td->td_retval[0] = count;
495	return (0);
496}
497
498#ifdef COMPAT_FREEBSD4
499/*
500 * Get old format filesystem statistics.
501 */
502static void cvtstatfs(struct statfs *, struct ostatfs *);
503
504#ifndef _SYS_SYSPROTO_H_
505struct freebsd4_statfs_args {
506	char *path;
507	struct ostatfs *buf;
508};
509#endif
510int
511freebsd4_statfs(td, uap)
512	struct thread *td;
513	struct freebsd4_statfs_args /* {
514		char *path;
515		struct ostatfs *buf;
516	} */ *uap;
517{
518	struct ostatfs osb;
519	struct statfs sf;
520	int error;
521
522	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
523	if (error)
524		return (error);
525	cvtstatfs(&sf, &osb);
526	return (copyout(&osb, uap->buf, sizeof(osb)));
527}
528
529/*
530 * Get filesystem statistics.
531 */
532#ifndef _SYS_SYSPROTO_H_
533struct freebsd4_fstatfs_args {
534	int fd;
535	struct ostatfs *buf;
536};
537#endif
538int
539freebsd4_fstatfs(td, uap)
540	struct thread *td;
541	struct freebsd4_fstatfs_args /* {
542		int fd;
543		struct ostatfs *buf;
544	} */ *uap;
545{
546	struct ostatfs osb;
547	struct statfs sf;
548	int error;
549
550	error = kern_fstatfs(td, uap->fd, &sf);
551	if (error)
552		return (error);
553	cvtstatfs(&sf, &osb);
554	return (copyout(&osb, uap->buf, sizeof(osb)));
555}
556
557/*
558 * Get statistics on all filesystems.
559 */
560#ifndef _SYS_SYSPROTO_H_
561struct freebsd4_getfsstat_args {
562	struct ostatfs *buf;
563	long bufsize;
564	int flags;
565};
566#endif
567int
568freebsd4_getfsstat(td, uap)
569	struct thread *td;
570	register struct freebsd4_getfsstat_args /* {
571		struct ostatfs *buf;
572		long bufsize;
573		int flags;
574	} */ *uap;
575{
576	struct statfs *buf, *sp;
577	struct ostatfs osb;
578	size_t count, size;
579	int error;
580
581	count = uap->bufsize / sizeof(struct ostatfs);
582	size = count * sizeof(struct statfs);
583	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
584	if (size > 0) {
585		count = td->td_retval[0];
586		sp = buf;
587		while (count > 0 && error == 0) {
588			cvtstatfs(sp, &osb);
589			error = copyout(&osb, uap->buf, sizeof(osb));
590			sp++;
591			uap->buf++;
592			count--;
593		}
594		free(buf, M_TEMP);
595	}
596	return (error);
597}
598
599/*
600 * Implement fstatfs() for (NFS) file handles.
601 */
602#ifndef _SYS_SYSPROTO_H_
603struct freebsd4_fhstatfs_args {
604	struct fhandle *u_fhp;
605	struct ostatfs *buf;
606};
607#endif
608int
609freebsd4_fhstatfs(td, uap)
610	struct thread *td;
611	struct freebsd4_fhstatfs_args /* {
612		struct fhandle *u_fhp;
613		struct ostatfs *buf;
614	} */ *uap;
615{
616	struct ostatfs osb;
617	struct statfs sf;
618	fhandle_t fh;
619	int error;
620
621	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
622	if (error)
623		return (error);
624	error = kern_fhstatfs(td, fh, &sf);
625	if (error)
626		return (error);
627	cvtstatfs(&sf, &osb);
628	return (copyout(&osb, uap->buf, sizeof(osb)));
629}
630
631/*
632 * Convert a new format statfs structure to an old format statfs structure.
633 */
634static void
635cvtstatfs(nsp, osp)
636	struct statfs *nsp;
637	struct ostatfs *osp;
638{
639
640	bzero(osp, sizeof(*osp));
641	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
642	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
643	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
644	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
645	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
646	osp->f_files = MIN(nsp->f_files, LONG_MAX);
647	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
648	osp->f_owner = nsp->f_owner;
649	osp->f_type = nsp->f_type;
650	osp->f_flags = nsp->f_flags;
651	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
652	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
653	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
654	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
655	strlcpy(osp->f_fstypename, nsp->f_fstypename,
656	    MIN(MFSNAMELEN, OMFSNAMELEN));
657	strlcpy(osp->f_mntonname, nsp->f_mntonname,
658	    MIN(MNAMELEN, OMNAMELEN));
659	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
660	    MIN(MNAMELEN, OMNAMELEN));
661	osp->f_fsid = nsp->f_fsid;
662}
663#endif /* COMPAT_FREEBSD4 */
664
665/*
666 * Change current working directory to a given file descriptor.
667 */
668#ifndef _SYS_SYSPROTO_H_
669struct fchdir_args {
670	int	fd;
671};
672#endif
673int
674fchdir(td, uap)
675	struct thread *td;
676	struct fchdir_args /* {
677		int fd;
678	} */ *uap;
679{
680	register struct filedesc *fdp = td->td_proc->p_fd;
681	struct vnode *vp, *tdp, *vpold;
682	struct mount *mp;
683	struct file *fp;
684	int vfslocked;
685	int error;
686
687	AUDIT_ARG(fd, uap->fd);
688	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
689		return (error);
690	vp = fp->f_vnode;
691	VREF(vp);
692	fdrop(fp, td);
693	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
694	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
695	AUDIT_ARG(vnode, vp, ARG_VNODE1);
696	error = change_dir(vp, td);
697	while (!error && (mp = vp->v_mountedhere) != NULL) {
698		int tvfslocked;
699		if (vfs_busy(mp, 0, 0, td))
700			continue;
701		tvfslocked = VFS_LOCK_GIANT(mp);
702		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
703		vfs_unbusy(mp, td);
704		if (error) {
705			VFS_UNLOCK_GIANT(tvfslocked);
706			break;
707		}
708		vput(vp);
709		VFS_UNLOCK_GIANT(vfslocked);
710		vp = tdp;
711		vfslocked = tvfslocked;
712	}
713	if (error) {
714		vput(vp);
715		VFS_UNLOCK_GIANT(vfslocked);
716		return (error);
717	}
718	VOP_UNLOCK(vp, 0, td);
719	VFS_UNLOCK_GIANT(vfslocked);
720	FILEDESC_LOCK_FAST(fdp);
721	vpold = fdp->fd_cdir;
722	fdp->fd_cdir = vp;
723	FILEDESC_UNLOCK_FAST(fdp);
724	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
725	vrele(vpold);
726	VFS_UNLOCK_GIANT(vfslocked);
727	return (0);
728}
729
730/*
731 * Change current working directory (``.'').
732 */
733#ifndef _SYS_SYSPROTO_H_
734struct chdir_args {
735	char	*path;
736};
737#endif
738int
739chdir(td, uap)
740	struct thread *td;
741	struct chdir_args /* {
742		char *path;
743	} */ *uap;
744{
745
746	return (kern_chdir(td, uap->path, UIO_USERSPACE));
747}
748
749int
750kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
751{
752	register struct filedesc *fdp = td->td_proc->p_fd;
753	int error;
754	struct nameidata nd;
755	struct vnode *vp;
756	int vfslocked;
757
758	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
759	    pathseg, path, td);
760	if ((error = namei(&nd)) != 0)
761		return (error);
762	vfslocked = NDHASGIANT(&nd);
763	if ((error = change_dir(nd.ni_vp, td)) != 0) {
764		vput(nd.ni_vp);
765		VFS_UNLOCK_GIANT(vfslocked);
766		NDFREE(&nd, NDF_ONLY_PNBUF);
767		return (error);
768	}
769	VOP_UNLOCK(nd.ni_vp, 0, td);
770	VFS_UNLOCK_GIANT(vfslocked);
771	NDFREE(&nd, NDF_ONLY_PNBUF);
772	FILEDESC_LOCK_FAST(fdp);
773	vp = fdp->fd_cdir;
774	fdp->fd_cdir = nd.ni_vp;
775	FILEDESC_UNLOCK_FAST(fdp);
776	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
777	vrele(vp);
778	VFS_UNLOCK_GIANT(vfslocked);
779	return (0);
780}
781
782/*
783 * Helper function for raised chroot(2) security function:  Refuse if
784 * any filedescriptors are open directories.
785 */
786static int
787chroot_refuse_vdir_fds(fdp)
788	struct filedesc *fdp;
789{
790	struct vnode *vp;
791	struct file *fp;
792	int fd;
793
794	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
795	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
796		fp = fget_locked(fdp, fd);
797		if (fp == NULL)
798			continue;
799		if (fp->f_type == DTYPE_VNODE) {
800			vp = fp->f_vnode;
801			if (vp->v_type == VDIR)
802				return (EPERM);
803		}
804	}
805	return (0);
806}
807
808/*
809 * This sysctl determines if we will allow a process to chroot(2) if it
810 * has a directory open:
811 *	0: disallowed for all processes.
812 *	1: allowed for processes that were not already chroot(2)'ed.
813 *	2: allowed for all processes.
814 */
815
816static int chroot_allow_open_directories = 1;
817
818SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
819     &chroot_allow_open_directories, 0, "");
820
821/*
822 * Change notion of root (``/'') directory.
823 */
824#ifndef _SYS_SYSPROTO_H_
825struct chroot_args {
826	char	*path;
827};
828#endif
829int
830chroot(td, uap)
831	struct thread *td;
832	struct chroot_args /* {
833		char *path;
834	} */ *uap;
835{
836	int error;
837	struct nameidata nd;
838	int vfslocked;
839
840	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
841	    SUSER_ALLOWJAIL);
842	if (error)
843		return (error);
844	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
845	    UIO_USERSPACE, uap->path, td);
846	error = namei(&nd);
847	if (error)
848		goto error;
849	vfslocked = NDHASGIANT(&nd);
850	if ((error = change_dir(nd.ni_vp, td)) != 0)
851		goto e_vunlock;
852#ifdef MAC
853	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
854		goto e_vunlock;
855#endif
856	VOP_UNLOCK(nd.ni_vp, 0, td);
857	error = change_root(nd.ni_vp, td);
858	vrele(nd.ni_vp);
859	VFS_UNLOCK_GIANT(vfslocked);
860	NDFREE(&nd, NDF_ONLY_PNBUF);
861	return (error);
862e_vunlock:
863	vput(nd.ni_vp);
864	VFS_UNLOCK_GIANT(vfslocked);
865error:
866	NDFREE(&nd, NDF_ONLY_PNBUF);
867	return (error);
868}
869
870/*
871 * Common routine for chroot and chdir.  Callers must provide a locked vnode
872 * instance.
873 */
874int
875change_dir(vp, td)
876	struct vnode *vp;
877	struct thread *td;
878{
879	int error;
880
881	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
882	if (vp->v_type != VDIR)
883		return (ENOTDIR);
884#ifdef MAC
885	error = mac_check_vnode_chdir(td->td_ucred, vp);
886	if (error)
887		return (error);
888#endif
889	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
890	return (error);
891}
892
893/*
894 * Common routine for kern_chroot() and jail_attach().  The caller is
895 * responsible for invoking priv_check() and mac_check_chroot() to authorize
896 * this operation.
897 */
898int
899change_root(vp, td)
900	struct vnode *vp;
901	struct thread *td;
902{
903	struct filedesc *fdp;
904	struct vnode *oldvp;
905	int vfslocked;
906	int error;
907
908	VFS_ASSERT_GIANT(vp->v_mount);
909	fdp = td->td_proc->p_fd;
910	FILEDESC_LOCK(fdp);
911	if (chroot_allow_open_directories == 0 ||
912	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
913		error = chroot_refuse_vdir_fds(fdp);
914		if (error) {
915			FILEDESC_UNLOCK(fdp);
916			return (error);
917		}
918	}
919	oldvp = fdp->fd_rdir;
920	fdp->fd_rdir = vp;
921	VREF(fdp->fd_rdir);
922	if (!fdp->fd_jdir) {
923		fdp->fd_jdir = vp;
924		VREF(fdp->fd_jdir);
925	}
926	FILEDESC_UNLOCK(fdp);
927	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
928	vrele(oldvp);
929	VFS_UNLOCK_GIANT(vfslocked);
930	return (0);
931}
932
933/*
934 * Check permissions, allocate an open file structure,
935 * and call the device open routine if any.
936 *
937 * MP SAFE
938 */
939#ifndef _SYS_SYSPROTO_H_
940struct open_args {
941	char	*path;
942	int	flags;
943	int	mode;
944};
945#endif
946int
947open(td, uap)
948	struct thread *td;
949	register struct open_args /* {
950		char *path;
951		int flags;
952		int mode;
953	} */ *uap;
954{
955
956	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
957}
958
959int
960kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
961    int mode)
962{
963	struct proc *p = td->td_proc;
964	struct filedesc *fdp = p->p_fd;
965	struct file *fp;
966	struct vnode *vp;
967	struct vattr vat;
968	struct mount *mp;
969	int cmode;
970	struct file *nfp;
971	int type, indx, error;
972	struct flock lf;
973	struct nameidata nd;
974	int vfslocked;
975
976	AUDIT_ARG(fflags, flags);
977	AUDIT_ARG(mode, mode);
978	if ((flags & O_ACCMODE) == O_ACCMODE)
979		return (EINVAL);
980	flags = FFLAGS(flags);
981	error = falloc(td, &nfp, &indx);
982	if (error)
983		return (error);
984	/* An extra reference on `nfp' has been held for us by falloc(). */
985	fp = nfp;
986	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
987	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
988	td->td_dupfd = -1;		/* XXX check for fdopen */
989	error = vn_open(&nd, &flags, cmode, indx);
990	if (error) {
991		/*
992		 * If the vn_open replaced the method vector, something
993		 * wonderous happened deep below and we just pass it up
994		 * pretending we know what we do.
995		 */
996		if (error == ENXIO && fp->f_ops != &badfileops) {
997			fdrop(fp, td);
998			td->td_retval[0] = indx;
999			return (0);
1000		}
1001
1002		/*
1003		 * release our own reference
1004		 */
1005		fdrop(fp, td);
1006
1007		/*
1008		 * handle special fdopen() case.  bleh.  dupfdopen() is
1009		 * responsible for dropping the old contents of ofiles[indx]
1010		 * if it succeeds.
1011		 */
1012		if ((error == ENODEV || error == ENXIO) &&
1013		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1014		    (error =
1015			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1016			td->td_retval[0] = indx;
1017			return (0);
1018		}
1019		/*
1020		 * Clean up the descriptor, but only if another thread hadn't
1021		 * replaced or closed it.
1022		 */
1023		fdclose(fdp, fp, indx, td);
1024
1025		if (error == ERESTART)
1026			error = EINTR;
1027		return (error);
1028	}
1029	td->td_dupfd = 0;
1030	vfslocked = NDHASGIANT(&nd);
1031	NDFREE(&nd, NDF_ONLY_PNBUF);
1032	vp = nd.ni_vp;
1033
1034	/*
1035	 * There should be 2 references on the file, one from the descriptor
1036	 * table, and one for us.
1037	 *
1038	 * Handle the case where someone closed the file (via its file
1039	 * descriptor) while we were blocked.  The end result should look
1040	 * like opening the file succeeded but it was immediately closed.
1041	 * We call vn_close() manually because we haven't yet hooked up
1042	 * the various 'struct file' fields.
1043	 */
1044	FILEDESC_LOCK(fdp);
1045	FILE_LOCK(fp);
1046	if (fp->f_count == 1) {
1047		mp = vp->v_mount;
1048		KASSERT(fdp->fd_ofiles[indx] != fp,
1049		    ("Open file descriptor lost all refs"));
1050		FILE_UNLOCK(fp);
1051		FILEDESC_UNLOCK(fdp);
1052		VOP_UNLOCK(vp, 0, td);
1053		vn_close(vp, flags & FMASK, fp->f_cred, td);
1054		VFS_UNLOCK_GIANT(vfslocked);
1055		fdrop(fp, td);
1056		td->td_retval[0] = indx;
1057		return (0);
1058	}
1059	fp->f_vnode = vp;
1060	if (fp->f_data == NULL)
1061		fp->f_data = vp;
1062	fp->f_flag = flags & FMASK;
1063	if (fp->f_ops == &badfileops)
1064		fp->f_ops = &vnops;
1065	fp->f_seqcount = 1;
1066	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1067	FILE_UNLOCK(fp);
1068	FILEDESC_UNLOCK(fdp);
1069
1070	VOP_UNLOCK(vp, 0, td);
1071	if (flags & (O_EXLOCK | O_SHLOCK)) {
1072		lf.l_whence = SEEK_SET;
1073		lf.l_start = 0;
1074		lf.l_len = 0;
1075		if (flags & O_EXLOCK)
1076			lf.l_type = F_WRLCK;
1077		else
1078			lf.l_type = F_RDLCK;
1079		type = F_FLOCK;
1080		if ((flags & FNONBLOCK) == 0)
1081			type |= F_WAIT;
1082		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1083			    type)) != 0)
1084			goto bad;
1085		fp->f_flag |= FHASLOCK;
1086	}
1087	if (flags & O_TRUNC) {
1088		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1089			goto bad;
1090		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1091		VATTR_NULL(&vat);
1092		vat.va_size = 0;
1093		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1094#ifdef MAC
1095		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1096		if (error == 0)
1097#endif
1098			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1099		VOP_UNLOCK(vp, 0, td);
1100		vn_finished_write(mp);
1101		if (error)
1102			goto bad;
1103	}
1104	VFS_UNLOCK_GIANT(vfslocked);
1105	/*
1106	 * Release our private reference, leaving the one associated with
1107	 * the descriptor table intact.
1108	 */
1109	fdrop(fp, td);
1110	td->td_retval[0] = indx;
1111	return (0);
1112bad:
1113	VFS_UNLOCK_GIANT(vfslocked);
1114	fdclose(fdp, fp, indx, td);
1115	fdrop(fp, td);
1116	return (error);
1117}
1118
1119#ifdef COMPAT_43
1120/*
1121 * Create a file.
1122 *
1123 * MP SAFE
1124 */
1125#ifndef _SYS_SYSPROTO_H_
1126struct ocreat_args {
1127	char	*path;
1128	int	mode;
1129};
1130#endif
1131int
1132ocreat(td, uap)
1133	struct thread *td;
1134	register struct ocreat_args /* {
1135		char *path;
1136		int mode;
1137	} */ *uap;
1138{
1139
1140	return (kern_open(td, uap->path, UIO_USERSPACE,
1141	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1142}
1143#endif /* COMPAT_43 */
1144
1145/*
1146 * Create a special file.
1147 */
1148#ifndef _SYS_SYSPROTO_H_
1149struct mknod_args {
1150	char	*path;
1151	int	mode;
1152	int	dev;
1153};
1154#endif
1155int
1156mknod(td, uap)
1157	struct thread *td;
1158	register struct mknod_args /* {
1159		char *path;
1160		int mode;
1161		int dev;
1162	} */ *uap;
1163{
1164
1165	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1166}
1167
1168int
1169kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1170    int dev)
1171{
1172	struct vnode *vp;
1173	struct mount *mp;
1174	struct vattr vattr;
1175	int error;
1176	int whiteout = 0;
1177	struct nameidata nd;
1178	int vfslocked;
1179
1180	AUDIT_ARG(mode, mode);
1181	AUDIT_ARG(dev, dev);
1182	switch (mode & S_IFMT) {
1183	case S_IFCHR:
1184	case S_IFBLK:
1185		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1186		break;
1187	case S_IFMT:
1188		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1189		break;
1190	case S_IFWHT:
1191		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1192		break;
1193	default:
1194		error = EINVAL;
1195		break;
1196	}
1197	if (error)
1198		return (error);
1199restart:
1200	bwillwrite();
1201	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1202	    pathseg, path, td);
1203	if ((error = namei(&nd)) != 0)
1204		return (error);
1205	vfslocked = NDHASGIANT(&nd);
1206	vp = nd.ni_vp;
1207	if (vp != NULL) {
1208		NDFREE(&nd, NDF_ONLY_PNBUF);
1209		if (vp == nd.ni_dvp)
1210			vrele(nd.ni_dvp);
1211		else
1212			vput(nd.ni_dvp);
1213		vrele(vp);
1214		VFS_UNLOCK_GIANT(vfslocked);
1215		return (EEXIST);
1216	} else {
1217		VATTR_NULL(&vattr);
1218		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1219		vattr.va_mode = (mode & ALLPERMS) &
1220		    ~td->td_proc->p_fd->fd_cmask;
1221		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1222		vattr.va_rdev = dev;
1223		whiteout = 0;
1224
1225		switch (mode & S_IFMT) {
1226		case S_IFMT:	/* used by badsect to flag bad sectors */
1227			vattr.va_type = VBAD;
1228			break;
1229		case S_IFCHR:
1230			vattr.va_type = VCHR;
1231			break;
1232		case S_IFBLK:
1233			vattr.va_type = VBLK;
1234			break;
1235		case S_IFWHT:
1236			whiteout = 1;
1237			break;
1238		default:
1239			panic("kern_mknod: invalid mode");
1240		}
1241	}
1242	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1243		NDFREE(&nd, NDF_ONLY_PNBUF);
1244		vput(nd.ni_dvp);
1245		VFS_UNLOCK_GIANT(vfslocked);
1246		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1247			return (error);
1248		goto restart;
1249	}
1250#ifdef MAC
1251	if (error == 0 && !whiteout)
1252		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1253		    &nd.ni_cnd, &vattr);
1254#endif
1255	if (!error) {
1256		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1257		if (whiteout)
1258			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1259		else {
1260			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1261						&nd.ni_cnd, &vattr);
1262			if (error == 0)
1263				vput(nd.ni_vp);
1264		}
1265	}
1266	NDFREE(&nd, NDF_ONLY_PNBUF);
1267	vput(nd.ni_dvp);
1268	vn_finished_write(mp);
1269	VFS_UNLOCK_GIANT(vfslocked);
1270	return (error);
1271}
1272
1273/*
1274 * Create a named pipe.
1275 */
1276#ifndef _SYS_SYSPROTO_H_
1277struct mkfifo_args {
1278	char	*path;
1279	int	mode;
1280};
1281#endif
1282int
1283mkfifo(td, uap)
1284	struct thread *td;
1285	register struct mkfifo_args /* {
1286		char *path;
1287		int mode;
1288	} */ *uap;
1289{
1290
1291	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1292}
1293
1294int
1295kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1296{
1297	struct mount *mp;
1298	struct vattr vattr;
1299	int error;
1300	struct nameidata nd;
1301	int vfslocked;
1302
1303	AUDIT_ARG(mode, mode);
1304restart:
1305	bwillwrite();
1306	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1307	    pathseg, path, td);
1308	if ((error = namei(&nd)) != 0)
1309		return (error);
1310	vfslocked = NDHASGIANT(&nd);
1311	if (nd.ni_vp != NULL) {
1312		NDFREE(&nd, NDF_ONLY_PNBUF);
1313		if (nd.ni_vp == nd.ni_dvp)
1314			vrele(nd.ni_dvp);
1315		else
1316			vput(nd.ni_dvp);
1317		vrele(nd.ni_vp);
1318		VFS_UNLOCK_GIANT(vfslocked);
1319		return (EEXIST);
1320	}
1321	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1322		NDFREE(&nd, NDF_ONLY_PNBUF);
1323		vput(nd.ni_dvp);
1324		VFS_UNLOCK_GIANT(vfslocked);
1325		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1326			return (error);
1327		goto restart;
1328	}
1329	VATTR_NULL(&vattr);
1330	vattr.va_type = VFIFO;
1331	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1332	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1333	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1334#ifdef MAC
1335	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1336	    &vattr);
1337	if (error)
1338		goto out;
1339#endif
1340	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1341	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1342	if (error == 0)
1343		vput(nd.ni_vp);
1344#ifdef MAC
1345out:
1346#endif
1347	vput(nd.ni_dvp);
1348	vn_finished_write(mp);
1349	VFS_UNLOCK_GIANT(vfslocked);
1350	NDFREE(&nd, NDF_ONLY_PNBUF);
1351	return (error);
1352}
1353
1354/*
1355 * Make a hard file link.
1356 */
1357#ifndef _SYS_SYSPROTO_H_
1358struct link_args {
1359	char	*path;
1360	char	*link;
1361};
1362#endif
1363int
1364link(td, uap)
1365	struct thread *td;
1366	register struct link_args /* {
1367		char *path;
1368		char *link;
1369	} */ *uap;
1370{
1371	int error;
1372
1373	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1374	return (error);
1375}
1376
1377static int hardlink_check_uid = 0;
1378SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1379    &hardlink_check_uid, 0,
1380    "Unprivileged processes cannot create hard links to files owned by other "
1381    "users");
1382static int hardlink_check_gid = 0;
1383SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1384    &hardlink_check_gid, 0,
1385    "Unprivileged processes cannot create hard links to files owned by other "
1386    "groups");
1387
1388static int
1389can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1390{
1391	struct vattr va;
1392	int error;
1393
1394	if (!hardlink_check_uid && !hardlink_check_gid)
1395		return (0);
1396
1397	error = VOP_GETATTR(vp, &va, cred, td);
1398	if (error != 0)
1399		return (error);
1400
1401	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1402		error = priv_check_cred(cred, PRIV_VFS_LINK,
1403		    SUSER_ALLOWJAIL);
1404		if (error)
1405			return (error);
1406	}
1407
1408	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1409		error = priv_check_cred(cred, PRIV_VFS_LINK,
1410		    SUSER_ALLOWJAIL);
1411		if (error)
1412			return (error);
1413	}
1414
1415	return (0);
1416}
1417
1418int
1419kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1420{
1421	struct vnode *vp;
1422	struct mount *mp;
1423	struct nameidata nd;
1424	int vfslocked;
1425	int lvfslocked;
1426	int error;
1427
1428	bwillwrite();
1429	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1430	if ((error = namei(&nd)) != 0)
1431		return (error);
1432	vfslocked = NDHASGIANT(&nd);
1433	NDFREE(&nd, NDF_ONLY_PNBUF);
1434	vp = nd.ni_vp;
1435	if (vp->v_type == VDIR) {
1436		vrele(vp);
1437		VFS_UNLOCK_GIANT(vfslocked);
1438		return (EPERM);		/* POSIX */
1439	}
1440	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1441		vrele(vp);
1442		VFS_UNLOCK_GIANT(vfslocked);
1443		return (error);
1444	}
1445	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1446	    segflg, link, td);
1447	if ((error = namei(&nd)) == 0) {
1448		lvfslocked = NDHASGIANT(&nd);
1449		if (nd.ni_vp != NULL) {
1450			if (nd.ni_dvp == nd.ni_vp)
1451				vrele(nd.ni_dvp);
1452			else
1453				vput(nd.ni_dvp);
1454			vrele(nd.ni_vp);
1455			error = EEXIST;
1456		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1457		    == 0) {
1458			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1459			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1460			error = can_hardlink(vp, td, td->td_ucred);
1461			if (error == 0)
1462#ifdef MAC
1463				error = mac_check_vnode_link(td->td_ucred,
1464				    nd.ni_dvp, vp, &nd.ni_cnd);
1465			if (error == 0)
1466#endif
1467				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1468			VOP_UNLOCK(vp, 0, td);
1469			vput(nd.ni_dvp);
1470		}
1471		NDFREE(&nd, NDF_ONLY_PNBUF);
1472		VFS_UNLOCK_GIANT(lvfslocked);
1473	}
1474	vrele(vp);
1475	vn_finished_write(mp);
1476	VFS_UNLOCK_GIANT(vfslocked);
1477	return (error);
1478}
1479
1480/*
1481 * Make a symbolic link.
1482 */
1483#ifndef _SYS_SYSPROTO_H_
1484struct symlink_args {
1485	char	*path;
1486	char	*link;
1487};
1488#endif
1489int
1490symlink(td, uap)
1491	struct thread *td;
1492	register struct symlink_args /* {
1493		char *path;
1494		char *link;
1495	} */ *uap;
1496{
1497
1498	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1499}
1500
1501int
1502kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1503{
1504	struct mount *mp;
1505	struct vattr vattr;
1506	char *syspath;
1507	int error;
1508	struct nameidata nd;
1509	int vfslocked;
1510
1511	if (segflg == UIO_SYSSPACE) {
1512		syspath = path;
1513	} else {
1514		syspath = uma_zalloc(namei_zone, M_WAITOK);
1515		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1516			goto out;
1517	}
1518	AUDIT_ARG(text, syspath);
1519restart:
1520	bwillwrite();
1521	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1522	    segflg, link, td);
1523	if ((error = namei(&nd)) != 0)
1524		goto out;
1525	vfslocked = NDHASGIANT(&nd);
1526	if (nd.ni_vp) {
1527		NDFREE(&nd, NDF_ONLY_PNBUF);
1528		if (nd.ni_vp == nd.ni_dvp)
1529			vrele(nd.ni_dvp);
1530		else
1531			vput(nd.ni_dvp);
1532		vrele(nd.ni_vp);
1533		VFS_UNLOCK_GIANT(vfslocked);
1534		error = EEXIST;
1535		goto out;
1536	}
1537	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1538		NDFREE(&nd, NDF_ONLY_PNBUF);
1539		vput(nd.ni_dvp);
1540		VFS_UNLOCK_GIANT(vfslocked);
1541		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1542			goto out;
1543		goto restart;
1544	}
1545	VATTR_NULL(&vattr);
1546	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1547	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1548	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1549#ifdef MAC
1550	vattr.va_type = VLNK;
1551	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1552	    &vattr);
1553	if (error)
1554		goto out2;
1555#endif
1556	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1557	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1558	if (error == 0)
1559		vput(nd.ni_vp);
1560#ifdef MAC
1561out2:
1562#endif
1563	NDFREE(&nd, NDF_ONLY_PNBUF);
1564	vput(nd.ni_dvp);
1565	vn_finished_write(mp);
1566	VFS_UNLOCK_GIANT(vfslocked);
1567out:
1568	if (segflg != UIO_SYSSPACE)
1569		uma_zfree(namei_zone, syspath);
1570	return (error);
1571}
1572
1573/*
1574 * Delete a whiteout from the filesystem.
1575 */
1576int
1577undelete(td, uap)
1578	struct thread *td;
1579	register struct undelete_args /* {
1580		char *path;
1581	} */ *uap;
1582{
1583	int error;
1584	struct mount *mp;
1585	struct nameidata nd;
1586	int vfslocked;
1587
1588restart:
1589	bwillwrite();
1590	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1591	    UIO_USERSPACE, uap->path, td);
1592	error = namei(&nd);
1593	if (error)
1594		return (error);
1595	vfslocked = NDHASGIANT(&nd);
1596
1597	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1598		NDFREE(&nd, NDF_ONLY_PNBUF);
1599		if (nd.ni_vp == nd.ni_dvp)
1600			vrele(nd.ni_dvp);
1601		else
1602			vput(nd.ni_dvp);
1603		if (nd.ni_vp)
1604			vrele(nd.ni_vp);
1605		VFS_UNLOCK_GIANT(vfslocked);
1606		return (EEXIST);
1607	}
1608	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1609		NDFREE(&nd, NDF_ONLY_PNBUF);
1610		vput(nd.ni_dvp);
1611		VFS_UNLOCK_GIANT(vfslocked);
1612		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1613			return (error);
1614		goto restart;
1615	}
1616	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1617	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1618	NDFREE(&nd, NDF_ONLY_PNBUF);
1619	vput(nd.ni_dvp);
1620	vn_finished_write(mp);
1621	VFS_UNLOCK_GIANT(vfslocked);
1622	return (error);
1623}
1624
1625/*
1626 * Delete a name from the filesystem.
1627 */
1628#ifndef _SYS_SYSPROTO_H_
1629struct unlink_args {
1630	char	*path;
1631};
1632#endif
1633int
1634unlink(td, uap)
1635	struct thread *td;
1636	struct unlink_args /* {
1637		char *path;
1638	} */ *uap;
1639{
1640	int error;
1641
1642	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1643	return (error);
1644}
1645
1646int
1647kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1648{
1649	struct mount *mp;
1650	struct vnode *vp;
1651	int error;
1652	struct nameidata nd;
1653	int vfslocked;
1654
1655restart:
1656	bwillwrite();
1657	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1658	    pathseg, path, td);
1659	if ((error = namei(&nd)) != 0)
1660		return (error == EINVAL ? EPERM : error);
1661	vfslocked = NDHASGIANT(&nd);
1662	vp = nd.ni_vp;
1663	if (vp->v_type == VDIR)
1664		error = EPERM;		/* POSIX */
1665	else {
1666		/*
1667		 * The root of a mounted filesystem cannot be deleted.
1668		 *
1669		 * XXX: can this only be a VDIR case?
1670		 */
1671		if (vp->v_vflag & VV_ROOT)
1672			error = EBUSY;
1673	}
1674	if (error == 0) {
1675		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1676			NDFREE(&nd, NDF_ONLY_PNBUF);
1677			vput(nd.ni_dvp);
1678			if (vp == nd.ni_dvp)
1679				vrele(vp);
1680			else
1681				vput(vp);
1682			VFS_UNLOCK_GIANT(vfslocked);
1683			if ((error = vn_start_write(NULL, &mp,
1684			    V_XSLEEP | PCATCH)) != 0)
1685				return (error);
1686			goto restart;
1687		}
1688#ifdef MAC
1689		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1690		    &nd.ni_cnd);
1691		if (error)
1692			goto out;
1693#endif
1694		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1695		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1696#ifdef MAC
1697out:
1698#endif
1699		vn_finished_write(mp);
1700	}
1701	NDFREE(&nd, NDF_ONLY_PNBUF);
1702	vput(nd.ni_dvp);
1703	if (vp == nd.ni_dvp)
1704		vrele(vp);
1705	else
1706		vput(vp);
1707	VFS_UNLOCK_GIANT(vfslocked);
1708	return (error);
1709}
1710
1711/*
1712 * Reposition read/write file offset.
1713 */
1714#ifndef _SYS_SYSPROTO_H_
1715struct lseek_args {
1716	int	fd;
1717	int	pad;
1718	off_t	offset;
1719	int	whence;
1720};
1721#endif
1722int
1723lseek(td, uap)
1724	struct thread *td;
1725	register struct lseek_args /* {
1726		int fd;
1727		int pad;
1728		off_t offset;
1729		int whence;
1730	} */ *uap;
1731{
1732	struct ucred *cred = td->td_ucred;
1733	struct file *fp;
1734	struct vnode *vp;
1735	struct vattr vattr;
1736	off_t offset;
1737	int error, noneg;
1738	int vfslocked;
1739
1740	if ((error = fget(td, uap->fd, &fp)) != 0)
1741		return (error);
1742	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1743		fdrop(fp, td);
1744		return (ESPIPE);
1745	}
1746	vp = fp->f_vnode;
1747	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1748	noneg = (vp->v_type != VCHR);
1749	offset = uap->offset;
1750	switch (uap->whence) {
1751	case L_INCR:
1752		if (noneg &&
1753		    (fp->f_offset < 0 ||
1754		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1755			error = EOVERFLOW;
1756			break;
1757		}
1758		offset += fp->f_offset;
1759		break;
1760	case L_XTND:
1761		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1762		error = VOP_GETATTR(vp, &vattr, cred, td);
1763		VOP_UNLOCK(vp, 0, td);
1764		if (error)
1765			break;
1766		if (noneg &&
1767		    (vattr.va_size > OFF_MAX ||
1768		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1769			error = EOVERFLOW;
1770			break;
1771		}
1772		offset += vattr.va_size;
1773		break;
1774	case L_SET:
1775		break;
1776	default:
1777		error = EINVAL;
1778	}
1779	if (error == 0 && noneg && offset < 0)
1780		error = EINVAL;
1781	if (error != 0)
1782		goto drop;
1783	fp->f_offset = offset;
1784	*(off_t *)(td->td_retval) = fp->f_offset;
1785drop:
1786	fdrop(fp, td);
1787	VFS_UNLOCK_GIANT(vfslocked);
1788	return (error);
1789}
1790
1791#if defined(COMPAT_43)
1792/*
1793 * Reposition read/write file offset.
1794 */
1795#ifndef _SYS_SYSPROTO_H_
1796struct olseek_args {
1797	int	fd;
1798	long	offset;
1799	int	whence;
1800};
1801#endif
1802int
1803olseek(td, uap)
1804	struct thread *td;
1805	register struct olseek_args /* {
1806		int fd;
1807		long offset;
1808		int whence;
1809	} */ *uap;
1810{
1811	struct lseek_args /* {
1812		int fd;
1813		int pad;
1814		off_t offset;
1815		int whence;
1816	} */ nuap;
1817	int error;
1818
1819	nuap.fd = uap->fd;
1820	nuap.offset = uap->offset;
1821	nuap.whence = uap->whence;
1822	error = lseek(td, &nuap);
1823	return (error);
1824}
1825#endif /* COMPAT_43 */
1826
1827/*
1828 * Check access permissions using passed credentials.
1829 */
1830static int
1831vn_access(vp, user_flags, cred, td)
1832	struct vnode	*vp;
1833	int		user_flags;
1834	struct ucred	*cred;
1835	struct thread	*td;
1836{
1837	int error, flags;
1838
1839	/* Flags == 0 means only check for existence. */
1840	error = 0;
1841	if (user_flags) {
1842		flags = 0;
1843		if (user_flags & R_OK)
1844			flags |= VREAD;
1845		if (user_flags & W_OK)
1846			flags |= VWRITE;
1847		if (user_flags & X_OK)
1848			flags |= VEXEC;
1849#ifdef MAC
1850		error = mac_check_vnode_access(cred, vp, flags);
1851		if (error)
1852			return (error);
1853#endif
1854		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1855			error = VOP_ACCESS(vp, flags, cred, td);
1856	}
1857	return (error);
1858}
1859
1860/*
1861 * Check access permissions using "real" credentials.
1862 */
1863#ifndef _SYS_SYSPROTO_H_
1864struct access_args {
1865	char	*path;
1866	int	flags;
1867};
1868#endif
1869int
1870access(td, uap)
1871	struct thread *td;
1872	register struct access_args /* {
1873		char *path;
1874		int flags;
1875	} */ *uap;
1876{
1877
1878	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1879}
1880
1881int
1882kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1883{
1884	struct ucred *cred, *tmpcred;
1885	register struct vnode *vp;
1886	struct nameidata nd;
1887	int vfslocked;
1888	int error;
1889
1890	/*
1891	 * Create and modify a temporary credential instead of one that
1892	 * is potentially shared.  This could also mess up socket
1893	 * buffer accounting which can run in an interrupt context.
1894	 */
1895	cred = td->td_ucred;
1896	tmpcred = crdup(cred);
1897	tmpcred->cr_uid = cred->cr_ruid;
1898	tmpcred->cr_groups[0] = cred->cr_rgid;
1899	td->td_ucred = tmpcred;
1900	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1901	    pathseg, path, td);
1902	if ((error = namei(&nd)) != 0)
1903		goto out1;
1904	vfslocked = NDHASGIANT(&nd);
1905	vp = nd.ni_vp;
1906
1907	error = vn_access(vp, flags, tmpcred, td);
1908	NDFREE(&nd, NDF_ONLY_PNBUF);
1909	vput(vp);
1910	VFS_UNLOCK_GIANT(vfslocked);
1911out1:
1912	td->td_ucred = cred;
1913	crfree(tmpcred);
1914	return (error);
1915}
1916
1917/*
1918 * Check access permissions using "effective" credentials.
1919 */
1920#ifndef _SYS_SYSPROTO_H_
1921struct eaccess_args {
1922	char	*path;
1923	int	flags;
1924};
1925#endif
1926int
1927eaccess(td, uap)
1928	struct thread *td;
1929	register struct eaccess_args /* {
1930		char *path;
1931		int flags;
1932	} */ *uap;
1933{
1934
1935	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1936}
1937
1938int
1939kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1940{
1941	struct nameidata nd;
1942	struct vnode *vp;
1943	int vfslocked;
1944	int error;
1945
1946	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1947	    pathseg, path, td);
1948	if ((error = namei(&nd)) != 0)
1949		return (error);
1950	vp = nd.ni_vp;
1951	vfslocked = NDHASGIANT(&nd);
1952	error = vn_access(vp, flags, td->td_ucred, td);
1953	NDFREE(&nd, NDF_ONLY_PNBUF);
1954	vput(vp);
1955	VFS_UNLOCK_GIANT(vfslocked);
1956	return (error);
1957}
1958
1959#if defined(COMPAT_43)
1960/*
1961 * Get file status; this version follows links.
1962 */
1963#ifndef _SYS_SYSPROTO_H_
1964struct ostat_args {
1965	char	*path;
1966	struct ostat *ub;
1967};
1968#endif
1969int
1970ostat(td, uap)
1971	struct thread *td;
1972	register struct ostat_args /* {
1973		char *path;
1974		struct ostat *ub;
1975	} */ *uap;
1976{
1977	struct stat sb;
1978	struct ostat osb;
1979	int error;
1980
1981	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1982	if (error)
1983		return (error);
1984	cvtstat(&sb, &osb);
1985	error = copyout(&osb, uap->ub, sizeof (osb));
1986	return (error);
1987}
1988
1989/*
1990 * Get file status; this version does not follow links.
1991 */
1992#ifndef _SYS_SYSPROTO_H_
1993struct olstat_args {
1994	char	*path;
1995	struct ostat *ub;
1996};
1997#endif
1998int
1999olstat(td, uap)
2000	struct thread *td;
2001	register struct olstat_args /* {
2002		char *path;
2003		struct ostat *ub;
2004	} */ *uap;
2005{
2006	struct stat sb;
2007	struct ostat osb;
2008	int error;
2009
2010	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2011	if (error)
2012		return (error);
2013	cvtstat(&sb, &osb);
2014	error = copyout(&osb, uap->ub, sizeof (osb));
2015	return (error);
2016}
2017
2018/*
2019 * Convert from an old to a new stat structure.
2020 */
2021void
2022cvtstat(st, ost)
2023	struct stat *st;
2024	struct ostat *ost;
2025{
2026
2027	ost->st_dev = st->st_dev;
2028	ost->st_ino = st->st_ino;
2029	ost->st_mode = st->st_mode;
2030	ost->st_nlink = st->st_nlink;
2031	ost->st_uid = st->st_uid;
2032	ost->st_gid = st->st_gid;
2033	ost->st_rdev = st->st_rdev;
2034	if (st->st_size < (quad_t)1 << 32)
2035		ost->st_size = st->st_size;
2036	else
2037		ost->st_size = -2;
2038	ost->st_atime = st->st_atime;
2039	ost->st_mtime = st->st_mtime;
2040	ost->st_ctime = st->st_ctime;
2041	ost->st_blksize = st->st_blksize;
2042	ost->st_blocks = st->st_blocks;
2043	ost->st_flags = st->st_flags;
2044	ost->st_gen = st->st_gen;
2045}
2046#endif /* COMPAT_43 */
2047
2048/*
2049 * Get file status; this version follows links.
2050 */
2051#ifndef _SYS_SYSPROTO_H_
2052struct stat_args {
2053	char	*path;
2054	struct stat *ub;
2055};
2056#endif
2057int
2058stat(td, uap)
2059	struct thread *td;
2060	register struct stat_args /* {
2061		char *path;
2062		struct stat *ub;
2063	} */ *uap;
2064{
2065	struct stat sb;
2066	int error;
2067
2068	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2069	if (error == 0)
2070		error = copyout(&sb, uap->ub, sizeof (sb));
2071	return (error);
2072}
2073
2074int
2075kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2076{
2077	struct nameidata nd;
2078	struct stat sb;
2079	int error, vfslocked;
2080
2081	NDINIT(&nd, LOOKUP,
2082	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2083	    pathseg, path, td);
2084	if ((error = namei(&nd)) != 0)
2085		return (error);
2086	vfslocked = NDHASGIANT(&nd);
2087	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2088	NDFREE(&nd, NDF_ONLY_PNBUF);
2089	vput(nd.ni_vp);
2090	VFS_UNLOCK_GIANT(vfslocked);
2091	if (mtx_owned(&Giant))
2092		printf("stat(%d): %s\n", vfslocked, path);
2093	if (error)
2094		return (error);
2095	*sbp = sb;
2096	return (0);
2097}
2098
2099/*
2100 * Get file status; this version does not follow links.
2101 */
2102#ifndef _SYS_SYSPROTO_H_
2103struct lstat_args {
2104	char	*path;
2105	struct stat *ub;
2106};
2107#endif
2108int
2109lstat(td, uap)
2110	struct thread *td;
2111	register struct lstat_args /* {
2112		char *path;
2113		struct stat *ub;
2114	} */ *uap;
2115{
2116	struct stat sb;
2117	int error;
2118
2119	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2120	if (error == 0)
2121		error = copyout(&sb, uap->ub, sizeof (sb));
2122	return (error);
2123}
2124
2125int
2126kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2127{
2128	struct vnode *vp;
2129	struct stat sb;
2130	struct nameidata nd;
2131	int error, vfslocked;
2132
2133	NDINIT(&nd, LOOKUP,
2134	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2135	    pathseg, path, td);
2136	if ((error = namei(&nd)) != 0)
2137		return (error);
2138	vfslocked = NDHASGIANT(&nd);
2139	vp = nd.ni_vp;
2140	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2141	NDFREE(&nd, NDF_ONLY_PNBUF);
2142	vput(vp);
2143	VFS_UNLOCK_GIANT(vfslocked);
2144	if (error)
2145		return (error);
2146	*sbp = sb;
2147	return (0);
2148}
2149
2150/*
2151 * Implementation of the NetBSD [l]stat() functions.
2152 */
2153void
2154cvtnstat(sb, nsb)
2155	struct stat *sb;
2156	struct nstat *nsb;
2157{
2158	bzero(nsb, sizeof *nsb);
2159	nsb->st_dev = sb->st_dev;
2160	nsb->st_ino = sb->st_ino;
2161	nsb->st_mode = sb->st_mode;
2162	nsb->st_nlink = sb->st_nlink;
2163	nsb->st_uid = sb->st_uid;
2164	nsb->st_gid = sb->st_gid;
2165	nsb->st_rdev = sb->st_rdev;
2166	nsb->st_atimespec = sb->st_atimespec;
2167	nsb->st_mtimespec = sb->st_mtimespec;
2168	nsb->st_ctimespec = sb->st_ctimespec;
2169	nsb->st_size = sb->st_size;
2170	nsb->st_blocks = sb->st_blocks;
2171	nsb->st_blksize = sb->st_blksize;
2172	nsb->st_flags = sb->st_flags;
2173	nsb->st_gen = sb->st_gen;
2174	nsb->st_birthtimespec = sb->st_birthtimespec;
2175}
2176
2177#ifndef _SYS_SYSPROTO_H_
2178struct nstat_args {
2179	char	*path;
2180	struct nstat *ub;
2181};
2182#endif
2183int
2184nstat(td, uap)
2185	struct thread *td;
2186	register struct nstat_args /* {
2187		char *path;
2188		struct nstat *ub;
2189	} */ *uap;
2190{
2191	struct stat sb;
2192	struct nstat nsb;
2193	int error;
2194
2195	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2196	if (error)
2197		return (error);
2198	cvtnstat(&sb, &nsb);
2199	error = copyout(&nsb, uap->ub, sizeof (nsb));
2200	return (error);
2201}
2202
2203/*
2204 * NetBSD lstat.  Get file status; this version does not follow links.
2205 */
2206#ifndef _SYS_SYSPROTO_H_
2207struct lstat_args {
2208	char	*path;
2209	struct stat *ub;
2210};
2211#endif
2212int
2213nlstat(td, uap)
2214	struct thread *td;
2215	register struct nlstat_args /* {
2216		char *path;
2217		struct nstat *ub;
2218	} */ *uap;
2219{
2220	struct stat sb;
2221	struct nstat nsb;
2222	int error;
2223
2224	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2225	if (error)
2226		return (error);
2227	cvtnstat(&sb, &nsb);
2228	error = copyout(&nsb, uap->ub, sizeof (nsb));
2229	return (error);
2230}
2231
2232/*
2233 * Get configurable pathname variables.
2234 */
2235#ifndef _SYS_SYSPROTO_H_
2236struct pathconf_args {
2237	char	*path;
2238	int	name;
2239};
2240#endif
2241int
2242pathconf(td, uap)
2243	struct thread *td;
2244	register struct pathconf_args /* {
2245		char *path;
2246		int name;
2247	} */ *uap;
2248{
2249
2250	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2251}
2252
2253int
2254kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2255{
2256	struct nameidata nd;
2257	int error, vfslocked;
2258
2259	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2260	    pathseg, path, td);
2261	if ((error = namei(&nd)) != 0)
2262		return (error);
2263	vfslocked = NDHASGIANT(&nd);
2264	NDFREE(&nd, NDF_ONLY_PNBUF);
2265
2266	/* If asynchronous I/O is available, it works for all files. */
2267	if (name == _PC_ASYNC_IO)
2268		td->td_retval[0] = async_io_version;
2269	else
2270		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2271	vput(nd.ni_vp);
2272	VFS_UNLOCK_GIANT(vfslocked);
2273	return (error);
2274}
2275
2276/*
2277 * Return target name of a symbolic link.
2278 */
2279#ifndef _SYS_SYSPROTO_H_
2280struct readlink_args {
2281	char	*path;
2282	char	*buf;
2283	int	count;
2284};
2285#endif
2286int
2287readlink(td, uap)
2288	struct thread *td;
2289	register struct readlink_args /* {
2290		char *path;
2291		char *buf;
2292		int count;
2293	} */ *uap;
2294{
2295
2296	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2297	    UIO_USERSPACE, uap->count));
2298}
2299
2300int
2301kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2302    enum uio_seg bufseg, int count)
2303{
2304	register struct vnode *vp;
2305	struct iovec aiov;
2306	struct uio auio;
2307	int error;
2308	struct nameidata nd;
2309	int vfslocked;
2310
2311	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2312	    pathseg, path, td);
2313	if ((error = namei(&nd)) != 0)
2314		return (error);
2315	NDFREE(&nd, NDF_ONLY_PNBUF);
2316	vfslocked = NDHASGIANT(&nd);
2317	vp = nd.ni_vp;
2318#ifdef MAC
2319	error = mac_check_vnode_readlink(td->td_ucred, vp);
2320	if (error) {
2321		vput(vp);
2322		VFS_UNLOCK_GIANT(vfslocked);
2323		return (error);
2324	}
2325#endif
2326	if (vp->v_type != VLNK)
2327		error = EINVAL;
2328	else {
2329		aiov.iov_base = buf;
2330		aiov.iov_len = count;
2331		auio.uio_iov = &aiov;
2332		auio.uio_iovcnt = 1;
2333		auio.uio_offset = 0;
2334		auio.uio_rw = UIO_READ;
2335		auio.uio_segflg = bufseg;
2336		auio.uio_td = td;
2337		auio.uio_resid = count;
2338		error = VOP_READLINK(vp, &auio, td->td_ucred);
2339	}
2340	vput(vp);
2341	VFS_UNLOCK_GIANT(vfslocked);
2342	td->td_retval[0] = count - auio.uio_resid;
2343	return (error);
2344}
2345
2346/*
2347 * Common implementation code for chflags() and fchflags().
2348 */
2349static int
2350setfflags(td, vp, flags)
2351	struct thread *td;
2352	struct vnode *vp;
2353	int flags;
2354{
2355	int error;
2356	struct mount *mp;
2357	struct vattr vattr;
2358
2359	/*
2360	 * Prevent non-root users from setting flags on devices.  When
2361	 * a device is reused, users can retain ownership of the device
2362	 * if they are allowed to set flags and programs assume that
2363	 * chown can't fail when done as root.
2364	 */
2365	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2366		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2367		    SUSER_ALLOWJAIL);
2368		if (error)
2369			return (error);
2370	}
2371
2372	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2373		return (error);
2374	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2375	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2376	VATTR_NULL(&vattr);
2377	vattr.va_flags = flags;
2378#ifdef MAC
2379	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2380	if (error == 0)
2381#endif
2382		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2383	VOP_UNLOCK(vp, 0, td);
2384	vn_finished_write(mp);
2385	return (error);
2386}
2387
2388/*
2389 * Change flags of a file given a path name.
2390 */
2391#ifndef _SYS_SYSPROTO_H_
2392struct chflags_args {
2393	char	*path;
2394	int	flags;
2395};
2396#endif
2397int
2398chflags(td, uap)
2399	struct thread *td;
2400	register struct chflags_args /* {
2401		char *path;
2402		int flags;
2403	} */ *uap;
2404{
2405	int error;
2406	struct nameidata nd;
2407	int vfslocked;
2408
2409	AUDIT_ARG(fflags, uap->flags);
2410	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2411	    uap->path, td);
2412	if ((error = namei(&nd)) != 0)
2413		return (error);
2414	NDFREE(&nd, NDF_ONLY_PNBUF);
2415	vfslocked = NDHASGIANT(&nd);
2416	error = setfflags(td, nd.ni_vp, uap->flags);
2417	vrele(nd.ni_vp);
2418	VFS_UNLOCK_GIANT(vfslocked);
2419	return (error);
2420}
2421
2422/*
2423 * Same as chflags() but doesn't follow symlinks.
2424 */
2425int
2426lchflags(td, uap)
2427	struct thread *td;
2428	register struct lchflags_args /* {
2429		char *path;
2430		int flags;
2431	} */ *uap;
2432{
2433	int error;
2434	struct nameidata nd;
2435	int vfslocked;
2436
2437	AUDIT_ARG(fflags, uap->flags);
2438	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2439	    uap->path, td);
2440	if ((error = namei(&nd)) != 0)
2441		return (error);
2442	vfslocked = NDHASGIANT(&nd);
2443	NDFREE(&nd, NDF_ONLY_PNBUF);
2444	error = setfflags(td, nd.ni_vp, uap->flags);
2445	vrele(nd.ni_vp);
2446	VFS_UNLOCK_GIANT(vfslocked);
2447	return (error);
2448}
2449
2450/*
2451 * Change flags of a file given a file descriptor.
2452 */
2453#ifndef _SYS_SYSPROTO_H_
2454struct fchflags_args {
2455	int	fd;
2456	int	flags;
2457};
2458#endif
2459int
2460fchflags(td, uap)
2461	struct thread *td;
2462	register struct fchflags_args /* {
2463		int fd;
2464		int flags;
2465	} */ *uap;
2466{
2467	struct file *fp;
2468	int vfslocked;
2469	int error;
2470
2471	AUDIT_ARG(fd, uap->fd);
2472	AUDIT_ARG(fflags, uap->flags);
2473	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2474		return (error);
2475	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2476#ifdef AUDIT
2477	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2478	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2479	VOP_UNLOCK(fp->f_vnode, 0, td);
2480#endif
2481	error = setfflags(td, fp->f_vnode, uap->flags);
2482	VFS_UNLOCK_GIANT(vfslocked);
2483	fdrop(fp, td);
2484	return (error);
2485}
2486
2487/*
2488 * Common implementation code for chmod(), lchmod() and fchmod().
2489 */
2490static int
2491setfmode(td, vp, mode)
2492	struct thread *td;
2493	struct vnode *vp;
2494	int mode;
2495{
2496	int error;
2497	struct mount *mp;
2498	struct vattr vattr;
2499
2500	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2501		return (error);
2502	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2503	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2504	VATTR_NULL(&vattr);
2505	vattr.va_mode = mode & ALLPERMS;
2506#ifdef MAC
2507	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2508	if (error == 0)
2509#endif
2510		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2511	VOP_UNLOCK(vp, 0, td);
2512	vn_finished_write(mp);
2513	return (error);
2514}
2515
2516/*
2517 * Change mode of a file given path name.
2518 */
2519#ifndef _SYS_SYSPROTO_H_
2520struct chmod_args {
2521	char	*path;
2522	int	mode;
2523};
2524#endif
2525int
2526chmod(td, uap)
2527	struct thread *td;
2528	register struct chmod_args /* {
2529		char *path;
2530		int mode;
2531	} */ *uap;
2532{
2533
2534	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2535}
2536
2537int
2538kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2539{
2540	int error;
2541	struct nameidata nd;
2542	int vfslocked;
2543
2544	AUDIT_ARG(mode, mode);
2545	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2546	if ((error = namei(&nd)) != 0)
2547		return (error);
2548	vfslocked = NDHASGIANT(&nd);
2549	NDFREE(&nd, NDF_ONLY_PNBUF);
2550	error = setfmode(td, nd.ni_vp, mode);
2551	vrele(nd.ni_vp);
2552	VFS_UNLOCK_GIANT(vfslocked);
2553	return (error);
2554}
2555
2556/*
2557 * Change mode of a file given path name (don't follow links.)
2558 */
2559#ifndef _SYS_SYSPROTO_H_
2560struct lchmod_args {
2561	char	*path;
2562	int	mode;
2563};
2564#endif
2565int
2566lchmod(td, uap)
2567	struct thread *td;
2568	register struct lchmod_args /* {
2569		char *path;
2570		int mode;
2571	} */ *uap;
2572{
2573	int error;
2574	struct nameidata nd;
2575	int vfslocked;
2576
2577	AUDIT_ARG(mode, (mode_t)uap->mode);
2578	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2579	    uap->path, td);
2580	if ((error = namei(&nd)) != 0)
2581		return (error);
2582	vfslocked = NDHASGIANT(&nd);
2583	NDFREE(&nd, NDF_ONLY_PNBUF);
2584	error = setfmode(td, nd.ni_vp, uap->mode);
2585	vrele(nd.ni_vp);
2586	VFS_UNLOCK_GIANT(vfslocked);
2587	return (error);
2588}
2589
2590/*
2591 * Change mode of a file given a file descriptor.
2592 */
2593#ifndef _SYS_SYSPROTO_H_
2594struct fchmod_args {
2595	int	fd;
2596	int	mode;
2597};
2598#endif
2599int
2600fchmod(td, uap)
2601	struct thread *td;
2602	register struct fchmod_args /* {
2603		int fd;
2604		int mode;
2605	} */ *uap;
2606{
2607	struct file *fp;
2608	int vfslocked;
2609	int error;
2610
2611	AUDIT_ARG(fd, uap->fd);
2612	AUDIT_ARG(mode, uap->mode);
2613	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2614		return (error);
2615	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2616#ifdef AUDIT
2617	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2618	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2619	VOP_UNLOCK(fp->f_vnode, 0, td);
2620#endif
2621	error = setfmode(td, fp->f_vnode, uap->mode);
2622	VFS_UNLOCK_GIANT(vfslocked);
2623	fdrop(fp, td);
2624	return (error);
2625}
2626
2627/*
2628 * Common implementation for chown(), lchown(), and fchown()
2629 */
2630static int
2631setfown(td, vp, uid, gid)
2632	struct thread *td;
2633	struct vnode *vp;
2634	uid_t uid;
2635	gid_t gid;
2636{
2637	int error;
2638	struct mount *mp;
2639	struct vattr vattr;
2640
2641	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2642		return (error);
2643	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2644	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2645	VATTR_NULL(&vattr);
2646	vattr.va_uid = uid;
2647	vattr.va_gid = gid;
2648#ifdef MAC
2649	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2650	    vattr.va_gid);
2651	if (error == 0)
2652#endif
2653		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2654	VOP_UNLOCK(vp, 0, td);
2655	vn_finished_write(mp);
2656	return (error);
2657}
2658
2659/*
2660 * Set ownership given a path name.
2661 */
2662#ifndef _SYS_SYSPROTO_H_
2663struct chown_args {
2664	char	*path;
2665	int	uid;
2666	int	gid;
2667};
2668#endif
2669int
2670chown(td, uap)
2671	struct thread *td;
2672	register struct chown_args /* {
2673		char *path;
2674		int uid;
2675		int gid;
2676	} */ *uap;
2677{
2678
2679	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2680}
2681
2682int
2683kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2684    int gid)
2685{
2686	int error;
2687	struct nameidata nd;
2688	int vfslocked;
2689
2690	AUDIT_ARG(owner, uid, gid);
2691	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2692	if ((error = namei(&nd)) != 0)
2693		return (error);
2694	vfslocked = NDHASGIANT(&nd);
2695	NDFREE(&nd, NDF_ONLY_PNBUF);
2696	error = setfown(td, nd.ni_vp, uid, gid);
2697	vrele(nd.ni_vp);
2698	VFS_UNLOCK_GIANT(vfslocked);
2699	return (error);
2700}
2701
2702/*
2703 * Set ownership given a path name, do not cross symlinks.
2704 */
2705#ifndef _SYS_SYSPROTO_H_
2706struct lchown_args {
2707	char	*path;
2708	int	uid;
2709	int	gid;
2710};
2711#endif
2712int
2713lchown(td, uap)
2714	struct thread *td;
2715	register struct lchown_args /* {
2716		char *path;
2717		int uid;
2718		int gid;
2719	} */ *uap;
2720{
2721
2722	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2723}
2724
2725int
2726kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2727    int gid)
2728{
2729	int error;
2730	struct nameidata nd;
2731	int vfslocked;
2732
2733	AUDIT_ARG(owner, uid, gid);
2734	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2735	if ((error = namei(&nd)) != 0)
2736		return (error);
2737	vfslocked = NDHASGIANT(&nd);
2738	NDFREE(&nd, NDF_ONLY_PNBUF);
2739	error = setfown(td, nd.ni_vp, uid, gid);
2740	vrele(nd.ni_vp);
2741	VFS_UNLOCK_GIANT(vfslocked);
2742	return (error);
2743}
2744
2745/*
2746 * Set ownership given a file descriptor.
2747 */
2748#ifndef _SYS_SYSPROTO_H_
2749struct fchown_args {
2750	int	fd;
2751	int	uid;
2752	int	gid;
2753};
2754#endif
2755int
2756fchown(td, uap)
2757	struct thread *td;
2758	register struct fchown_args /* {
2759		int fd;
2760		int uid;
2761		int gid;
2762	} */ *uap;
2763{
2764	struct file *fp;
2765	int vfslocked;
2766	int error;
2767
2768	AUDIT_ARG(fd, uap->fd);
2769	AUDIT_ARG(owner, uap->uid, uap->gid);
2770	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2771		return (error);
2772	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2773#ifdef AUDIT
2774	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2775	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2776	VOP_UNLOCK(fp->f_vnode, 0, td);
2777#endif
2778	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2779	VFS_UNLOCK_GIANT(vfslocked);
2780	fdrop(fp, td);
2781	return (error);
2782}
2783
2784/*
2785 * Common implementation code for utimes(), lutimes(), and futimes().
2786 */
2787static int
2788getutimes(usrtvp, tvpseg, tsp)
2789	const struct timeval *usrtvp;
2790	enum uio_seg tvpseg;
2791	struct timespec *tsp;
2792{
2793	struct timeval tv[2];
2794	const struct timeval *tvp;
2795	int error;
2796
2797	if (usrtvp == NULL) {
2798		microtime(&tv[0]);
2799		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2800		tsp[1] = tsp[0];
2801	} else {
2802		if (tvpseg == UIO_SYSSPACE) {
2803			tvp = usrtvp;
2804		} else {
2805			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2806				return (error);
2807			tvp = tv;
2808		}
2809
2810		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2811		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2812			return (EINVAL);
2813		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2814		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2815	}
2816	return (0);
2817}
2818
2819/*
2820 * Common implementation code for utimes(), lutimes(), and futimes().
2821 */
2822static int
2823setutimes(td, vp, ts, numtimes, nullflag)
2824	struct thread *td;
2825	struct vnode *vp;
2826	const struct timespec *ts;
2827	int numtimes;
2828	int nullflag;
2829{
2830	int error, setbirthtime;
2831	struct mount *mp;
2832	struct vattr vattr;
2833
2834	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2835		return (error);
2836	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2837	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2838	setbirthtime = 0;
2839	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2840	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2841		setbirthtime = 1;
2842	VATTR_NULL(&vattr);
2843	vattr.va_atime = ts[0];
2844	vattr.va_mtime = ts[1];
2845	if (setbirthtime)
2846		vattr.va_birthtime = ts[1];
2847	if (numtimes > 2)
2848		vattr.va_birthtime = ts[2];
2849	if (nullflag)
2850		vattr.va_vaflags |= VA_UTIMES_NULL;
2851#ifdef MAC
2852	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2853	    vattr.va_mtime);
2854#endif
2855	if (error == 0)
2856		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2857	VOP_UNLOCK(vp, 0, td);
2858	vn_finished_write(mp);
2859	return (error);
2860}
2861
2862/*
2863 * Set the access and modification times of a file.
2864 */
2865#ifndef _SYS_SYSPROTO_H_
2866struct utimes_args {
2867	char	*path;
2868	struct	timeval *tptr;
2869};
2870#endif
2871int
2872utimes(td, uap)
2873	struct thread *td;
2874	register struct utimes_args /* {
2875		char *path;
2876		struct timeval *tptr;
2877	} */ *uap;
2878{
2879
2880	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2881	    UIO_USERSPACE));
2882}
2883
2884int
2885kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2886    struct timeval *tptr, enum uio_seg tptrseg)
2887{
2888	struct timespec ts[2];
2889	int error;
2890	struct nameidata nd;
2891	int vfslocked;
2892
2893	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2894		return (error);
2895	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2896	if ((error = namei(&nd)) != 0)
2897		return (error);
2898	vfslocked = NDHASGIANT(&nd);
2899	NDFREE(&nd, NDF_ONLY_PNBUF);
2900	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2901	vrele(nd.ni_vp);
2902	VFS_UNLOCK_GIANT(vfslocked);
2903	return (error);
2904}
2905
2906/*
2907 * Set the access and modification times of a file.
2908 */
2909#ifndef _SYS_SYSPROTO_H_
2910struct lutimes_args {
2911	char	*path;
2912	struct	timeval *tptr;
2913};
2914#endif
2915int
2916lutimes(td, uap)
2917	struct thread *td;
2918	register struct lutimes_args /* {
2919		char *path;
2920		struct timeval *tptr;
2921	} */ *uap;
2922{
2923
2924	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2925	    UIO_USERSPACE));
2926}
2927
2928int
2929kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2930    struct timeval *tptr, enum uio_seg tptrseg)
2931{
2932	struct timespec ts[2];
2933	int error;
2934	struct nameidata nd;
2935	int vfslocked;
2936
2937	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2938		return (error);
2939	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2940	if ((error = namei(&nd)) != 0)
2941		return (error);
2942	vfslocked = NDHASGIANT(&nd);
2943	NDFREE(&nd, NDF_ONLY_PNBUF);
2944	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2945	vrele(nd.ni_vp);
2946	VFS_UNLOCK_GIANT(vfslocked);
2947	return (error);
2948}
2949
2950/*
2951 * Set the access and modification times of a file.
2952 */
2953#ifndef _SYS_SYSPROTO_H_
2954struct futimes_args {
2955	int	fd;
2956	struct	timeval *tptr;
2957};
2958#endif
2959int
2960futimes(td, uap)
2961	struct thread *td;
2962	register struct futimes_args /* {
2963		int  fd;
2964		struct timeval *tptr;
2965	} */ *uap;
2966{
2967
2968	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2969}
2970
2971int
2972kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2973    enum uio_seg tptrseg)
2974{
2975	struct timespec ts[2];
2976	struct file *fp;
2977	int vfslocked;
2978	int error;
2979
2980	AUDIT_ARG(fd, fd);
2981	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2982		return (error);
2983	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2984		return (error);
2985	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2986#ifdef AUDIT
2987	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2988	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2989	VOP_UNLOCK(fp->f_vnode, 0, td);
2990#endif
2991	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2992	VFS_UNLOCK_GIANT(vfslocked);
2993	fdrop(fp, td);
2994	return (error);
2995}
2996
2997/*
2998 * Truncate a file given its path name.
2999 */
3000#ifndef _SYS_SYSPROTO_H_
3001struct truncate_args {
3002	char	*path;
3003	int	pad;
3004	off_t	length;
3005};
3006#endif
3007int
3008truncate(td, uap)
3009	struct thread *td;
3010	register struct truncate_args /* {
3011		char *path;
3012		int pad;
3013		off_t length;
3014	} */ *uap;
3015{
3016
3017	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3018}
3019
3020int
3021kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3022{
3023	struct mount *mp;
3024	struct vnode *vp;
3025	struct vattr vattr;
3026	int error;
3027	struct nameidata nd;
3028	int vfslocked;
3029
3030	if (length < 0)
3031		return(EINVAL);
3032	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3033	if ((error = namei(&nd)) != 0)
3034		return (error);
3035	vfslocked = NDHASGIANT(&nd);
3036	vp = nd.ni_vp;
3037	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3038		vrele(vp);
3039		VFS_UNLOCK_GIANT(vfslocked);
3040		return (error);
3041	}
3042	NDFREE(&nd, NDF_ONLY_PNBUF);
3043	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3044	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3045	if (vp->v_type == VDIR)
3046		error = EISDIR;
3047#ifdef MAC
3048	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3049	}
3050#endif
3051	else if ((error = vn_writechk(vp)) == 0 &&
3052	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3053		VATTR_NULL(&vattr);
3054		vattr.va_size = length;
3055		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3056	}
3057	vput(vp);
3058	vn_finished_write(mp);
3059	VFS_UNLOCK_GIANT(vfslocked);
3060	return (error);
3061}
3062
3063/*
3064 * Truncate a file given a file descriptor.
3065 */
3066#ifndef _SYS_SYSPROTO_H_
3067struct ftruncate_args {
3068	int	fd;
3069	int	pad;
3070	off_t	length;
3071};
3072#endif
3073int
3074ftruncate(td, uap)
3075	struct thread *td;
3076	register struct ftruncate_args /* {
3077		int fd;
3078		int pad;
3079		off_t length;
3080	} */ *uap;
3081{
3082	struct mount *mp;
3083	struct vattr vattr;
3084	struct vnode *vp;
3085	struct file *fp;
3086	int vfslocked;
3087	int error;
3088
3089	AUDIT_ARG(fd, uap->fd);
3090	if (uap->length < 0)
3091		return(EINVAL);
3092	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3093		return (error);
3094	if ((fp->f_flag & FWRITE) == 0) {
3095		fdrop(fp, td);
3096		return (EINVAL);
3097	}
3098	vp = fp->f_vnode;
3099	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3100	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3101		goto drop;
3102	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3103	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3104	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3105	if (vp->v_type == VDIR)
3106		error = EISDIR;
3107#ifdef MAC
3108	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3109	    vp))) {
3110	}
3111#endif
3112	else if ((error = vn_writechk(vp)) == 0) {
3113		VATTR_NULL(&vattr);
3114		vattr.va_size = uap->length;
3115		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3116	}
3117	VOP_UNLOCK(vp, 0, td);
3118	vn_finished_write(mp);
3119drop:
3120	VFS_UNLOCK_GIANT(vfslocked);
3121	fdrop(fp, td);
3122	return (error);
3123}
3124
3125#if defined(COMPAT_43)
3126/*
3127 * Truncate a file given its path name.
3128 */
3129#ifndef _SYS_SYSPROTO_H_
3130struct otruncate_args {
3131	char	*path;
3132	long	length;
3133};
3134#endif
3135int
3136otruncate(td, uap)
3137	struct thread *td;
3138	register struct otruncate_args /* {
3139		char *path;
3140		long length;
3141	} */ *uap;
3142{
3143	struct truncate_args /* {
3144		char *path;
3145		int pad;
3146		off_t length;
3147	} */ nuap;
3148
3149	nuap.path = uap->path;
3150	nuap.length = uap->length;
3151	return (truncate(td, &nuap));
3152}
3153
3154/*
3155 * Truncate a file given a file descriptor.
3156 */
3157#ifndef _SYS_SYSPROTO_H_
3158struct oftruncate_args {
3159	int	fd;
3160	long	length;
3161};
3162#endif
3163int
3164oftruncate(td, uap)
3165	struct thread *td;
3166	register struct oftruncate_args /* {
3167		int fd;
3168		long length;
3169	} */ *uap;
3170{
3171	struct ftruncate_args /* {
3172		int fd;
3173		int pad;
3174		off_t length;
3175	} */ nuap;
3176
3177	nuap.fd = uap->fd;
3178	nuap.length = uap->length;
3179	return (ftruncate(td, &nuap));
3180}
3181#endif /* COMPAT_43 */
3182
3183/*
3184 * Sync an open file.
3185 */
3186#ifndef _SYS_SYSPROTO_H_
3187struct fsync_args {
3188	int	fd;
3189};
3190#endif
3191int
3192fsync(td, uap)
3193	struct thread *td;
3194	struct fsync_args /* {
3195		int fd;
3196	} */ *uap;
3197{
3198	struct vnode *vp;
3199	struct mount *mp;
3200	struct file *fp;
3201	int vfslocked;
3202	int error;
3203
3204	AUDIT_ARG(fd, uap->fd);
3205	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3206		return (error);
3207	vp = fp->f_vnode;
3208	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3209	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3210		goto drop;
3211	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3212	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3213	if (vp->v_object != NULL) {
3214		VM_OBJECT_LOCK(vp->v_object);
3215		vm_object_page_clean(vp->v_object, 0, 0, 0);
3216		VM_OBJECT_UNLOCK(vp->v_object);
3217	}
3218	error = VOP_FSYNC(vp, MNT_WAIT, td);
3219
3220	VOP_UNLOCK(vp, 0, td);
3221	vn_finished_write(mp);
3222drop:
3223	VFS_UNLOCK_GIANT(vfslocked);
3224	fdrop(fp, td);
3225	return (error);
3226}
3227
3228/*
3229 * Rename files.  Source and destination must either both be directories,
3230 * or both not be directories.  If target is a directory, it must be empty.
3231 */
3232#ifndef _SYS_SYSPROTO_H_
3233struct rename_args {
3234	char	*from;
3235	char	*to;
3236};
3237#endif
3238int
3239rename(td, uap)
3240	struct thread *td;
3241	register struct rename_args /* {
3242		char *from;
3243		char *to;
3244	} */ *uap;
3245{
3246
3247	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3248}
3249
3250int
3251kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3252{
3253	struct mount *mp = NULL;
3254	struct vnode *tvp, *fvp, *tdvp;
3255	struct nameidata fromnd, tond;
3256	int tvfslocked;
3257	int fvfslocked;
3258	int error;
3259
3260	bwillwrite();
3261#ifdef MAC
3262	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3263	    AUDITVNODE1, pathseg, from, td);
3264#else
3265	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3266	    AUDITVNODE1, pathseg, from, td);
3267#endif
3268	if ((error = namei(&fromnd)) != 0)
3269		return (error);
3270	fvfslocked = NDHASGIANT(&fromnd);
3271	tvfslocked = 0;
3272#ifdef MAC
3273	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3274	    fromnd.ni_vp, &fromnd.ni_cnd);
3275	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3276	if (fromnd.ni_dvp != fromnd.ni_vp)
3277		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3278#endif
3279	fvp = fromnd.ni_vp;
3280	if (error == 0)
3281		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3282	if (error != 0) {
3283		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3284		vrele(fromnd.ni_dvp);
3285		vrele(fvp);
3286		goto out1;
3287	}
3288	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3289	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3290	if (fromnd.ni_vp->v_type == VDIR)
3291		tond.ni_cnd.cn_flags |= WILLBEDIR;
3292	if ((error = namei(&tond)) != 0) {
3293		/* Translate error code for rename("dir1", "dir2/."). */
3294		if (error == EISDIR && fvp->v_type == VDIR)
3295			error = EINVAL;
3296		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3297		vrele(fromnd.ni_dvp);
3298		vrele(fvp);
3299		vn_finished_write(mp);
3300		goto out1;
3301	}
3302	tvfslocked = NDHASGIANT(&tond);
3303	tdvp = tond.ni_dvp;
3304	tvp = tond.ni_vp;
3305	if (tvp != NULL) {
3306		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3307			error = ENOTDIR;
3308			goto out;
3309		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3310			error = EISDIR;
3311			goto out;
3312		}
3313	}
3314	if (fvp == tdvp)
3315		error = EINVAL;
3316	/*
3317	 * If the source is the same as the destination (that is, if they
3318	 * are links to the same vnode), then there is nothing to do.
3319	 */
3320	if (fvp == tvp)
3321		error = -1;
3322#ifdef MAC
3323	else
3324		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3325		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3326#endif
3327out:
3328	if (!error) {
3329		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3330		if (fromnd.ni_dvp != tdvp) {
3331			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3332		}
3333		if (tvp) {
3334			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3335		}
3336		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3337				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3338		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3339		NDFREE(&tond, NDF_ONLY_PNBUF);
3340	} else {
3341		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3342		NDFREE(&tond, NDF_ONLY_PNBUF);
3343		if (tvp)
3344			vput(tvp);
3345		if (tdvp == tvp)
3346			vrele(tdvp);
3347		else
3348			vput(tdvp);
3349		vrele(fromnd.ni_dvp);
3350		vrele(fvp);
3351	}
3352	vrele(tond.ni_startdir);
3353	vn_finished_write(mp);
3354out1:
3355	if (fromnd.ni_startdir)
3356		vrele(fromnd.ni_startdir);
3357	VFS_UNLOCK_GIANT(fvfslocked);
3358	VFS_UNLOCK_GIANT(tvfslocked);
3359	if (error == -1)
3360		return (0);
3361	return (error);
3362}
3363
3364/*
3365 * Make a directory file.
3366 */
3367#ifndef _SYS_SYSPROTO_H_
3368struct mkdir_args {
3369	char	*path;
3370	int	mode;
3371};
3372#endif
3373int
3374mkdir(td, uap)
3375	struct thread *td;
3376	register struct mkdir_args /* {
3377		char *path;
3378		int mode;
3379	} */ *uap;
3380{
3381
3382	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3383}
3384
3385int
3386kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3387{
3388	struct mount *mp;
3389	struct vnode *vp;
3390	struct vattr vattr;
3391	int error;
3392	struct nameidata nd;
3393	int vfslocked;
3394
3395	AUDIT_ARG(mode, mode);
3396restart:
3397	bwillwrite();
3398	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3399	    segflg, path, td);
3400	nd.ni_cnd.cn_flags |= WILLBEDIR;
3401	if ((error = namei(&nd)) != 0)
3402		return (error);
3403	vfslocked = NDHASGIANT(&nd);
3404	vp = nd.ni_vp;
3405	if (vp != NULL) {
3406		NDFREE(&nd, NDF_ONLY_PNBUF);
3407		/*
3408		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3409		 * the strange behaviour of leaving the vnode unlocked
3410		 * if the target is the same vnode as the parent.
3411		 */
3412		if (vp == nd.ni_dvp)
3413			vrele(nd.ni_dvp);
3414		else
3415			vput(nd.ni_dvp);
3416		vrele(vp);
3417		VFS_UNLOCK_GIANT(vfslocked);
3418		return (EEXIST);
3419	}
3420	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3421		NDFREE(&nd, NDF_ONLY_PNBUF);
3422		vput(nd.ni_dvp);
3423		VFS_UNLOCK_GIANT(vfslocked);
3424		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3425			return (error);
3426		goto restart;
3427	}
3428	VATTR_NULL(&vattr);
3429	vattr.va_type = VDIR;
3430	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3431	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3432	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3433#ifdef MAC
3434	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3435	    &vattr);
3436	if (error)
3437		goto out;
3438#endif
3439	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3440	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3441#ifdef MAC
3442out:
3443#endif
3444	NDFREE(&nd, NDF_ONLY_PNBUF);
3445	vput(nd.ni_dvp);
3446	if (!error)
3447		vput(nd.ni_vp);
3448	vn_finished_write(mp);
3449	VFS_UNLOCK_GIANT(vfslocked);
3450	return (error);
3451}
3452
3453/*
3454 * Remove a directory file.
3455 */
3456#ifndef _SYS_SYSPROTO_H_
3457struct rmdir_args {
3458	char	*path;
3459};
3460#endif
3461int
3462rmdir(td, uap)
3463	struct thread *td;
3464	struct rmdir_args /* {
3465		char *path;
3466	} */ *uap;
3467{
3468
3469	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3470}
3471
3472int
3473kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3474{
3475	struct mount *mp;
3476	struct vnode *vp;
3477	int error;
3478	struct nameidata nd;
3479	int vfslocked;
3480
3481restart:
3482	bwillwrite();
3483	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3484	    pathseg, path, td);
3485	if ((error = namei(&nd)) != 0)
3486		return (error);
3487	vfslocked = NDHASGIANT(&nd);
3488	vp = nd.ni_vp;
3489	if (vp->v_type != VDIR) {
3490		error = ENOTDIR;
3491		goto out;
3492	}
3493	/*
3494	 * No rmdir "." please.
3495	 */
3496	if (nd.ni_dvp == vp) {
3497		error = EINVAL;
3498		goto out;
3499	}
3500	/*
3501	 * The root of a mounted filesystem cannot be deleted.
3502	 */
3503	if (vp->v_vflag & VV_ROOT) {
3504		error = EBUSY;
3505		goto out;
3506	}
3507#ifdef MAC
3508	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3509	    &nd.ni_cnd);
3510	if (error)
3511		goto out;
3512#endif
3513	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3514		NDFREE(&nd, NDF_ONLY_PNBUF);
3515		vput(vp);
3516		if (nd.ni_dvp == vp)
3517			vrele(nd.ni_dvp);
3518		else
3519			vput(nd.ni_dvp);
3520		VFS_UNLOCK_GIANT(vfslocked);
3521		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3522			return (error);
3523		goto restart;
3524	}
3525	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3526	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3527	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3528	vn_finished_write(mp);
3529out:
3530	NDFREE(&nd, NDF_ONLY_PNBUF);
3531	vput(vp);
3532	if (nd.ni_dvp == vp)
3533		vrele(nd.ni_dvp);
3534	else
3535		vput(nd.ni_dvp);
3536	VFS_UNLOCK_GIANT(vfslocked);
3537	return (error);
3538}
3539
3540#ifdef COMPAT_43
3541/*
3542 * Read a block of directory entries in a filesystem independent format.
3543 */
3544#ifndef _SYS_SYSPROTO_H_
3545struct ogetdirentries_args {
3546	int	fd;
3547	char	*buf;
3548	u_int	count;
3549	long	*basep;
3550};
3551#endif
3552int
3553ogetdirentries(td, uap)
3554	struct thread *td;
3555	register struct ogetdirentries_args /* {
3556		int fd;
3557		char *buf;
3558		u_int count;
3559		long *basep;
3560	} */ *uap;
3561{
3562	struct vnode *vp;
3563	struct file *fp;
3564	struct uio auio, kuio;
3565	struct iovec aiov, kiov;
3566	struct dirent *dp, *edp;
3567	caddr_t dirbuf;
3568	int error, eofflag, readcnt, vfslocked;
3569	long loff;
3570
3571	/* XXX arbitrary sanity limit on `count'. */
3572	if (uap->count > 64 * 1024)
3573		return (EINVAL);
3574	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3575		return (error);
3576	if ((fp->f_flag & FREAD) == 0) {
3577		fdrop(fp, td);
3578		return (EBADF);
3579	}
3580	vp = fp->f_vnode;
3581unionread:
3582	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3583	if (vp->v_type != VDIR) {
3584		VFS_UNLOCK_GIANT(vfslocked);
3585		fdrop(fp, td);
3586		return (EINVAL);
3587	}
3588	aiov.iov_base = uap->buf;
3589	aiov.iov_len = uap->count;
3590	auio.uio_iov = &aiov;
3591	auio.uio_iovcnt = 1;
3592	auio.uio_rw = UIO_READ;
3593	auio.uio_segflg = UIO_USERSPACE;
3594	auio.uio_td = td;
3595	auio.uio_resid = uap->count;
3596	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3597	loff = auio.uio_offset = fp->f_offset;
3598#ifdef MAC
3599	error = mac_check_vnode_readdir(td->td_ucred, vp);
3600	if (error) {
3601		VOP_UNLOCK(vp, 0, td);
3602		VFS_UNLOCK_GIANT(vfslocked);
3603		fdrop(fp, td);
3604		return (error);
3605	}
3606#endif
3607#	if (BYTE_ORDER != LITTLE_ENDIAN)
3608		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3609			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3610			    NULL, NULL);
3611			fp->f_offset = auio.uio_offset;
3612		} else
3613#	endif
3614	{
3615		kuio = auio;
3616		kuio.uio_iov = &kiov;
3617		kuio.uio_segflg = UIO_SYSSPACE;
3618		kiov.iov_len = uap->count;
3619		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3620		kiov.iov_base = dirbuf;
3621		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3622			    NULL, NULL);
3623		fp->f_offset = kuio.uio_offset;
3624		if (error == 0) {
3625			readcnt = uap->count - kuio.uio_resid;
3626			edp = (struct dirent *)&dirbuf[readcnt];
3627			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3628#				if (BYTE_ORDER == LITTLE_ENDIAN)
3629					/*
3630					 * The expected low byte of
3631					 * dp->d_namlen is our dp->d_type.
3632					 * The high MBZ byte of dp->d_namlen
3633					 * is our dp->d_namlen.
3634					 */
3635					dp->d_type = dp->d_namlen;
3636					dp->d_namlen = 0;
3637#				else
3638					/*
3639					 * The dp->d_type is the high byte
3640					 * of the expected dp->d_namlen,
3641					 * so must be zero'ed.
3642					 */
3643					dp->d_type = 0;
3644#				endif
3645				if (dp->d_reclen > 0) {
3646					dp = (struct dirent *)
3647					    ((char *)dp + dp->d_reclen);
3648				} else {
3649					error = EIO;
3650					break;
3651				}
3652			}
3653			if (dp >= edp)
3654				error = uiomove(dirbuf, readcnt, &auio);
3655		}
3656		FREE(dirbuf, M_TEMP);
3657	}
3658	if (error) {
3659		VOP_UNLOCK(vp, 0, td);
3660		VFS_UNLOCK_GIANT(vfslocked);
3661		fdrop(fp, td);
3662		return (error);
3663	}
3664	if (uap->count == auio.uio_resid &&
3665	    (vp->v_vflag & VV_ROOT) &&
3666	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3667		struct vnode *tvp = vp;
3668		vp = vp->v_mount->mnt_vnodecovered;
3669		VREF(vp);
3670		fp->f_vnode = vp;
3671		fp->f_data = vp;
3672		fp->f_offset = 0;
3673		vput(tvp);
3674		VFS_UNLOCK_GIANT(vfslocked);
3675		goto unionread;
3676	}
3677	VOP_UNLOCK(vp, 0, td);
3678	VFS_UNLOCK_GIANT(vfslocked);
3679	error = copyout(&loff, uap->basep, sizeof(long));
3680	fdrop(fp, td);
3681	td->td_retval[0] = uap->count - auio.uio_resid;
3682	return (error);
3683}
3684#endif /* COMPAT_43 */
3685
3686/*
3687 * Read a block of directory entries in a filesystem independent format.
3688 */
3689#ifndef _SYS_SYSPROTO_H_
3690struct getdirentries_args {
3691	int	fd;
3692	char	*buf;
3693	u_int	count;
3694	long	*basep;
3695};
3696#endif
3697int
3698getdirentries(td, uap)
3699	struct thread *td;
3700	register struct getdirentries_args /* {
3701		int fd;
3702		char *buf;
3703		u_int count;
3704		long *basep;
3705	} */ *uap;
3706{
3707	struct vnode *vp;
3708	struct file *fp;
3709	struct uio auio;
3710	struct iovec aiov;
3711	int vfslocked;
3712	long loff;
3713	int error, eofflag;
3714
3715	AUDIT_ARG(fd, uap->fd);
3716	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3717		return (error);
3718	if ((fp->f_flag & FREAD) == 0) {
3719		fdrop(fp, td);
3720		return (EBADF);
3721	}
3722	vp = fp->f_vnode;
3723unionread:
3724	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3725	if (vp->v_type != VDIR) {
3726		VFS_UNLOCK_GIANT(vfslocked);
3727		error = EINVAL;
3728		goto fail;
3729	}
3730	aiov.iov_base = uap->buf;
3731	aiov.iov_len = uap->count;
3732	auio.uio_iov = &aiov;
3733	auio.uio_iovcnt = 1;
3734	auio.uio_rw = UIO_READ;
3735	auio.uio_segflg = UIO_USERSPACE;
3736	auio.uio_td = td;
3737	auio.uio_resid = uap->count;
3738	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3739	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3740	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3741	loff = auio.uio_offset = fp->f_offset;
3742#ifdef MAC
3743	error = mac_check_vnode_readdir(td->td_ucred, vp);
3744	if (error == 0)
3745#endif
3746		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3747		    NULL);
3748	fp->f_offset = auio.uio_offset;
3749	if (error) {
3750		VOP_UNLOCK(vp, 0, td);
3751		VFS_UNLOCK_GIANT(vfslocked);
3752		goto fail;
3753	}
3754	if (uap->count == auio.uio_resid &&
3755	    (vp->v_vflag & VV_ROOT) &&
3756	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3757		struct vnode *tvp = vp;
3758		vp = vp->v_mount->mnt_vnodecovered;
3759		VREF(vp);
3760		fp->f_vnode = vp;
3761		fp->f_data = vp;
3762		fp->f_offset = 0;
3763		vput(tvp);
3764		VFS_UNLOCK_GIANT(vfslocked);
3765		goto unionread;
3766	}
3767	VOP_UNLOCK(vp, 0, td);
3768	VFS_UNLOCK_GIANT(vfslocked);
3769	if (uap->basep != NULL) {
3770		error = copyout(&loff, uap->basep, sizeof(long));
3771	}
3772	td->td_retval[0] = uap->count - auio.uio_resid;
3773fail:
3774	fdrop(fp, td);
3775	return (error);
3776}
3777#ifndef _SYS_SYSPROTO_H_
3778struct getdents_args {
3779	int fd;
3780	char *buf;
3781	size_t count;
3782};
3783#endif
3784int
3785getdents(td, uap)
3786	struct thread *td;
3787	register struct getdents_args /* {
3788		int fd;
3789		char *buf;
3790		u_int count;
3791	} */ *uap;
3792{
3793	struct getdirentries_args ap;
3794	ap.fd = uap->fd;
3795	ap.buf = uap->buf;
3796	ap.count = uap->count;
3797	ap.basep = NULL;
3798	return (getdirentries(td, &ap));
3799}
3800
3801/*
3802 * Set the mode mask for creation of filesystem nodes.
3803 *
3804 * MP SAFE
3805 */
3806#ifndef _SYS_SYSPROTO_H_
3807struct umask_args {
3808	int	newmask;
3809};
3810#endif
3811int
3812umask(td, uap)
3813	struct thread *td;
3814	struct umask_args /* {
3815		int newmask;
3816	} */ *uap;
3817{
3818	register struct filedesc *fdp;
3819
3820	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3821	fdp = td->td_proc->p_fd;
3822	td->td_retval[0] = fdp->fd_cmask;
3823	fdp->fd_cmask = uap->newmask & ALLPERMS;
3824	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3825	return (0);
3826}
3827
3828/*
3829 * Void all references to file by ripping underlying filesystem
3830 * away from vnode.
3831 */
3832#ifndef _SYS_SYSPROTO_H_
3833struct revoke_args {
3834	char	*path;
3835};
3836#endif
3837int
3838revoke(td, uap)
3839	struct thread *td;
3840	register struct revoke_args /* {
3841		char *path;
3842	} */ *uap;
3843{
3844	struct vnode *vp;
3845	struct vattr vattr;
3846	int error;
3847	struct nameidata nd;
3848	int vfslocked;
3849
3850	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3851	    UIO_USERSPACE, uap->path, td);
3852	if ((error = namei(&nd)) != 0)
3853		return (error);
3854	vfslocked = NDHASGIANT(&nd);
3855	vp = nd.ni_vp;
3856	NDFREE(&nd, NDF_ONLY_PNBUF);
3857	if (vp->v_type != VCHR) {
3858		error = EINVAL;
3859		goto out;
3860	}
3861#ifdef MAC
3862	error = mac_check_vnode_revoke(td->td_ucred, vp);
3863	if (error)
3864		goto out;
3865#endif
3866	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3867	if (error)
3868		goto out;
3869	if (td->td_ucred->cr_uid != vattr.va_uid) {
3870		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3871		    SUSER_ALLOWJAIL);
3872		if (error)
3873			goto out;
3874	}
3875	if (vcount(vp) > 1)
3876		VOP_REVOKE(vp, REVOKEALL);
3877out:
3878	vput(vp);
3879	VFS_UNLOCK_GIANT(vfslocked);
3880	return (error);
3881}
3882
3883/*
3884 * Convert a user file descriptor to a kernel file entry.
3885 * A reference on the file entry is held upon returning.
3886 */
3887int
3888getvnode(fdp, fd, fpp)
3889	struct filedesc *fdp;
3890	int fd;
3891	struct file **fpp;
3892{
3893	int error;
3894	struct file *fp;
3895
3896	fp = NULL;
3897	if (fdp == NULL)
3898		error = EBADF;
3899	else {
3900		FILEDESC_LOCK(fdp);
3901		if ((u_int)fd >= fdp->fd_nfiles ||
3902		    (fp = fdp->fd_ofiles[fd]) == NULL)
3903			error = EBADF;
3904		else if (fp->f_vnode == NULL) {
3905			fp = NULL;
3906			error = EINVAL;
3907		} else {
3908			fhold(fp);
3909			error = 0;
3910		}
3911		FILEDESC_UNLOCK(fdp);
3912	}
3913	*fpp = fp;
3914	return (error);
3915}
3916
3917/*
3918 * Get (NFS) file handle
3919 */
3920#ifndef _SYS_SYSPROTO_H_
3921struct lgetfh_args {
3922	char	*fname;
3923	fhandle_t *fhp;
3924};
3925#endif
3926int
3927lgetfh(td, uap)
3928	struct thread *td;
3929	register struct lgetfh_args *uap;
3930{
3931	struct nameidata nd;
3932	fhandle_t fh;
3933	register struct vnode *vp;
3934	int vfslocked;
3935	int error;
3936
3937	error = priv_check(td, PRIV_VFS_GETFH);
3938	if (error)
3939		return (error);
3940	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3941	    UIO_USERSPACE, uap->fname, td);
3942	error = namei(&nd);
3943	if (error)
3944		return (error);
3945	vfslocked = NDHASGIANT(&nd);
3946	NDFREE(&nd, NDF_ONLY_PNBUF);
3947	vp = nd.ni_vp;
3948	bzero(&fh, sizeof(fh));
3949	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3950	error = VOP_VPTOFH(vp, &fh.fh_fid);
3951	vput(vp);
3952	VFS_UNLOCK_GIANT(vfslocked);
3953	if (error)
3954		return (error);
3955	error = copyout(&fh, uap->fhp, sizeof (fh));
3956	return (error);
3957}
3958
3959#ifndef _SYS_SYSPROTO_H_
3960struct getfh_args {
3961	char	*fname;
3962	fhandle_t *fhp;
3963};
3964#endif
3965int
3966getfh(td, uap)
3967	struct thread *td;
3968	register struct getfh_args *uap;
3969{
3970	struct nameidata nd;
3971	fhandle_t fh;
3972	register struct vnode *vp;
3973	int vfslocked;
3974	int error;
3975
3976	error = priv_check(td, PRIV_VFS_GETFH);
3977	if (error)
3978		return (error);
3979	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3980	    UIO_USERSPACE, uap->fname, td);
3981	error = namei(&nd);
3982	if (error)
3983		return (error);
3984	vfslocked = NDHASGIANT(&nd);
3985	NDFREE(&nd, NDF_ONLY_PNBUF);
3986	vp = nd.ni_vp;
3987	bzero(&fh, sizeof(fh));
3988	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3989	error = VOP_VPTOFH(vp, &fh.fh_fid);
3990	vput(vp);
3991	VFS_UNLOCK_GIANT(vfslocked);
3992	if (error)
3993		return (error);
3994	error = copyout(&fh, uap->fhp, sizeof (fh));
3995	return (error);
3996}
3997
3998/*
3999 * syscall for the rpc.lockd to use to translate a NFS file handle into an
4000 * open descriptor.
4001 *
4002 * warning: do not remove the priv_check() call or this becomes one giant
4003 * security hole.
4004 *
4005 * MP SAFE
4006 */
4007#ifndef _SYS_SYSPROTO_H_
4008struct fhopen_args {
4009	const struct fhandle *u_fhp;
4010	int flags;
4011};
4012#endif
4013int
4014fhopen(td, uap)
4015	struct thread *td;
4016	struct fhopen_args /* {
4017		const struct fhandle *u_fhp;
4018		int flags;
4019	} */ *uap;
4020{
4021	struct proc *p = td->td_proc;
4022	struct mount *mp;
4023	struct vnode *vp;
4024	struct fhandle fhp;
4025	struct vattr vat;
4026	struct vattr *vap = &vat;
4027	struct flock lf;
4028	struct file *fp;
4029	register struct filedesc *fdp = p->p_fd;
4030	int fmode, mode, error, type;
4031	struct file *nfp;
4032	int vfslocked;
4033	int indx;
4034
4035	error = priv_check(td, PRIV_VFS_FHOPEN);
4036	if (error)
4037		return (error);
4038	fmode = FFLAGS(uap->flags);
4039	/* why not allow a non-read/write open for our lockd? */
4040	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4041		return (EINVAL);
4042	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4043	if (error)
4044		return(error);
4045	/* find the mount point */
4046	mp = vfs_getvfs(&fhp.fh_fsid);
4047	if (mp == NULL)
4048		return (ESTALE);
4049	vfslocked = VFS_LOCK_GIANT(mp);
4050	/* now give me my vnode, it gets returned to me locked */
4051	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4052	if (error)
4053		goto out;
4054	/*
4055	 * from now on we have to make sure not
4056	 * to forget about the vnode
4057	 * any error that causes an abort must vput(vp)
4058	 * just set error = err and 'goto bad;'.
4059	 */
4060
4061	/*
4062	 * from vn_open
4063	 */
4064	if (vp->v_type == VLNK) {
4065		error = EMLINK;
4066		goto bad;
4067	}
4068	if (vp->v_type == VSOCK) {
4069		error = EOPNOTSUPP;
4070		goto bad;
4071	}
4072	mode = 0;
4073	if (fmode & (FWRITE | O_TRUNC)) {
4074		if (vp->v_type == VDIR) {
4075			error = EISDIR;
4076			goto bad;
4077		}
4078		error = vn_writechk(vp);
4079		if (error)
4080			goto bad;
4081		mode |= VWRITE;
4082	}
4083	if (fmode & FREAD)
4084		mode |= VREAD;
4085	if (fmode & O_APPEND)
4086		mode |= VAPPEND;
4087#ifdef MAC
4088	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4089	if (error)
4090		goto bad;
4091#endif
4092	if (mode) {
4093		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4094		if (error)
4095			goto bad;
4096	}
4097	if (fmode & O_TRUNC) {
4098		VOP_UNLOCK(vp, 0, td);				/* XXX */
4099		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4100			vrele(vp);
4101			goto out;
4102		}
4103		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4104		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4105#ifdef MAC
4106		/*
4107		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4108		 * should be right.
4109		 */
4110		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4111		if (error == 0) {
4112#endif
4113			VATTR_NULL(vap);
4114			vap->va_size = 0;
4115			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4116#ifdef MAC
4117		}
4118#endif
4119		vn_finished_write(mp);
4120		if (error)
4121			goto bad;
4122	}
4123	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4124	if (error)
4125		goto bad;
4126
4127	if (fmode & FWRITE)
4128		vp->v_writecount++;
4129
4130	/*
4131	 * end of vn_open code
4132	 */
4133
4134	if ((error = falloc(td, &nfp, &indx)) != 0) {
4135		if (fmode & FWRITE)
4136			vp->v_writecount--;
4137		goto bad;
4138	}
4139	/* An extra reference on `nfp' has been held for us by falloc(). */
4140	fp = nfp;
4141
4142	nfp->f_vnode = vp;
4143	nfp->f_data = vp;
4144	nfp->f_flag = fmode & FMASK;
4145	nfp->f_ops = &vnops;
4146	nfp->f_type = DTYPE_VNODE;
4147	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4148		lf.l_whence = SEEK_SET;
4149		lf.l_start = 0;
4150		lf.l_len = 0;
4151		if (fmode & O_EXLOCK)
4152			lf.l_type = F_WRLCK;
4153		else
4154			lf.l_type = F_RDLCK;
4155		type = F_FLOCK;
4156		if ((fmode & FNONBLOCK) == 0)
4157			type |= F_WAIT;
4158		VOP_UNLOCK(vp, 0, td);
4159		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4160			    type)) != 0) {
4161			/*
4162			 * The lock request failed.  Normally close the
4163			 * descriptor but handle the case where someone might
4164			 * have dup()d or close()d it when we weren't looking.
4165			 */
4166			fdclose(fdp, fp, indx, td);
4167
4168			/*
4169			 * release our private reference
4170			 */
4171			fdrop(fp, td);
4172			goto out;
4173		}
4174		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4175		fp->f_flag |= FHASLOCK;
4176	}
4177
4178	VOP_UNLOCK(vp, 0, td);
4179	fdrop(fp, td);
4180	vfs_rel(mp);
4181	VFS_UNLOCK_GIANT(vfslocked);
4182	td->td_retval[0] = indx;
4183	return (0);
4184
4185bad:
4186	vput(vp);
4187out:
4188	vfs_rel(mp);
4189	VFS_UNLOCK_GIANT(vfslocked);
4190	return (error);
4191}
4192
4193/*
4194 * Stat an (NFS) file handle.
4195 *
4196 * MP SAFE
4197 */
4198#ifndef _SYS_SYSPROTO_H_
4199struct fhstat_args {
4200	struct fhandle *u_fhp;
4201	struct stat *sb;
4202};
4203#endif
4204int
4205fhstat(td, uap)
4206	struct thread *td;
4207	register struct fhstat_args /* {
4208		struct fhandle *u_fhp;
4209		struct stat *sb;
4210	} */ *uap;
4211{
4212	struct stat sb;
4213	fhandle_t fh;
4214	struct mount *mp;
4215	struct vnode *vp;
4216	int vfslocked;
4217	int error;
4218
4219	error = priv_check(td, PRIV_VFS_FHSTAT);
4220	if (error)
4221		return (error);
4222	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4223	if (error)
4224		return (error);
4225	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4226		return (ESTALE);
4227	vfslocked = VFS_LOCK_GIANT(mp);
4228	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4229		vfs_rel(mp);
4230		VFS_UNLOCK_GIANT(vfslocked);
4231		return (error);
4232	}
4233	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4234	vput(vp);
4235	vfs_rel(mp);
4236	VFS_UNLOCK_GIANT(vfslocked);
4237	if (error)
4238		return (error);
4239	error = copyout(&sb, uap->sb, sizeof(sb));
4240	return (error);
4241}
4242
4243/*
4244 * Implement fstatfs() for (NFS) file handles.
4245 *
4246 * MP SAFE
4247 */
4248#ifndef _SYS_SYSPROTO_H_
4249struct fhstatfs_args {
4250	struct fhandle *u_fhp;
4251	struct statfs *buf;
4252};
4253#endif
4254int
4255fhstatfs(td, uap)
4256	struct thread *td;
4257	struct fhstatfs_args /* {
4258		struct fhandle *u_fhp;
4259		struct statfs *buf;
4260	} */ *uap;
4261{
4262	struct statfs sf;
4263	fhandle_t fh;
4264	int error;
4265
4266	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4267	if (error)
4268		return (error);
4269	error = kern_fhstatfs(td, fh, &sf);
4270	if (error)
4271		return (error);
4272	return (copyout(&sf, uap->buf, sizeof(sf)));
4273}
4274
4275int
4276kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4277{
4278	struct statfs *sp;
4279	struct mount *mp;
4280	struct vnode *vp;
4281	int vfslocked;
4282	int error;
4283
4284	error = priv_check(td, PRIV_VFS_FHSTATFS);
4285	if (error)
4286		return (error);
4287	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4288		return (ESTALE);
4289	vfslocked = VFS_LOCK_GIANT(mp);
4290	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4291	if (error) {
4292		VFS_UNLOCK_GIANT(vfslocked);
4293		vfs_rel(mp);
4294		return (error);
4295	}
4296	vput(vp);
4297	error = prison_canseemount(td->td_ucred, mp);
4298	if (error)
4299		goto out;
4300#ifdef MAC
4301	error = mac_check_mount_stat(td->td_ucred, mp);
4302	if (error)
4303		goto out;
4304#endif
4305	/*
4306	 * Set these in case the underlying filesystem fails to do so.
4307	 */
4308	sp = &mp->mnt_stat;
4309	sp->f_version = STATFS_VERSION;
4310	sp->f_namemax = NAME_MAX;
4311	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4312	error = VFS_STATFS(mp, sp, td);
4313	if (error == 0)
4314		*buf = *sp;
4315out:
4316	vfs_rel(mp);
4317	VFS_UNLOCK_GIANT(vfslocked);
4318	return (error);
4319}
4320