vfs_syscalls.c revision 169250
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 169250 2007-05-04 14:23:29Z kib $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/sysproto.h>
52#include <sys/namei.h>
53#include <sys/filedesc.h>
54#include <sys/kernel.h>
55#include <sys/fcntl.h>
56#include <sys/file.h>
57#include <sys/filio.h>
58#include <sys/limits.h>
59#include <sys/linker.h>
60#include <sys/stat.h>
61#include <sys/sx.h>
62#include <sys/unistd.h>
63#include <sys/vnode.h>
64#include <sys/priv.h>
65#include <sys/proc.h>
66#include <sys/dirent.h>
67#include <sys/jail.h>
68#include <sys/syscallsubr.h>
69#include <sys/sysctl.h>
70
71#include <machine/stdarg.h>
72
73#include <security/audit/audit.h>
74#include <security/mac/mac_framework.h>
75
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_page.h>
79#include <vm/uma.h>
80
81static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84static int setfmode(struct thread *td, struct vnode *, int);
85static int setfflags(struct thread *td, struct vnode *, int);
86static int setutimes(struct thread *td, struct vnode *,
87    const struct timespec *, int, int);
88static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89    struct thread *td);
90
91/*
92 * The module initialization routine for POSIX asynchronous I/O will
93 * set this to the version of AIO that it implements.  (Zero means
94 * that it is not implemented.)  This value is used here by pathconf()
95 * and in kern_descrip.c by fpathconf().
96 */
97int async_io_version;
98
99#ifdef DEBUG
100static int syncprt = 0;
101SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
102#endif
103
104/*
105 * Sync each mounted filesystem.
106 */
107#ifndef _SYS_SYSPROTO_H_
108struct sync_args {
109	int     dummy;
110};
111#endif
112/* ARGSUSED */
113int
114sync(td, uap)
115	struct thread *td;
116	struct sync_args *uap;
117{
118	struct mount *mp, *nmp;
119	int vfslocked;
120
121	mtx_lock(&mountlist_mtx);
122	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124			nmp = TAILQ_NEXT(mp, mnt_list);
125			continue;
126		}
127		vfslocked = VFS_LOCK_GIANT(mp);
128		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130			MNT_ILOCK(mp);
131			mp->mnt_noasync++;
132			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133			MNT_IUNLOCK(mp);
134			vfs_msync(mp, MNT_NOWAIT);
135			VFS_SYNC(mp, MNT_NOWAIT, td);
136			MNT_ILOCK(mp);
137			mp->mnt_noasync--;
138			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139			    mp->mnt_noasync == 0)
140				mp->mnt_kern_flag |= MNTK_ASYNC;
141			MNT_IUNLOCK(mp);
142			vn_finished_write(mp);
143		}
144		VFS_UNLOCK_GIANT(vfslocked);
145		mtx_lock(&mountlist_mtx);
146		nmp = TAILQ_NEXT(mp, mnt_list);
147		vfs_unbusy(mp, td);
148	}
149	mtx_unlock(&mountlist_mtx);
150	return (0);
151}
152
153/* XXX PRISON: could be per prison flag */
154static int prison_quotas;
155#if 0
156SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157#endif
158
159/*
160 * Change filesystem quotas.
161 */
162#ifndef _SYS_SYSPROTO_H_
163struct quotactl_args {
164	char *path;
165	int cmd;
166	int uid;
167	caddr_t arg;
168};
169#endif
170int
171quotactl(td, uap)
172	struct thread *td;
173	register struct quotactl_args /* {
174		char *path;
175		int cmd;
176		int uid;
177		caddr_t arg;
178	} */ *uap;
179{
180	struct mount *mp;
181	int vfslocked;
182	int error;
183	struct nameidata nd;
184
185	AUDIT_ARG(cmd, uap->cmd);
186	AUDIT_ARG(uid, uap->uid);
187	if (jailed(td->td_ucred) && !prison_quotas)
188		return (EPERM);
189	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
190	   UIO_USERSPACE, uap->path, td);
191	if ((error = namei(&nd)) != 0)
192		return (error);
193	vfslocked = NDHASGIANT(&nd);
194	NDFREE(&nd, NDF_ONLY_PNBUF);
195	mp = nd.ni_vp->v_mount;
196	if ((error = vfs_busy(mp, 0, NULL, td))) {
197		vrele(nd.ni_vp);
198		VFS_UNLOCK_GIANT(vfslocked);
199		return (error);
200	}
201	vrele(nd.ni_vp);
202	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203	vfs_unbusy(mp, td);
204	VFS_UNLOCK_GIANT(vfslocked);
205	return (error);
206}
207
208/*
209 * Get filesystem statistics.
210 */
211#ifndef _SYS_SYSPROTO_H_
212struct statfs_args {
213	char *path;
214	struct statfs *buf;
215};
216#endif
217int
218statfs(td, uap)
219	struct thread *td;
220	register struct statfs_args /* {
221		char *path;
222		struct statfs *buf;
223	} */ *uap;
224{
225	struct statfs sf;
226	int error;
227
228	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229	if (error == 0)
230		error = copyout(&sf, uap->buf, sizeof(sf));
231	return (error);
232}
233
234int
235kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236    struct statfs *buf)
237{
238	struct mount *mp;
239	struct statfs *sp, sb;
240	int vfslocked;
241	int error;
242	struct nameidata nd;
243
244	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245	    pathseg, path, td);
246	error = namei(&nd);
247	if (error)
248		return (error);
249	vfslocked = NDHASGIANT(&nd);
250	mp = nd.ni_vp->v_mount;
251	vfs_ref(mp);
252	NDFREE(&nd, NDF_ONLY_PNBUF);
253	vput(nd.ni_vp);
254#ifdef MAC
255	error = mac_check_mount_stat(td->td_ucred, mp);
256	if (error)
257		goto out;
258#endif
259	/*
260	 * Set these in case the underlying filesystem fails to do so.
261	 */
262	sp = &mp->mnt_stat;
263	sp->f_version = STATFS_VERSION;
264	sp->f_namemax = NAME_MAX;
265	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
266	error = VFS_STATFS(mp, sp, td);
267	if (error)
268		goto out;
269	if (priv_check(td, PRIV_VFS_GENERATION)) {
270		bcopy(sp, &sb, sizeof(sb));
271		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
272		prison_enforce_statfs(td->td_ucred, mp, &sb);
273		sp = &sb;
274	}
275	*buf = *sp;
276out:
277	vfs_rel(mp);
278	VFS_UNLOCK_GIANT(vfslocked);
279	if (mtx_owned(&Giant))
280		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
281	return (error);
282}
283
284/*
285 * Get filesystem statistics.
286 */
287#ifndef _SYS_SYSPROTO_H_
288struct fstatfs_args {
289	int fd;
290	struct statfs *buf;
291};
292#endif
293int
294fstatfs(td, uap)
295	struct thread *td;
296	register struct fstatfs_args /* {
297		int fd;
298		struct statfs *buf;
299	} */ *uap;
300{
301	struct statfs sf;
302	int error;
303
304	error = kern_fstatfs(td, uap->fd, &sf);
305	if (error == 0)
306		error = copyout(&sf, uap->buf, sizeof(sf));
307	return (error);
308}
309
310int
311kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
312{
313	struct file *fp;
314	struct mount *mp;
315	struct statfs *sp, sb;
316	int vfslocked;
317	struct vnode *vp;
318	int error;
319
320	AUDIT_ARG(fd, fd);
321	error = getvnode(td->td_proc->p_fd, fd, &fp);
322	if (error)
323		return (error);
324	vp = fp->f_vnode;
325	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
326	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
327#ifdef AUDIT
328	AUDIT_ARG(vnode, vp, ARG_VNODE1);
329#endif
330	mp = vp->v_mount;
331	if (mp)
332		vfs_ref(mp);
333	VOP_UNLOCK(vp, 0, td);
334	fdrop(fp, td);
335	if (vp->v_iflag & VI_DOOMED) {
336		error = EBADF;
337		goto out;
338	}
339#ifdef MAC
340	error = mac_check_mount_stat(td->td_ucred, mp);
341	if (error)
342		goto out;
343#endif
344	/*
345	 * Set these in case the underlying filesystem fails to do so.
346	 */
347	sp = &mp->mnt_stat;
348	sp->f_version = STATFS_VERSION;
349	sp->f_namemax = NAME_MAX;
350	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
351	error = VFS_STATFS(mp, sp, td);
352	if (error)
353		goto out;
354	if (priv_check(td, PRIV_VFS_GENERATION)) {
355		bcopy(sp, &sb, sizeof(sb));
356		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
357		prison_enforce_statfs(td->td_ucred, mp, &sb);
358		sp = &sb;
359	}
360	*buf = *sp;
361out:
362	if (mp)
363		vfs_rel(mp);
364	VFS_UNLOCK_GIANT(vfslocked);
365	return (error);
366}
367
368/*
369 * Get statistics on all filesystems.
370 */
371#ifndef _SYS_SYSPROTO_H_
372struct getfsstat_args {
373	struct statfs *buf;
374	long bufsize;
375	int flags;
376};
377#endif
378int
379getfsstat(td, uap)
380	struct thread *td;
381	register struct getfsstat_args /* {
382		struct statfs *buf;
383		long bufsize;
384		int flags;
385	} */ *uap;
386{
387
388	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
389	    uap->flags));
390}
391
392/*
393 * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
394 * 	The caller is responsible for freeing memory which will be allocated
395 *	in '*buf'.
396 */
397int
398kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
399    enum uio_seg bufseg, int flags)
400{
401	struct mount *mp, *nmp;
402	struct statfs *sfsp, *sp, sb;
403	size_t count, maxcount;
404	int vfslocked;
405	int error;
406
407	maxcount = bufsize / sizeof(struct statfs);
408	if (bufsize == 0)
409		sfsp = NULL;
410	else if (bufseg == UIO_USERSPACE)
411		sfsp = *buf;
412	else /* if (bufseg == UIO_SYSSPACE) */ {
413		count = 0;
414		mtx_lock(&mountlist_mtx);
415		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
416			count++;
417		}
418		mtx_unlock(&mountlist_mtx);
419		if (maxcount > count)
420			maxcount = count;
421		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
422		    M_WAITOK);
423	}
424	count = 0;
425	mtx_lock(&mountlist_mtx);
426	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
427		if (prison_canseemount(td->td_ucred, mp) != 0) {
428			nmp = TAILQ_NEXT(mp, mnt_list);
429			continue;
430		}
431#ifdef MAC
432		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
433			nmp = TAILQ_NEXT(mp, mnt_list);
434			continue;
435		}
436#endif
437		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
438			nmp = TAILQ_NEXT(mp, mnt_list);
439			continue;
440		}
441		vfslocked = VFS_LOCK_GIANT(mp);
442		if (sfsp && count < maxcount) {
443			sp = &mp->mnt_stat;
444			/*
445			 * Set these in case the underlying filesystem
446			 * fails to do so.
447			 */
448			sp->f_version = STATFS_VERSION;
449			sp->f_namemax = NAME_MAX;
450			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
451			/*
452			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
453			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
454			 * overrides MNT_WAIT.
455			 */
456			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
457			    (flags & MNT_WAIT)) &&
458			    (error = VFS_STATFS(mp, sp, td))) {
459				VFS_UNLOCK_GIANT(vfslocked);
460				mtx_lock(&mountlist_mtx);
461				nmp = TAILQ_NEXT(mp, mnt_list);
462				vfs_unbusy(mp, td);
463				continue;
464			}
465			if (priv_check(td, PRIV_VFS_GENERATION)) {
466				bcopy(sp, &sb, sizeof(sb));
467				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
468				prison_enforce_statfs(td->td_ucred, mp, &sb);
469				sp = &sb;
470			}
471			if (bufseg == UIO_SYSSPACE)
472				bcopy(sp, sfsp, sizeof(*sp));
473			else /* if (bufseg == UIO_USERSPACE) */ {
474				error = copyout(sp, sfsp, sizeof(*sp));
475				if (error) {
476					vfs_unbusy(mp, td);
477					VFS_UNLOCK_GIANT(vfslocked);
478					return (error);
479				}
480			}
481			sfsp++;
482		}
483		VFS_UNLOCK_GIANT(vfslocked);
484		count++;
485		mtx_lock(&mountlist_mtx);
486		nmp = TAILQ_NEXT(mp, mnt_list);
487		vfs_unbusy(mp, td);
488	}
489	mtx_unlock(&mountlist_mtx);
490	if (sfsp && count > maxcount)
491		td->td_retval[0] = maxcount;
492	else
493		td->td_retval[0] = count;
494	return (0);
495}
496
497#ifdef COMPAT_FREEBSD4
498/*
499 * Get old format filesystem statistics.
500 */
501static void cvtstatfs(struct statfs *, struct ostatfs *);
502
503#ifndef _SYS_SYSPROTO_H_
504struct freebsd4_statfs_args {
505	char *path;
506	struct ostatfs *buf;
507};
508#endif
509int
510freebsd4_statfs(td, uap)
511	struct thread *td;
512	struct freebsd4_statfs_args /* {
513		char *path;
514		struct ostatfs *buf;
515	} */ *uap;
516{
517	struct ostatfs osb;
518	struct statfs sf;
519	int error;
520
521	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
522	if (error)
523		return (error);
524	cvtstatfs(&sf, &osb);
525	return (copyout(&osb, uap->buf, sizeof(osb)));
526}
527
528/*
529 * Get filesystem statistics.
530 */
531#ifndef _SYS_SYSPROTO_H_
532struct freebsd4_fstatfs_args {
533	int fd;
534	struct ostatfs *buf;
535};
536#endif
537int
538freebsd4_fstatfs(td, uap)
539	struct thread *td;
540	struct freebsd4_fstatfs_args /* {
541		int fd;
542		struct ostatfs *buf;
543	} */ *uap;
544{
545	struct ostatfs osb;
546	struct statfs sf;
547	int error;
548
549	error = kern_fstatfs(td, uap->fd, &sf);
550	if (error)
551		return (error);
552	cvtstatfs(&sf, &osb);
553	return (copyout(&osb, uap->buf, sizeof(osb)));
554}
555
556/*
557 * Get statistics on all filesystems.
558 */
559#ifndef _SYS_SYSPROTO_H_
560struct freebsd4_getfsstat_args {
561	struct ostatfs *buf;
562	long bufsize;
563	int flags;
564};
565#endif
566int
567freebsd4_getfsstat(td, uap)
568	struct thread *td;
569	register struct freebsd4_getfsstat_args /* {
570		struct ostatfs *buf;
571		long bufsize;
572		int flags;
573	} */ *uap;
574{
575	struct statfs *buf, *sp;
576	struct ostatfs osb;
577	size_t count, size;
578	int error;
579
580	count = uap->bufsize / sizeof(struct ostatfs);
581	size = count * sizeof(struct statfs);
582	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
583	if (size > 0) {
584		count = td->td_retval[0];
585		sp = buf;
586		while (count > 0 && error == 0) {
587			cvtstatfs(sp, &osb);
588			error = copyout(&osb, uap->buf, sizeof(osb));
589			sp++;
590			uap->buf++;
591			count--;
592		}
593		free(buf, M_TEMP);
594	}
595	return (error);
596}
597
598/*
599 * Implement fstatfs() for (NFS) file handles.
600 */
601#ifndef _SYS_SYSPROTO_H_
602struct freebsd4_fhstatfs_args {
603	struct fhandle *u_fhp;
604	struct ostatfs *buf;
605};
606#endif
607int
608freebsd4_fhstatfs(td, uap)
609	struct thread *td;
610	struct freebsd4_fhstatfs_args /* {
611		struct fhandle *u_fhp;
612		struct ostatfs *buf;
613	} */ *uap;
614{
615	struct ostatfs osb;
616	struct statfs sf;
617	fhandle_t fh;
618	int error;
619
620	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
621	if (error)
622		return (error);
623	error = kern_fhstatfs(td, fh, &sf);
624	if (error)
625		return (error);
626	cvtstatfs(&sf, &osb);
627	return (copyout(&osb, uap->buf, sizeof(osb)));
628}
629
630/*
631 * Convert a new format statfs structure to an old format statfs structure.
632 */
633static void
634cvtstatfs(nsp, osp)
635	struct statfs *nsp;
636	struct ostatfs *osp;
637{
638
639	bzero(osp, sizeof(*osp));
640	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
641	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
642	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
643	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
644	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
645	osp->f_files = MIN(nsp->f_files, LONG_MAX);
646	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
647	osp->f_owner = nsp->f_owner;
648	osp->f_type = nsp->f_type;
649	osp->f_flags = nsp->f_flags;
650	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
651	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
652	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
653	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
654	strlcpy(osp->f_fstypename, nsp->f_fstypename,
655	    MIN(MFSNAMELEN, OMFSNAMELEN));
656	strlcpy(osp->f_mntonname, nsp->f_mntonname,
657	    MIN(MNAMELEN, OMNAMELEN));
658	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
659	    MIN(MNAMELEN, OMNAMELEN));
660	osp->f_fsid = nsp->f_fsid;
661}
662#endif /* COMPAT_FREEBSD4 */
663
664/*
665 * Change current working directory to a given file descriptor.
666 */
667#ifndef _SYS_SYSPROTO_H_
668struct fchdir_args {
669	int	fd;
670};
671#endif
672int
673fchdir(td, uap)
674	struct thread *td;
675	struct fchdir_args /* {
676		int fd;
677	} */ *uap;
678{
679	register struct filedesc *fdp = td->td_proc->p_fd;
680	struct vnode *vp, *tdp, *vpold;
681	struct mount *mp;
682	struct file *fp;
683	int vfslocked;
684	int error;
685
686	AUDIT_ARG(fd, uap->fd);
687	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
688		return (error);
689	vp = fp->f_vnode;
690	VREF(vp);
691	fdrop(fp, td);
692	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
693	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
694	AUDIT_ARG(vnode, vp, ARG_VNODE1);
695	error = change_dir(vp, td);
696	while (!error && (mp = vp->v_mountedhere) != NULL) {
697		int tvfslocked;
698		if (vfs_busy(mp, 0, 0, td))
699			continue;
700		tvfslocked = VFS_LOCK_GIANT(mp);
701		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
702		vfs_unbusy(mp, td);
703		if (error) {
704			VFS_UNLOCK_GIANT(tvfslocked);
705			break;
706		}
707		vput(vp);
708		VFS_UNLOCK_GIANT(vfslocked);
709		vp = tdp;
710		vfslocked = tvfslocked;
711	}
712	if (error) {
713		vput(vp);
714		VFS_UNLOCK_GIANT(vfslocked);
715		return (error);
716	}
717	VOP_UNLOCK(vp, 0, td);
718	VFS_UNLOCK_GIANT(vfslocked);
719	FILEDESC_XLOCK(fdp);
720	vpold = fdp->fd_cdir;
721	fdp->fd_cdir = vp;
722	FILEDESC_XUNLOCK(fdp);
723	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
724	vrele(vpold);
725	VFS_UNLOCK_GIANT(vfslocked);
726	return (0);
727}
728
729/*
730 * Change current working directory (``.'').
731 */
732#ifndef _SYS_SYSPROTO_H_
733struct chdir_args {
734	char	*path;
735};
736#endif
737int
738chdir(td, uap)
739	struct thread *td;
740	struct chdir_args /* {
741		char *path;
742	} */ *uap;
743{
744
745	return (kern_chdir(td, uap->path, UIO_USERSPACE));
746}
747
748int
749kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
750{
751	register struct filedesc *fdp = td->td_proc->p_fd;
752	int error;
753	struct nameidata nd;
754	struct vnode *vp;
755	int vfslocked;
756
757	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
758	    pathseg, path, td);
759	if ((error = namei(&nd)) != 0)
760		return (error);
761	vfslocked = NDHASGIANT(&nd);
762	if ((error = change_dir(nd.ni_vp, td)) != 0) {
763		vput(nd.ni_vp);
764		VFS_UNLOCK_GIANT(vfslocked);
765		NDFREE(&nd, NDF_ONLY_PNBUF);
766		return (error);
767	}
768	VOP_UNLOCK(nd.ni_vp, 0, td);
769	VFS_UNLOCK_GIANT(vfslocked);
770	NDFREE(&nd, NDF_ONLY_PNBUF);
771	FILEDESC_XLOCK(fdp);
772	vp = fdp->fd_cdir;
773	fdp->fd_cdir = nd.ni_vp;
774	FILEDESC_XUNLOCK(fdp);
775	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
776	vrele(vp);
777	VFS_UNLOCK_GIANT(vfslocked);
778	return (0);
779}
780
781/*
782 * Helper function for raised chroot(2) security function:  Refuse if
783 * any filedescriptors are open directories.
784 */
785static int
786chroot_refuse_vdir_fds(fdp)
787	struct filedesc *fdp;
788{
789	struct vnode *vp;
790	struct file *fp;
791	int fd;
792
793	FILEDESC_LOCK_ASSERT(fdp);
794
795	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
796		fp = fget_locked(fdp, fd);
797		if (fp == NULL)
798			continue;
799		if (fp->f_type == DTYPE_VNODE) {
800			vp = fp->f_vnode;
801			if (vp->v_type == VDIR)
802				return (EPERM);
803		}
804	}
805	return (0);
806}
807
808/*
809 * This sysctl determines if we will allow a process to chroot(2) if it
810 * has a directory open:
811 *	0: disallowed for all processes.
812 *	1: allowed for processes that were not already chroot(2)'ed.
813 *	2: allowed for all processes.
814 */
815
816static int chroot_allow_open_directories = 1;
817
818SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
819     &chroot_allow_open_directories, 0, "");
820
821/*
822 * Change notion of root (``/'') directory.
823 */
824#ifndef _SYS_SYSPROTO_H_
825struct chroot_args {
826	char	*path;
827};
828#endif
829int
830chroot(td, uap)
831	struct thread *td;
832	struct chroot_args /* {
833		char *path;
834	} */ *uap;
835{
836	int error;
837	struct nameidata nd;
838	int vfslocked;
839
840	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
841	    SUSER_ALLOWJAIL);
842	if (error)
843		return (error);
844	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
845	    UIO_USERSPACE, uap->path, td);
846	error = namei(&nd);
847	if (error)
848		goto error;
849	vfslocked = NDHASGIANT(&nd);
850	if ((error = change_dir(nd.ni_vp, td)) != 0)
851		goto e_vunlock;
852#ifdef MAC
853	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
854		goto e_vunlock;
855#endif
856	VOP_UNLOCK(nd.ni_vp, 0, td);
857	error = change_root(nd.ni_vp, td);
858	vrele(nd.ni_vp);
859	VFS_UNLOCK_GIANT(vfslocked);
860	NDFREE(&nd, NDF_ONLY_PNBUF);
861	return (error);
862e_vunlock:
863	vput(nd.ni_vp);
864	VFS_UNLOCK_GIANT(vfslocked);
865error:
866	NDFREE(&nd, NDF_ONLY_PNBUF);
867	return (error);
868}
869
870/*
871 * Common routine for chroot and chdir.  Callers must provide a locked vnode
872 * instance.
873 */
874int
875change_dir(vp, td)
876	struct vnode *vp;
877	struct thread *td;
878{
879	int error;
880
881	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
882	if (vp->v_type != VDIR)
883		return (ENOTDIR);
884#ifdef MAC
885	error = mac_check_vnode_chdir(td->td_ucred, vp);
886	if (error)
887		return (error);
888#endif
889	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
890	return (error);
891}
892
893/*
894 * Common routine for kern_chroot() and jail_attach().  The caller is
895 * responsible for invoking priv_check() and mac_check_chroot() to authorize
896 * this operation.
897 */
898int
899change_root(vp, td)
900	struct vnode *vp;
901	struct thread *td;
902{
903	struct filedesc *fdp;
904	struct vnode *oldvp;
905	int vfslocked;
906	int error;
907
908	VFS_ASSERT_GIANT(vp->v_mount);
909	fdp = td->td_proc->p_fd;
910	FILEDESC_XLOCK(fdp);
911	if (chroot_allow_open_directories == 0 ||
912	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
913		error = chroot_refuse_vdir_fds(fdp);
914		if (error) {
915			FILEDESC_XUNLOCK(fdp);
916			return (error);
917		}
918	}
919	oldvp = fdp->fd_rdir;
920	fdp->fd_rdir = vp;
921	VREF(fdp->fd_rdir);
922	if (!fdp->fd_jdir) {
923		fdp->fd_jdir = vp;
924		VREF(fdp->fd_jdir);
925	}
926	FILEDESC_XUNLOCK(fdp);
927	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
928	vrele(oldvp);
929	VFS_UNLOCK_GIANT(vfslocked);
930	return (0);
931}
932
933/*
934 * Check permissions, allocate an open file structure, and call the device
935 * open routine if any.
936 */
937#ifndef _SYS_SYSPROTO_H_
938struct open_args {
939	char	*path;
940	int	flags;
941	int	mode;
942};
943#endif
944int
945open(td, uap)
946	struct thread *td;
947	register struct open_args /* {
948		char *path;
949		int flags;
950		int mode;
951	} */ *uap;
952{
953
954	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
955}
956
957int
958kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
959    int mode)
960{
961	struct proc *p = td->td_proc;
962	struct filedesc *fdp = p->p_fd;
963	struct file *fp;
964	struct vnode *vp;
965	struct vattr vat;
966	struct mount *mp;
967	int cmode;
968	struct file *nfp;
969	int type, indx, error;
970	struct flock lf;
971	struct nameidata nd;
972	int vfslocked;
973
974	AUDIT_ARG(fflags, flags);
975	AUDIT_ARG(mode, mode);
976	if ((flags & O_ACCMODE) == O_ACCMODE)
977		return (EINVAL);
978	flags = FFLAGS(flags);
979	error = falloc(td, &nfp, &indx);
980	if (error)
981		return (error);
982	/* An extra reference on `nfp' has been held for us by falloc(). */
983	fp = nfp;
984	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
985	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
986	td->td_dupfd = -1;		/* XXX check for fdopen */
987	FILEDESC_XLOCK(fdp);
988	if (fp != fdp->fd_ofiles[indx]) {
989		FILEDESC_XUNLOCK(fdp);
990		fdrop(fp, td);
991		td->td_retval[0] = indx;
992		return (0);
993	}
994	fdp->fd_ofileflags[indx] |= UF_OPENING;
995	FILEDESC_XUNLOCK(fdp);
996	error = vn_open(&nd, &flags, cmode, indx);
997	FILEDESC_XLOCK(fdp);
998	fdp->fd_ofileflags[indx] &= ~UF_OPENING;
999	FILEDESC_XUNLOCK(fdp);
1000	if (error) {
1001		/*
1002		 * If the vn_open replaced the method vector, something
1003		 * wonderous happened deep below and we just pass it up
1004		 * pretending we know what we do.
1005		 */
1006		if (error == ENXIO && fp->f_ops != &badfileops) {
1007			fdrop(fp, td);
1008			td->td_retval[0] = indx;
1009			return (0);
1010		}
1011
1012		/*
1013		 * handle special fdopen() case.  bleh.  dupfdopen() is
1014		 * responsible for dropping the old contents of ofiles[indx]
1015		 * if it succeeds.
1016		 */
1017		if ((error == ENODEV || error == ENXIO) &&
1018		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1019		    (error =
1020			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1021			td->td_retval[0] = indx;
1022			fdrop(fp, td);
1023			return (0);
1024		}
1025		/*
1026		 * Clean up the descriptor, but only if another thread hadn't
1027		 * replaced or closed it.
1028		 */
1029		fdclose(fdp, fp, indx, td);
1030		fdrop(fp, td);
1031
1032		if (error == ERESTART)
1033			error = EINTR;
1034		return (error);
1035	}
1036	td->td_dupfd = 0;
1037	vfslocked = NDHASGIANT(&nd);
1038	NDFREE(&nd, NDF_ONLY_PNBUF);
1039	vp = nd.ni_vp;
1040
1041	FILE_LOCK(fp);
1042	fp->f_vnode = vp;
1043	if (fp->f_data == NULL)
1044		fp->f_data = vp;
1045	fp->f_flag = flags & FMASK;
1046	fp->f_seqcount = 1;
1047	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1048	if (fp->f_ops == &badfileops)
1049		fp->f_ops = &vnops;
1050	FILE_UNLOCK(fp);
1051
1052	VOP_UNLOCK(vp, 0, td);
1053	if (flags & (O_EXLOCK | O_SHLOCK)) {
1054		lf.l_whence = SEEK_SET;
1055		lf.l_start = 0;
1056		lf.l_len = 0;
1057		if (flags & O_EXLOCK)
1058			lf.l_type = F_WRLCK;
1059		else
1060			lf.l_type = F_RDLCK;
1061		type = F_FLOCK;
1062		if ((flags & FNONBLOCK) == 0)
1063			type |= F_WAIT;
1064		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1065			    type)) != 0)
1066			goto bad;
1067		fp->f_flag |= FHASLOCK;
1068	}
1069	if (flags & O_TRUNC) {
1070		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1071			goto bad;
1072		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1073		VATTR_NULL(&vat);
1074		vat.va_size = 0;
1075		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1076#ifdef MAC
1077		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1078		if (error == 0)
1079#endif
1080			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1081		VOP_UNLOCK(vp, 0, td);
1082		vn_finished_write(mp);
1083		if (error)
1084			goto bad;
1085	}
1086	VFS_UNLOCK_GIANT(vfslocked);
1087	/*
1088	 * Release our private reference, leaving the one associated with
1089	 * the descriptor table intact.
1090	 */
1091	fdrop(fp, td);
1092	td->td_retval[0] = indx;
1093	return (0);
1094bad:
1095	VFS_UNLOCK_GIANT(vfslocked);
1096	fdclose(fdp, fp, indx, td);
1097	fdrop(fp, td);
1098	return (error);
1099}
1100
1101#ifdef COMPAT_43
1102/*
1103 * Create a file.
1104 */
1105#ifndef _SYS_SYSPROTO_H_
1106struct ocreat_args {
1107	char	*path;
1108	int	mode;
1109};
1110#endif
1111int
1112ocreat(td, uap)
1113	struct thread *td;
1114	register struct ocreat_args /* {
1115		char *path;
1116		int mode;
1117	} */ *uap;
1118{
1119
1120	return (kern_open(td, uap->path, UIO_USERSPACE,
1121	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1122}
1123#endif /* COMPAT_43 */
1124
1125/*
1126 * Create a special file.
1127 */
1128#ifndef _SYS_SYSPROTO_H_
1129struct mknod_args {
1130	char	*path;
1131	int	mode;
1132	int	dev;
1133};
1134#endif
1135int
1136mknod(td, uap)
1137	struct thread *td;
1138	register struct mknod_args /* {
1139		char *path;
1140		int mode;
1141		int dev;
1142	} */ *uap;
1143{
1144
1145	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1146}
1147
1148int
1149kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1150    int dev)
1151{
1152	struct vnode *vp;
1153	struct mount *mp;
1154	struct vattr vattr;
1155	int error;
1156	int whiteout = 0;
1157	struct nameidata nd;
1158	int vfslocked;
1159
1160	AUDIT_ARG(mode, mode);
1161	AUDIT_ARG(dev, dev);
1162	switch (mode & S_IFMT) {
1163	case S_IFCHR:
1164	case S_IFBLK:
1165		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1166		break;
1167	case S_IFMT:
1168		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1169		break;
1170	case S_IFWHT:
1171		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1172		break;
1173	default:
1174		error = EINVAL;
1175		break;
1176	}
1177	if (error)
1178		return (error);
1179restart:
1180	bwillwrite();
1181	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1182	    pathseg, path, td);
1183	if ((error = namei(&nd)) != 0)
1184		return (error);
1185	vfslocked = NDHASGIANT(&nd);
1186	vp = nd.ni_vp;
1187	if (vp != NULL) {
1188		NDFREE(&nd, NDF_ONLY_PNBUF);
1189		if (vp == nd.ni_dvp)
1190			vrele(nd.ni_dvp);
1191		else
1192			vput(nd.ni_dvp);
1193		vrele(vp);
1194		VFS_UNLOCK_GIANT(vfslocked);
1195		return (EEXIST);
1196	} else {
1197		VATTR_NULL(&vattr);
1198		FILEDESC_SLOCK(td->td_proc->p_fd);
1199		vattr.va_mode = (mode & ALLPERMS) &
1200		    ~td->td_proc->p_fd->fd_cmask;
1201		FILEDESC_SUNLOCK(td->td_proc->p_fd);
1202		vattr.va_rdev = dev;
1203		whiteout = 0;
1204
1205		switch (mode & S_IFMT) {
1206		case S_IFMT:	/* used by badsect to flag bad sectors */
1207			vattr.va_type = VBAD;
1208			break;
1209		case S_IFCHR:
1210			vattr.va_type = VCHR;
1211			break;
1212		case S_IFBLK:
1213			vattr.va_type = VBLK;
1214			break;
1215		case S_IFWHT:
1216			whiteout = 1;
1217			break;
1218		default:
1219			panic("kern_mknod: invalid mode");
1220		}
1221	}
1222	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1223		NDFREE(&nd, NDF_ONLY_PNBUF);
1224		vput(nd.ni_dvp);
1225		VFS_UNLOCK_GIANT(vfslocked);
1226		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1227			return (error);
1228		goto restart;
1229	}
1230#ifdef MAC
1231	if (error == 0 && !whiteout)
1232		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1233		    &nd.ni_cnd, &vattr);
1234#endif
1235	if (!error) {
1236		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1237		if (whiteout)
1238			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1239		else {
1240			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1241						&nd.ni_cnd, &vattr);
1242			if (error == 0)
1243				vput(nd.ni_vp);
1244		}
1245	}
1246	NDFREE(&nd, NDF_ONLY_PNBUF);
1247	vput(nd.ni_dvp);
1248	vn_finished_write(mp);
1249	VFS_UNLOCK_GIANT(vfslocked);
1250	return (error);
1251}
1252
1253/*
1254 * Create a named pipe.
1255 */
1256#ifndef _SYS_SYSPROTO_H_
1257struct mkfifo_args {
1258	char	*path;
1259	int	mode;
1260};
1261#endif
1262int
1263mkfifo(td, uap)
1264	struct thread *td;
1265	register struct mkfifo_args /* {
1266		char *path;
1267		int mode;
1268	} */ *uap;
1269{
1270
1271	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1272}
1273
1274int
1275kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1276{
1277	struct mount *mp;
1278	struct vattr vattr;
1279	int error;
1280	struct nameidata nd;
1281	int vfslocked;
1282
1283	AUDIT_ARG(mode, mode);
1284restart:
1285	bwillwrite();
1286	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1287	    pathseg, path, td);
1288	if ((error = namei(&nd)) != 0)
1289		return (error);
1290	vfslocked = NDHASGIANT(&nd);
1291	if (nd.ni_vp != NULL) {
1292		NDFREE(&nd, NDF_ONLY_PNBUF);
1293		if (nd.ni_vp == nd.ni_dvp)
1294			vrele(nd.ni_dvp);
1295		else
1296			vput(nd.ni_dvp);
1297		vrele(nd.ni_vp);
1298		VFS_UNLOCK_GIANT(vfslocked);
1299		return (EEXIST);
1300	}
1301	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1302		NDFREE(&nd, NDF_ONLY_PNBUF);
1303		vput(nd.ni_dvp);
1304		VFS_UNLOCK_GIANT(vfslocked);
1305		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1306			return (error);
1307		goto restart;
1308	}
1309	VATTR_NULL(&vattr);
1310	vattr.va_type = VFIFO;
1311	FILEDESC_SLOCK(td->td_proc->p_fd);
1312	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1313	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1314#ifdef MAC
1315	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1316	    &vattr);
1317	if (error)
1318		goto out;
1319#endif
1320	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1321	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1322	if (error == 0)
1323		vput(nd.ni_vp);
1324#ifdef MAC
1325out:
1326#endif
1327	vput(nd.ni_dvp);
1328	vn_finished_write(mp);
1329	VFS_UNLOCK_GIANT(vfslocked);
1330	NDFREE(&nd, NDF_ONLY_PNBUF);
1331	return (error);
1332}
1333
1334/*
1335 * Make a hard file link.
1336 */
1337#ifndef _SYS_SYSPROTO_H_
1338struct link_args {
1339	char	*path;
1340	char	*link;
1341};
1342#endif
1343int
1344link(td, uap)
1345	struct thread *td;
1346	register struct link_args /* {
1347		char *path;
1348		char *link;
1349	} */ *uap;
1350{
1351	int error;
1352
1353	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1354	return (error);
1355}
1356
1357static int hardlink_check_uid = 0;
1358SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1359    &hardlink_check_uid, 0,
1360    "Unprivileged processes cannot create hard links to files owned by other "
1361    "users");
1362static int hardlink_check_gid = 0;
1363SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1364    &hardlink_check_gid, 0,
1365    "Unprivileged processes cannot create hard links to files owned by other "
1366    "groups");
1367
1368static int
1369can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1370{
1371	struct vattr va;
1372	int error;
1373
1374	if (!hardlink_check_uid && !hardlink_check_gid)
1375		return (0);
1376
1377	error = VOP_GETATTR(vp, &va, cred, td);
1378	if (error != 0)
1379		return (error);
1380
1381	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1382		error = priv_check_cred(cred, PRIV_VFS_LINK,
1383		    SUSER_ALLOWJAIL);
1384		if (error)
1385			return (error);
1386	}
1387
1388	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1389		error = priv_check_cred(cred, PRIV_VFS_LINK,
1390		    SUSER_ALLOWJAIL);
1391		if (error)
1392			return (error);
1393	}
1394
1395	return (0);
1396}
1397
1398int
1399kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1400{
1401	struct vnode *vp;
1402	struct mount *mp;
1403	struct nameidata nd;
1404	int vfslocked;
1405	int lvfslocked;
1406	int error;
1407
1408	bwillwrite();
1409	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1410	if ((error = namei(&nd)) != 0)
1411		return (error);
1412	vfslocked = NDHASGIANT(&nd);
1413	NDFREE(&nd, NDF_ONLY_PNBUF);
1414	vp = nd.ni_vp;
1415	if (vp->v_type == VDIR) {
1416		vrele(vp);
1417		VFS_UNLOCK_GIANT(vfslocked);
1418		return (EPERM);		/* POSIX */
1419	}
1420	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1421		vrele(vp);
1422		VFS_UNLOCK_GIANT(vfslocked);
1423		return (error);
1424	}
1425	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1426	    segflg, link, td);
1427	if ((error = namei(&nd)) == 0) {
1428		lvfslocked = NDHASGIANT(&nd);
1429		if (nd.ni_vp != NULL) {
1430			if (nd.ni_dvp == nd.ni_vp)
1431				vrele(nd.ni_dvp);
1432			else
1433				vput(nd.ni_dvp);
1434			vrele(nd.ni_vp);
1435			error = EEXIST;
1436		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1437		    == 0) {
1438			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1439			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1440			error = can_hardlink(vp, td, td->td_ucred);
1441			if (error == 0)
1442#ifdef MAC
1443				error = mac_check_vnode_link(td->td_ucred,
1444				    nd.ni_dvp, vp, &nd.ni_cnd);
1445			if (error == 0)
1446#endif
1447				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1448			VOP_UNLOCK(vp, 0, td);
1449			vput(nd.ni_dvp);
1450		}
1451		NDFREE(&nd, NDF_ONLY_PNBUF);
1452		VFS_UNLOCK_GIANT(lvfslocked);
1453	}
1454	vrele(vp);
1455	vn_finished_write(mp);
1456	VFS_UNLOCK_GIANT(vfslocked);
1457	return (error);
1458}
1459
1460/*
1461 * Make a symbolic link.
1462 */
1463#ifndef _SYS_SYSPROTO_H_
1464struct symlink_args {
1465	char	*path;
1466	char	*link;
1467};
1468#endif
1469int
1470symlink(td, uap)
1471	struct thread *td;
1472	register struct symlink_args /* {
1473		char *path;
1474		char *link;
1475	} */ *uap;
1476{
1477
1478	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1479}
1480
1481int
1482kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1483{
1484	struct mount *mp;
1485	struct vattr vattr;
1486	char *syspath;
1487	int error;
1488	struct nameidata nd;
1489	int vfslocked;
1490
1491	if (segflg == UIO_SYSSPACE) {
1492		syspath = path;
1493	} else {
1494		syspath = uma_zalloc(namei_zone, M_WAITOK);
1495		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1496			goto out;
1497	}
1498	AUDIT_ARG(text, syspath);
1499restart:
1500	bwillwrite();
1501	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1502	    segflg, link, td);
1503	if ((error = namei(&nd)) != 0)
1504		goto out;
1505	vfslocked = NDHASGIANT(&nd);
1506	if (nd.ni_vp) {
1507		NDFREE(&nd, NDF_ONLY_PNBUF);
1508		if (nd.ni_vp == nd.ni_dvp)
1509			vrele(nd.ni_dvp);
1510		else
1511			vput(nd.ni_dvp);
1512		vrele(nd.ni_vp);
1513		VFS_UNLOCK_GIANT(vfslocked);
1514		error = EEXIST;
1515		goto out;
1516	}
1517	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1518		NDFREE(&nd, NDF_ONLY_PNBUF);
1519		vput(nd.ni_dvp);
1520		VFS_UNLOCK_GIANT(vfslocked);
1521		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1522			goto out;
1523		goto restart;
1524	}
1525	VATTR_NULL(&vattr);
1526	FILEDESC_SLOCK(td->td_proc->p_fd);
1527	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1528	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1529#ifdef MAC
1530	vattr.va_type = VLNK;
1531	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1532	    &vattr);
1533	if (error)
1534		goto out2;
1535#endif
1536	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1537	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1538	if (error == 0)
1539		vput(nd.ni_vp);
1540#ifdef MAC
1541out2:
1542#endif
1543	NDFREE(&nd, NDF_ONLY_PNBUF);
1544	vput(nd.ni_dvp);
1545	vn_finished_write(mp);
1546	VFS_UNLOCK_GIANT(vfslocked);
1547out:
1548	if (segflg != UIO_SYSSPACE)
1549		uma_zfree(namei_zone, syspath);
1550	return (error);
1551}
1552
1553/*
1554 * Delete a whiteout from the filesystem.
1555 */
1556int
1557undelete(td, uap)
1558	struct thread *td;
1559	register struct undelete_args /* {
1560		char *path;
1561	} */ *uap;
1562{
1563	int error;
1564	struct mount *mp;
1565	struct nameidata nd;
1566	int vfslocked;
1567
1568restart:
1569	bwillwrite();
1570	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1571	    UIO_USERSPACE, uap->path, td);
1572	error = namei(&nd);
1573	if (error)
1574		return (error);
1575	vfslocked = NDHASGIANT(&nd);
1576
1577	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1578		NDFREE(&nd, NDF_ONLY_PNBUF);
1579		if (nd.ni_vp == nd.ni_dvp)
1580			vrele(nd.ni_dvp);
1581		else
1582			vput(nd.ni_dvp);
1583		if (nd.ni_vp)
1584			vrele(nd.ni_vp);
1585		VFS_UNLOCK_GIANT(vfslocked);
1586		return (EEXIST);
1587	}
1588	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1589		NDFREE(&nd, NDF_ONLY_PNBUF);
1590		vput(nd.ni_dvp);
1591		VFS_UNLOCK_GIANT(vfslocked);
1592		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1593			return (error);
1594		goto restart;
1595	}
1596	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1597	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1598	NDFREE(&nd, NDF_ONLY_PNBUF);
1599	vput(nd.ni_dvp);
1600	vn_finished_write(mp);
1601	VFS_UNLOCK_GIANT(vfslocked);
1602	return (error);
1603}
1604
1605/*
1606 * Delete a name from the filesystem.
1607 */
1608#ifndef _SYS_SYSPROTO_H_
1609struct unlink_args {
1610	char	*path;
1611};
1612#endif
1613int
1614unlink(td, uap)
1615	struct thread *td;
1616	struct unlink_args /* {
1617		char *path;
1618	} */ *uap;
1619{
1620	int error;
1621
1622	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1623	return (error);
1624}
1625
1626int
1627kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1628{
1629	struct mount *mp;
1630	struct vnode *vp;
1631	int error;
1632	struct nameidata nd;
1633	int vfslocked;
1634
1635restart:
1636	bwillwrite();
1637	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1638	    pathseg, path, td);
1639	if ((error = namei(&nd)) != 0)
1640		return (error == EINVAL ? EPERM : error);
1641	vfslocked = NDHASGIANT(&nd);
1642	vp = nd.ni_vp;
1643	if (vp->v_type == VDIR)
1644		error = EPERM;		/* POSIX */
1645	else {
1646		/*
1647		 * The root of a mounted filesystem cannot be deleted.
1648		 *
1649		 * XXX: can this only be a VDIR case?
1650		 */
1651		if (vp->v_vflag & VV_ROOT)
1652			error = EBUSY;
1653	}
1654	if (error == 0) {
1655		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1656			NDFREE(&nd, NDF_ONLY_PNBUF);
1657			vput(nd.ni_dvp);
1658			if (vp == nd.ni_dvp)
1659				vrele(vp);
1660			else
1661				vput(vp);
1662			VFS_UNLOCK_GIANT(vfslocked);
1663			if ((error = vn_start_write(NULL, &mp,
1664			    V_XSLEEP | PCATCH)) != 0)
1665				return (error);
1666			goto restart;
1667		}
1668#ifdef MAC
1669		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1670		    &nd.ni_cnd);
1671		if (error)
1672			goto out;
1673#endif
1674		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1675		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1676#ifdef MAC
1677out:
1678#endif
1679		vn_finished_write(mp);
1680	}
1681	NDFREE(&nd, NDF_ONLY_PNBUF);
1682	vput(nd.ni_dvp);
1683	if (vp == nd.ni_dvp)
1684		vrele(vp);
1685	else
1686		vput(vp);
1687	VFS_UNLOCK_GIANT(vfslocked);
1688	return (error);
1689}
1690
1691/*
1692 * Reposition read/write file offset.
1693 */
1694#ifndef _SYS_SYSPROTO_H_
1695struct lseek_args {
1696	int	fd;
1697	int	pad;
1698	off_t	offset;
1699	int	whence;
1700};
1701#endif
1702int
1703lseek(td, uap)
1704	struct thread *td;
1705	register struct lseek_args /* {
1706		int fd;
1707		int pad;
1708		off_t offset;
1709		int whence;
1710	} */ *uap;
1711{
1712	struct ucred *cred = td->td_ucred;
1713	struct file *fp;
1714	struct vnode *vp;
1715	struct vattr vattr;
1716	off_t offset;
1717	int error, noneg;
1718	int vfslocked;
1719
1720	if ((error = fget(td, uap->fd, &fp)) != 0)
1721		return (error);
1722	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1723		fdrop(fp, td);
1724		return (ESPIPE);
1725	}
1726	vp = fp->f_vnode;
1727	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1728	noneg = (vp->v_type != VCHR);
1729	offset = uap->offset;
1730	switch (uap->whence) {
1731	case L_INCR:
1732		if (noneg &&
1733		    (fp->f_offset < 0 ||
1734		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1735			error = EOVERFLOW;
1736			break;
1737		}
1738		offset += fp->f_offset;
1739		break;
1740	case L_XTND:
1741		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1742		error = VOP_GETATTR(vp, &vattr, cred, td);
1743		VOP_UNLOCK(vp, 0, td);
1744		if (error)
1745			break;
1746		if (noneg &&
1747		    (vattr.va_size > OFF_MAX ||
1748		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1749			error = EOVERFLOW;
1750			break;
1751		}
1752		offset += vattr.va_size;
1753		break;
1754	case L_SET:
1755		break;
1756	case SEEK_DATA:
1757		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1758		break;
1759	case SEEK_HOLE:
1760		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1761		break;
1762	default:
1763		error = EINVAL;
1764	}
1765	if (error == 0 && noneg && offset < 0)
1766		error = EINVAL;
1767	if (error != 0)
1768		goto drop;
1769	fp->f_offset = offset;
1770	*(off_t *)(td->td_retval) = fp->f_offset;
1771drop:
1772	fdrop(fp, td);
1773	VFS_UNLOCK_GIANT(vfslocked);
1774	return (error);
1775}
1776
1777#if defined(COMPAT_43)
1778/*
1779 * Reposition read/write file offset.
1780 */
1781#ifndef _SYS_SYSPROTO_H_
1782struct olseek_args {
1783	int	fd;
1784	long	offset;
1785	int	whence;
1786};
1787#endif
1788int
1789olseek(td, uap)
1790	struct thread *td;
1791	register struct olseek_args /* {
1792		int fd;
1793		long offset;
1794		int whence;
1795	} */ *uap;
1796{
1797	struct lseek_args /* {
1798		int fd;
1799		int pad;
1800		off_t offset;
1801		int whence;
1802	} */ nuap;
1803	int error;
1804
1805	nuap.fd = uap->fd;
1806	nuap.offset = uap->offset;
1807	nuap.whence = uap->whence;
1808	error = lseek(td, &nuap);
1809	return (error);
1810}
1811#endif /* COMPAT_43 */
1812
1813/*
1814 * Check access permissions using passed credentials.
1815 */
1816static int
1817vn_access(vp, user_flags, cred, td)
1818	struct vnode	*vp;
1819	int		user_flags;
1820	struct ucred	*cred;
1821	struct thread	*td;
1822{
1823	int error, flags;
1824
1825	/* Flags == 0 means only check for existence. */
1826	error = 0;
1827	if (user_flags) {
1828		flags = 0;
1829		if (user_flags & R_OK)
1830			flags |= VREAD;
1831		if (user_flags & W_OK)
1832			flags |= VWRITE;
1833		if (user_flags & X_OK)
1834			flags |= VEXEC;
1835#ifdef MAC
1836		error = mac_check_vnode_access(cred, vp, flags);
1837		if (error)
1838			return (error);
1839#endif
1840		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1841			error = VOP_ACCESS(vp, flags, cred, td);
1842	}
1843	return (error);
1844}
1845
1846/*
1847 * Check access permissions using "real" credentials.
1848 */
1849#ifndef _SYS_SYSPROTO_H_
1850struct access_args {
1851	char	*path;
1852	int	flags;
1853};
1854#endif
1855int
1856access(td, uap)
1857	struct thread *td;
1858	register struct access_args /* {
1859		char *path;
1860		int flags;
1861	} */ *uap;
1862{
1863
1864	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1865}
1866
1867int
1868kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1869{
1870	struct ucred *cred, *tmpcred;
1871	register struct vnode *vp;
1872	struct nameidata nd;
1873	int vfslocked;
1874	int error;
1875
1876	/*
1877	 * Create and modify a temporary credential instead of one that
1878	 * is potentially shared.  This could also mess up socket
1879	 * buffer accounting which can run in an interrupt context.
1880	 */
1881	cred = td->td_ucred;
1882	tmpcred = crdup(cred);
1883	tmpcred->cr_uid = cred->cr_ruid;
1884	tmpcred->cr_groups[0] = cred->cr_rgid;
1885	td->td_ucred = tmpcred;
1886	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1887	    pathseg, path, td);
1888	if ((error = namei(&nd)) != 0)
1889		goto out1;
1890	vfslocked = NDHASGIANT(&nd);
1891	vp = nd.ni_vp;
1892
1893	error = vn_access(vp, flags, tmpcred, td);
1894	NDFREE(&nd, NDF_ONLY_PNBUF);
1895	vput(vp);
1896	VFS_UNLOCK_GIANT(vfslocked);
1897out1:
1898	td->td_ucred = cred;
1899	crfree(tmpcred);
1900	return (error);
1901}
1902
1903/*
1904 * Check access permissions using "effective" credentials.
1905 */
1906#ifndef _SYS_SYSPROTO_H_
1907struct eaccess_args {
1908	char	*path;
1909	int	flags;
1910};
1911#endif
1912int
1913eaccess(td, uap)
1914	struct thread *td;
1915	register struct eaccess_args /* {
1916		char *path;
1917		int flags;
1918	} */ *uap;
1919{
1920
1921	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1922}
1923
1924int
1925kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1926{
1927	struct nameidata nd;
1928	struct vnode *vp;
1929	int vfslocked;
1930	int error;
1931
1932	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1933	    pathseg, path, td);
1934	if ((error = namei(&nd)) != 0)
1935		return (error);
1936	vp = nd.ni_vp;
1937	vfslocked = NDHASGIANT(&nd);
1938	error = vn_access(vp, flags, td->td_ucred, td);
1939	NDFREE(&nd, NDF_ONLY_PNBUF);
1940	vput(vp);
1941	VFS_UNLOCK_GIANT(vfslocked);
1942	return (error);
1943}
1944
1945#if defined(COMPAT_43)
1946/*
1947 * Get file status; this version follows links.
1948 */
1949#ifndef _SYS_SYSPROTO_H_
1950struct ostat_args {
1951	char	*path;
1952	struct ostat *ub;
1953};
1954#endif
1955int
1956ostat(td, uap)
1957	struct thread *td;
1958	register struct ostat_args /* {
1959		char *path;
1960		struct ostat *ub;
1961	} */ *uap;
1962{
1963	struct stat sb;
1964	struct ostat osb;
1965	int error;
1966
1967	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1968	if (error)
1969		return (error);
1970	cvtstat(&sb, &osb);
1971	error = copyout(&osb, uap->ub, sizeof (osb));
1972	return (error);
1973}
1974
1975/*
1976 * Get file status; this version does not follow links.
1977 */
1978#ifndef _SYS_SYSPROTO_H_
1979struct olstat_args {
1980	char	*path;
1981	struct ostat *ub;
1982};
1983#endif
1984int
1985olstat(td, uap)
1986	struct thread *td;
1987	register struct olstat_args /* {
1988		char *path;
1989		struct ostat *ub;
1990	} */ *uap;
1991{
1992	struct stat sb;
1993	struct ostat osb;
1994	int error;
1995
1996	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1997	if (error)
1998		return (error);
1999	cvtstat(&sb, &osb);
2000	error = copyout(&osb, uap->ub, sizeof (osb));
2001	return (error);
2002}
2003
2004/*
2005 * Convert from an old to a new stat structure.
2006 */
2007void
2008cvtstat(st, ost)
2009	struct stat *st;
2010	struct ostat *ost;
2011{
2012
2013	ost->st_dev = st->st_dev;
2014	ost->st_ino = st->st_ino;
2015	ost->st_mode = st->st_mode;
2016	ost->st_nlink = st->st_nlink;
2017	ost->st_uid = st->st_uid;
2018	ost->st_gid = st->st_gid;
2019	ost->st_rdev = st->st_rdev;
2020	if (st->st_size < (quad_t)1 << 32)
2021		ost->st_size = st->st_size;
2022	else
2023		ost->st_size = -2;
2024	ost->st_atime = st->st_atime;
2025	ost->st_mtime = st->st_mtime;
2026	ost->st_ctime = st->st_ctime;
2027	ost->st_blksize = st->st_blksize;
2028	ost->st_blocks = st->st_blocks;
2029	ost->st_flags = st->st_flags;
2030	ost->st_gen = st->st_gen;
2031}
2032#endif /* COMPAT_43 */
2033
2034/*
2035 * Get file status; this version follows links.
2036 */
2037#ifndef _SYS_SYSPROTO_H_
2038struct stat_args {
2039	char	*path;
2040	struct stat *ub;
2041};
2042#endif
2043int
2044stat(td, uap)
2045	struct thread *td;
2046	register struct stat_args /* {
2047		char *path;
2048		struct stat *ub;
2049	} */ *uap;
2050{
2051	struct stat sb;
2052	int error;
2053
2054	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2055	if (error == 0)
2056		error = copyout(&sb, uap->ub, sizeof (sb));
2057	return (error);
2058}
2059
2060int
2061kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2062{
2063	struct nameidata nd;
2064	struct stat sb;
2065	int error, vfslocked;
2066
2067	NDINIT(&nd, LOOKUP,
2068	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2069	    pathseg, path, td);
2070	if ((error = namei(&nd)) != 0)
2071		return (error);
2072	vfslocked = NDHASGIANT(&nd);
2073	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2074	NDFREE(&nd, NDF_ONLY_PNBUF);
2075	vput(nd.ni_vp);
2076	VFS_UNLOCK_GIANT(vfslocked);
2077	if (mtx_owned(&Giant))
2078		printf("stat(%d): %s\n", vfslocked, path);
2079	if (error)
2080		return (error);
2081	*sbp = sb;
2082	return (0);
2083}
2084
2085/*
2086 * Get file status; this version does not follow links.
2087 */
2088#ifndef _SYS_SYSPROTO_H_
2089struct lstat_args {
2090	char	*path;
2091	struct stat *ub;
2092};
2093#endif
2094int
2095lstat(td, uap)
2096	struct thread *td;
2097	register struct lstat_args /* {
2098		char *path;
2099		struct stat *ub;
2100	} */ *uap;
2101{
2102	struct stat sb;
2103	int error;
2104
2105	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2106	if (error == 0)
2107		error = copyout(&sb, uap->ub, sizeof (sb));
2108	return (error);
2109}
2110
2111int
2112kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2113{
2114	struct vnode *vp;
2115	struct stat sb;
2116	struct nameidata nd;
2117	int error, vfslocked;
2118
2119	NDINIT(&nd, LOOKUP,
2120	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2121	    pathseg, path, td);
2122	if ((error = namei(&nd)) != 0)
2123		return (error);
2124	vfslocked = NDHASGIANT(&nd);
2125	vp = nd.ni_vp;
2126	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2127	NDFREE(&nd, NDF_ONLY_PNBUF);
2128	vput(vp);
2129	VFS_UNLOCK_GIANT(vfslocked);
2130	if (error)
2131		return (error);
2132	*sbp = sb;
2133	return (0);
2134}
2135
2136/*
2137 * Implementation of the NetBSD [l]stat() functions.
2138 */
2139void
2140cvtnstat(sb, nsb)
2141	struct stat *sb;
2142	struct nstat *nsb;
2143{
2144	bzero(nsb, sizeof *nsb);
2145	nsb->st_dev = sb->st_dev;
2146	nsb->st_ino = sb->st_ino;
2147	nsb->st_mode = sb->st_mode;
2148	nsb->st_nlink = sb->st_nlink;
2149	nsb->st_uid = sb->st_uid;
2150	nsb->st_gid = sb->st_gid;
2151	nsb->st_rdev = sb->st_rdev;
2152	nsb->st_atimespec = sb->st_atimespec;
2153	nsb->st_mtimespec = sb->st_mtimespec;
2154	nsb->st_ctimespec = sb->st_ctimespec;
2155	nsb->st_size = sb->st_size;
2156	nsb->st_blocks = sb->st_blocks;
2157	nsb->st_blksize = sb->st_blksize;
2158	nsb->st_flags = sb->st_flags;
2159	nsb->st_gen = sb->st_gen;
2160	nsb->st_birthtimespec = sb->st_birthtimespec;
2161}
2162
2163#ifndef _SYS_SYSPROTO_H_
2164struct nstat_args {
2165	char	*path;
2166	struct nstat *ub;
2167};
2168#endif
2169int
2170nstat(td, uap)
2171	struct thread *td;
2172	register struct nstat_args /* {
2173		char *path;
2174		struct nstat *ub;
2175	} */ *uap;
2176{
2177	struct stat sb;
2178	struct nstat nsb;
2179	int error;
2180
2181	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2182	if (error)
2183		return (error);
2184	cvtnstat(&sb, &nsb);
2185	error = copyout(&nsb, uap->ub, sizeof (nsb));
2186	return (error);
2187}
2188
2189/*
2190 * NetBSD lstat.  Get file status; this version does not follow links.
2191 */
2192#ifndef _SYS_SYSPROTO_H_
2193struct lstat_args {
2194	char	*path;
2195	struct stat *ub;
2196};
2197#endif
2198int
2199nlstat(td, uap)
2200	struct thread *td;
2201	register struct nlstat_args /* {
2202		char *path;
2203		struct nstat *ub;
2204	} */ *uap;
2205{
2206	struct stat sb;
2207	struct nstat nsb;
2208	int error;
2209
2210	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2211	if (error)
2212		return (error);
2213	cvtnstat(&sb, &nsb);
2214	error = copyout(&nsb, uap->ub, sizeof (nsb));
2215	return (error);
2216}
2217
2218/*
2219 * Get configurable pathname variables.
2220 */
2221#ifndef _SYS_SYSPROTO_H_
2222struct pathconf_args {
2223	char	*path;
2224	int	name;
2225};
2226#endif
2227int
2228pathconf(td, uap)
2229	struct thread *td;
2230	register struct pathconf_args /* {
2231		char *path;
2232		int name;
2233	} */ *uap;
2234{
2235
2236	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2237}
2238
2239int
2240kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2241{
2242	struct nameidata nd;
2243	int error, vfslocked;
2244
2245	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2246	    pathseg, path, td);
2247	if ((error = namei(&nd)) != 0)
2248		return (error);
2249	vfslocked = NDHASGIANT(&nd);
2250	NDFREE(&nd, NDF_ONLY_PNBUF);
2251
2252	/* If asynchronous I/O is available, it works for all files. */
2253	if (name == _PC_ASYNC_IO)
2254		td->td_retval[0] = async_io_version;
2255	else
2256		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2257	vput(nd.ni_vp);
2258	VFS_UNLOCK_GIANT(vfslocked);
2259	return (error);
2260}
2261
2262/*
2263 * Return target name of a symbolic link.
2264 */
2265#ifndef _SYS_SYSPROTO_H_
2266struct readlink_args {
2267	char	*path;
2268	char	*buf;
2269	int	count;
2270};
2271#endif
2272int
2273readlink(td, uap)
2274	struct thread *td;
2275	register struct readlink_args /* {
2276		char *path;
2277		char *buf;
2278		int count;
2279	} */ *uap;
2280{
2281
2282	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2283	    UIO_USERSPACE, uap->count));
2284}
2285
2286int
2287kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2288    enum uio_seg bufseg, int count)
2289{
2290	register struct vnode *vp;
2291	struct iovec aiov;
2292	struct uio auio;
2293	int error;
2294	struct nameidata nd;
2295	int vfslocked;
2296
2297	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2298	    pathseg, path, td);
2299	if ((error = namei(&nd)) != 0)
2300		return (error);
2301	NDFREE(&nd, NDF_ONLY_PNBUF);
2302	vfslocked = NDHASGIANT(&nd);
2303	vp = nd.ni_vp;
2304#ifdef MAC
2305	error = mac_check_vnode_readlink(td->td_ucred, vp);
2306	if (error) {
2307		vput(vp);
2308		VFS_UNLOCK_GIANT(vfslocked);
2309		return (error);
2310	}
2311#endif
2312	if (vp->v_type != VLNK)
2313		error = EINVAL;
2314	else {
2315		aiov.iov_base = buf;
2316		aiov.iov_len = count;
2317		auio.uio_iov = &aiov;
2318		auio.uio_iovcnt = 1;
2319		auio.uio_offset = 0;
2320		auio.uio_rw = UIO_READ;
2321		auio.uio_segflg = bufseg;
2322		auio.uio_td = td;
2323		auio.uio_resid = count;
2324		error = VOP_READLINK(vp, &auio, td->td_ucred);
2325	}
2326	vput(vp);
2327	VFS_UNLOCK_GIANT(vfslocked);
2328	td->td_retval[0] = count - auio.uio_resid;
2329	return (error);
2330}
2331
2332/*
2333 * Common implementation code for chflags() and fchflags().
2334 */
2335static int
2336setfflags(td, vp, flags)
2337	struct thread *td;
2338	struct vnode *vp;
2339	int flags;
2340{
2341	int error;
2342	struct mount *mp;
2343	struct vattr vattr;
2344
2345	/*
2346	 * Prevent non-root users from setting flags on devices.  When
2347	 * a device is reused, users can retain ownership of the device
2348	 * if they are allowed to set flags and programs assume that
2349	 * chown can't fail when done as root.
2350	 */
2351	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2352		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2353		    SUSER_ALLOWJAIL);
2354		if (error)
2355			return (error);
2356	}
2357
2358	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2359		return (error);
2360	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2361	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2362	VATTR_NULL(&vattr);
2363	vattr.va_flags = flags;
2364#ifdef MAC
2365	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2366	if (error == 0)
2367#endif
2368		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2369	VOP_UNLOCK(vp, 0, td);
2370	vn_finished_write(mp);
2371	return (error);
2372}
2373
2374/*
2375 * Change flags of a file given a path name.
2376 */
2377#ifndef _SYS_SYSPROTO_H_
2378struct chflags_args {
2379	char	*path;
2380	int	flags;
2381};
2382#endif
2383int
2384chflags(td, uap)
2385	struct thread *td;
2386	register struct chflags_args /* {
2387		char *path;
2388		int flags;
2389	} */ *uap;
2390{
2391	int error;
2392	struct nameidata nd;
2393	int vfslocked;
2394
2395	AUDIT_ARG(fflags, uap->flags);
2396	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2397	    uap->path, td);
2398	if ((error = namei(&nd)) != 0)
2399		return (error);
2400	NDFREE(&nd, NDF_ONLY_PNBUF);
2401	vfslocked = NDHASGIANT(&nd);
2402	error = setfflags(td, nd.ni_vp, uap->flags);
2403	vrele(nd.ni_vp);
2404	VFS_UNLOCK_GIANT(vfslocked);
2405	return (error);
2406}
2407
2408/*
2409 * Same as chflags() but doesn't follow symlinks.
2410 */
2411int
2412lchflags(td, uap)
2413	struct thread *td;
2414	register struct lchflags_args /* {
2415		char *path;
2416		int flags;
2417	} */ *uap;
2418{
2419	int error;
2420	struct nameidata nd;
2421	int vfslocked;
2422
2423	AUDIT_ARG(fflags, uap->flags);
2424	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2425	    uap->path, td);
2426	if ((error = namei(&nd)) != 0)
2427		return (error);
2428	vfslocked = NDHASGIANT(&nd);
2429	NDFREE(&nd, NDF_ONLY_PNBUF);
2430	error = setfflags(td, nd.ni_vp, uap->flags);
2431	vrele(nd.ni_vp);
2432	VFS_UNLOCK_GIANT(vfslocked);
2433	return (error);
2434}
2435
2436/*
2437 * Change flags of a file given a file descriptor.
2438 */
2439#ifndef _SYS_SYSPROTO_H_
2440struct fchflags_args {
2441	int	fd;
2442	int	flags;
2443};
2444#endif
2445int
2446fchflags(td, uap)
2447	struct thread *td;
2448	register struct fchflags_args /* {
2449		int fd;
2450		int flags;
2451	} */ *uap;
2452{
2453	struct file *fp;
2454	int vfslocked;
2455	int error;
2456
2457	AUDIT_ARG(fd, uap->fd);
2458	AUDIT_ARG(fflags, uap->flags);
2459	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2460		return (error);
2461	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2462#ifdef AUDIT
2463	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2464	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2465	VOP_UNLOCK(fp->f_vnode, 0, td);
2466#endif
2467	error = setfflags(td, fp->f_vnode, uap->flags);
2468	VFS_UNLOCK_GIANT(vfslocked);
2469	fdrop(fp, td);
2470	return (error);
2471}
2472
2473/*
2474 * Common implementation code for chmod(), lchmod() and fchmod().
2475 */
2476static int
2477setfmode(td, vp, mode)
2478	struct thread *td;
2479	struct vnode *vp;
2480	int mode;
2481{
2482	int error;
2483	struct mount *mp;
2484	struct vattr vattr;
2485
2486	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2487		return (error);
2488	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2489	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2490	VATTR_NULL(&vattr);
2491	vattr.va_mode = mode & ALLPERMS;
2492#ifdef MAC
2493	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2494	if (error == 0)
2495#endif
2496		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2497	VOP_UNLOCK(vp, 0, td);
2498	vn_finished_write(mp);
2499	return (error);
2500}
2501
2502/*
2503 * Change mode of a file given path name.
2504 */
2505#ifndef _SYS_SYSPROTO_H_
2506struct chmod_args {
2507	char	*path;
2508	int	mode;
2509};
2510#endif
2511int
2512chmod(td, uap)
2513	struct thread *td;
2514	register struct chmod_args /* {
2515		char *path;
2516		int mode;
2517	} */ *uap;
2518{
2519
2520	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2521}
2522
2523int
2524kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2525{
2526	int error;
2527	struct nameidata nd;
2528	int vfslocked;
2529
2530	AUDIT_ARG(mode, mode);
2531	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2532	if ((error = namei(&nd)) != 0)
2533		return (error);
2534	vfslocked = NDHASGIANT(&nd);
2535	NDFREE(&nd, NDF_ONLY_PNBUF);
2536	error = setfmode(td, nd.ni_vp, mode);
2537	vrele(nd.ni_vp);
2538	VFS_UNLOCK_GIANT(vfslocked);
2539	return (error);
2540}
2541
2542/*
2543 * Change mode of a file given path name (don't follow links.)
2544 */
2545#ifndef _SYS_SYSPROTO_H_
2546struct lchmod_args {
2547	char	*path;
2548	int	mode;
2549};
2550#endif
2551int
2552lchmod(td, uap)
2553	struct thread *td;
2554	register struct lchmod_args /* {
2555		char *path;
2556		int mode;
2557	} */ *uap;
2558{
2559	int error;
2560	struct nameidata nd;
2561	int vfslocked;
2562
2563	AUDIT_ARG(mode, (mode_t)uap->mode);
2564	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2565	    uap->path, td);
2566	if ((error = namei(&nd)) != 0)
2567		return (error);
2568	vfslocked = NDHASGIANT(&nd);
2569	NDFREE(&nd, NDF_ONLY_PNBUF);
2570	error = setfmode(td, nd.ni_vp, uap->mode);
2571	vrele(nd.ni_vp);
2572	VFS_UNLOCK_GIANT(vfslocked);
2573	return (error);
2574}
2575
2576/*
2577 * Change mode of a file given a file descriptor.
2578 */
2579#ifndef _SYS_SYSPROTO_H_
2580struct fchmod_args {
2581	int	fd;
2582	int	mode;
2583};
2584#endif
2585int
2586fchmod(td, uap)
2587	struct thread *td;
2588	register struct fchmod_args /* {
2589		int fd;
2590		int mode;
2591	} */ *uap;
2592{
2593	struct file *fp;
2594	int vfslocked;
2595	int error;
2596
2597	AUDIT_ARG(fd, uap->fd);
2598	AUDIT_ARG(mode, uap->mode);
2599	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2600		return (error);
2601	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2602#ifdef AUDIT
2603	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2604	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2605	VOP_UNLOCK(fp->f_vnode, 0, td);
2606#endif
2607	error = setfmode(td, fp->f_vnode, uap->mode);
2608	VFS_UNLOCK_GIANT(vfslocked);
2609	fdrop(fp, td);
2610	return (error);
2611}
2612
2613/*
2614 * Common implementation for chown(), lchown(), and fchown()
2615 */
2616static int
2617setfown(td, vp, uid, gid)
2618	struct thread *td;
2619	struct vnode *vp;
2620	uid_t uid;
2621	gid_t gid;
2622{
2623	int error;
2624	struct mount *mp;
2625	struct vattr vattr;
2626
2627	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2628		return (error);
2629	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2630	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2631	VATTR_NULL(&vattr);
2632	vattr.va_uid = uid;
2633	vattr.va_gid = gid;
2634#ifdef MAC
2635	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2636	    vattr.va_gid);
2637	if (error == 0)
2638#endif
2639		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2640	VOP_UNLOCK(vp, 0, td);
2641	vn_finished_write(mp);
2642	return (error);
2643}
2644
2645/*
2646 * Set ownership given a path name.
2647 */
2648#ifndef _SYS_SYSPROTO_H_
2649struct chown_args {
2650	char	*path;
2651	int	uid;
2652	int	gid;
2653};
2654#endif
2655int
2656chown(td, uap)
2657	struct thread *td;
2658	register struct chown_args /* {
2659		char *path;
2660		int uid;
2661		int gid;
2662	} */ *uap;
2663{
2664
2665	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2666}
2667
2668int
2669kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2670    int gid)
2671{
2672	int error;
2673	struct nameidata nd;
2674	int vfslocked;
2675
2676	AUDIT_ARG(owner, uid, gid);
2677	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2678	if ((error = namei(&nd)) != 0)
2679		return (error);
2680	vfslocked = NDHASGIANT(&nd);
2681	NDFREE(&nd, NDF_ONLY_PNBUF);
2682	error = setfown(td, nd.ni_vp, uid, gid);
2683	vrele(nd.ni_vp);
2684	VFS_UNLOCK_GIANT(vfslocked);
2685	return (error);
2686}
2687
2688/*
2689 * Set ownership given a path name, do not cross symlinks.
2690 */
2691#ifndef _SYS_SYSPROTO_H_
2692struct lchown_args {
2693	char	*path;
2694	int	uid;
2695	int	gid;
2696};
2697#endif
2698int
2699lchown(td, uap)
2700	struct thread *td;
2701	register struct lchown_args /* {
2702		char *path;
2703		int uid;
2704		int gid;
2705	} */ *uap;
2706{
2707
2708	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2709}
2710
2711int
2712kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2713    int gid)
2714{
2715	int error;
2716	struct nameidata nd;
2717	int vfslocked;
2718
2719	AUDIT_ARG(owner, uid, gid);
2720	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2721	if ((error = namei(&nd)) != 0)
2722		return (error);
2723	vfslocked = NDHASGIANT(&nd);
2724	NDFREE(&nd, NDF_ONLY_PNBUF);
2725	error = setfown(td, nd.ni_vp, uid, gid);
2726	vrele(nd.ni_vp);
2727	VFS_UNLOCK_GIANT(vfslocked);
2728	return (error);
2729}
2730
2731/*
2732 * Set ownership given a file descriptor.
2733 */
2734#ifndef _SYS_SYSPROTO_H_
2735struct fchown_args {
2736	int	fd;
2737	int	uid;
2738	int	gid;
2739};
2740#endif
2741int
2742fchown(td, uap)
2743	struct thread *td;
2744	register struct fchown_args /* {
2745		int fd;
2746		int uid;
2747		int gid;
2748	} */ *uap;
2749{
2750	struct file *fp;
2751	int vfslocked;
2752	int error;
2753
2754	AUDIT_ARG(fd, uap->fd);
2755	AUDIT_ARG(owner, uap->uid, uap->gid);
2756	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2757		return (error);
2758	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2759#ifdef AUDIT
2760	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2761	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2762	VOP_UNLOCK(fp->f_vnode, 0, td);
2763#endif
2764	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2765	VFS_UNLOCK_GIANT(vfslocked);
2766	fdrop(fp, td);
2767	return (error);
2768}
2769
2770/*
2771 * Common implementation code for utimes(), lutimes(), and futimes().
2772 */
2773static int
2774getutimes(usrtvp, tvpseg, tsp)
2775	const struct timeval *usrtvp;
2776	enum uio_seg tvpseg;
2777	struct timespec *tsp;
2778{
2779	struct timeval tv[2];
2780	const struct timeval *tvp;
2781	int error;
2782
2783	if (usrtvp == NULL) {
2784		microtime(&tv[0]);
2785		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2786		tsp[1] = tsp[0];
2787	} else {
2788		if (tvpseg == UIO_SYSSPACE) {
2789			tvp = usrtvp;
2790		} else {
2791			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2792				return (error);
2793			tvp = tv;
2794		}
2795
2796		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2797		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2798			return (EINVAL);
2799		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2800		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2801	}
2802	return (0);
2803}
2804
2805/*
2806 * Common implementation code for utimes(), lutimes(), and futimes().
2807 */
2808static int
2809setutimes(td, vp, ts, numtimes, nullflag)
2810	struct thread *td;
2811	struct vnode *vp;
2812	const struct timespec *ts;
2813	int numtimes;
2814	int nullflag;
2815{
2816	int error, setbirthtime;
2817	struct mount *mp;
2818	struct vattr vattr;
2819
2820	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2821		return (error);
2822	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2823	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2824	setbirthtime = 0;
2825	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2826	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2827		setbirthtime = 1;
2828	VATTR_NULL(&vattr);
2829	vattr.va_atime = ts[0];
2830	vattr.va_mtime = ts[1];
2831	if (setbirthtime)
2832		vattr.va_birthtime = ts[1];
2833	if (numtimes > 2)
2834		vattr.va_birthtime = ts[2];
2835	if (nullflag)
2836		vattr.va_vaflags |= VA_UTIMES_NULL;
2837#ifdef MAC
2838	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2839	    vattr.va_mtime);
2840#endif
2841	if (error == 0)
2842		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2843	VOP_UNLOCK(vp, 0, td);
2844	vn_finished_write(mp);
2845	return (error);
2846}
2847
2848/*
2849 * Set the access and modification times of a file.
2850 */
2851#ifndef _SYS_SYSPROTO_H_
2852struct utimes_args {
2853	char	*path;
2854	struct	timeval *tptr;
2855};
2856#endif
2857int
2858utimes(td, uap)
2859	struct thread *td;
2860	register struct utimes_args /* {
2861		char *path;
2862		struct timeval *tptr;
2863	} */ *uap;
2864{
2865
2866	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2867	    UIO_USERSPACE));
2868}
2869
2870int
2871kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2872    struct timeval *tptr, enum uio_seg tptrseg)
2873{
2874	struct timespec ts[2];
2875	int error;
2876	struct nameidata nd;
2877	int vfslocked;
2878
2879	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2880		return (error);
2881	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2882	if ((error = namei(&nd)) != 0)
2883		return (error);
2884	vfslocked = NDHASGIANT(&nd);
2885	NDFREE(&nd, NDF_ONLY_PNBUF);
2886	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2887	vrele(nd.ni_vp);
2888	VFS_UNLOCK_GIANT(vfslocked);
2889	return (error);
2890}
2891
2892/*
2893 * Set the access and modification times of a file.
2894 */
2895#ifndef _SYS_SYSPROTO_H_
2896struct lutimes_args {
2897	char	*path;
2898	struct	timeval *tptr;
2899};
2900#endif
2901int
2902lutimes(td, uap)
2903	struct thread *td;
2904	register struct lutimes_args /* {
2905		char *path;
2906		struct timeval *tptr;
2907	} */ *uap;
2908{
2909
2910	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2911	    UIO_USERSPACE));
2912}
2913
2914int
2915kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2916    struct timeval *tptr, enum uio_seg tptrseg)
2917{
2918	struct timespec ts[2];
2919	int error;
2920	struct nameidata nd;
2921	int vfslocked;
2922
2923	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2924		return (error);
2925	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2926	if ((error = namei(&nd)) != 0)
2927		return (error);
2928	vfslocked = NDHASGIANT(&nd);
2929	NDFREE(&nd, NDF_ONLY_PNBUF);
2930	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2931	vrele(nd.ni_vp);
2932	VFS_UNLOCK_GIANT(vfslocked);
2933	return (error);
2934}
2935
2936/*
2937 * Set the access and modification times of a file.
2938 */
2939#ifndef _SYS_SYSPROTO_H_
2940struct futimes_args {
2941	int	fd;
2942	struct	timeval *tptr;
2943};
2944#endif
2945int
2946futimes(td, uap)
2947	struct thread *td;
2948	register struct futimes_args /* {
2949		int  fd;
2950		struct timeval *tptr;
2951	} */ *uap;
2952{
2953
2954	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2955}
2956
2957int
2958kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2959    enum uio_seg tptrseg)
2960{
2961	struct timespec ts[2];
2962	struct file *fp;
2963	int vfslocked;
2964	int error;
2965
2966	AUDIT_ARG(fd, fd);
2967	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2968		return (error);
2969	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2970		return (error);
2971	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2972#ifdef AUDIT
2973	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2974	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2975	VOP_UNLOCK(fp->f_vnode, 0, td);
2976#endif
2977	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2978	VFS_UNLOCK_GIANT(vfslocked);
2979	fdrop(fp, td);
2980	return (error);
2981}
2982
2983/*
2984 * Truncate a file given its path name.
2985 */
2986#ifndef _SYS_SYSPROTO_H_
2987struct truncate_args {
2988	char	*path;
2989	int	pad;
2990	off_t	length;
2991};
2992#endif
2993int
2994truncate(td, uap)
2995	struct thread *td;
2996	register struct truncate_args /* {
2997		char *path;
2998		int pad;
2999		off_t length;
3000	} */ *uap;
3001{
3002
3003	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3004}
3005
3006int
3007kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3008{
3009	struct mount *mp;
3010	struct vnode *vp;
3011	struct vattr vattr;
3012	int error;
3013	struct nameidata nd;
3014	int vfslocked;
3015
3016	if (length < 0)
3017		return(EINVAL);
3018	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3019	if ((error = namei(&nd)) != 0)
3020		return (error);
3021	vfslocked = NDHASGIANT(&nd);
3022	vp = nd.ni_vp;
3023	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3024		vrele(vp);
3025		VFS_UNLOCK_GIANT(vfslocked);
3026		return (error);
3027	}
3028	NDFREE(&nd, NDF_ONLY_PNBUF);
3029	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3030	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3031	if (vp->v_type == VDIR)
3032		error = EISDIR;
3033#ifdef MAC
3034	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3035	}
3036#endif
3037	else if ((error = vn_writechk(vp)) == 0 &&
3038	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3039		VATTR_NULL(&vattr);
3040		vattr.va_size = length;
3041		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3042	}
3043	vput(vp);
3044	vn_finished_write(mp);
3045	VFS_UNLOCK_GIANT(vfslocked);
3046	return (error);
3047}
3048
3049/*
3050 * Truncate a file given a file descriptor.
3051 */
3052#ifndef _SYS_SYSPROTO_H_
3053struct ftruncate_args {
3054	int	fd;
3055	int	pad;
3056	off_t	length;
3057};
3058#endif
3059int
3060ftruncate(td, uap)
3061	struct thread *td;
3062	register struct ftruncate_args /* {
3063		int fd;
3064		int pad;
3065		off_t length;
3066	} */ *uap;
3067{
3068	struct mount *mp;
3069	struct vattr vattr;
3070	struct vnode *vp;
3071	struct file *fp;
3072	int vfslocked;
3073	int error;
3074
3075	AUDIT_ARG(fd, uap->fd);
3076	if (uap->length < 0)
3077		return(EINVAL);
3078	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3079		return (error);
3080	if ((fp->f_flag & FWRITE) == 0) {
3081		fdrop(fp, td);
3082		return (EINVAL);
3083	}
3084	vp = fp->f_vnode;
3085	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3086	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3087		goto drop;
3088	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3089	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3090	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3091	if (vp->v_type == VDIR)
3092		error = EISDIR;
3093#ifdef MAC
3094	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3095	    vp))) {
3096	}
3097#endif
3098	else if ((error = vn_writechk(vp)) == 0) {
3099		VATTR_NULL(&vattr);
3100		vattr.va_size = uap->length;
3101		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3102	}
3103	VOP_UNLOCK(vp, 0, td);
3104	vn_finished_write(mp);
3105drop:
3106	VFS_UNLOCK_GIANT(vfslocked);
3107	fdrop(fp, td);
3108	return (error);
3109}
3110
3111#if defined(COMPAT_43)
3112/*
3113 * Truncate a file given its path name.
3114 */
3115#ifndef _SYS_SYSPROTO_H_
3116struct otruncate_args {
3117	char	*path;
3118	long	length;
3119};
3120#endif
3121int
3122otruncate(td, uap)
3123	struct thread *td;
3124	register struct otruncate_args /* {
3125		char *path;
3126		long length;
3127	} */ *uap;
3128{
3129	struct truncate_args /* {
3130		char *path;
3131		int pad;
3132		off_t length;
3133	} */ nuap;
3134
3135	nuap.path = uap->path;
3136	nuap.length = uap->length;
3137	return (truncate(td, &nuap));
3138}
3139
3140/*
3141 * Truncate a file given a file descriptor.
3142 */
3143#ifndef _SYS_SYSPROTO_H_
3144struct oftruncate_args {
3145	int	fd;
3146	long	length;
3147};
3148#endif
3149int
3150oftruncate(td, uap)
3151	struct thread *td;
3152	register struct oftruncate_args /* {
3153		int fd;
3154		long length;
3155	} */ *uap;
3156{
3157	struct ftruncate_args /* {
3158		int fd;
3159		int pad;
3160		off_t length;
3161	} */ nuap;
3162
3163	nuap.fd = uap->fd;
3164	nuap.length = uap->length;
3165	return (ftruncate(td, &nuap));
3166}
3167#endif /* COMPAT_43 */
3168
3169/*
3170 * Sync an open file.
3171 */
3172#ifndef _SYS_SYSPROTO_H_
3173struct fsync_args {
3174	int	fd;
3175};
3176#endif
3177int
3178fsync(td, uap)
3179	struct thread *td;
3180	struct fsync_args /* {
3181		int fd;
3182	} */ *uap;
3183{
3184	struct vnode *vp;
3185	struct mount *mp;
3186	struct file *fp;
3187	int vfslocked;
3188	int error;
3189
3190	AUDIT_ARG(fd, uap->fd);
3191	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3192		return (error);
3193	vp = fp->f_vnode;
3194	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3195	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3196		goto drop;
3197	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3198	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3199	if (vp->v_object != NULL) {
3200		VM_OBJECT_LOCK(vp->v_object);
3201		vm_object_page_clean(vp->v_object, 0, 0, 0);
3202		VM_OBJECT_UNLOCK(vp->v_object);
3203	}
3204	error = VOP_FSYNC(vp, MNT_WAIT, td);
3205
3206	VOP_UNLOCK(vp, 0, td);
3207	vn_finished_write(mp);
3208drop:
3209	VFS_UNLOCK_GIANT(vfslocked);
3210	fdrop(fp, td);
3211	return (error);
3212}
3213
3214/*
3215 * Rename files.  Source and destination must either both be directories, or
3216 * both not be directories.  If target is a directory, it must be empty.
3217 */
3218#ifndef _SYS_SYSPROTO_H_
3219struct rename_args {
3220	char	*from;
3221	char	*to;
3222};
3223#endif
3224int
3225rename(td, uap)
3226	struct thread *td;
3227	register struct rename_args /* {
3228		char *from;
3229		char *to;
3230	} */ *uap;
3231{
3232
3233	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3234}
3235
3236int
3237kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3238{
3239	struct mount *mp = NULL;
3240	struct vnode *tvp, *fvp, *tdvp;
3241	struct nameidata fromnd, tond;
3242	int tvfslocked;
3243	int fvfslocked;
3244	int error;
3245
3246	bwillwrite();
3247#ifdef MAC
3248	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3249	    AUDITVNODE1, pathseg, from, td);
3250#else
3251	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3252	    AUDITVNODE1, pathseg, from, td);
3253#endif
3254	if ((error = namei(&fromnd)) != 0)
3255		return (error);
3256	fvfslocked = NDHASGIANT(&fromnd);
3257	tvfslocked = 0;
3258#ifdef MAC
3259	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3260	    fromnd.ni_vp, &fromnd.ni_cnd);
3261	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3262	if (fromnd.ni_dvp != fromnd.ni_vp)
3263		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3264#endif
3265	fvp = fromnd.ni_vp;
3266	if (error == 0)
3267		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3268	if (error != 0) {
3269		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3270		vrele(fromnd.ni_dvp);
3271		vrele(fvp);
3272		goto out1;
3273	}
3274	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3275	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3276	if (fromnd.ni_vp->v_type == VDIR)
3277		tond.ni_cnd.cn_flags |= WILLBEDIR;
3278	if ((error = namei(&tond)) != 0) {
3279		/* Translate error code for rename("dir1", "dir2/."). */
3280		if (error == EISDIR && fvp->v_type == VDIR)
3281			error = EINVAL;
3282		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3283		vrele(fromnd.ni_dvp);
3284		vrele(fvp);
3285		vn_finished_write(mp);
3286		goto out1;
3287	}
3288	tvfslocked = NDHASGIANT(&tond);
3289	tdvp = tond.ni_dvp;
3290	tvp = tond.ni_vp;
3291	if (tvp != NULL) {
3292		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3293			error = ENOTDIR;
3294			goto out;
3295		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3296			error = EISDIR;
3297			goto out;
3298		}
3299	}
3300	if (fvp == tdvp)
3301		error = EINVAL;
3302	/*
3303	 * If the source is the same as the destination (that is, if they
3304	 * are links to the same vnode), then there is nothing to do.
3305	 */
3306	if (fvp == tvp)
3307		error = -1;
3308#ifdef MAC
3309	else
3310		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3311		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3312#endif
3313out:
3314	if (!error) {
3315		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3316		if (fromnd.ni_dvp != tdvp) {
3317			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3318		}
3319		if (tvp) {
3320			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3321		}
3322		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3323				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3324		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3325		NDFREE(&tond, NDF_ONLY_PNBUF);
3326	} else {
3327		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3328		NDFREE(&tond, NDF_ONLY_PNBUF);
3329		if (tvp)
3330			vput(tvp);
3331		if (tdvp == tvp)
3332			vrele(tdvp);
3333		else
3334			vput(tdvp);
3335		vrele(fromnd.ni_dvp);
3336		vrele(fvp);
3337	}
3338	vrele(tond.ni_startdir);
3339	vn_finished_write(mp);
3340out1:
3341	if (fromnd.ni_startdir)
3342		vrele(fromnd.ni_startdir);
3343	VFS_UNLOCK_GIANT(fvfslocked);
3344	VFS_UNLOCK_GIANT(tvfslocked);
3345	if (error == -1)
3346		return (0);
3347	return (error);
3348}
3349
3350/*
3351 * Make a directory file.
3352 */
3353#ifndef _SYS_SYSPROTO_H_
3354struct mkdir_args {
3355	char	*path;
3356	int	mode;
3357};
3358#endif
3359int
3360mkdir(td, uap)
3361	struct thread *td;
3362	register struct mkdir_args /* {
3363		char *path;
3364		int mode;
3365	} */ *uap;
3366{
3367
3368	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3369}
3370
3371int
3372kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3373{
3374	struct mount *mp;
3375	struct vnode *vp;
3376	struct vattr vattr;
3377	int error;
3378	struct nameidata nd;
3379	int vfslocked;
3380
3381	AUDIT_ARG(mode, mode);
3382restart:
3383	bwillwrite();
3384	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3385	    segflg, path, td);
3386	nd.ni_cnd.cn_flags |= WILLBEDIR;
3387	if ((error = namei(&nd)) != 0)
3388		return (error);
3389	vfslocked = NDHASGIANT(&nd);
3390	vp = nd.ni_vp;
3391	if (vp != NULL) {
3392		NDFREE(&nd, NDF_ONLY_PNBUF);
3393		/*
3394		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3395		 * the strange behaviour of leaving the vnode unlocked
3396		 * if the target is the same vnode as the parent.
3397		 */
3398		if (vp == nd.ni_dvp)
3399			vrele(nd.ni_dvp);
3400		else
3401			vput(nd.ni_dvp);
3402		vrele(vp);
3403		VFS_UNLOCK_GIANT(vfslocked);
3404		return (EEXIST);
3405	}
3406	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3407		NDFREE(&nd, NDF_ONLY_PNBUF);
3408		vput(nd.ni_dvp);
3409		VFS_UNLOCK_GIANT(vfslocked);
3410		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3411			return (error);
3412		goto restart;
3413	}
3414	VATTR_NULL(&vattr);
3415	vattr.va_type = VDIR;
3416	FILEDESC_SLOCK(td->td_proc->p_fd);
3417	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3418	FILEDESC_SUNLOCK(td->td_proc->p_fd);
3419#ifdef MAC
3420	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3421	    &vattr);
3422	if (error)
3423		goto out;
3424#endif
3425	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3426	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3427#ifdef MAC
3428out:
3429#endif
3430	NDFREE(&nd, NDF_ONLY_PNBUF);
3431	vput(nd.ni_dvp);
3432	if (!error)
3433		vput(nd.ni_vp);
3434	vn_finished_write(mp);
3435	VFS_UNLOCK_GIANT(vfslocked);
3436	return (error);
3437}
3438
3439/*
3440 * Remove a directory file.
3441 */
3442#ifndef _SYS_SYSPROTO_H_
3443struct rmdir_args {
3444	char	*path;
3445};
3446#endif
3447int
3448rmdir(td, uap)
3449	struct thread *td;
3450	struct rmdir_args /* {
3451		char *path;
3452	} */ *uap;
3453{
3454
3455	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3456}
3457
3458int
3459kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3460{
3461	struct mount *mp;
3462	struct vnode *vp;
3463	int error;
3464	struct nameidata nd;
3465	int vfslocked;
3466
3467restart:
3468	bwillwrite();
3469	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3470	    pathseg, path, td);
3471	if ((error = namei(&nd)) != 0)
3472		return (error);
3473	vfslocked = NDHASGIANT(&nd);
3474	vp = nd.ni_vp;
3475	if (vp->v_type != VDIR) {
3476		error = ENOTDIR;
3477		goto out;
3478	}
3479	/*
3480	 * No rmdir "." please.
3481	 */
3482	if (nd.ni_dvp == vp) {
3483		error = EINVAL;
3484		goto out;
3485	}
3486	/*
3487	 * The root of a mounted filesystem cannot be deleted.
3488	 */
3489	if (vp->v_vflag & VV_ROOT) {
3490		error = EBUSY;
3491		goto out;
3492	}
3493#ifdef MAC
3494	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3495	    &nd.ni_cnd);
3496	if (error)
3497		goto out;
3498#endif
3499	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3500		NDFREE(&nd, NDF_ONLY_PNBUF);
3501		vput(vp);
3502		if (nd.ni_dvp == vp)
3503			vrele(nd.ni_dvp);
3504		else
3505			vput(nd.ni_dvp);
3506		VFS_UNLOCK_GIANT(vfslocked);
3507		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3508			return (error);
3509		goto restart;
3510	}
3511	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3512	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3513	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3514	vn_finished_write(mp);
3515out:
3516	NDFREE(&nd, NDF_ONLY_PNBUF);
3517	vput(vp);
3518	if (nd.ni_dvp == vp)
3519		vrele(nd.ni_dvp);
3520	else
3521		vput(nd.ni_dvp);
3522	VFS_UNLOCK_GIANT(vfslocked);
3523	return (error);
3524}
3525
3526#ifdef COMPAT_43
3527/*
3528 * Read a block of directory entries in a filesystem independent format.
3529 */
3530#ifndef _SYS_SYSPROTO_H_
3531struct ogetdirentries_args {
3532	int	fd;
3533	char	*buf;
3534	u_int	count;
3535	long	*basep;
3536};
3537#endif
3538int
3539ogetdirentries(td, uap)
3540	struct thread *td;
3541	register struct ogetdirentries_args /* {
3542		int fd;
3543		char *buf;
3544		u_int count;
3545		long *basep;
3546	} */ *uap;
3547{
3548	struct vnode *vp;
3549	struct file *fp;
3550	struct uio auio, kuio;
3551	struct iovec aiov, kiov;
3552	struct dirent *dp, *edp;
3553	caddr_t dirbuf;
3554	int error, eofflag, readcnt, vfslocked;
3555	long loff;
3556
3557	/* XXX arbitrary sanity limit on `count'. */
3558	if (uap->count > 64 * 1024)
3559		return (EINVAL);
3560	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3561		return (error);
3562	if ((fp->f_flag & FREAD) == 0) {
3563		fdrop(fp, td);
3564		return (EBADF);
3565	}
3566	vp = fp->f_vnode;
3567unionread:
3568	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3569	if (vp->v_type != VDIR) {
3570		VFS_UNLOCK_GIANT(vfslocked);
3571		fdrop(fp, td);
3572		return (EINVAL);
3573	}
3574	aiov.iov_base = uap->buf;
3575	aiov.iov_len = uap->count;
3576	auio.uio_iov = &aiov;
3577	auio.uio_iovcnt = 1;
3578	auio.uio_rw = UIO_READ;
3579	auio.uio_segflg = UIO_USERSPACE;
3580	auio.uio_td = td;
3581	auio.uio_resid = uap->count;
3582	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3583	loff = auio.uio_offset = fp->f_offset;
3584#ifdef MAC
3585	error = mac_check_vnode_readdir(td->td_ucred, vp);
3586	if (error) {
3587		VOP_UNLOCK(vp, 0, td);
3588		VFS_UNLOCK_GIANT(vfslocked);
3589		fdrop(fp, td);
3590		return (error);
3591	}
3592#endif
3593#	if (BYTE_ORDER != LITTLE_ENDIAN)
3594		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3595			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3596			    NULL, NULL);
3597			fp->f_offset = auio.uio_offset;
3598		} else
3599#	endif
3600	{
3601		kuio = auio;
3602		kuio.uio_iov = &kiov;
3603		kuio.uio_segflg = UIO_SYSSPACE;
3604		kiov.iov_len = uap->count;
3605		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3606		kiov.iov_base = dirbuf;
3607		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3608			    NULL, NULL);
3609		fp->f_offset = kuio.uio_offset;
3610		if (error == 0) {
3611			readcnt = uap->count - kuio.uio_resid;
3612			edp = (struct dirent *)&dirbuf[readcnt];
3613			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3614#				if (BYTE_ORDER == LITTLE_ENDIAN)
3615					/*
3616					 * The expected low byte of
3617					 * dp->d_namlen is our dp->d_type.
3618					 * The high MBZ byte of dp->d_namlen
3619					 * is our dp->d_namlen.
3620					 */
3621					dp->d_type = dp->d_namlen;
3622					dp->d_namlen = 0;
3623#				else
3624					/*
3625					 * The dp->d_type is the high byte
3626					 * of the expected dp->d_namlen,
3627					 * so must be zero'ed.
3628					 */
3629					dp->d_type = 0;
3630#				endif
3631				if (dp->d_reclen > 0) {
3632					dp = (struct dirent *)
3633					    ((char *)dp + dp->d_reclen);
3634				} else {
3635					error = EIO;
3636					break;
3637				}
3638			}
3639			if (dp >= edp)
3640				error = uiomove(dirbuf, readcnt, &auio);
3641		}
3642		FREE(dirbuf, M_TEMP);
3643	}
3644	if (error) {
3645		VOP_UNLOCK(vp, 0, td);
3646		VFS_UNLOCK_GIANT(vfslocked);
3647		fdrop(fp, td);
3648		return (error);
3649	}
3650	if (uap->count == auio.uio_resid &&
3651	    (vp->v_vflag & VV_ROOT) &&
3652	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3653		struct vnode *tvp = vp;
3654		vp = vp->v_mount->mnt_vnodecovered;
3655		VREF(vp);
3656		fp->f_vnode = vp;
3657		fp->f_data = vp;
3658		fp->f_offset = 0;
3659		vput(tvp);
3660		VFS_UNLOCK_GIANT(vfslocked);
3661		goto unionread;
3662	}
3663	VOP_UNLOCK(vp, 0, td);
3664	VFS_UNLOCK_GIANT(vfslocked);
3665	error = copyout(&loff, uap->basep, sizeof(long));
3666	fdrop(fp, td);
3667	td->td_retval[0] = uap->count - auio.uio_resid;
3668	return (error);
3669}
3670#endif /* COMPAT_43 */
3671
3672/*
3673 * Read a block of directory entries in a filesystem independent format.
3674 */
3675#ifndef _SYS_SYSPROTO_H_
3676struct getdirentries_args {
3677	int	fd;
3678	char	*buf;
3679	u_int	count;
3680	long	*basep;
3681};
3682#endif
3683int
3684getdirentries(td, uap)
3685	struct thread *td;
3686	register struct getdirentries_args /* {
3687		int fd;
3688		char *buf;
3689		u_int count;
3690		long *basep;
3691	} */ *uap;
3692{
3693	struct vnode *vp;
3694	struct file *fp;
3695	struct uio auio;
3696	struct iovec aiov;
3697	int vfslocked;
3698	long loff;
3699	int error, eofflag;
3700
3701	AUDIT_ARG(fd, uap->fd);
3702	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3703		return (error);
3704	if ((fp->f_flag & FREAD) == 0) {
3705		fdrop(fp, td);
3706		return (EBADF);
3707	}
3708	vp = fp->f_vnode;
3709unionread:
3710	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3711	if (vp->v_type != VDIR) {
3712		VFS_UNLOCK_GIANT(vfslocked);
3713		error = EINVAL;
3714		goto fail;
3715	}
3716	aiov.iov_base = uap->buf;
3717	aiov.iov_len = uap->count;
3718	auio.uio_iov = &aiov;
3719	auio.uio_iovcnt = 1;
3720	auio.uio_rw = UIO_READ;
3721	auio.uio_segflg = UIO_USERSPACE;
3722	auio.uio_td = td;
3723	auio.uio_resid = uap->count;
3724	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3725	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3726	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3727	loff = auio.uio_offset = fp->f_offset;
3728#ifdef MAC
3729	error = mac_check_vnode_readdir(td->td_ucred, vp);
3730	if (error == 0)
3731#endif
3732		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3733		    NULL);
3734	fp->f_offset = auio.uio_offset;
3735	if (error) {
3736		VOP_UNLOCK(vp, 0, td);
3737		VFS_UNLOCK_GIANT(vfslocked);
3738		goto fail;
3739	}
3740	if (uap->count == auio.uio_resid &&
3741	    (vp->v_vflag & VV_ROOT) &&
3742	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3743		struct vnode *tvp = vp;
3744		vp = vp->v_mount->mnt_vnodecovered;
3745		VREF(vp);
3746		fp->f_vnode = vp;
3747		fp->f_data = vp;
3748		fp->f_offset = 0;
3749		vput(tvp);
3750		VFS_UNLOCK_GIANT(vfslocked);
3751		goto unionread;
3752	}
3753	VOP_UNLOCK(vp, 0, td);
3754	VFS_UNLOCK_GIANT(vfslocked);
3755	if (uap->basep != NULL) {
3756		error = copyout(&loff, uap->basep, sizeof(long));
3757	}
3758	td->td_retval[0] = uap->count - auio.uio_resid;
3759fail:
3760	fdrop(fp, td);
3761	return (error);
3762}
3763
3764#ifndef _SYS_SYSPROTO_H_
3765struct getdents_args {
3766	int fd;
3767	char *buf;
3768	size_t count;
3769};
3770#endif
3771int
3772getdents(td, uap)
3773	struct thread *td;
3774	register struct getdents_args /* {
3775		int fd;
3776		char *buf;
3777		u_int count;
3778	} */ *uap;
3779{
3780	struct getdirentries_args ap;
3781	ap.fd = uap->fd;
3782	ap.buf = uap->buf;
3783	ap.count = uap->count;
3784	ap.basep = NULL;
3785	return (getdirentries(td, &ap));
3786}
3787
3788/*
3789 * Set the mode mask for creation of filesystem nodes.
3790 */
3791#ifndef _SYS_SYSPROTO_H_
3792struct umask_args {
3793	int	newmask;
3794};
3795#endif
3796int
3797umask(td, uap)
3798	struct thread *td;
3799	struct umask_args /* {
3800		int newmask;
3801	} */ *uap;
3802{
3803	register struct filedesc *fdp;
3804
3805	FILEDESC_XLOCK(td->td_proc->p_fd);
3806	fdp = td->td_proc->p_fd;
3807	td->td_retval[0] = fdp->fd_cmask;
3808	fdp->fd_cmask = uap->newmask & ALLPERMS;
3809	FILEDESC_XUNLOCK(td->td_proc->p_fd);
3810	return (0);
3811}
3812
3813/*
3814 * Void all references to file by ripping underlying filesystem away from
3815 * vnode.
3816 */
3817#ifndef _SYS_SYSPROTO_H_
3818struct revoke_args {
3819	char	*path;
3820};
3821#endif
3822int
3823revoke(td, uap)
3824	struct thread *td;
3825	register struct revoke_args /* {
3826		char *path;
3827	} */ *uap;
3828{
3829	struct vnode *vp;
3830	struct vattr vattr;
3831	int error;
3832	struct nameidata nd;
3833	int vfslocked;
3834
3835	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3836	    UIO_USERSPACE, uap->path, td);
3837	if ((error = namei(&nd)) != 0)
3838		return (error);
3839	vfslocked = NDHASGIANT(&nd);
3840	vp = nd.ni_vp;
3841	NDFREE(&nd, NDF_ONLY_PNBUF);
3842	if (vp->v_type != VCHR) {
3843		error = EINVAL;
3844		goto out;
3845	}
3846#ifdef MAC
3847	error = mac_check_vnode_revoke(td->td_ucred, vp);
3848	if (error)
3849		goto out;
3850#endif
3851	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3852	if (error)
3853		goto out;
3854	if (td->td_ucred->cr_uid != vattr.va_uid) {
3855		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3856		    SUSER_ALLOWJAIL);
3857		if (error)
3858			goto out;
3859	}
3860	if (vcount(vp) > 1)
3861		VOP_REVOKE(vp, REVOKEALL);
3862out:
3863	vput(vp);
3864	VFS_UNLOCK_GIANT(vfslocked);
3865	return (error);
3866}
3867
3868/*
3869 * Convert a user file descriptor to a kernel file entry.
3870 * A reference on the file entry is held upon returning.
3871 */
3872int
3873getvnode(fdp, fd, fpp)
3874	struct filedesc *fdp;
3875	int fd;
3876	struct file **fpp;
3877{
3878	int error;
3879	struct file *fp;
3880
3881	fp = NULL;
3882	if (fdp == NULL)
3883		error = EBADF;
3884	else {
3885		FILEDESC_SLOCK(fdp);
3886		if ((u_int)fd >= fdp->fd_nfiles ||
3887		    (fp = fdp->fd_ofiles[fd]) == NULL)
3888			error = EBADF;
3889		else if (fp->f_vnode == NULL) {
3890			fp = NULL;
3891			error = EINVAL;
3892		} else {
3893			fhold(fp);
3894			error = 0;
3895		}
3896		FILEDESC_SUNLOCK(fdp);
3897	}
3898	*fpp = fp;
3899	return (error);
3900}
3901
3902/*
3903 * Get an (NFS) file handle.
3904 */
3905#ifndef _SYS_SYSPROTO_H_
3906struct lgetfh_args {
3907	char	*fname;
3908	fhandle_t *fhp;
3909};
3910#endif
3911int
3912lgetfh(td, uap)
3913	struct thread *td;
3914	register struct lgetfh_args *uap;
3915{
3916	struct nameidata nd;
3917	fhandle_t fh;
3918	register struct vnode *vp;
3919	int vfslocked;
3920	int error;
3921
3922	error = priv_check(td, PRIV_VFS_GETFH);
3923	if (error)
3924		return (error);
3925	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3926	    UIO_USERSPACE, uap->fname, td);
3927	error = namei(&nd);
3928	if (error)
3929		return (error);
3930	vfslocked = NDHASGIANT(&nd);
3931	NDFREE(&nd, NDF_ONLY_PNBUF);
3932	vp = nd.ni_vp;
3933	bzero(&fh, sizeof(fh));
3934	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3935	error = VOP_VPTOFH(vp, &fh.fh_fid);
3936	vput(vp);
3937	VFS_UNLOCK_GIANT(vfslocked);
3938	if (error)
3939		return (error);
3940	error = copyout(&fh, uap->fhp, sizeof (fh));
3941	return (error);
3942}
3943
3944#ifndef _SYS_SYSPROTO_H_
3945struct getfh_args {
3946	char	*fname;
3947	fhandle_t *fhp;
3948};
3949#endif
3950int
3951getfh(td, uap)
3952	struct thread *td;
3953	register struct getfh_args *uap;
3954{
3955	struct nameidata nd;
3956	fhandle_t fh;
3957	register struct vnode *vp;
3958	int vfslocked;
3959	int error;
3960
3961	error = priv_check(td, PRIV_VFS_GETFH);
3962	if (error)
3963		return (error);
3964	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3965	    UIO_USERSPACE, uap->fname, td);
3966	error = namei(&nd);
3967	if (error)
3968		return (error);
3969	vfslocked = NDHASGIANT(&nd);
3970	NDFREE(&nd, NDF_ONLY_PNBUF);
3971	vp = nd.ni_vp;
3972	bzero(&fh, sizeof(fh));
3973	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3974	error = VOP_VPTOFH(vp, &fh.fh_fid);
3975	vput(vp);
3976	VFS_UNLOCK_GIANT(vfslocked);
3977	if (error)
3978		return (error);
3979	error = copyout(&fh, uap->fhp, sizeof (fh));
3980	return (error);
3981}
3982
3983/*
3984 * syscall for the rpc.lockd to use to translate a NFS file handle into an
3985 * open descriptor.
3986 *
3987 * warning: do not remove the priv_check() call or this becomes one giant
3988 * security hole.
3989 */
3990#ifndef _SYS_SYSPROTO_H_
3991struct fhopen_args {
3992	const struct fhandle *u_fhp;
3993	int flags;
3994};
3995#endif
3996int
3997fhopen(td, uap)
3998	struct thread *td;
3999	struct fhopen_args /* {
4000		const struct fhandle *u_fhp;
4001		int flags;
4002	} */ *uap;
4003{
4004	struct proc *p = td->td_proc;
4005	struct mount *mp;
4006	struct vnode *vp;
4007	struct fhandle fhp;
4008	struct vattr vat;
4009	struct vattr *vap = &vat;
4010	struct flock lf;
4011	struct file *fp;
4012	register struct filedesc *fdp = p->p_fd;
4013	int fmode, mode, error, type;
4014	struct file *nfp;
4015	int vfslocked;
4016	int indx;
4017
4018	error = priv_check(td, PRIV_VFS_FHOPEN);
4019	if (error)
4020		return (error);
4021	fmode = FFLAGS(uap->flags);
4022	/* why not allow a non-read/write open for our lockd? */
4023	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4024		return (EINVAL);
4025	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4026	if (error)
4027		return(error);
4028	/* find the mount point */
4029	mp = vfs_getvfs(&fhp.fh_fsid);
4030	if (mp == NULL)
4031		return (ESTALE);
4032	vfslocked = VFS_LOCK_GIANT(mp);
4033	/* now give me my vnode, it gets returned to me locked */
4034	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4035	if (error)
4036		goto out;
4037	/*
4038	 * from now on we have to make sure not
4039	 * to forget about the vnode
4040	 * any error that causes an abort must vput(vp)
4041	 * just set error = err and 'goto bad;'.
4042	 */
4043
4044	/*
4045	 * from vn_open
4046	 */
4047	if (vp->v_type == VLNK) {
4048		error = EMLINK;
4049		goto bad;
4050	}
4051	if (vp->v_type == VSOCK) {
4052		error = EOPNOTSUPP;
4053		goto bad;
4054	}
4055	mode = 0;
4056	if (fmode & (FWRITE | O_TRUNC)) {
4057		if (vp->v_type == VDIR) {
4058			error = EISDIR;
4059			goto bad;
4060		}
4061		error = vn_writechk(vp);
4062		if (error)
4063			goto bad;
4064		mode |= VWRITE;
4065	}
4066	if (fmode & FREAD)
4067		mode |= VREAD;
4068	if (fmode & O_APPEND)
4069		mode |= VAPPEND;
4070#ifdef MAC
4071	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4072	if (error)
4073		goto bad;
4074#endif
4075	if (mode) {
4076		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4077		if (error)
4078			goto bad;
4079	}
4080	if (fmode & O_TRUNC) {
4081		VOP_UNLOCK(vp, 0, td);				/* XXX */
4082		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4083			vrele(vp);
4084			goto out;
4085		}
4086		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4087		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4088#ifdef MAC
4089		/*
4090		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4091		 * should be right.
4092		 */
4093		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4094		if (error == 0) {
4095#endif
4096			VATTR_NULL(vap);
4097			vap->va_size = 0;
4098			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4099#ifdef MAC
4100		}
4101#endif
4102		vn_finished_write(mp);
4103		if (error)
4104			goto bad;
4105	}
4106	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4107	if (error)
4108		goto bad;
4109
4110	if (fmode & FWRITE)
4111		vp->v_writecount++;
4112
4113	/*
4114	 * end of vn_open code
4115	 */
4116
4117	if ((error = falloc(td, &nfp, &indx)) != 0) {
4118		if (fmode & FWRITE)
4119			vp->v_writecount--;
4120		goto bad;
4121	}
4122	/* An extra reference on `nfp' has been held for us by falloc(). */
4123	fp = nfp;
4124
4125	FILE_LOCK(nfp);
4126	nfp->f_vnode = vp;
4127	nfp->f_data = vp;
4128	nfp->f_flag = fmode & FMASK;
4129	nfp->f_type = DTYPE_VNODE;
4130	nfp->f_ops = &vnops;
4131	FILE_UNLOCK(nfp);
4132	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4133		lf.l_whence = SEEK_SET;
4134		lf.l_start = 0;
4135		lf.l_len = 0;
4136		if (fmode & O_EXLOCK)
4137			lf.l_type = F_WRLCK;
4138		else
4139			lf.l_type = F_RDLCK;
4140		type = F_FLOCK;
4141		if ((fmode & FNONBLOCK) == 0)
4142			type |= F_WAIT;
4143		VOP_UNLOCK(vp, 0, td);
4144		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4145			    type)) != 0) {
4146			/*
4147			 * The lock request failed.  Normally close the
4148			 * descriptor but handle the case where someone might
4149			 * have dup()d or close()d it when we weren't looking.
4150			 */
4151			fdclose(fdp, fp, indx, td);
4152
4153			/*
4154			 * release our private reference
4155			 */
4156			fdrop(fp, td);
4157			goto out;
4158		}
4159		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4160		fp->f_flag |= FHASLOCK;
4161	}
4162
4163	VOP_UNLOCK(vp, 0, td);
4164	fdrop(fp, td);
4165	vfs_rel(mp);
4166	VFS_UNLOCK_GIANT(vfslocked);
4167	td->td_retval[0] = indx;
4168	return (0);
4169
4170bad:
4171	vput(vp);
4172out:
4173	vfs_rel(mp);
4174	VFS_UNLOCK_GIANT(vfslocked);
4175	return (error);
4176}
4177
4178/*
4179 * Stat an (NFS) file handle.
4180 */
4181#ifndef _SYS_SYSPROTO_H_
4182struct fhstat_args {
4183	struct fhandle *u_fhp;
4184	struct stat *sb;
4185};
4186#endif
4187int
4188fhstat(td, uap)
4189	struct thread *td;
4190	register struct fhstat_args /* {
4191		struct fhandle *u_fhp;
4192		struct stat *sb;
4193	} */ *uap;
4194{
4195	struct stat sb;
4196	fhandle_t fh;
4197	struct mount *mp;
4198	struct vnode *vp;
4199	int vfslocked;
4200	int error;
4201
4202	error = priv_check(td, PRIV_VFS_FHSTAT);
4203	if (error)
4204		return (error);
4205	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4206	if (error)
4207		return (error);
4208	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4209		return (ESTALE);
4210	vfslocked = VFS_LOCK_GIANT(mp);
4211	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4212		vfs_rel(mp);
4213		VFS_UNLOCK_GIANT(vfslocked);
4214		return (error);
4215	}
4216	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4217	vput(vp);
4218	vfs_rel(mp);
4219	VFS_UNLOCK_GIANT(vfslocked);
4220	if (error)
4221		return (error);
4222	error = copyout(&sb, uap->sb, sizeof(sb));
4223	return (error);
4224}
4225
4226/*
4227 * Implement fstatfs() for (NFS) file handles.
4228 */
4229#ifndef _SYS_SYSPROTO_H_
4230struct fhstatfs_args {
4231	struct fhandle *u_fhp;
4232	struct statfs *buf;
4233};
4234#endif
4235int
4236fhstatfs(td, uap)
4237	struct thread *td;
4238	struct fhstatfs_args /* {
4239		struct fhandle *u_fhp;
4240		struct statfs *buf;
4241	} */ *uap;
4242{
4243	struct statfs sf;
4244	fhandle_t fh;
4245	int error;
4246
4247	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4248	if (error)
4249		return (error);
4250	error = kern_fhstatfs(td, fh, &sf);
4251	if (error)
4252		return (error);
4253	return (copyout(&sf, uap->buf, sizeof(sf)));
4254}
4255
4256int
4257kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4258{
4259	struct statfs *sp;
4260	struct mount *mp;
4261	struct vnode *vp;
4262	int vfslocked;
4263	int error;
4264
4265	error = priv_check(td, PRIV_VFS_FHSTATFS);
4266	if (error)
4267		return (error);
4268	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4269		return (ESTALE);
4270	vfslocked = VFS_LOCK_GIANT(mp);
4271	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4272	if (error) {
4273		VFS_UNLOCK_GIANT(vfslocked);
4274		vfs_rel(mp);
4275		return (error);
4276	}
4277	vput(vp);
4278	error = prison_canseemount(td->td_ucred, mp);
4279	if (error)
4280		goto out;
4281#ifdef MAC
4282	error = mac_check_mount_stat(td->td_ucred, mp);
4283	if (error)
4284		goto out;
4285#endif
4286	/*
4287	 * Set these in case the underlying filesystem fails to do so.
4288	 */
4289	sp = &mp->mnt_stat;
4290	sp->f_version = STATFS_VERSION;
4291	sp->f_namemax = NAME_MAX;
4292	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4293	error = VFS_STATFS(mp, sp, td);
4294	if (error == 0)
4295		*buf = *sp;
4296out:
4297	vfs_rel(mp);
4298	VFS_UNLOCK_GIANT(vfslocked);
4299	return (error);
4300}
4301