vfs_syscalls.c revision 301049
1139825Simp/*-
222521Sdyson * Copyright (c) 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
3422521Sdyson *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
351541Srgrimes */
361541Srgrimes
37116192Sobrien#include <sys/cdefs.h>
38116192Sobrien__FBSDID("$FreeBSD: releng/9.3/sys/kern/vfs_syscalls.c 301049 2016-05-31 16:55:37Z glebius $");
39116192Sobrien
4013260Swollman#include "opt_capsicum.h"
4131749Seivind#include "opt_compat.h"
4274822Srwatson#include "opt_kdtrace.h"
43136969Sphk#include "opt_ktrace.h"
4413260Swollman
451541Srgrimes#include <sys/param.h>
461541Srgrimes#include <sys/systm.h>
47101073Srwatson#include <sys/bio.h>
481541Srgrimes#include <sys/buf.h>
491541Srgrimes#include <sys/capability.h>
5018020Sbde#include <sys/disk.h>
51236044Skib#include <sys/sysent.h>
521541Srgrimes#include <sys/malloc.h>
5360041Sphk#include <sys/mount.h>
541541Srgrimes#include <sys/mutex.h>
551541Srgrimes#include <sys/sysproto.h>
56164033Srwatson#include <sys/namei.h>
57150634Sjhb#include <sys/filedesc.h>
5831699Sbde#include <sys/kernel.h>
591541Srgrimes#include <sys/fcntl.h>
601541Srgrimes#include <sys/file.h>
611960Sdg#include <sys/filio.h>
6249535Sphk#include <sys/limits.h>
6374822Srwatson#include <sys/linker.h>
641541Srgrimes#include <sys/sdt.h>
65163606Srwatson#include <sys/stat.h>
66163606Srwatson#include <sys/sx.h>
6772521Sjlemon#include <sys/unistd.h>
6872521Sjlemon#include <sys/vnode.h>
6934266Sjulian#include <sys/priv.h>
7034266Sjulian#include <sys/proc.h>
7132011Sbde#include <sys/dirent.h>
7274822Srwatson#include <sys/jail.h>
7359241Srwatson#include <sys/syscallsubr.h>
741541Srgrimes#include <sys/sysctl.h>
751541Srgrimes#ifdef KTRACE
761541Srgrimes#include <sys/ktrace.h>
771541Srgrimes#endif
781541Srgrimes
7982364Siedowse#include <machine/stdarg.h>
8082364Siedowse
8182364Siedowse#include <security/audit/audit.h>
82163841Spjd#include <security/mac/mac_framework.h>
83163841Spjd
84218485Snetchild#include <vm/vm.h>
85163841Spjd#include <vm/vm_object.h>
861541Srgrimes#include <vm/vm_page.h>
87218485Snetchild#include <vm/uma.h>
88218485Snetchild
89218485Snetchild#include <ufs/ufs/quota.h>
90218485Snetchild
91218485SnetchildMALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
92218485Snetchild
93218513SnetchildSDT_PROVIDER_DEFINE(vfs);
94218513SnetchildSDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
95218485SnetchildSDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
96218485Snetchild
97218485Snetchildstatic int chroot_refuse_vdir_fds(struct filedesc *fdp);
98136969Sphkstatic int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
99136969Sphkstatic int setfflags(struct thread *td, struct vnode *, int);
100200796Straszstatic int setutimes(struct thread *td, struct vnode *,
10192728Salfred    const struct timespec *, int, int);
10292728Salfredstatic int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
103138270Sphk    struct thread *td);
104138270Sphk
105138270Sphk/*
106236044Skib * The module initialization routine for POSIX asynchronous I/O will
107138270Sphk * set this to the version of AIO that it implements.  (Zero means
108308946Smckusick * that it is not implemented.)  This value is used here by pathconf()
109187526Sjhb * and in kern_descrip.c by fpathconf().
110138270Sphk */
111138270Sphkint async_io_version;
112138270Sphk
113138270Sphk#ifdef DEBUG
114138270Sphkstatic int syncprt = 0;
115138270SphkSYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
116138270Sphk#endif
117138270Sphk
118138270Sphk/*
119138270Sphk * Sync each mounted filesystem.
120138270Sphk */
121138270Sphk#ifndef _SYS_SYSPROTO_H_
122138270Sphkstruct sync_args {
123138270Sphk	int     dummy;
124138270Sphk};
125195296Strasz#endif
1261541Srgrimes/* ARGSUSED */
127207141Sjeffint
128207141Sjeffsys_sync(td, uap)
1291541Srgrimes	struct thread *td;
13034266Sjulian	struct sync_args *uap;
13134266Sjulian{
13234266Sjulian	struct mount *mp, *nmp;
13334266Sjulian	int save, vfslocked;
13434266Sjulian
13534266Sjulian	mtx_lock(&mountlist_mtx);
13634266Sjulian	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
13734266Sjulian		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
13834266Sjulian			nmp = TAILQ_NEXT(mp, mnt_list);
13934266Sjulian			continue;
14034266Sjulian		}
141169898Spjd		vfslocked = VFS_LOCK_GIANT(mp);
142169898Spjd		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
14336721Sbde		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
14436721Sbde			save = curthread_pflags_set(TDP_SYNCIO);
14550137Sjdp			vfs_msync(mp, MNT_NOWAIT);
14636721Sbde			VFS_SYNC(mp, MNT_NOWAIT);
147169898Spjd			curthread_pflags_restore(save);
148169898Spjd			vn_finished_write(mp);
14936721Sbde		}
150183070Skib		VFS_UNLOCK_GIANT(vfslocked);
151163194Skib		mtx_lock(&mountlist_mtx);
152169898Spjd		nmp = TAILQ_NEXT(mp, mnt_list);
153166564Skib		vfs_unbusy(mp);
154163194Skib	}
15589384Smckusick	mtx_unlock(&mountlist_mtx);
15689384Smckusick	return (0);
157163194Skib}
158163194Skib
159163194Skib/*
16089384Smckusick * Change filesystem quotas.
161163194Skib */
162163194Skib#ifndef _SYS_SYSPROTO_H_
163163194Skibstruct quotactl_args {
164163194Skib	char *path;
165163194Skib	int cmd;
166163194Skib	int uid;
16736721Sbde	caddr_t arg;
168163194Skib};
169163194Skib#endif
170163194Skibint
171163194Skibsys_quotactl(td, uap)
172163194Skib	struct thread *td;
173163194Skib	register struct quotactl_args /* {
174163194Skib		char *path;
175191564Srmacklem		int cmd;
176163194Skib		int uid;
177163194Skib		caddr_t arg;
178163194Skib	} */ *uap;
17936721Sbde{
180169898Spjd	struct mount *mp;
181169898Spjd	int vfslocked;
182169898Spjd	int error;
183169898Spjd	struct nameidata nd;
184169898Spjd
185169898Spjd	AUDIT_ARG_CMD(uap->cmd);
186169898Spjd	AUDIT_ARG_UID(uap->uid);
187169898Spjd	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
188163194Skib		return (EPERM);
18936721Sbde	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
19036721Sbde	   UIO_USERSPACE, uap->path, td);
19134266Sjulian	if ((error = namei(&nd)) != 0)
1921541Srgrimes		return (error);
1931541Srgrimes	vfslocked = NDHASGIANT(&nd);
194104094Sphk	NDFREE(&nd, NDF_ONLY_PNBUF);
1951541Srgrimes	mp = nd.ni_vp->v_mount;
1961541Srgrimes	vfs_ref(mp);
1971541Srgrimes	vput(nd.ni_vp);
1981541Srgrimes	error = vfs_busy(mp, 0);
1991541Srgrimes	vfs_rel(mp);
2001541Srgrimes	if (error) {
2011541Srgrimes		VFS_UNLOCK_GIANT(vfslocked);
2021541Srgrimes		return (error);
2031541Srgrimes	}
2041541Srgrimes	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
2053420Sphk
2061541Srgrimes	/*
207308946Smckusick	 * Since quota on operation typically needs to open quota
208276648Skib	 * file, the Q_QUOTAON handler needs to unbusy the mount point
2091541Srgrimes	 * before calling into namei.  Otherwise, unmount might be
210276648Skib	 * started between two vfs_busy() invocations (first is our,
211276648Skib	 * second is from mount point cross-walk code in lookup()),
2121541Srgrimes	 * causing deadlock.
2131541Srgrimes	 *
2141541Srgrimes	 * Require that Q_QUOTAON handles the vfs_busy() reference on
2151541Srgrimes	 * its own, always returning with ubusied mount point.
2161541Srgrimes	 */
2171541Srgrimes	if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
2181541Srgrimes		vfs_unbusy(mp);
219104094Sphk	VFS_UNLOCK_GIANT(vfslocked);
2201541Srgrimes	return (error);
2211541Srgrimes}
2221541Srgrimes
2231541Srgrimes/*
2241541Srgrimes * Used by statfs conversion routines to scale the block size up if
2251541Srgrimes * necessary so that all of the block counts are <= 'max_size'.  Note
2261541Srgrimes * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
2271541Srgrimes * value of 'n'.
22811644Sdg */
22911644Sdgvoid
23011644Sdgstatfs_scale_blocks(struct statfs *sf, long max_size)
23168307Sbde{
2321541Srgrimes	uint64_t count;
2331541Srgrimes	int shift;
2343420Sphk
235308946Smckusick	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
2363420Sphk
2371541Srgrimes	/*
2381541Srgrimes	 * Attempt to scale the block counts to give a more accurate
2391541Srgrimes	 * overview to userland of the ratio of free space to used
2401541Srgrimes	 * space.  To do this, find the largest block count and compute
2411541Srgrimes	 * a divisor that lets it fit into a signed integer <= max_size.
2421541Srgrimes	 */
2431541Srgrimes	if (sf->f_bavail < 0)
2441541Srgrimes		count = -sf->f_bavail;
245132775Skan	else
2461541Srgrimes		count = sf->f_bavail;
2471541Srgrimes	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
24853101Seivind	if (count <= max_size)
2491541Srgrimes		return;
250143500Sjeff
2511541Srgrimes	count >>= flsl(max_size);
2521541Srgrimes	shift = 0;
25368307Sbde	while (count > 0) {
2541541Srgrimes		shift++;
255143500Sjeff		count >>=1;
25692462Smckusick	}
25753101Seivind
25853101Seivind	sf->f_bsize <<= shift;
25953101Seivind	sf->f_blocks >>= shift;
26053101Seivind	sf->f_bfree >>= shift;
2611541Srgrimes	sf->f_bavail >>= shift;
2621541Srgrimes}
2631541Srgrimes
2641541Srgrimes/*
2651541Srgrimes * Get filesystem statistics.
2661541Srgrimes */
2671541Srgrimes#ifndef _SYS_SYSPROTO_H_
268104094Sphkstruct statfs_args {
269140729Sphk	char *path;
2701541Srgrimes	struct statfs *buf;
271140729Sphk};
272140962Speadar#endif
2731541Srgrimesint
274140729Sphksys_statfs(td, uap)
275135877Sphk	struct thread *td;
276135877Sphk	register struct statfs_args /* {
277140962Speadar		char *path;
2781541Srgrimes		struct statfs *buf;
2791541Srgrimes	} */ *uap;
2801541Srgrimes{
281140962Speadar	struct statfs sf;
2821541Srgrimes	int error;
2831541Srgrimes
284140962Speadar	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
2851541Srgrimes	if (error == 0)
2861541Srgrimes		error = copyout(&sf, uap->buf, sizeof(sf));
2871541Srgrimes	return (error);
2881541Srgrimes}
2891541Srgrimes
2901541Srgrimesint
2911541Srgrimeskern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
2921541Srgrimes    struct statfs *buf)
2931541Srgrimes{
294104094Sphk	struct mount *mp;
2951541Srgrimes	struct statfs *sp, sb;
2961541Srgrimes	int vfslocked;
2971541Srgrimes	int error;
2981541Srgrimes	struct nameidata nd;
2991541Srgrimes
30083366Sjulian	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
3011541Srgrimes	    AUDITVNODE1, pathseg, path, td);
3021541Srgrimes	error = namei(&nd);
30375943Smckusick	if (error)
304163194Skib		return (error);
3051541Srgrimes	vfslocked = NDHASGIANT(&nd);
306103944Sjeff	mp = nd.ni_vp->v_mount;
307163194Skib	vfs_ref(mp);
308169898Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
309169898Spjd	vput(nd.ni_vp);
310163194Skib	error = vfs_busy(mp, 0);
3111541Srgrimes	vfs_rel(mp);
3121541Srgrimes	if (error) {
3131541Srgrimes		VFS_UNLOCK_GIANT(vfslocked);
314104094Sphk		return (error);
315200796Strasz	}
316200796Strasz#ifdef MAC
3171541Srgrimes	error = mac_mount_check_stat(td->td_ucred, mp);
318184413Strasz	if (error)
3191541Srgrimes		goto out;
32083366Sjulian#endif
3211541Srgrimes	/*
3221541Srgrimes	 * Set these in case the underlying filesystem fails to do so.
32311644Sdg	 */
32411644Sdg	sp = &mp->mnt_stat;
325184413Strasz	sp->f_version = STATFS_VERSION;
326182115Skib	sp->f_namemax = NAME_MAX;
327182115Skib	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
328182115Skib	error = VFS_STATFS(mp, sp);
329182115Skib	if (error)
33074822Srwatson		goto out;
33174822Srwatson	if (priv_check(td, PRIV_VFS_GENERATION)) {
332200796Strasz		bcopy(sp, &sb, sizeof(sb));
33317040Swollman		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3341541Srgrimes		prison_enforce_statfs(td->td_ucred, mp, &sb);
33511644Sdg		sp = &sb;
33696755Strhodes	}
33711644Sdg	*buf = *sp;
33896755Strhodesout:
33911644Sdg	vfs_unbusy(mp);
340200796Strasz	VFS_UNLOCK_GIANT(vfslocked);
3411541Srgrimes	return (error);
3421541Srgrimes}
3431541Srgrimes
3441541Srgrimes/*
34511644Sdg * Get filesystem statistics.
34611644Sdg */
347182111Skib#ifndef _SYS_SYSPROTO_H_
348184408Skibstruct fstatfs_args {
349184408Skib	int fd;
350184408Skib	struct statfs *buf;
351184408Skib};
352184408Skib#endif
353184408Skibint
354182111Skibsys_fstatfs(td, uap)
355184408Skib	struct thread *td;
356184408Skib	register struct fstatfs_args /* {
357184408Skib		int fd;
358184408Skib		struct statfs *buf;
359184408Skib	} */ *uap;
360182111Skib{
361182111Skib	struct statfs sf;
362182111Skib	int error;
363182111Skib
364182111Skib	error = kern_fstatfs(td, uap->fd, &sf);
365183078Sjhb	if (error == 0)
366182111Skib		error = copyout(&sf, uap->buf, sizeof(sf));
367182111Skib	return (error);
368182111Skib}
369183078Sjhb
370182111Skibint
371182111Skibkern_fstatfs(struct thread *td, int fd, struct statfs *buf)
372182111Skib{
373182111Skib	struct file *fp;
374182111Skib	struct mount *mp;
375182111Skib	struct statfs *sp, sb;
376182111Skib	int vfslocked;
377182111Skib	struct vnode *vp;
378182111Skib	int error;
3791541Srgrimes
38034266Sjulian	AUDIT_ARG_FD(fd);
38134266Sjulian	error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp);
3821541Srgrimes	if (error)
38311644Sdg		return (error);
3841541Srgrimes	vp = fp->f_vnode;
385200796Strasz	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
386200796Strasz	vn_lock(vp, LK_SHARED | LK_RETRY);
387200796Strasz#ifdef AUDIT
388200796Strasz	AUDIT_ARG_VNODE1(vp);
389200796Strasz#endif
390200796Strasz	mp = vp->v_mount;
391200796Strasz	if (mp)
3921541Srgrimes		vfs_ref(mp);
3931541Srgrimes	VOP_UNLOCK(vp, 0);
39474822Srwatson	fdrop(fp, td);
395200796Strasz	if (mp == NULL) {
396200796Strasz		error = EBADF;
397200796Strasz		goto out;
398200796Strasz	}
399200796Strasz	error = vfs_busy(mp, 0);
400200796Strasz	vfs_rel(mp);
401191249Strasz	if (error) {
402200796Strasz		VFS_UNLOCK_GIANT(vfslocked);
403200796Strasz		return (error);
404200796Strasz	}
405200796Strasz#ifdef MAC
406105179Srwatson	error = mac_mount_check_stat(td->td_ucred, mp);
407105179Srwatson	if (error)
408200796Strasz		goto out;
409200796Strasz#endif
410200796Strasz	/*
411200796Strasz	 * Set these in case the underlying filesystem fails to do so.
412200796Strasz	 */
413200796Strasz	sp = &mp->mnt_stat;
414200796Strasz	sp->f_version = STATFS_VERSION;
415200796Strasz	sp->f_namemax = NAME_MAX;
416200796Strasz	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
417105179Srwatson	error = VFS_STATFS(mp, sp);
418105179Srwatson	if (error)
419200796Strasz		goto out;
420200796Strasz	if (priv_check(td, PRIV_VFS_GENERATION)) {
421200796Strasz		bcopy(sp, &sb, sizeof(sb));
422200796Strasz		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
423105179Srwatson		prison_enforce_statfs(td->td_ucred, mp, &sb);
424105179Srwatson		sp = &sb;
425105179Srwatson	}
426105179Srwatson	*buf = *sp;
427105179Srwatsonout:
428200796Strasz	if (mp)
429200796Strasz		vfs_unbusy(mp);
430200796Strasz	VFS_UNLOCK_GIANT(vfslocked);
431200796Strasz	return (error);
432105179Srwatson}
433191249Strasz
434200796Strasz/*
435200796Strasz * Get statistics on all filesystems.
436200796Strasz */
437105179Srwatson#ifndef _SYS_SYSPROTO_H_
438200796Straszstruct getfsstat_args {
439200796Strasz	struct statfs *buf;
44074822Srwatson	long bufsize;
441200796Strasz	int flags;
44274822Srwatson};
4431541Srgrimes#endif
4441541Srgrimesint
4451541Srgrimessys_getfsstat(td, uap)
446104094Sphk	struct thread *td;
4471541Srgrimes	register struct getfsstat_args /* {
4481541Srgrimes		struct statfs *buf;
4491541Srgrimes		long bufsize;
4501541Srgrimes		int flags;
4511541Srgrimes	} */ *uap;
4521541Srgrimes{
4531541Srgrimes
45496506Sphk	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
45596506Sphk	    uap->flags));
45696506Sphk}
4571541Srgrimes
458169898Spjd/*
459169898Spjd * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
460169898Spjd * 	The caller is responsible for freeing memory which will be allocated
461169898Spjd *	in '*buf'.
462169898Spjd */
463169898Spjdint
464169898Spjdkern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
465169898Spjd    enum uio_seg bufseg, int flags)
466169898Spjd{
467169898Spjd	struct mount *mp, *nmp;
4681541Srgrimes	struct statfs *sfsp, *sp, sb;
4691541Srgrimes	size_t count, maxcount;
4701541Srgrimes	int vfslocked;
47147028Sphk	int error;
4721541Srgrimes
4731541Srgrimes	maxcount = bufsize / sizeof(struct statfs);
47496873Siedowse	if (bufsize == 0)
4751541Srgrimes		sfsp = NULL;
4761541Srgrimes	else if (bufseg == UIO_USERSPACE)
47798542Smckusick		sfsp = *buf;
47898542Smckusick	else /* if (bufseg == UIO_SYSSPACE) */ {
47998542Smckusick		count = 0;
48098542Smckusick		mtx_lock(&mountlist_mtx);
48198542Smckusick		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
48298542Smckusick			count++;
48398542Smckusick		}
48498542Smckusick		mtx_unlock(&mountlist_mtx);
485191564Srmacklem		if (maxcount > count)
48698542Smckusick			maxcount = count;
48798542Smckusick		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
48898542Smckusick		    M_WAITOK);
48998542Smckusick	}
49098542Smckusick	count = 0;
49198542Smckusick	mtx_lock(&mountlist_mtx);
49298542Smckusick	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
493100201Smckusick		if (prison_canseemount(td->td_ucred, mp) != 0) {
494100201Smckusick			nmp = TAILQ_NEXT(mp, mnt_list);
49598542Smckusick			continue;
496191564Srmacklem		}
49798542Smckusick#ifdef MAC
4981541Srgrimes		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
4991541Srgrimes			nmp = TAILQ_NEXT(mp, mnt_list);
50049682Sphk			continue;
50137364Sbde		}
5021541Srgrimes#endif
5031541Srgrimes		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
5041541Srgrimes			nmp = TAILQ_NEXT(mp, mnt_list);
5051541Srgrimes			continue;
5061541Srgrimes		}
5071541Srgrimes		vfslocked = VFS_LOCK_GIANT(mp);
508104094Sphk		if (sfsp && count < maxcount) {
5091541Srgrimes			sp = &mp->mnt_stat;
5101541Srgrimes			/*
5111541Srgrimes			 * Set these in case the underlying filesystem
5121541Srgrimes			 * fails to do so.
5131541Srgrimes			 */
5141541Srgrimes			sp->f_version = STATFS_VERSION;
5151541Srgrimes			sp->f_namemax = NAME_MAX;
51611644Sdg			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
51711644Sdg			/*
51811644Sdg			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
51911644Sdg			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
520182371Sattilio			 * overrides MNT_WAIT.
5211541Srgrimes			 */
5221541Srgrimes			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
5231541Srgrimes			    (flags & MNT_WAIT)) &&
5241541Srgrimes			    (error = VFS_STATFS(mp, sp))) {
5251541Srgrimes				VFS_UNLOCK_GIANT(vfslocked);
5261541Srgrimes				mtx_lock(&mountlist_mtx);
5271541Srgrimes				nmp = TAILQ_NEXT(mp, mnt_list);
5281541Srgrimes				vfs_unbusy(mp);
5291541Srgrimes				continue;
5301541Srgrimes			}
5311541Srgrimes			if (priv_check(td, PRIV_VFS_GENERATION)) {
5321541Srgrimes				bcopy(sp, &sb, sizeof(sb));
533254627Sken				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
534254627Sken				prison_enforce_statfs(td->td_ucred, mp, &sb);
535254627Sken				sp = &sb;
536254627Sken			}
537254627Sken			if (bufseg == UIO_SYSSPACE)
538233875Sjh				bcopy(sp, sfsp, sizeof(*sp));
53911644Sdg			else /* if (bufseg == UIO_USERSPACE) */ {
54011644Sdg				error = copyout(sp, sfsp, sizeof(*sp));
54166039Srwatson				if (error) {
54267309Srwatson					vfs_unbusy(mp);
54367309Srwatson					VFS_UNLOCK_GIANT(vfslocked);
54466039Srwatson					return (error);
54583366Sjulian				}
5461541Srgrimes			}
54767309Srwatson			sfsp++;
548141543Scperciva		}
549141543Scperciva		VFS_UNLOCK_GIANT(vfslocked);
55083924Srwatson		count++;
55183924Srwatson		mtx_lock(&mountlist_mtx);
552141543Scperciva		nmp = TAILQ_NEXT(mp, mnt_list);
553141543Scperciva		vfs_unbusy(mp);
554141543Scperciva	}
555141543Scperciva	mtx_unlock(&mountlist_mtx);
55667309Srwatson	if (sfsp && count > maxcount)
557170587Srwatson		td->td_retval[0] = maxcount;
558233787Sjh	else
559233787Sjh		td->td_retval[0] = count;
56083987Srwatson	return (0);
56183987Srwatson}
56283987Srwatson
56383987Srwatson#ifdef COMPAT_FREEBSD4
564234103Sjh/*
565234103Sjh * Get old format filesystem statistics.
56663897Smckusick */
5671541Srgrimesstatic void cvtstatfs(struct statfs *, struct ostatfs *);
568233787Sjh
569233787Sjh#ifndef _SYS_SYSPROTO_H_
570234103Sjhstruct freebsd4_statfs_args {
5711541Srgrimes	char *path;
5721541Srgrimes	struct ostatfs *buf;
573234103Sjh};
574234103Sjh#endif
5751541Srgrimesint
576231122Skibfreebsd4_statfs(td, uap)
577233787Sjh	struct thread *td;
578231122Skib	struct freebsd4_statfs_args /* {
5791541Srgrimes		char *path;
580233787Sjh		struct ostatfs *buf;
581233787Sjh	} */ *uap;
582234421Sjh{
583234421Sjh	struct ostatfs osb;
584233787Sjh	struct statfs sf;
5851541Srgrimes	int error;
5861541Srgrimes
5871541Srgrimes	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
5881541Srgrimes	if (error)
5891541Srgrimes		return (error);
5903420Sphk	cvtstatfs(&sf, &osb);
59111644Sdg	return (copyout(&osb, uap->buf, sizeof(osb)));
59211644Sdg}
59367309Srwatson
59483366Sjulian/*
5951541Srgrimes * Get filesystem statistics.
5963420Sphk */
5971541Srgrimes#ifndef _SYS_SYSPROTO_H_
59811644Sdgstruct freebsd4_fstatfs_args {
599159109Smaxim	int fd;
600159109Smaxim	struct ostatfs *buf;
601159109Smaxim};
60211644Sdg#endif
60311644Sdgint
60411644Sdgfreebsd4_fstatfs(td, uap)
6051541Srgrimes	struct thread *td;
60611644Sdg	struct freebsd4_fstatfs_args /* {
60711644Sdg		int fd;
608159102Smaxim		struct ostatfs *buf;
609159102Smaxim	} */ *uap;
610159102Smaxim{
611159102Smaxim	struct ostatfs osb;
612159102Smaxim	struct statfs sf;
61311644Sdg	int error;
61411644Sdg
61563897Smckusick	error = kern_fstatfs(td, uap->fd, &sf);
61663897Smckusick	if (error)
61711644Sdg		return (error);
61834266Sjulian	cvtstatfs(&sf, &osb);
619159102Smaxim	return (copyout(&osb, uap->buf, sizeof(osb)));
620159102Smaxim}
621159102Smaxim
622159102Smaxim/*
623159102Smaxim * Get statistics on all filesystems.
624159102Smaxim */
625159102Smaxim#ifndef _SYS_SYSPROTO_H_
626159102Smaximstruct freebsd4_getfsstat_args {
627159102Smaxim	struct ostatfs *buf;
62811644Sdg	long bufsize;
629301100Skib	int flags;
630301100Skib};
631234605Strasz#endif
6321541Srgrimesint
6331541Srgrimesfreebsd4_getfsstat(td, uap)
634100207Smckusick	struct thread *td;
635100207Smckusick	register struct freebsd4_getfsstat_args /* {
636100207Smckusick		struct ostatfs *buf;
63711644Sdg		long bufsize;
63811644Sdg		int flags;
63963897Smckusick	} */ *uap;
64063897Smckusick{
641267816Skib	struct statfs *buf, *sp;
642267816Skib	struct ostatfs osb;
6431541Srgrimes	size_t count, size;
644292540Skib	int error;
64550137Sjdp
646292540Skib	count = uap->bufsize / sizeof(struct ostatfs);
647132775Skan	size = count * sizeof(struct statfs);
648132775Skan	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
64950137Sjdp	if (size > 0) {
65050137Sjdp		count = td->td_retval[0];
651292540Skib		sp = buf;
652132775Skan		while (count > 0 && error == 0) {
653132775Skan			cvtstatfs(sp, &osb);
65450137Sjdp			error = copyout(&osb, uap->buf, sizeof(osb));
655100207Smckusick			sp++;
656100207Smckusick			uap->buf++;
657100207Smckusick			count--;
658100207Smckusick		}
659100207Smckusick		free(buf, M_TEMP);
66042374Sbde	}
6613420Sphk	return (error);
6621541Srgrimes}
6631541Srgrimes
6641541Srgrimes/*
66511644Sdg * Implement fstatfs() for (NFS) file handles.
66611644Sdg */
66711644Sdg#ifndef _SYS_SYSPROTO_H_
66863897Smckusickstruct freebsd4_fhstatfs_args {
66963897Smckusick	struct fhandle *u_fhp;
67063897Smckusick	struct ostatfs *buf;
67183366Sjulian};
67211644Sdg#endif
6731541Srgrimesint
6741541Srgrimesfreebsd4_fhstatfs(td, uap)
6751541Srgrimes	struct thread *td;
676200796Strasz	struct freebsd4_fhstatfs_args /* {
677200796Strasz		struct fhandle *u_fhp;
678200796Strasz		struct ostatfs *buf;
679200796Strasz	} */ *uap;
680200796Strasz{
681200796Strasz	struct ostatfs osb;
682200796Strasz	struct statfs sf;
683200796Strasz	fhandle_t fh;
684200796Strasz	int error;
685200796Strasz
686200796Strasz	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
687200796Strasz	if (error)
688200796Strasz		return (error);
689200796Strasz	error = kern_fhstatfs(td, fh, &sf);
690200796Strasz	if (error)
691200796Strasz		return (error);
692200796Strasz	cvtstatfs(&sf, &osb);
693200796Strasz	return (copyout(&osb, uap->buf, sizeof(osb)));
694200796Strasz}
695200796Strasz
696200796Strasz/*
697200796Strasz * Convert a new format statfs structure to an old format statfs structure.
698200796Strasz */
699200796Straszstatic void
700200796Straszcvtstatfs(nsp, osp)
701200796Strasz	struct statfs *nsp;
7021541Srgrimes	struct ostatfs *osp;
703187526Sjhb{
704187526Sjhb
705187526Sjhb	statfs_scale_blocks(nsp, LONG_MAX);
706187526Sjhb	bzero(osp, sizeof(*osp));
707187526Sjhb	osp->f_bsize = nsp->f_bsize;
708187526Sjhb	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
709187526Sjhb	osp->f_blocks = nsp->f_blocks;
710187526Sjhb	osp->f_bfree = nsp->f_bfree;
711187526Sjhb	osp->f_bavail = nsp->f_bavail;
712187526Sjhb	osp->f_files = MIN(nsp->f_files, LONG_MAX);
713187526Sjhb	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
714187526Sjhb	osp->f_owner = nsp->f_owner;
715187526Sjhb	osp->f_type = nsp->f_type;
716187526Sjhb	osp->f_flags = nsp->f_flags;
717187526Sjhb	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
718231122Skib	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
719231122Skib	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
720231122Skib	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
721187526Sjhb	strlcpy(osp->f_fstypename, nsp->f_fstypename,
722187526Sjhb	    MIN(MFSNAMELEN, OMFSNAMELEN));
723187526Sjhb	strlcpy(osp->f_mntonname, nsp->f_mntonname,
724187526Sjhb	    MIN(MNAMELEN, OMNAMELEN));
7251541Srgrimes	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
7261541Srgrimes	    MIN(MNAMELEN, OMNAMELEN));
7271541Srgrimes	osp->f_fsid = nsp->f_fsid;
7281541Srgrimes}
72983366Sjulian#endif /* COMPAT_FREEBSD4 */
73096506Sphk
73196506Sphk/*
73296506Sphk * Change current working directory to a given file descriptor.
73383366Sjulian */
7341541Srgrimes#ifndef _SYS_SYSPROTO_H_
73596506Sphkstruct fchdir_args {
7361541Srgrimes	int	fd;
7371541Srgrimes};
73867309Srwatson#endif
73967309Srwatsonint
74067309Srwatsonsys_fchdir(td, uap)
74167309Srwatson	struct thread *td;
742200796Strasz	struct fchdir_args /* {
7431541Srgrimes		int fd;
74467309Srwatson	} */ *uap;
74567309Srwatson{
74667309Srwatson	register struct filedesc *fdp = td->td_proc->p_fd;
747118404Srwatson	struct vnode *vp, *tdp, *vpold;
748118404Srwatson	struct mount *mp;
74967309Srwatson	struct file *fp;
750118404Srwatson	int vfslocked;
751170587Srwatson	int error;
7521541Srgrimes
7531541Srgrimes	AUDIT_ARG_FD(uap->fd);
754118404Srwatson	if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0)
755170587Srwatson		return (error);
756118404Srwatson	vp = fp->f_vnode;
757118404Srwatson	VREF(vp);
758118404Srwatson	fdrop(fp, td);
759200796Strasz	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
760200796Strasz	vn_lock(vp, LK_SHARED | LK_RETRY);
761200796Strasz	AUDIT_ARG_VNODE1(vp);
762200796Strasz	error = change_dir(vp, td);
763200796Strasz	while (!error && (mp = vp->v_mountedhere) != NULL) {
764200796Strasz		int tvfslocked;
765200796Strasz		if (vfs_busy(mp, 0))
766200796Strasz			continue;
767200796Strasz		tvfslocked = VFS_LOCK_GIANT(mp);
768200796Strasz		error = VFS_ROOT(mp, LK_SHARED, &tdp);
7691541Srgrimes		vfs_unbusy(mp);
7701541Srgrimes		if (error) {
771132775Skan			VFS_UNLOCK_GIANT(tvfslocked);
7721541Srgrimes			break;
773200796Strasz		}
774200796Strasz		vput(vp);
775200796Strasz		VFS_UNLOCK_GIANT(vfslocked);
776200796Strasz		vp = tdp;
777231122Skib		vfslocked = tvfslocked;
778231122Skib	}
779231122Skib	if (error) {
780200796Strasz		vput(vp);
7811541Srgrimes		VFS_UNLOCK_GIANT(vfslocked);
7821541Srgrimes		return (error);
7831541Srgrimes	}
7841541Srgrimes	VOP_UNLOCK(vp, 0);
7851541Srgrimes	VFS_UNLOCK_GIANT(vfslocked);
7861541Srgrimes	FILEDESC_XLOCK(fdp);
7871541Srgrimes	vpold = fdp->fd_cdir;
78883366Sjulian	fdp->fd_cdir = vp;
78996506Sphk	FILEDESC_XUNLOCK(fdp);
7901541Srgrimes	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
7911541Srgrimes	vrele(vpold);
7921541Srgrimes	VFS_UNLOCK_GIANT(vfslocked);
79383366Sjulian	return (0);
7941541Srgrimes}
79596506Sphk
7961541Srgrimes/*
7971541Srgrimes * Change current working directory (``.'').
7981541Srgrimes */
7991541Srgrimes#ifndef _SYS_SYSPROTO_H_
80096506Sphkstruct chdir_args {
80198542Smckusick	char	*path;
8021541Srgrimes};
8031541Srgrimes#endif
8041541Srgrimesint
8051541Srgrimessys_chdir(td, uap)
8061541Srgrimes	struct thread *td;
8071541Srgrimes	struct chdir_args /* {
8081541Srgrimes		char *path;
809164033Srwatson	} */ *uap;
810164033Srwatson{
8111541Srgrimes
812200796Strasz	return (kern_chdir(td, uap->path, UIO_USERSPACE));
81367309Srwatson}
81467309Srwatson
815164033Srwatsonint
816164033Srwatsonkern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
817164033Srwatson{
81867309Srwatson	register struct filedesc *fdp = td->td_proc->p_fd;
819200796Strasz	int error;
82065928Sphk	struct nameidata nd;
821170587Srwatson	struct vnode *vp;
8221541Srgrimes	int vfslocked;
8231541Srgrimes
8241541Srgrimes	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
8251541Srgrimes	    MPSAFE, pathseg, path, td);
82643311Sdillon	if ((error = namei(&nd)) != 0)
8271541Srgrimes		return (error);
8281541Srgrimes	vfslocked = NDHASGIANT(&nd);
8291541Srgrimes	if ((error = change_dir(nd.ni_vp, td)) != 0) {
8301541Srgrimes		vput(nd.ni_vp);
8311541Srgrimes		VFS_UNLOCK_GIANT(vfslocked);
8321541Srgrimes		NDFREE(&nd, NDF_ONLY_PNBUF);
8331541Srgrimes		return (error);
8341541Srgrimes	}
8351541Srgrimes	VOP_UNLOCK(nd.ni_vp, 0);
83698542Smckusick	VFS_UNLOCK_GIANT(vfslocked);
8371541Srgrimes	NDFREE(&nd, NDF_ONLY_PNBUF);
8381541Srgrimes	FILEDESC_XLOCK(fdp);
8391541Srgrimes	vp = fdp->fd_cdir;
8401541Srgrimes	fdp->fd_cdir = nd.ni_vp;
8411541Srgrimes	FILEDESC_XUNLOCK(fdp);
8421541Srgrimes	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
8431541Srgrimes	vrele(vp);
8441541Srgrimes	VFS_UNLOCK_GIANT(vfslocked);
845132775Skan	return (0);
8461541Srgrimes}
847132775Skan
8481541Srgrimes/*
8491541Srgrimes * Helper function for raised chroot(2) security function:  Refuse if
8501541Srgrimes * any filedescriptors are open directories.
8511541Srgrimes */
8521541Srgrimesstatic int
8531541Srgrimeschroot_refuse_vdir_fds(fdp)
8541541Srgrimes	struct filedesc *fdp;
8551541Srgrimes{
8561541Srgrimes	struct vnode *vp;
8571541Srgrimes	struct file *fp;
8581541Srgrimes	int fd;
8591541Srgrimes
8601541Srgrimes	FILEDESC_LOCK_ASSERT(fdp);
8611541Srgrimes
8621541Srgrimes	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
8631541Srgrimes		fp = fget_locked(fdp, fd);
8641541Srgrimes		if (fp == NULL)
8651541Srgrimes			continue;
8661541Srgrimes		if (fp->f_type == DTYPE_VNODE) {
8671541Srgrimes			vp = fp->f_vnode;
8681541Srgrimes			if (vp->v_type == VDIR)
8691541Srgrimes				return (EPERM);
870132775Skan		}
8711541Srgrimes	}
872132775Skan	return (0);
8731541Srgrimes}
8741541Srgrimes
8751541Srgrimes/*
8761541Srgrimes * This sysctl determines if we will allow a process to chroot(2) if it
8771541Srgrimes * has a directory open:
8781541Srgrimes *	0: disallowed for all processes.
8791541Srgrimes *	1: allowed for processes that were not already chroot(2)'ed.
8801541Srgrimes *	2: allowed for all processes.
8811541Srgrimes */
8821541Srgrimes
8831541Srgrimesstatic int chroot_allow_open_directories = 1;
8841541Srgrimes
8851541SrgrimesSYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
8861541Srgrimes     &chroot_allow_open_directories, 0, "");
8871541Srgrimes
8881541Srgrimes/*
88923562Smpp * Change notion of root (``/'') directory.
8901541Srgrimes */
89127378Sbde#ifndef _SYS_SYSPROTO_H_
892167151Spjdstruct chroot_args {
893170587Srwatson	char	*path;
894167151Spjd};
895167151Spjd#endif
896167151Spjdint
89798542Smckusicksys_chroot(td, uap)
898231122Skib	struct thread *td;
899231122Skib	struct chroot_args /* {
9001541Srgrimes		char *path;
9011541Srgrimes	} */ *uap;
902104094Sphk{
9031541Srgrimes	int error;
9041541Srgrimes	struct nameidata nd;
9051541Srgrimes	int vfslocked;
9061541Srgrimes
9071541Srgrimes	error = priv_check(td, PRIV_VFS_CHROOT);
9081541Srgrimes	if (error)
9091541Srgrimes		return (error);
91011644Sdg	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
91111644Sdg	    AUDITVNODE1, UIO_USERSPACE, uap->path, td);
91211644Sdg	error = namei(&nd);
9131541Srgrimes	if (error)
914156897Stegge		goto error;
9151541Srgrimes	vfslocked = NDHASGIANT(&nd);
916156897Stegge	if ((error = change_dir(nd.ni_vp, td)) != 0)
9171541Srgrimes		goto e_vunlock;
91826360Sjulian#ifdef MAC
9191541Srgrimes	if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
9201541Srgrimes		goto e_vunlock;
9211541Srgrimes#endif
9221541Srgrimes	VOP_UNLOCK(nd.ni_vp, 0);
923163841Spjd	error = change_root(nd.ni_vp, td);
924163841Spjd	vrele(nd.ni_vp);
925163841Spjd	VFS_UNLOCK_GIANT(vfslocked);
92634266Sjulian	NDFREE(&nd, NDF_ONLY_PNBUF);
92782395Speter	return (error);
928101308Sjeffe_vunlock:
929156897Stegge	vput(nd.ni_vp);
930156897Stegge	VFS_UNLOCK_GIANT(vfslocked);
931156897Steggeerror:
932156897Stegge	NDFREE(&nd, NDF_ONLY_PNBUF);
933156897Stegge	return (error);
934156897Stegge}
935156897Stegge
936156897Stegge/*
937156897Stegge * Common routine for chroot and chdir.  Callers must provide a locked vnode
938156897Stegge * instance.
939175294Sattilio */
940156897Steggeint
941175202Sattiliochange_dir(vp, td)
942156897Stegge	struct vnode *vp;
9431541Srgrimes	struct thread *td;
9441541Srgrimes{
9451541Srgrimes	int error;
9461541Srgrimes
947308946Smckusick	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
948308946Smckusick	if (vp->v_type != VDIR)
949308946Smckusick		return (ENOTDIR);
950308946Smckusick#ifdef MAC
951308946Smckusick	error = mac_vnode_check_chdir(td->td_ucred, vp);
952308946Smckusick	if (error)
953308946Smckusick		return (error);
954308946Smckusick#endif
955308946Smckusick	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
956308946Smckusick	return (error);
957308946Smckusick}
9581541Srgrimes
9591541Srgrimes/*
9601541Srgrimes * Common routine for kern_chroot() and jail_attach().  The caller is
961104094Sphk * responsible for invoking priv_check() and mac_vnode_check_chroot() to
9621541Srgrimes * authorize this operation.
9631541Srgrimes */
9649842Sdgint
9651541Srgrimeschange_root(vp, td)
9661541Srgrimes	struct vnode *vp;
9671541Srgrimes	struct thread *td;
9681541Srgrimes{
96911644Sdg	struct filedesc *fdp;
97011644Sdg	struct vnode *oldvp;
97111644Sdg	int vfslocked;
97211644Sdg	int error;
97334266Sjulian
9741541Srgrimes	VFS_ASSERT_GIANT(vp->v_mount);
9751541Srgrimes	fdp = td->td_proc->p_fd;
976173464Sobrien	FILEDESC_XLOCK(fdp);
9771541Srgrimes	if (chroot_allow_open_directories == 0 ||
9781541Srgrimes	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
9791541Srgrimes		error = chroot_refuse_vdir_fds(fdp);
980308946Smckusick		if (error) {
981308946Smckusick			FILEDESC_XUNLOCK(fdp);
982308946Smckusick			return (error);
983308946Smckusick		}
984308946Smckusick	}
9859354Sdg	oldvp = fdp->fd_rdir;
9861541Srgrimes	fdp->fd_rdir = vp;
9871541Srgrimes	VREF(fdp->fd_rdir);
988103636Struckman	if (!fdp->fd_jdir) {
9891541Srgrimes		fdp->fd_jdir = vp;
990233817Smckusick		VREF(fdp->fd_jdir);
991233817Smckusick	}
992233817Smckusick	FILEDESC_XUNLOCK(fdp);
993233817Smckusick	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
994233817Smckusick	vrele(oldvp);
995233817Smckusick	VFS_UNLOCK_GIANT(vfslocked);
996233817Smckusick	return (0);
997233817Smckusick}
9981541Srgrimes
9991541Srgrimesstatic __inline cap_rights_t
1000103636Struckmanflags_to_rights(int flags)
10011541Srgrimes{
100234266Sjulian	cap_rights_t rights = 0;
10031541Srgrimes
1004132775Skan	switch ((flags & O_ACCMODE)) {
10051541Srgrimes	case O_RDONLY:
100634266Sjulian		rights |= CAP_READ;
1007207141Sjeff		break;
100848801Smckusick
100912117Sdyson	case O_RDWR:
101034266Sjulian		rights |= CAP_READ;
1011207141Sjeff		/* fall through */
101212117Sdyson
101312117Sdyson	case O_WRONLY:
10141541Srgrimes		rights |= CAP_WRITE;
101534266Sjulian		break;
10161541Srgrimes
1017132775Skan	case O_EXEC:
10181541Srgrimes		rights |= CAP_FEXECVE;
101955697Smckusick		break;
1020207141Sjeff	}
10211541Srgrimes
1022103636Struckman	if (flags & O_CREAT)
10231541Srgrimes		rights |= CAP_CREATE;
10241541Srgrimes
10251541Srgrimes	if (flags & O_TRUNC)
102622521Sdyson		rights |= CAP_FTRUNCATE;
102722521Sdyson
102822521Sdyson	if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
1029104094Sphk		rights |= CAP_FLOCK;
103022521Sdyson
103122521Sdyson	return (rights);
103222521Sdyson}
103322521Sdyson
103422521Sdyson/*
103522521Sdyson * Check permissions, allocate an open file structure, and call the device
103622521Sdyson * open routine if any.
103722521Sdyson */
103822521Sdyson#ifndef _SYS_SYSPROTO_H_
103922521Sdysonstruct open_args {
104022521Sdyson	char	*path;
10411541Srgrimes	int	flags;
104222521Sdyson	int	mode;
104322521Sdyson};
104422521Sdyson#endif
104522521Sdysonint
104622521Sdysonsys_open(td, uap)
104722521Sdyson	struct thread *td;
104822521Sdyson	register struct open_args /* {
104922521Sdyson		char *path;
105022521Sdyson		int flags;
1051173464Sobrien		int mode;
105222521Sdyson	} */ *uap;
105322521Sdyson{
105422521Sdyson
105522521Sdyson	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
105622521Sdyson}
105722521Sdyson
105822521Sdyson#ifndef _SYS_SYSPROTO_H_
105922521Sdysonstruct openat_args {
106022521Sdyson	int	fd;
106122521Sdyson	char	*path;
1062207141Sjeff	int	flag;
106322521Sdyson	int	mode;
106422521Sdyson};
106522521Sdyson#endif
106622521Sdysonint
1067173464Sobriensys_openat(struct thread *td, struct openat_args *uap)
106822521Sdyson{
106922521Sdyson
107022521Sdyson	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
107122521Sdyson	    uap->mode));
107222521Sdyson}
107334266Sjulian
107422521Sdysonint
107534266Sjuliankern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
107634266Sjulian    int mode)
107722521Sdyson{
107822521Sdyson
107922521Sdyson	return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
108022521Sdyson}
1081207141Sjeff
1082207141Sjeffint
1083207141Sjeffkern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1084207141Sjeff    int flags, int mode)
1085207141Sjeff{
10861541Srgrimes	struct proc *p = td->td_proc;
10871541Srgrimes	struct filedesc *fdp = p->p_fd;
10881541Srgrimes	struct file *fp;
10891541Srgrimes	struct vnode *vp;
10901541Srgrimes	int cmode;
10911541Srgrimes	struct file *nfp;
10921541Srgrimes	int type, indx = -1, error, error_open;
10931541Srgrimes	struct flock lf;
10941541Srgrimes	struct nameidata nd;
10951541Srgrimes	int vfslocked;
10961541Srgrimes	cap_rights_t rights_needed = CAP_LOOKUP;
10971541Srgrimes
10981541Srgrimes	AUDIT_ARG_FFLAGS(flags);
10991541Srgrimes	AUDIT_ARG_MODE(mode);
11001541Srgrimes	/* XXX: audit dirfd */
11011541Srgrimes	rights_needed |= flags_to_rights(flags);
11021541Srgrimes	/*
11031541Srgrimes	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
11041541Srgrimes	 * may be specified.
11051541Srgrimes	 */
11061541Srgrimes	if (flags & O_EXEC) {
11071541Srgrimes		if (flags & O_ACCMODE)
11081541Srgrimes			return (EINVAL);
11091541Srgrimes	} else if ((flags & O_ACCMODE) == O_ACCMODE)
1110104094Sphk		return (EINVAL);
11111541Srgrimes	else
11121541Srgrimes		flags = FFLAGS(flags);
11131541Srgrimes
11141541Srgrimes	/*
11151541Srgrimes	 * allocate the file descriptor, but don't install a descriptor yet
11161541Srgrimes	 */
11171541Srgrimes	error = falloc_noinstall(td, &nfp);
11181541Srgrimes	if (error)
11191541Srgrimes		return (error);
11201541Srgrimes	/* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
11211541Srgrimes	fp = nfp;
112296506Sphk	/* Set the flags early so the finit in devfs can pick them up. */
11231541Srgrimes	fp->f_flag = flags & FMASK;
112411644Sdg	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1125207141Sjeff	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
112611644Sdg	    path, fd, rights_needed, td);
112711644Sdg	td->td_dupfd = -1;		/* XXX check for fdopen */
112883366Sjulian	error = vn_open(&nd, &flags, cmode, fp);
1129207141Sjeff	if (error) {
113034266Sjulian		/*
1131207141Sjeff		 * If the vn_open replaced the method vector, something
1132207141Sjeff		 * wonderous happened deep below and we just pass it up
1133209717Sjeff		 * pretending we know what we do.
1134207141Sjeff		 */
1135207141Sjeff		if (error == ENXIO && fp->f_ops != &badfileops)
11361541Srgrimes			goto success;
1137173464Sobrien
11381541Srgrimes		/*
11391541Srgrimes		 * handle special fdopen() case.  bleh.  dupfdopen() is
11401541Srgrimes		 * responsible for dropping the old contents of ofiles[indx]
11411541Srgrimes		 * if it succeeds.
1142207141Sjeff		 *
1143207141Sjeff		 * Don't do this for relative (capability) lookups; we don't
1144207141Sjeff		 * understand exactly what would happen, and we don't think
1145207141Sjeff		 * that it ever should.
1146207141Sjeff		 */
11471541Srgrimes		if ((nd.ni_strictrelative == 0) &&
11481541Srgrimes		    (error == ENODEV || error == ENXIO) &&
11491541Srgrimes		    (td->td_dupfd >= 0)) {
11501541Srgrimes			/* XXX from fdopen */
11511541Srgrimes			error_open = error;
11521541Srgrimes			if ((error = finstall(td, fp, &indx, flags)) != 0)
1153207141Sjeff				goto bad_unlocked;
1154207141Sjeff			if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
1155207141Sjeff			    flags, error_open)) == 0)
1156207141Sjeff				goto success;
1157207141Sjeff		}
1158207141Sjeff		/*
1159207141Sjeff		 * Clean up the descriptor, but only if another thread hadn't
1160207141Sjeff		 * replaced or closed it.
1161207141Sjeff		 */
1162207141Sjeff		if (indx != -1)
1163207141Sjeff			fdclose(fdp, fp, indx, td);
1164207141Sjeff		fdrop(fp, td);
1165207141Sjeff
1166207141Sjeff		return (error);
1167207141Sjeff	}
1168207141Sjeff	td->td_dupfd = 0;
1169207141Sjeff	vfslocked = NDHASGIANT(&nd);
1170207141Sjeff	NDFREE(&nd, NDF_ONLY_PNBUF);
1171207141Sjeff	vp = nd.ni_vp;
1172207141Sjeff
1173207141Sjeff	/*
1174207141Sjeff	 * Store the vnode, for any f_type. Typically, the vnode use
1175207141Sjeff	 * count is decremented by direct call to vn_closefile() for
1176207141Sjeff	 * files that switched type in the cdevsw fdopen() method.
1177207141Sjeff	 */
1178207141Sjeff	fp->f_vnode = vp;
1179207141Sjeff	/*
1180207141Sjeff	 * If the file wasn't claimed by devfs bind it to the normal
1181207141Sjeff	 * vnode operations here.
1182207141Sjeff	 */
1183207141Sjeff	if (fp->f_ops == &badfileops) {
1184207141Sjeff		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1185207141Sjeff		fp->f_seqcount = 1;
1186207141Sjeff		finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
1187207141Sjeff	}
1188207141Sjeff
1189207141Sjeff	VOP_UNLOCK(vp, 0);
1190207141Sjeff	if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
1191207141Sjeff		lf.l_whence = SEEK_SET;
1192207141Sjeff		lf.l_start = 0;
1193207141Sjeff		lf.l_len = 0;
1194207141Sjeff		if (flags & O_EXLOCK)
1195207141Sjeff			lf.l_type = F_WRLCK;
1196207141Sjeff		else
1197207141Sjeff			lf.l_type = F_RDLCK;
1198207141Sjeff		type = F_FLOCK;
1199207141Sjeff		if ((flags & FNONBLOCK) == 0)
12001541Srgrimes			type |= F_WAIT;
1201207141Sjeff		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1202207141Sjeff			    type)) != 0)
1203207141Sjeff			goto bad;
12041541Srgrimes		atomic_set_int(&fp->f_flag, FHASLOCK);
1205207141Sjeff	}
1206207141Sjeff	if (flags & O_TRUNC) {
1207207141Sjeff		error = fo_truncate(fp, 0, td->td_ucred, td);
1208207141Sjeff		if (error)
1209207141Sjeff			goto bad;
1210207141Sjeff	}
1211207141Sjeff	VFS_UNLOCK_GIANT(vfslocked);
1212207141Sjeffsuccess:
1213207141Sjeff	/*
1214207141Sjeff	 * If we haven't already installed the FD (for dupfdopen), do so now.
1215207141Sjeff	 */
1216207141Sjeff	if (indx == -1) {
1217207141Sjeff#ifdef CAPABILITIES
1218207141Sjeff		if (nd.ni_strictrelative == 1) {
1219207141Sjeff			/*
1220207141Sjeff			 * We are doing a strict relative lookup; wrap the
1221207141Sjeff			 * result in a capability.
1222207141Sjeff			 */
1223207141Sjeff			if ((error = kern_capwrap(td, fp, nd.ni_baserights,
1224207141Sjeff			    &indx)) != 0)
1225207141Sjeff				goto bad_unlocked;
1226207141Sjeff		} else
1227207141Sjeff#endif
1228207141Sjeff			if ((error = finstall(td, fp, &indx, flags)) != 0)
1229207141Sjeff				goto bad_unlocked;
1230207141Sjeff
1231207141Sjeff	}
1232207141Sjeff
1233207141Sjeff	/*
1234207141Sjeff	 * Release our private reference, leaving the one associated with
1235207141Sjeff	 * the descriptor table intact.
1236207141Sjeff	 */
1237207141Sjeff	fdrop(fp, td);
1238207141Sjeff	td->td_retval[0] = indx;
1239207141Sjeff	return (0);
1240207141Sjeffbad:
1241207141Sjeff	VFS_UNLOCK_GIANT(vfslocked);
1242207141Sjeffbad_unlocked:
1243253998Smckusick	if (indx != -1)
1244207141Sjeff		fdclose(fdp, fp, indx, td);
1245207141Sjeff	fdrop(fp, td);
1246207141Sjeff	td->td_retval[0] = -1;
1247207141Sjeff	return (error);
1248207141Sjeff}
1249207141Sjeff
1250207141Sjeff#ifdef COMPAT_43
1251207141Sjeff/*
1252207141Sjeff * Create a file.
1253207141Sjeff */
125426360Sjulian#ifndef _SYS_SYSPROTO_H_
12551541Srgrimesstruct ocreat_args {
12561541Srgrimes	char	*path;
1257207141Sjeff	int	mode;
12581541Srgrimes};
125919388Sbde#endif
1260103180Sbdeint
1261207141Sjeffocreat(td, uap)
1262207141Sjeff	struct thread *td;
126319388Sbde	register struct ocreat_args /* {
12641541Srgrimes		char *path;
1265103180Sbde		int mode;
1266207141Sjeff	} */ *uap;
1267103180Sbde{
1268207141Sjeff
1269207141Sjeff	return (kern_open(td, uap->path, UIO_USERSPACE,
1270207141Sjeff	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1271207141Sjeff}
127244291Simp#endif /* COMPAT_43 */
1273207141Sjeff
127444291Simp/*
1275207141Sjeff * Create a special file.
1276207141Sjeff */
12771541Srgrimes#ifndef _SYS_SYSPROTO_H_
1278207141Sjeffstruct mknod_args {
12791541Srgrimes	char	*path;
1280207141Sjeff	int	mode;
12811541Srgrimes	int	dev;
12821541Srgrimes};
12831541Srgrimes#endif
12841541Srgrimesint
1285207141Sjeffsys_mknod(td, uap)
1286207141Sjeff	struct thread *td;
12871541Srgrimes	register struct mknod_args /* {
1288207141Sjeff		char *path;
12891541Srgrimes		int mode;
1290207141Sjeff		int dev;
1291207141Sjeff	} */ *uap;
129238291Sjulian{
12931541Srgrimes
1294218838Skib	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1295218838Skib}
1296218838Skib
1297207141Sjeff#ifndef _SYS_SYSPROTO_H_
1298207141Sjeffstruct mknodat_args {
12991541Srgrimes	int	fd;
13001541Srgrimes	char	*path;
13011541Srgrimes	mode_t	mode;
13021541Srgrimes	dev_t	dev;
13031541Srgrimes};
1304166052Smpp#endif
13051541Srgrimesint
13061541Srgrimessys_mknodat(struct thread *td, struct mknodat_args *uap)
1307207141Sjeff{
13081541Srgrimes
13091541Srgrimes	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
1310207141Sjeff	    uap->dev));
13113420Sphk}
1312207141Sjeff
1313207141Sjeffint
1314207141Sjeffkern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1315207141Sjeff    int dev)
1316207141Sjeff{
1317207141Sjeff
1318207141Sjeff	return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
1319207141Sjeff}
1320207141Sjeff
1321207141Sjeffint
1322207141Sjeffkern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1323207141Sjeff    int mode, int dev)
1324207141Sjeff{
1325207141Sjeff	struct vnode *vp;
1326207141Sjeff	struct mount *mp;
1327207141Sjeff	struct vattr vattr;
1328207141Sjeff	int error;
1329207141Sjeff	int whiteout = 0;
1330207141Sjeff	struct nameidata nd;
1331207141Sjeff	int vfslocked;
1332207141Sjeff
13331541Srgrimes	AUDIT_ARG_MODE(mode);
13341541Srgrimes	AUDIT_ARG_DEV(dev);
13351541Srgrimes	switch (mode & S_IFMT) {
1336207141Sjeff	case S_IFCHR:
1337207141Sjeff	case S_IFBLK:
1338207141Sjeff		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1339207141Sjeff		break;
13401541Srgrimes	case S_IFMT:
1341207141Sjeff		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1342207141Sjeff		break;
1343207141Sjeff	case S_IFWHT:
1344207141Sjeff		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1345207141Sjeff		break;
1346207141Sjeff	case S_IFIFO:
1347207141Sjeff		if (dev == 0)
1348207141Sjeff			return (kern_mkfifoat(td, fd, path, pathseg, mode));
1349207141Sjeff		/* FALLTHROUGH */
1350207141Sjeff	default:
1351207141Sjeff		error = EINVAL;
1352207141Sjeff		break;
1353207141Sjeff	}
1354207141Sjeff	if (error)
1355207141Sjeff		return (error);
1356207141Sjeffrestart:
13571541Srgrimes	bwillwrite();
13588876Srgrimes	NDINIT_ATRIGHTS(&nd, CREATE,
13591541Srgrimes	    LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
13601541Srgrimes	    CAP_MKFIFO, td);
13611541Srgrimes	if ((error = namei(&nd)) != 0)
13621541Srgrimes		return (error);
1363207141Sjeff	vfslocked = NDHASGIANT(&nd);
1364207141Sjeff	vp = nd.ni_vp;
136523562Smpp	if (vp != NULL) {
13661541Srgrimes		NDFREE(&nd, NDF_ONLY_PNBUF);
1367207141Sjeff		if (vp == nd.ni_dvp)
1368207141Sjeff			vrele(nd.ni_dvp);
1369207141Sjeff		else
1370207141Sjeff			vput(nd.ni_dvp);
1371207141Sjeff		vrele(vp);
1372207141Sjeff		VFS_UNLOCK_GIANT(vfslocked);
1373207141Sjeff		return (EEXIST);
1374207141Sjeff	} else {
13751541Srgrimes		VATTR_NULL(&vattr);
13761541Srgrimes		vattr.va_mode = (mode & ALLPERMS) &
13771541Srgrimes		    ~td->td_proc->p_fd->fd_cmask;
13781541Srgrimes		vattr.va_rdev = dev;
1379207141Sjeff		whiteout = 0;
1380207141Sjeff
1381207141Sjeff		switch (mode & S_IFMT) {
13821541Srgrimes		case S_IFMT:	/* used by badsect to flag bad sectors */
1383207141Sjeff			vattr.va_type = VBAD;
1384207141Sjeff			break;
1385207141Sjeff		case S_IFCHR:
1386207141Sjeff			vattr.va_type = VCHR;
13871541Srgrimes			break;
1388207141Sjeff		case S_IFBLK:
138923562Smpp			vattr.va_type = VBLK;
13901541Srgrimes			break;
13911541Srgrimes		case S_IFWHT:
13921541Srgrimes			whiteout = 1;
1393207141Sjeff			break;
139423562Smpp		default:
13951541Srgrimes			panic("kern_mknod: invalid mode");
139667309Srwatson		}
139767309Srwatson	}
139867309Srwatson	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
139967309Srwatson		NDFREE(&nd, NDF_ONLY_PNBUF);
14001541Srgrimes		vput(nd.ni_dvp);
1401207141Sjeff		VFS_UNLOCK_GIANT(vfslocked);
140283366Sjulian		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
140383366Sjulian			return (error);
14041541Srgrimes		goto restart;
14051541Srgrimes	}
14061541Srgrimes#ifdef MAC
14071541Srgrimes	if (error == 0 && !whiteout)
14081541Srgrimes		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
14091541Srgrimes		    &nd.ni_cnd, &vattr);
14101541Srgrimes#endif
14111541Srgrimes	if (!error) {
1412207141Sjeff		if (whiteout)
1413207141Sjeff			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1414207141Sjeff		else {
14151541Srgrimes			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
14161541Srgrimes						&nd.ni_cnd, &vattr);
14171541Srgrimes			if (error == 0)
14181541Srgrimes				vput(nd.ni_vp);
14191541Srgrimes		}
14201541Srgrimes	}
14211541Srgrimes	NDFREE(&nd, NDF_ONLY_PNBUF);
14221541Srgrimes	vput(nd.ni_dvp);
14231541Srgrimes	vn_finished_write(mp);
14241541Srgrimes	VFS_UNLOCK_GIANT(vfslocked);
14251541Srgrimes	return (error);
14261541Srgrimes}
142734266Sjulian
142836779Sjulian/*
1429207141Sjeff * Create a named pipe.
143055697Smckusick */
1431207141Sjeff#ifndef _SYS_SYSPROTO_H_
143236779Sjulianstruct mkfifo_args {
1433207141Sjeff	char	*path;
143455697Smckusick	int	mode;
1435207141Sjeff};
14361541Srgrimes#endif
1437207141Sjeffint
1438207141Sjeffsys_mkfifo(td, uap)
1439207141Sjeff	struct thread *td;
1440207141Sjeff	register struct mkfifo_args /* {
1441207141Sjeff		char *path;
1442207141Sjeff		int mode;
1443207141Sjeff	} */ *uap;
1444207141Sjeff{
1445207141Sjeff
1446207141Sjeff	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1447207141Sjeff}
1448207141Sjeff
1449207141Sjeff#ifndef _SYS_SYSPROTO_H_
1450207141Sjeffstruct mkfifoat_args {
1451207141Sjeff	int	fd;
145234266Sjulian	char	*path;
145334266Sjulian	mode_t	mode;
1454209717Sjeff};
1455209717Sjeff#endif
145634266Sjulianint
145734266Sjuliansys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
145834266Sjulian{
145934266Sjulian
146034266Sjulian	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
146134266Sjulian	    uap->mode));
146234266Sjulian}
146334266Sjulian
146455697Smckusickint
1465207141Sjeffkern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1466207141Sjeff{
1467207141Sjeff
146855697Smckusick	return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
1469207141Sjeff}
1470207141Sjeff
1471207141Sjeffint
147234266Sjuliankern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
14731541Srgrimes    int mode)
14741541Srgrimes{
14751541Srgrimes	struct mount *mp;
1476207141Sjeff	struct vattr vattr;
1477207141Sjeff	int error;
14781541Srgrimes	struct nameidata nd;
1479207141Sjeff	int vfslocked;
1480207141Sjeff
1481207141Sjeff	AUDIT_ARG_MODE(mode);
1482207141Sjeffrestart:
1483207141Sjeff	bwillwrite();
1484241011Smdf	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1485241011Smdf	    pathseg, path, fd, td);
14861541Srgrimes	if ((error = namei(&nd)) != 0)
14871541Srgrimes		return (error);
1488207141Sjeff	vfslocked = NDHASGIANT(&nd);
1489207141Sjeff	if (nd.ni_vp != NULL) {
1490207141Sjeff		NDFREE(&nd, NDF_ONLY_PNBUF);
1491207141Sjeff		if (nd.ni_vp == nd.ni_dvp)
14921541Srgrimes			vrele(nd.ni_dvp);
1493207141Sjeff		else
149484642Sdillon			vput(nd.ni_dvp);
1495207141Sjeff		vrele(nd.ni_vp);
1496207141Sjeff		VFS_UNLOCK_GIANT(vfslocked);
149784642Sdillon		return (EEXIST);
1498207141Sjeff	}
1499207141Sjeff	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1500207141Sjeff		NDFREE(&nd, NDF_ONLY_PNBUF);
1501207141Sjeff		vput(nd.ni_dvp);
1502207141Sjeff		VFS_UNLOCK_GIANT(vfslocked);
1503207141Sjeff		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1504207141Sjeff			return (error);
1505207141Sjeff		goto restart;
1506207141Sjeff	}
1507207141Sjeff	VATTR_NULL(&vattr);
1508207141Sjeff	vattr.va_type = VFIFO;
1509207141Sjeff	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1510216818Skib#ifdef MAC
1511216818Skib	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1512216818Skib	    &vattr);
1513207141Sjeff	if (error)
1514207141Sjeff		goto out;
1515207141Sjeff#endif
15161541Srgrimes	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1517207141Sjeff	if (error == 0)
1518226967Spho		vput(nd.ni_vp);
1519226971Spho#ifdef MAC
1520226971Sphoout:
1521226967Spho#endif
1522226967Spho	vput(nd.ni_dvp);
1523226967Spho	vn_finished_write(mp);
1524226967Spho	VFS_UNLOCK_GIANT(vfslocked);
1525232401Sjhb	NDFREE(&nd, NDF_ONLY_PNBUF);
1526232401Sjhb	return (error);
1527232401Sjhb}
1528232401Sjhb
1529232401Sjhb/*
1530226967Spho * Make a hard file link.
1531226967Spho */
1532232401Sjhb#ifndef _SYS_SYSPROTO_H_
1533232401Sjhbstruct link_args {
1534248422Skib	char	*path;
1535207141Sjeff	char	*link;
1536207141Sjeff};
1537207141Sjeff#endif
1538207141Sjeffint
1539207141Sjeffsys_link(td, uap)
1540207141Sjeff	struct thread *td;
1541207141Sjeff	register struct link_args /* {
1542207141Sjeff		char *path;
1543207141Sjeff		char *link;
1544207141Sjeff	} */ *uap;
1545207141Sjeff{
1546306182Skib
1547306182Skib	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1548306182Skib}
1549306182Skib
1550306182Skib#ifndef _SYS_SYSPROTO_H_
1551207141Sjeffstruct linkat_args {
1552306182Skib	int	fd1;
1553207141Sjeff	char	*path1;
1554207141Sjeff	int	fd2;
1555306182Skib	char	*path2;
1556306182Skib	int	flag;
1557306182Skib};
1558306182Skib#endif
1559306182Skibint
1560306182Skibsys_linkat(struct thread *td, struct linkat_args *uap)
1561207141Sjeff{
1562207141Sjeff	int flag;
1563207141Sjeff
1564207141Sjeff	flag = uap->flag;
15651541Srgrimes	if (flag & ~AT_SYMLINK_FOLLOW)
15661541Srgrimes		return (EINVAL);
15671541Srgrimes
1568207141Sjeff	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
1569207141Sjeff	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
1570207141Sjeff}
1571207141Sjeff
1572207141Sjeffint hardlink_check_uid = 0;
1573207141SjeffSYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1574207141Sjeff    &hardlink_check_uid, 0,
1575207141Sjeff    "Unprivileged processes cannot create hard links to files owned by other "
1576207141Sjeff    "users");
1577207141Sjeffstatic int hardlink_check_gid = 0;
1578207141SjeffSYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1579207141Sjeff    &hardlink_check_gid, 0,
1580207141Sjeff    "Unprivileged processes cannot create hard links to files owned by other "
1581207141Sjeff    "groups");
1582207141Sjeff
15831541Srgrimesstatic int
15841541Srgrimescan_hardlink(struct vnode *vp, struct ucred *cred)
15851541Srgrimes{
1586200796Strasz	struct vattr va;
1587200796Strasz	int error;
1588202934Strasz
1589202934Strasz	if (!hardlink_check_uid && !hardlink_check_gid)
1590202934Strasz		return (0);
1591202934Strasz
1592202934Strasz	error = VOP_GETATTR(vp, &va, cred);
1593202934Strasz	if (error != 0)
1594202934Strasz		return (error);
1595202934Strasz
1596202934Strasz	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1597202934Strasz		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1598202934Strasz		if (error)
1599202934Strasz			return (error);
1600202934Strasz	}
1601202934Strasz
1602202934Strasz	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1603202934Strasz		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1604202934Strasz		if (error)
1605202934Strasz			return (error);
1606202934Strasz	}
1607202934Strasz
1608202934Strasz	return (0);
1609202934Strasz}
1610202934Strasz
1611202934Straszint
1612202934Straszkern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1613202934Strasz{
1614202934Strasz
1615202934Strasz	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
1616202934Strasz}
1617202934Strasz
1618202934Straszint
1619202934Straszkern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
1620202934Strasz    enum uio_seg segflg, int follow)
1621202934Strasz{
1622202934Strasz	struct vnode *vp;
1623202934Strasz	struct mount *mp;
1624202934Strasz	struct nameidata nd;
1625202934Strasz	int vfslocked;
1626202934Strasz	int lvfslocked;
1627202934Strasz	int error;
1628202934Strasz
1629202934Strasz	bwillwrite();
1630202934Strasz	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
1631202934Strasz	    fd1, td);
1632202934Strasz
1633202934Strasz	if ((error = namei(&nd)) != 0)
1634202934Strasz		return (error);
1635202934Strasz	vfslocked = NDHASGIANT(&nd);
1636202934Strasz	NDFREE(&nd, NDF_ONLY_PNBUF);
1637202934Strasz	vp = nd.ni_vp;
1638202934Strasz	if (vp->v_type == VDIR) {
1639202934Strasz		vrele(vp);
1640202934Strasz		VFS_UNLOCK_GIANT(vfslocked);
1641202934Strasz		return (EPERM);		/* POSIX */
1642202934Strasz	}
1643202934Strasz	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1644202934Strasz		vrele(vp);
1645202934Strasz		VFS_UNLOCK_GIANT(vfslocked);
1646202934Strasz		return (error);
1647202934Strasz	}
1648202934Strasz	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1649202934Strasz	    segflg, path2, fd2, td);
1650202934Strasz	if ((error = namei(&nd)) == 0) {
1651202934Strasz		lvfslocked = NDHASGIANT(&nd);
1652202934Strasz		if (nd.ni_vp != NULL) {
1653202934Strasz			if (nd.ni_dvp == nd.ni_vp)
1654202934Strasz				vrele(nd.ni_dvp);
1655202934Strasz			else
1656202934Strasz				vput(nd.ni_dvp);
1657202934Strasz			vrele(nd.ni_vp);
1658202934Strasz			error = EEXIST;
1659202934Strasz		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
1660202934Strasz		    == 0) {
1661202934Strasz			error = can_hardlink(vp, td->td_ucred);
1662202934Strasz			if (error == 0)
1663202934Strasz#ifdef MAC
1664202934Strasz				error = mac_vnode_check_link(td->td_ucred,
1665202934Strasz				    nd.ni_dvp, vp, &nd.ni_cnd);
1666202934Strasz			if (error == 0)
1667202934Strasz#endif
1668202934Strasz				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1669202934Strasz			VOP_UNLOCK(vp, 0);
1670202934Strasz			vput(nd.ni_dvp);
1671202934Strasz		}
1672202934Strasz		NDFREE(&nd, NDF_ONLY_PNBUF);
1673202934Strasz		VFS_UNLOCK_GIANT(lvfslocked);
1674202934Strasz	}
1675202934Strasz	vrele(vp);
1676202934Strasz	vn_finished_write(mp);
1677202934Strasz	VFS_UNLOCK_GIANT(vfslocked);
1678202934Strasz	return (error);
1679202934Strasz}
1680202934Strasz
1681202934Strasz/*
1682202934Strasz * Make a symbolic link.
1683202934Strasz */
1684202934Strasz#ifndef _SYS_SYSPROTO_H_
1685202934Straszstruct symlink_args {
1686202934Strasz	char	*path;
1687202934Strasz	char	*link;
1688202934Strasz};
1689202934Strasz#endif
1690202934Straszint
1691202934Straszsys_symlink(td, uap)
1692202934Strasz	struct thread *td;
1693202934Strasz	register struct symlink_args /* {
1694202934Strasz		char *path;
1695202934Strasz		char *link;
1696202934Strasz	} */ *uap;
1697202934Strasz{
1698202934Strasz
1699202934Strasz	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1700202934Strasz}
1701202934Strasz
1702202934Strasz#ifndef _SYS_SYSPROTO_H_
1703202934Straszstruct symlinkat_args {
1704202934Strasz	char	*path;
1705202934Strasz	int	fd;
1706202934Strasz	char	*path2;
1707202934Strasz};
1708202934Strasz#endif
1709202934Straszint
1710202934Straszsys_symlinkat(struct thread *td, struct symlinkat_args *uap)
1711202934Strasz{
1712202934Strasz
1713202934Strasz	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
1714202934Strasz	    UIO_USERSPACE));
1715202934Strasz}
1716202934Strasz
1717202934Straszint
1718202934Straszkern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1719202934Strasz{
1720202934Strasz
1721202934Strasz	return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
1722202934Strasz}
1723202934Strasz
1724202934Straszint
1725202934Straszkern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
1726202934Strasz    enum uio_seg segflg)
1727202934Strasz{
1728308946Smckusick	struct mount *mp;
1729308946Smckusick	struct vattr vattr;
1730308946Smckusick	char *syspath;
1731308946Smckusick	int error;
1732202934Strasz	struct nameidata nd;
1733202934Strasz	int vfslocked;
1734202934Strasz
1735202934Strasz	if (segflg == UIO_SYSSPACE) {
1736202934Strasz		syspath = path1;
1737202934Strasz	} else {
1738202934Strasz		syspath = uma_zalloc(namei_zone, M_WAITOK);
1739202934Strasz		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
1740202934Strasz			goto out;
1741202934Strasz	}
1742202934Strasz	AUDIT_ARG_TEXT(syspath);
1743202934Straszrestart:
1744202934Strasz	bwillwrite();
1745200796Strasz	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1746200796Strasz	    segflg, path2, fd, td);
1747200796Strasz	if ((error = namei(&nd)) != 0)
1748200796Strasz		goto out;
1749200796Strasz	vfslocked = NDHASGIANT(&nd);
1750200796Strasz	if (nd.ni_vp) {
1751200796Strasz		NDFREE(&nd, NDF_ONLY_PNBUF);
1752200796Strasz		if (nd.ni_vp == nd.ni_dvp)
1753200796Strasz			vrele(nd.ni_dvp);
1754200796Strasz		else
1755200796Strasz			vput(nd.ni_dvp);
1756200796Strasz		vrele(nd.ni_vp);
1757200796Strasz		VFS_UNLOCK_GIANT(vfslocked);
1758200796Strasz		error = EEXIST;
1759200796Strasz		goto out;
1760200796Strasz	}
1761200796Strasz	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1762200796Strasz		NDFREE(&nd, NDF_ONLY_PNBUF);
1763200796Strasz		vput(nd.ni_dvp);
1764200796Strasz		VFS_UNLOCK_GIANT(vfslocked);
1765200796Strasz		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1766200796Strasz			goto out;
1767200796Strasz		goto restart;
1768200796Strasz	}
1769200796Strasz	VATTR_NULL(&vattr);
17701541Srgrimes	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
17711541Srgrimes#ifdef MAC
17721541Srgrimes	vattr.va_type = VLNK;
1773104094Sphk	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
17741541Srgrimes	    &vattr);
17751541Srgrimes	if (error)
17761541Srgrimes		goto out2;
17771541Srgrimes#endif
17781541Srgrimes	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
17791541Srgrimes	if (error == 0)
17801541Srgrimes		vput(nd.ni_vp);
17811541Srgrimes#ifdef MAC
178296506Sphkout2:
178396506Sphk#endif
178496506Sphk	NDFREE(&nd, NDF_ONLY_PNBUF);
178596506Sphk	vput(nd.ni_dvp);
17861541Srgrimes	vn_finished_write(mp);
178734266Sjulian	VFS_UNLOCK_GIANT(vfslocked);
17881541Srgrimesout:
178934266Sjulian	if (segflg != UIO_SYSSPACE)
17901541Srgrimes		uma_zfree(namei_zone, syspath);
179137539Sjulian	return (error);
17921541Srgrimes}
1793173464Sobrien
17941541Srgrimes/*
17951541Srgrimes * Delete a whiteout from the filesystem.
17961541Srgrimes */
17971541Srgrimesint
17981541Srgrimessys_undelete(td, uap)
17991541Srgrimes	struct thread *td;
18001541Srgrimes	register struct undelete_args /* {
18011541Srgrimes		char *path;
18021541Srgrimes	} */ *uap;
18031541Srgrimes{
18041541Srgrimes	int error;
18051541Srgrimes	struct mount *mp;
18061541Srgrimes	struct nameidata nd;
18071541Srgrimes	int vfslocked;
18081541Srgrimes
1809308946Smckusickrestart:
1810308946Smckusick	bwillwrite();
1811308946Smckusick	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1812308946Smckusick	    UIO_USERSPACE, uap->path, td);
1813308946Smckusick	error = namei(&nd);
181430474Sphk	if (error)
18153420Sphk		return (error);
18161541Srgrimes	vfslocked = NDHASGIANT(&nd);
18171541Srgrimes
18181541Srgrimes	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1819132775Skan		NDFREE(&nd, NDF_ONLY_PNBUF);
182031144Sjulian		if (nd.ni_vp == nd.ni_dvp)
182131144Sjulian			vrele(nd.ni_dvp);
18221541Srgrimes		else
182331144Sjulian			vput(nd.ni_dvp);
1824197269Sbrooks		if (nd.ni_vp)
182531147Sjulian			vrele(nd.ni_vp);
182659289Sjlemon		VFS_UNLOCK_GIANT(vfslocked);
182731144Sjulian		return (EEXIST);
182831683Speter	}
182959289Sjlemon	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
183031144Sjulian		NDFREE(&nd, NDF_ONLY_PNBUF);
183131683Speter		vput(nd.ni_dvp);
183232944Sjulian		VFS_UNLOCK_GIANT(vfslocked);
183331144Sjulian		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
183431144Sjulian			return (error);
183531683Speter		goto restart;
183631683Speter	}
183731144Sjulian	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
183831144Sjulian	NDFREE(&nd, NDF_ONLY_PNBUF);
1839132775Skan	vput(nd.ni_dvp);
184031144Sjulian	vn_finished_write(mp);
184132944Sjulian	VFS_UNLOCK_GIANT(vfslocked);
184231144Sjulian	return (error);
184331683Speter}
184431144Sjulian
184531144Sjulian/*
184631144Sjulian * Delete a name from the filesystem.
184731144Sjulian */
184831144Sjulian#ifndef _SYS_SYSPROTO_H_
1849150634Sjhbstruct unlink_args {
185031144Sjulian	char	*path;
185131147Sjulian};
1852197269Sbrooks#endif
185331144Sjulianint
185432944Sjuliansys_unlink(td, uap)
185531144Sjulian	struct thread *td;
185631683Speter	struct unlink_args /* {
185798542Smckusick		char *path;
185831144Sjulian	} */ *uap;
1859132775Skan{
186098542Smckusick
186131144Sjulian	return (kern_unlink(td, uap->path, UIO_USERSPACE));
186231144Sjulian}
186331683Speter
1864223020Smckusick#ifndef _SYS_SYSPROTO_H_
1865223020Smckusickstruct unlinkat_args {
186632944Sjulian	int	fd;
186731144Sjulian	char	*path;
186831144Sjulian	int	flag;
186931144Sjulian};
187031144Sjulian#endif
187131144Sjulianint
187231683Spetersys_unlinkat(struct thread *td, struct unlinkat_args *uap)
187331144Sjulian{
1874132775Skan	int flag = uap->flag;
187531144Sjulian	int fd = uap->fd;
18761541Srgrimes	char *path = uap->path;
18771541Srgrimes
1878223020Smckusick	if (flag & ~AT_REMOVEDIR)
1879223020Smckusick		return (EINVAL);
188030474Sphk
18811541Srgrimes	if (flag & AT_REMOVEDIR)
18821541Srgrimes		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
18831541Srgrimes	else
18841541Srgrimes		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
188531683Speter}
18861541Srgrimes
1887202934Straszint
1888202934Straszkern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
18891541Srgrimes{
189034266Sjulian
18911541Srgrimes	return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
1892132775Skan}
1893207141Sjeff
189498542Smckusickint
189522521Sdysonkern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1896132775Skan    ino_t oldinum)
189798542Smckusick{
18981541Srgrimes	struct mount *mp;
18991541Srgrimes	struct vnode *vp;
190034266Sjulian	int error;
190134266Sjulian	struct nameidata nd;
190234266Sjulian	struct stat sb;
19031541Srgrimes	int vfslocked;
190434266Sjulian
19051541Srgrimesrestart:
1906132775Skan	bwillwrite();
19071541Srgrimes	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
190834266Sjulian	    pathseg, path, fd, td);
1909207141Sjeff	if ((error = namei(&nd)) != 0)
1910207141Sjeff		return (error == EINVAL ? EPERM : error);
19113420Sphk	vfslocked = NDHASGIANT(&nd);
19121541Srgrimes	vp = nd.ni_vp;
1913101073Srwatson	if (vp->v_type == VDIR && oldinum == 0) {
1914105988Srwatson		error = EPERM;		/* POSIX */
1915172930Srwatson	} else if (oldinum != 0 &&
1916105988Srwatson		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
1917105988Srwatson		  sb.st_ino != oldinum) {
1918105988Srwatson			error = EIDRM;	/* Identifier removed */
1919105988Srwatson	} else {
1920101073Srwatson		/*
192174822Srwatson		 * The root of a mounted filesystem cannot be deleted.
1922202934Strasz		 *
1923202934Strasz		 * XXX: can this only be a VDIR case?
1924202934Strasz		 */
1925202934Strasz		if (vp->v_vflag & VV_ROOT)
192674822Srwatson			error = EBUSY;
1927202934Strasz	}
1928200796Strasz	if (error == 0) {
1929200796Strasz		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1930200796Strasz			NDFREE(&nd, NDF_ONLY_PNBUF);
1931200796Strasz			vput(nd.ni_dvp);
1932200796Strasz			if (vp == nd.ni_dvp)
193374822Srwatson				vrele(vp);
193474822Srwatson			else
193534266Sjulian				vput(vp);
193634266Sjulian			VFS_UNLOCK_GIANT(vfslocked);
193734266Sjulian			if ((error = vn_start_write(NULL, &mp,
1938145138Spjd			    V_XSLEEP | PCATCH)) != 0)
19391541Srgrimes				return (error);
19401541Srgrimes			goto restart;
19411541Srgrimes		}
19421541Srgrimes#ifdef MAC
19431541Srgrimes		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
19441541Srgrimes		    &nd.ni_cnd);
1945303376Skib		if (error)
194676132Sphk			goto out;
194798658Sdillon#endif
19481541Srgrimes		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
194934266Sjulian		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1950132775Skan#ifdef MAC
195134266Sjulianout:
195234266Sjulian#endif
195337539Sjulian		vn_finished_write(mp);
195437539Sjulian	}
195537539Sjulian	NDFREE(&nd, NDF_ONLY_PNBUF);
195637539Sjulian	vput(nd.ni_dvp);
195737539Sjulian	if (vp == nd.ni_dvp)
195837539Sjulian		vrele(vp);
195937539Sjulian	else
196037539Sjulian		vput(vp);
196137539Sjulian	VFS_UNLOCK_GIANT(vfslocked);
196237539Sjulian	return (error);
196337539Sjulian}
196437539Sjulian
196537539Sjulian/*
196637539Sjulian * Reposition read/write file offset.
196748801Smckusick */
196848801Smckusick#ifndef _SYS_SYSPROTO_H_
1969126853Sphkstruct lseek_args {
197034266Sjulian	int	fd;
19711541Srgrimes	int	pad;
197234266Sjulian	off_t	offset;
197335256Sdes	int	whence;
197434266Sjulian};
197534266Sjulian#endif
197634266Sjulianint
197734266Sjuliansys_lseek(td, uap)
197834266Sjulian	struct thread *td;
197934266Sjulian	register struct lseek_args /* {
198034266Sjulian		int fd;
198134266Sjulian		int pad;
198234266Sjulian		off_t offset;
198348801Smckusick		int whence;
198448801Smckusick	} */ *uap;
1985126853Sphk{
198634266Sjulian	struct ucred *cred = td->td_ucred;
198734266Sjulian	struct file *fp;
1988207141Sjeff	struct vnode *vp;
198934266Sjulian	struct vattr vattr;
199034266Sjulian	off_t foffset, offset, size;
199134266Sjulian	int error, noneg;
199234266Sjulian	int vfslocked;
199334266Sjulian
199434266Sjulian	AUDIT_ARG_FD(uap->fd);
19951541Srgrimes	if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0)
1996132775Skan		return (error);
19971541Srgrimes	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
199834266Sjulian		fdrop(fp, td);
199934266Sjulian		return (ESPIPE);
200034266Sjulian	}
200134266Sjulian	vp = fp->f_vnode;
200234266Sjulian	foffset = foffset_lock(fp, 0);
20031541Srgrimes	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2004132775Skan	noneg = (vp->v_type != VCHR);
20051541Srgrimes	offset = uap->offset;
200655697Smckusick	switch (uap->whence) {
2007207141Sjeff	case L_INCR:
2008207141Sjeff		if (noneg &&
20091541Srgrimes		    (foffset < 0 ||
201034266Sjulian		    (offset > 0 && foffset > OFF_MAX - offset))) {
20111541Srgrimes			error = EOVERFLOW;
20121541Srgrimes			break;
20131541Srgrimes		}
20141541Srgrimes		offset += foffset;
20151541Srgrimes		break;
20161541Srgrimes	case L_XTND:
20171541Srgrimes		vn_lock(vp, LK_SHARED | LK_RETRY);
2018104094Sphk		error = VOP_GETATTR(vp, &vattr, cred);
20191541Srgrimes		VOP_UNLOCK(vp, 0);
20201541Srgrimes		if (error)
20211541Srgrimes			break;
20221541Srgrimes
20231541Srgrimes		/*
20241541Srgrimes		 * If the file references a disk device, then fetch
20251541Srgrimes		 * the media size and use that to determine the ending
202611644Sdg		 * offset.
202711644Sdg		 */
202811644Sdg		if (vattr.va_size == 0 && vp->v_type == VCHR &&
202911644Sdg		    fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
2030209717Sjeff			vattr.va_size = size;
20311541Srgrimes		if (noneg &&
20321541Srgrimes		    (vattr.va_size > OFF_MAX ||
20331541Srgrimes		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
203425877Sphk			error = EOVERFLOW;
20351541Srgrimes			break;
203634266Sjulian		}
203734266Sjulian		offset += vattr.va_size;
203834266Sjulian		break;
203939796Smckusick	case L_SET:
204039796Smckusick		break;
204139796Smckusick	case SEEK_DATA:
20421541Srgrimes		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
20431541Srgrimes		break;
2044308946Smckusick	case SEEK_HOLE:
2045308946Smckusick		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
2046308946Smckusick		break;
204734266Sjulian	default:
204834266Sjulian		error = EINVAL;
204934266Sjulian	}
2050146356Smckusick	if (error == 0 && noneg && offset < 0)
20511541Srgrimes		error = EINVAL;
20521541Srgrimes	if (error != 0)
20531541Srgrimes		goto drop;
205426360Sjulian	VFS_KNOTE_UNLOCKED(vp, 0);
205526360Sjulian	*(off_t *)(td->td_retval) = offset;
20561541Srgrimesdrop:
20571541Srgrimes	fdrop(fp, td);
20581541Srgrimes	VFS_UNLOCK_GIANT(vfslocked);
205939796Smckusick	foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
206039796Smckusick	return (error);
206139796Smckusick}
206239796Smckusick
2063163841Spjd#if defined(COMPAT_43)
2064163841Spjd/*
2065163841Spjd * Reposition read/write file offset.
20661541Srgrimes */
20671541Srgrimes#ifndef _SYS_SYSPROTO_H_
20681541Srgrimesstruct olseek_args {
20691541Srgrimes	int	fd;
20701541Srgrimes	long	offset;
207155697Smckusick	int	whence;
207255697Smckusick};
2073207141Sjeff#endif
2074207141Sjeffint
207534266Sjulianolseek(td, uap)
207655697Smckusick	struct thread *td;
207755697Smckusick	register struct olseek_args /* {
207855697Smckusick		int fd;
2079207141Sjeff		long offset;
2080207141Sjeff		int whence;
20811541Srgrimes	} */ *uap;
208255697Smckusick{
20831541Srgrimes	struct lseek_args /* {
20841541Srgrimes		int fd;
2085209717Sjeff		int pad;
2086209717Sjeff		off_t offset;
2087209717Sjeff		int whence;
2088209717Sjeff	} */ nuap;
208955697Smckusick
20901541Srgrimes	nuap.fd = uap->fd;
209134266Sjulian	nuap.offset = uap->offset;
209234266Sjulian	nuap.whence = uap->whence;
2093132775Skan	return (sys_lseek(td, &nuap));
209455697Smckusick}
2095231122Skib#endif /* COMPAT_43 */
209634266Sjulian
2097132775Skan/* Version with the 'pad' argument */
209855697Smckusickint
209934266Sjulianfreebsd6_lseek(td, uap)
210034266Sjulian	struct thread *td;
210182364Siedowse	register struct freebsd6_lseek_args *uap;
210282364Siedowse{
210382364Siedowse	struct lseek_args ouap;
210482364Siedowse
210582364Siedowse	ouap.fd = uap->fd;
210634266Sjulian	ouap.offset = uap->offset;
21071541Srgrimes	ouap.whence = uap->whence;
21081541Srgrimes	return (sys_lseek(td, &ouap));
21091541Srgrimes}
21101541Srgrimes
21111541Srgrimes/*
21121541Srgrimes * Check access permissions using passed credentials.
2113104094Sphk */
21141541Srgrimesstatic int
21151541Srgrimesvn_access(vp, user_flags, cred, td)
21161541Srgrimes	struct vnode	*vp;
21171541Srgrimes	int		user_flags;
21181541Srgrimes	struct ucred	*cred;
21191541Srgrimes	struct thread	*td;
21201541Srgrimes{
21211541Srgrimes	int error;
21221541Srgrimes	accmode_t accmode;
212396506Sphk
212496506Sphk	/* Flags == 0 means only check for existence. */
21251541Srgrimes	error = 0;
21261541Srgrimes	if (user_flags) {
21273420Sphk		accmode = 0;
2128308946Smckusick		if (user_flags & R_OK)
21293420Sphk			accmode |= VREAD;
21301541Srgrimes		if (user_flags & W_OK)
21311541Srgrimes			accmode |= VWRITE;
21321541Srgrimes		if (user_flags & X_OK)
21331541Srgrimes			accmode |= VEXEC;
21341541Srgrimes#ifdef MAC
213598542Smckusick		error = mac_vnode_check_access(cred, vp, accmode);
21361541Srgrimes		if (error)
2137132775Skan			return (error);
21381541Srgrimes#endif
2139231122Skib		if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
21401541Srgrimes			error = VOP_ACCESS(vp, accmode, cred, td);
21411541Srgrimes	}
2142101744Srwatson	return (error);
2143194296Skib}
214453131Seivind
214553131Seivind/*
21461541Srgrimes * Check access permissions using "real" credentials.
21471541Srgrimes */
21481541Srgrimes#ifndef _SYS_SYSPROTO_H_
21491541Srgrimesstruct access_args {
21501541Srgrimes	char	*path;
21511541Srgrimes	int	flags;
21521541Srgrimes};
21531541Srgrimes#endif
21541541Srgrimesint
21551541Srgrimessys_access(td, uap)
21561541Srgrimes	struct thread *td;
21571541Srgrimes	register struct access_args /* {
215822521Sdyson		char *path;
2159161473Spjd		int flags;
216022521Sdyson	} */ *uap;
21611541Srgrimes{
21621541Srgrimes
2163252438Sgleb	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
216496506Sphk}
2165252438Sgleb
2166209717Sjeff#ifndef _SYS_SYSPROTO_H_
2167252438Sglebstruct faccessat_args {
2168252438Sgleb	int	dirfd;
2169252438Sgleb	char	*path;
2170252438Sgleb	int	mode;
2171252438Sgleb	int	flag;
2172252438Sgleb}
2173252438Sgleb#endif
217422521Sdysonint
21751541Srgrimessys_faccessat(struct thread *td, struct faccessat_args *uap)
2176252438Sgleb{
2177252438Sgleb
2178252438Sgleb	if (uap->flag & ~AT_EACCESS)
2179209717Sjeff		return (EINVAL);
2180209717Sjeff	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
2181252438Sgleb	    uap->mode));
2182332750Spfg}
2183332750Spfg
2184332750Spfgint
2185332750Spfgkern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2186252438Sgleb{
2187252438Sgleb
2188252438Sgleb	return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
2189252438Sgleb}
2190252438Sgleb
2191252438Sglebint
2192252438Sglebkern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2193252438Sgleb    int flags, int mode)
2194252438Sgleb{
2195252438Sgleb	struct ucred *cred, *tmpcred;
2196252438Sgleb	struct vnode *vp;
2197252438Sgleb	struct nameidata nd;
2198252438Sgleb	int vfslocked;
2199252438Sgleb	int error;
2200252438Sgleb
2201252438Sgleb	/*
2202252438Sgleb	 * Create and modify a temporary credential instead of one that
2203252438Sgleb	 * is potentially shared.
2204252438Sgleb	 */
2205252438Sgleb	if (!(flags & AT_EACCESS)) {
2206252438Sgleb		cred = td->td_ucred;
2207252438Sgleb		tmpcred = crdup(cred);
2208252438Sgleb		tmpcred->cr_uid = cred->cr_ruid;
2209252438Sgleb		tmpcred->cr_groups[0] = cred->cr_rgid;
2210252438Sgleb		td->td_ucred = tmpcred;
2211252438Sgleb	} else
2212252438Sgleb		cred = tmpcred = td->td_ucred;
2213252438Sgleb	AUDIT_ARG_VALUE(mode);
2214252438Sgleb	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2215252438Sgleb	    AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
2216252438Sgleb	if ((error = namei(&nd)) != 0)
2217252438Sgleb		goto out1;
2218252438Sgleb	vfslocked = NDHASGIANT(&nd);
2219252438Sgleb	vp = nd.ni_vp;
22201541Srgrimes
2221252438Sgleb	error = vn_access(vp, mode, tmpcred, td);
2222252438Sgleb	NDFREE(&nd, NDF_ONLY_PNBUF);
2223252438Sgleb	vput(vp);
2224252438Sgleb	VFS_UNLOCK_GIANT(vfslocked);
2225252438Sglebout1:
2226252438Sgleb	if (!(flags & AT_EACCESS)) {
2227252438Sgleb		td->td_ucred = cred;
2228252438Sgleb		crfree(tmpcred);
2229252438Sgleb	}
2230252438Sgleb	return (error);
2231252438Sgleb}
2232252438Sgleb
2233252438Sgleb/*
2234252438Sgleb * Check access permissions using "effective" credentials.
2235252438Sgleb */
2236252438Sgleb#ifndef _SYS_SYSPROTO_H_
2237252438Sglebstruct eaccess_args {
2238252438Sgleb	char	*path;
2239252438Sgleb	int	flags;
2240252438Sgleb};
2241252438Sgleb#endif
2242252438Sglebint
2243252438Sglebsys_eaccess(td, uap)
2244252438Sgleb	struct thread *td;
2245252438Sgleb	register struct eaccess_args /* {
2246252438Sgleb		char *path;
2247252438Sgleb		int flags;
2248252438Sgleb	} */ *uap;
2249252438Sgleb{
2250252438Sgleb
2251252438Sgleb	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
2252252438Sgleb}
2253252438Sgleb
2254252438Sglebint
2255252438Sglebkern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
2256252438Sgleb{
2257252438Sgleb
2258252438Sgleb	return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
2259252438Sgleb}
2260252438Sgleb
2261252438Sgleb#if defined(COMPAT_43)
2262252438Sgleb/*
2263252438Sgleb * Get file status; this version follows links.
22641541Srgrimes */
2265252438Sgleb#ifndef _SYS_SYSPROTO_H_
2266252438Sglebstruct ostat_args {
2267252438Sgleb	char	*path;
2268252438Sgleb	struct ostat *ub;
2269252438Sgleb};
2270252438Sgleb#endif
2271252438Sglebint
2272252438Sglebostat(td, uap)
2273252438Sgleb	struct thread *td;
2274252438Sgleb	register struct ostat_args /* {
2275252438Sgleb		char *path;
2276252438Sgleb		struct ostat *ub;
2277252438Sgleb	} */ *uap;
2278252438Sgleb{
22793167Sdfr	struct stat sb;
22803167Sdfr	struct ostat osb;
2281252438Sgleb	int error;
2282252438Sgleb
22831541Srgrimes	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
22841541Srgrimes	if (error)
22851541Srgrimes		return (error);
22861541Srgrimes	cvtstat(&sb, &osb);
22871541Srgrimes	error = copyout(&osb, uap->ub, sizeof (osb));
22881541Srgrimes	return (error);
2289104094Sphk}
22901541Srgrimes
22911541Srgrimes/*
22921541Srgrimes * Get file status; this version does not follow links.
22931541Srgrimes */
22941541Srgrimes#ifndef _SYS_SYSPROTO_H_
22951541Srgrimesstruct olstat_args {
22961541Srgrimes	char	*path;
229796506Sphk	struct ostat *ub;
229896506Sphk};
229998542Smckusick#endif
23001541Srgrimesint
23011541Srgrimesolstat(td, uap)
23021549Srgrimes	struct thread *td;
230398542Smckusick	register struct olstat_args /* {
2304142682Ssam		char *path;
23051541Srgrimes		struct ostat *ub;
23061541Srgrimes	} */ *uap;
23071541Srgrimes{
23081541Srgrimes	struct stat sb;
23091541Srgrimes	struct ostat osb;
23101541Srgrimes	int error;
23111541Srgrimes
231242957Sdillon	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
231376174Sphk	if (error)
231442957Sdillon		return (error);
23151541Srgrimes	cvtstat(&sb, &osb);
2316104094Sphk	error = copyout(&osb, uap->ub, sizeof (osb));
23171541Srgrimes	return (error);
23181541Srgrimes}
231937384Sjulian
23201541Srgrimes/*
23211541Srgrimes * Convert from an old to a new stat structure.
23221541Srgrimes */
232396506Sphkvoid
232496506Sphkcvtstat(st, ost)
2325137035Sphk	struct stat *st;
232696506Sphk	struct ostat *ost;
232798542Smckusick{
23281541Srgrimes
23291541Srgrimes	bzero(ost, sizeof(*ost));
23301541Srgrimes	ost->st_dev = st->st_dev;
23311541Srgrimes	ost->st_ino = st->st_ino;
2332100344Smckusick	ost->st_mode = st->st_mode;
233392363Smckusick	ost->st_nlink = st->st_nlink;
23343420Sphk	ost->st_uid = st->st_uid;
23351541Srgrimes	ost->st_gid = st->st_gid;
233658934Sphk	ost->st_rdev = st->st_rdev;
233759249Sphk	if (st->st_size < (quad_t)1 << 32)
2338186194Strasz		ost->st_size = st->st_size;
23391541Srgrimes	else
23401541Srgrimes		ost->st_size = -2;
23417695Sdg	ost->st_atim = st->st_atim;
23421541Srgrimes	ost->st_mtim = st->st_mtim;
23431541Srgrimes	ost->st_ctim = st->st_ctim;
234459249Sphk	ost->st_blksize = st->st_blksize;
23451541Srgrimes	ost->st_blocks = st->st_blocks;
23461541Srgrimes	ost->st_flags = st->st_flags;
2347121205Sphk	ost->st_gen = st->st_gen;
2348137035Sphk}
2349140051Sphk#endif /* COMPAT_43 */
23501541Srgrimes
23511541Srgrimes/*
23521541Srgrimes * Get file status; this version follows links.
23531541Srgrimes */
23541541Srgrimes#ifndef _SYS_SYSPROTO_H_
23551541Srgrimesstruct stat_args {
2356104094Sphk	char	*path;
23571541Srgrimes	struct stat *ub;
23581541Srgrimes};
23591541Srgrimes#endif
23601541Srgrimesint
23611541Srgrimessys_stat(td, uap)
236296506Sphk	struct thread *td;
236396506Sphk	register struct stat_args /* {
23641541Srgrimes		char *path;
2365142692Sphk		struct stat *ub;
2366142692Sphk	} */ *uap;
23671541Srgrimes{
23681541Srgrimes	struct stat sb;
23691541Srgrimes	int error;
23701541Srgrimes
23711541Srgrimes	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
23721541Srgrimes	if (error == 0)
23731541Srgrimes		error = copyout(&sb, uap->ub, sizeof (sb));
237436721Sbde	return (error);
23751541Srgrimes}
23761541Srgrimes
23771541Srgrimes#ifndef _SYS_SYSPROTO_H_
2378104094Sphkstruct fstatat_args {
23791541Srgrimes	int	fd;
23801541Srgrimes	char	*path;
23811541Srgrimes	struct stat	*buf;
23821541Srgrimes	int	flag;
23831541Srgrimes}
238483366Sjulian#endif
23851541Srgrimesint
23861541Srgrimessys_fstatat(struct thread *td, struct fstatat_args *uap)
238722521Sdyson{
2388163194Skib	struct stat sb;
23891541Srgrimes	int error;
2390103944Sjeff
2391163194Skib	error = kern_statat(td, uap->flag, uap->fd, uap->path,
2392169898Spjd	    UIO_USERSPACE, &sb);
2393169898Spjd	if (error == 0)
2394163194Skib		error = copyout(&sb, uap->buf, sizeof (sb));
2395138700Smarcel	return (error);
23961541Srgrimes}
23971541Srgrimes
23981541Srgrimesint
239977822Sjlemonkern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
240077822Sjlemon{
240177822Sjlemon
240277822Sjlemon	return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
2403104094Sphk}
240477822Sjlemon
240577822Sjlemonint
240677822Sjlemonkern_statat(struct thread *td, int flag, int fd, char *path,
240777822Sjlemon    enum uio_seg pathseg, struct stat *sbp)
240877822Sjlemon{
2409138700Smarcel
241077822Sjlemon	return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
2411147198Sssouhlal}
241277822Sjlemon
241377822Sjlemonint
241477822Sjlemonkern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
241577822Sjlemon    enum uio_seg pathseg, struct stat *sbp,
2416195296Strasz    void (*hook)(struct vnode *vp, struct stat *sbp))
2417195296Strasz{
2418195296Strasz	struct nameidata nd;
2419195296Strasz	struct stat sb;
2420195296Strasz	int error, vfslocked;
2421195296Strasz
2422195296Strasz	if (flag & ~AT_SYMLINK_NOFOLLOW)
2423195296Strasz		return (EINVAL);
2424195296Strasz
2425195296Strasz	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
2426195296Strasz	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
2427195296Strasz	    path, fd, CAP_FSTAT, td);
2428195296Strasz
2429200796Strasz	if ((error = namei(&nd)) != 0)
2430195296Strasz		return (error);
2431195296Strasz	vfslocked = NDHASGIANT(&nd);
2432195296Strasz	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2433195296Strasz	if (!error) {
2434195296Strasz		SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
2435195296Strasz		if (S_ISREG(sb.st_mode))
2436195296Strasz			SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
2437195296Strasz		if (__predict_false(hook != NULL))
2438195296Strasz			hook(nd.ni_vp, &sb);
2439195296Strasz	}
244031699Sbde	NDFREE(&nd, NDF_ONLY_PNBUF);
244131699Sbde	vput(nd.ni_vp);
2442104094Sphk	VFS_UNLOCK_GIANT(vfslocked);
244331699Sbde	if (error)
244431699Sbde		return (error);
244531699Sbde	*sbp = sb;
244631699Sbde#ifdef KTRACE
244731699Sbde	if (KTRPOINT(td, KTR_STRUCT))
244831699Sbde		ktrstat(&sb);
244931699Sbde#endif
2450106058Swollman	return (0);
245131699Sbde}
2452106058Swollman
245331699Sbde/*
245431699Sbde * Get file status; this version does not follow links.
245531699Sbde */
2456106058Swollman#ifndef _SYS_SYSPROTO_H_
245731699Sbdestruct lstat_args {
245831699Sbde	char	*path;
2459106058Swollman	struct stat *ub;
246031699Sbde};
246131699Sbde#endif
2462106058Swollmanint
246331699Sbdesys_lstat(td, uap)
246431699Sbde	struct thread *td;
2465106058Swollman	register struct lstat_args /* {
246631699Sbde		char *path;
246731699Sbde		struct stat *ub;
2468106058Swollman	} */ *uap;
246931699Sbde{
247031699Sbde	struct stat sb;
2471106058Swollman	int error;
2472105572Srwatson
2473105567Srwatson	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2474105567Srwatson	if (error == 0)
2475105567Srwatson		error = copyout(&sb, uap->ub, sizeof (sb));
2476105567Srwatson	return (error);
2477105567Srwatson}
2478105567Srwatson
2479105567Srwatsonint
2480105567Srwatsonkern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2481106058Swollman{
2482200796Strasz
2483200796Strasz	return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
2484200796Strasz	    sbp));
2485200796Strasz}
2486200796Strasz
2487200796Strasz/*
2488200796Strasz * Implementation of the NetBSD [l]stat() functions.
2489200796Strasz */
2490200796Straszvoid
2491200796Straszcvtnstat(sb, nsb)
2492200796Strasz	struct stat *sb;
2493200796Strasz	struct nstat *nsb;
2494105572Srwatson{
2495105571Srwatson	bzero(nsb, sizeof *nsb);
2496200796Strasz	nsb->st_dev = sb->st_dev;
2497105571Srwatson	nsb->st_ino = sb->st_ino;
2498105571Srwatson	nsb->st_mode = sb->st_mode;
2499105571Srwatson	nsb->st_nlink = sb->st_nlink;
2500105571Srwatson	nsb->st_uid = sb->st_uid;
2501105571Srwatson	nsb->st_gid = sb->st_gid;
2502105571Srwatson	nsb->st_rdev = sb->st_rdev;
2503106058Swollman	nsb->st_atim = sb->st_atim;
2504105572Srwatson	nsb->st_mtim = sb->st_mtim;
2505105567Srwatson	nsb->st_ctim = sb->st_ctim;
2506105567Srwatson	nsb->st_size = sb->st_size;
2507105567Srwatson	nsb->st_blocks = sb->st_blocks;
2508105567Srwatson	nsb->st_blksize = sb->st_blksize;
2509105567Srwatson	nsb->st_flags = sb->st_flags;
2510105567Srwatson	nsb->st_gen = sb->st_gen;
2511105567Srwatson	nsb->st_birthtim = sb->st_birthtim;
2512105567Srwatson}
2513106058Swollman
2514236044Skib#ifndef _SYS_SYSPROTO_H_
2515236044Skibstruct nstat_args {
2516236044Skib	char	*path;
2517106058Swollman	struct nstat *ub;
2518106058Swollman};
2519106058Swollman#endif
2520106058Swollmanint
2521106058Swollmansys_nstat(td, uap)
2522106058Swollman	struct thread *td;
2523106058Swollman	register struct nstat_args /* {
2524106058Swollman		char *path;
2525106058Swollman		struct nstat *ub;
2526106058Swollman	} */ *uap;
2527106058Swollman{
2528106058Swollman	struct stat sb;
2529106058Swollman	struct nstat nsb;
2530106058Swollman	int error;
2531106058Swollman
2532106058Swollman	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2533106058Swollman	if (error)
2534106058Swollman		return (error);
2535106058Swollman	cvtnstat(&sb, &nsb);
2536106058Swollman	error = copyout(&nsb, uap->ub, sizeof (nsb));
2537106058Swollman	return (error);
2538106058Swollman}
2539106058Swollman
2540106058Swollman/*
2541106058Swollman * NetBSD lstat.  Get file status; this version does not follow links.
2542106058Swollman */
2543106058Swollman#ifndef _SYS_SYSPROTO_H_
2544106058Swollmanstruct lstat_args {
2545106058Swollman	char	*path;
2546106058Swollman	struct stat *ub;
2547106058Swollman};
2548106058Swollman#endif
2549106058Swollmanint
255031699Sbdesys_nlstat(td, uap)
2551106058Swollman	struct thread *td;
2552106058Swollman	register struct nlstat_args /* {
255331699Sbde		char *path;
2554106058Swollman		struct nstat *ub;
255531699Sbde	} */ *uap;
255631699Sbde{
255731699Sbde	struct stat sb;
25581541Srgrimes	struct nstat nsb;
25591541Srgrimes	int error;
25601541Srgrimes
25611541Srgrimes	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2562135877Sphk	if (error)
25631541Srgrimes		return (error);
2564138290Sphk	cvtnstat(&sb, &nsb);
25651541Srgrimes	error = copyout(&nsb, uap->ub, sizeof (nsb));
25661541Srgrimes	return (error);
25671541Srgrimes}
256850521Sphk
25691541Srgrimes/*
25701541Srgrimes * Get configurable pathname variables.
25711541Srgrimes */
2572136980Sphk#ifndef _SYS_SYSPROTO_H_
2573136980Sphkstruct pathconf_args {
25741541Srgrimes	char	*path;
2575101308Sjeff	int	name;
25761541Srgrimes};
2577101308Sjeff#endif
25781541Srgrimesint
25791541Srgrimessys_pathconf(td, uap)
25801541Srgrimes	struct thread *td;
25811541Srgrimes	register struct pathconf_args /* {
25821541Srgrimes		char *path;
25831541Srgrimes		int name;
258474822Srwatson	} */ *uap;
25851541Srgrimes{
2586104094Sphk
2587308946Smckusick	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
25881541Srgrimes}
25891541Srgrimes
25901541Srgrimes#ifndef _SYS_SYSPROTO_H_
25911541Srgrimesstruct lpathconf_args {
2592308946Smckusick	char	*path;
25931541Srgrimes	int	name;
259496506Sphk};
259534266Sjulian#endif
25961541Srgrimesint
25971541Srgrimessys_lpathconf(td, uap)
25981541Srgrimes	struct thread *td;
25991541Srgrimes	register struct lpathconf_args /* {
2600173464Sobrien		char *path;
26011541Srgrimes		int name;
2602308946Smckusick	} */ *uap;
26031541Srgrimes{
26041541Srgrimes
26051541Srgrimes	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
26061541Srgrimes}
26071541Srgrimes
2608308946Smckusickint
2609308946Smckusickkern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
2610308946Smckusick    u_long flags)
2611308946Smckusick{
261230474Sphk	struct nameidata nd;
261354655Seivind	int error, vfslocked;
26141541Srgrimes
26151541Srgrimes	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
26161541Srgrimes	    flags, pathseg, path, td);
2617132775Skan	if ((error = namei(&nd)) != 0)
261831144Sjulian		return (error);
261931144Sjulian	vfslocked = NDHASGIANT(&nd);
262031144Sjulian	NDFREE(&nd, NDF_ONLY_PNBUF);
262131144Sjulian
2622194498Sbrooks	/* If asynchronous I/O is available, it works for all files. */
262331147Sjulian	if (name == _PC_ASYNC_IO)
262459289Sjlemon		td->td_retval[0] = async_io_version;
262531144Sjulian	else
262631683Speter		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
262731144Sjulian	vput(nd.ni_vp);
262859289Sjlemon	VFS_UNLOCK_GIANT(vfslocked);
262931144Sjulian	return (error);
263031144Sjulian}
263131144Sjulian
263231683Speter/*
263331683Speter * Return target name of a symbolic link.
263431683Speter */
263531144Sjulian#ifndef _SYS_SYSPROTO_H_
2636132775Skanstruct readlink_args {
263731144Sjulian	char	*path;
263831144Sjulian	char	*buf;
263931144Sjulian	size_t	count;
264031683Speter};
264131144Sjulian#endif
264231144Sjulianint
264331144Sjuliansys_readlink(td, uap)
264431144Sjulian	struct thread *td;
264531144Sjulian	register struct readlink_args /* {
2646150634Sjhb		char *path;
264731144Sjulian		char *buf;
264831147Sjulian		size_t count;
2649194498Sbrooks	} */ *uap;
265031144Sjulian{
265132944Sjulian
265231683Speter	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
265398542Smckusick	    UIO_USERSPACE, uap->count));
265431144Sjulian}
2655132775Skan#ifndef _SYS_SYSPROTO_H_
265698542Smckusickstruct readlinkat_args {
265731683Speter	int	fd;
265831144Sjulian	char	*path;
265931144Sjulian	char	*buf;
266031683Speter	size_t	bufsize;
2661223020Smckusick};
2662223020Smckusick#endif
266332944Sjulianint
266431144Sjuliansys_readlinkat(struct thread *td, struct readlinkat_args *uap)
266531144Sjulian{
266631144Sjulian
266731144Sjulian	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
266831144Sjulian	    uap->buf, UIO_USERSPACE, uap->bufsize));
266931683Speter}
267024438Speter
2671132775Skanint
26721541Srgrimeskern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
26731541Srgrimes    enum uio_seg bufseg, size_t count)
26741541Srgrimes{
2675223020Smckusick
2676223020Smckusick	return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
267730474Sphk	    count));
26781541Srgrimes}
26791541Srgrimes
26801541Srgrimesint
26811541Srgrimeskern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
268231683Speter    char *buf, enum uio_seg bufseg, size_t count)
26831541Srgrimes{
2684202934Strasz	struct vnode *vp;
2685202934Strasz	struct iovec aiov;
26861541Srgrimes	struct uio auio;
268734266Sjulian	int error;
26881541Srgrimes	struct nameidata nd;
2689132775Skan	int vfslocked;
269034266Sjulian
2691207141Sjeff	if (count > IOSIZE_MAX)
26921541Srgrimes		return (EINVAL);
2693170587Srwatson
26941541Srgrimes	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2695132775Skan	    AUDITVNODE1, pathseg, path, fd, td);
269698542Smckusick
26971541Srgrimes	if ((error = namei(&nd)) != 0)
269898542Smckusick		return (error);
269922521Sdyson	NDFREE(&nd, NDF_ONLY_PNBUF);
2700132775Skan	vfslocked = NDHASGIANT(&nd);
270198542Smckusick	vp = nd.ni_vp;
270222521Sdyson#ifdef MAC
27031541Srgrimes	error = mac_vnode_check_readlink(td->td_ucred, vp);
27041541Srgrimes	if (error) {
27051541Srgrimes		vput(vp);
270648801Smckusick		VFS_UNLOCK_GIANT(vfslocked);
27073420Sphk		return (error);
27081541Srgrimes	}
2709101073Srwatson#endif
2710105988Srwatson	if (vp->v_type != VLNK)
2711172930Srwatson		error = EINVAL;
2712105988Srwatson	else {
2713105988Srwatson		aiov.iov_base = buf;
2714105988Srwatson		aiov.iov_len = count;
2715105988Srwatson		auio.uio_iov = &aiov;
2716101073Srwatson		auio.uio_iovcnt = 1;
271774822Srwatson		auio.uio_offset = 0;
2718202934Strasz		auio.uio_rw = UIO_READ;
2719202934Strasz		auio.uio_segflg = bufseg;
2720202934Strasz		auio.uio_td = td;
2721202934Strasz		auio.uio_resid = count;
272274822Srwatson		error = VOP_READLINK(vp, &auio, td->td_ucred);
2723202934Strasz		td->td_retval[0] = count - auio.uio_resid;
2724200796Strasz	}
2725200796Strasz	vput(vp);
2726200796Strasz	VFS_UNLOCK_GIANT(vfslocked);
2727200796Strasz	return (error);
2728200796Strasz}
272974822Srwatson
273034266Sjulian/*
2731207141Sjeff * Common implementation code for chflags() and fchflags().
27323420Sphk */
27331541Srgrimesstatic int
27341541Srgrimessetfflags(td, vp, flags)
27351541Srgrimes	struct thread *td;
27361541Srgrimes	struct vnode *vp;
27371541Srgrimes	int flags;
27381541Srgrimes{
27391541Srgrimes	int error;
27401541Srgrimes	struct mount *mp;
27411541Srgrimes	struct vattr vattr;
274234266Sjulian
27431541Srgrimes	/* We can't support the value matching VNOVAL. */
2744132775Skan	if (flags == VNOVAL)
27451541Srgrimes		return (EOPNOTSUPP);
274655697Smckusick
2747207141Sjeff	/*
27481541Srgrimes	 * Prevent non-root users from setting flags on devices.  When
27491541Srgrimes	 * a device is reused, users can retain ownership of the device
27501541Srgrimes	 * if they are allowed to set flags and programs assume that
275130439Sphk	 * chown can't fail when done as root.
2752236044Skib	 */
2753236044Skib	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2754236044Skib		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2755236044Skib		if (error)
2756236044Skib			return (error);
2757236044Skib	}
2758236044Skib
2759236044Skib	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2760236044Skib		return (error);
2761236044Skib	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2762236044Skib	VATTR_NULL(&vattr);
2763236044Skib	vattr.va_flags = flags;
2764236044Skib#ifdef MAC
2765236044Skib	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
276630439Sphk	if (error == 0)
2767138290Sphk#endif
2768138290Sphk		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2769138290Sphk	VOP_UNLOCK(vp, 0);
2770138290Sphk	vn_finished_write(mp);
2771138290Sphk	return (error);
2772138290Sphk}
2773200796Strasz
2774138290Sphk/*
2775138290Sphk * Change flags of a file given a path name.
2776138290Sphk */
2777138290Sphk#ifndef _SYS_SYSPROTO_H_
2778138290Sphkstruct chflags_args {
2779138290Sphk	char	*path;
2780236044Skib	int	flags;
2781138290Sphk};
2782138290Sphk#endif
2783187564Sjhbint
2784138290Sphksys_chflags(td, uap)
2785138290Sphk	struct thread *td;
2786138290Sphk	register struct chflags_args /* {
2787138290Sphk		char *path;
2788138290Sphk		int flags;
2789138290Sphk	} */ *uap;
2790138290Sphk{
2791138290Sphk	int error;
2792138290Sphk	struct nameidata nd;
2793138290Sphk	int vfslocked;
2794138290Sphk
2795138290Sphk	AUDIT_ARG_FFLAGS(uap->flags);
2796138290Sphk	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2797101073Srwatson	    uap->path, td);
2798138290Sphk	if ((error = namei(&nd)) != 0)
2799101073Srwatson		return (error);
2800138290Sphk	NDFREE(&nd, NDF_ONLY_PNBUF);
2801138290Sphk	vfslocked = NDHASGIANT(&nd);
2802138290Sphk	error = setfflags(td, nd.ni_vp, uap->flags);
280395974Sphk	vrele(nd.ni_vp);
2804138868Sphk	VFS_UNLOCK_GIANT(vfslocked);
2805138868Sphk	return (error);
2806138868Sphk}
280795974Sphk
280874822Srwatson/*
2809138290Sphk * Same as chflags() but doesn't follow symlinks.
2810138290Sphk */
2811138290Sphkint
281274822Srwatsonsys_lchflags(td, uap)
281330439Sphk	struct thread *td;
281430439Sphk	register struct lchflags_args /* {
2815138290Sphk		char *path;
2816138290Sphk		int flags;
2817138290Sphk	} */ *uap;
2818200796Strasz{
2819138290Sphk	int error;
2820138290Sphk	struct nameidata nd;
2821138290Sphk	int vfslocked;
2822138290Sphk
2823187564Sjhb	AUDIT_ARG_FFLAGS(uap->flags);
2824195296Strasz	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2825138290Sphk	    uap->path, td);
2826138868Sphk	if ((error = namei(&nd)) != 0)
2827138290Sphk		return (error);
2828138290Sphk	vfslocked = NDHASGIANT(&nd);
2829101073Srwatson	NDFREE(&nd, NDF_ONLY_PNBUF);
2830138290Sphk	error = setfflags(td, nd.ni_vp, uap->flags);
2831101073Srwatson	vrele(nd.ni_vp);
2832138868Sphk	VFS_UNLOCK_GIANT(vfslocked);
283395974Sphk	return (error);
2834138868Sphk}
2835138868Sphk
2836138868Sphk/*
283795974Sphk * Change flags of a file given a file descriptor.
283874822Srwatson */
2839138290Sphk#ifndef _SYS_SYSPROTO_H_
2840138290Sphkstruct fchflags_args {
2841138290Sphk	int	fd;
284274822Srwatson	int	flags;
284330439Sphk};
2844#endif
2845int
2846sys_fchflags(td, uap)
2847	struct thread *td;
2848	register struct fchflags_args /* {
2849		int fd;
2850		int flags;
2851	} */ *uap;
2852{
2853	struct file *fp;
2854	int vfslocked;
2855	int error;
2856
2857	AUDIT_ARG_FD(uap->fd);
2858	AUDIT_ARG_FFLAGS(uap->flags);
2859	if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS,
2860	    &fp)) != 0)
2861		return (error);
2862	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2863#ifdef AUDIT
2864	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2865	AUDIT_ARG_VNODE1(fp->f_vnode);
2866	VOP_UNLOCK(fp->f_vnode, 0);
2867#endif
2868	error = setfflags(td, fp->f_vnode, uap->flags);
2869	VFS_UNLOCK_GIANT(vfslocked);
2870	fdrop(fp, td);
2871	return (error);
2872}
2873
2874/*
2875 * Common implementation code for chmod(), lchmod() and fchmod().
2876 */
2877int
2878setfmode(td, cred, vp, mode)
2879	struct thread *td;
2880	struct ucred *cred;
2881	struct vnode *vp;
2882	int mode;
2883{
2884	int error;
2885	struct mount *mp;
2886	struct vattr vattr;
2887
2888	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2889		return (error);
2890	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2891	VATTR_NULL(&vattr);
2892	vattr.va_mode = mode & ALLPERMS;
2893#ifdef MAC
2894	error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
2895	if (error == 0)
2896#endif
2897		error = VOP_SETATTR(vp, &vattr, cred);
2898	VOP_UNLOCK(vp, 0);
2899	vn_finished_write(mp);
2900	return (error);
2901}
2902
2903/*
2904 * Change mode of a file given path name.
2905 */
2906#ifndef _SYS_SYSPROTO_H_
2907struct chmod_args {
2908	char	*path;
2909	int	mode;
2910};
2911#endif
2912int
2913sys_chmod(td, uap)
2914	struct thread *td;
2915	register struct chmod_args /* {
2916		char *path;
2917		int mode;
2918	} */ *uap;
2919{
2920
2921	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2922}
2923
2924#ifndef _SYS_SYSPROTO_H_
2925struct fchmodat_args {
2926	int	dirfd;
2927	char	*path;
2928	mode_t	mode;
2929	int	flag;
2930}
2931#endif
2932int
2933sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
2934{
2935	int flag = uap->flag;
2936	int fd = uap->fd;
2937	char *path = uap->path;
2938	mode_t mode = uap->mode;
2939
2940	if (flag & ~AT_SYMLINK_NOFOLLOW)
2941		return (EINVAL);
2942
2943	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
2944}
2945
2946int
2947kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2948{
2949
2950	return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
2951}
2952
2953/*
2954 * Change mode of a file given path name (don't follow links.)
2955 */
2956#ifndef _SYS_SYSPROTO_H_
2957struct lchmod_args {
2958	char	*path;
2959	int	mode;
2960};
2961#endif
2962int
2963sys_lchmod(td, uap)
2964	struct thread *td;
2965	register struct lchmod_args /* {
2966		char *path;
2967		int mode;
2968	} */ *uap;
2969{
2970
2971	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2972	    uap->mode, AT_SYMLINK_NOFOLLOW));
2973}
2974
2975
2976int
2977kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2978    mode_t mode, int flag)
2979{
2980	int error;
2981	struct nameidata nd;
2982	int vfslocked;
2983	int follow;
2984
2985	AUDIT_ARG_MODE(mode);
2986	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2987	NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
2988	    path, fd, CAP_FCHMOD, td);
2989	if ((error = namei(&nd)) != 0)
2990		return (error);
2991	vfslocked = NDHASGIANT(&nd);
2992	NDFREE(&nd, NDF_ONLY_PNBUF);
2993	error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
2994	vrele(nd.ni_vp);
2995	VFS_UNLOCK_GIANT(vfslocked);
2996	return (error);
2997}
2998
2999/*
3000 * Change mode of a file given a file descriptor.
3001 */
3002#ifndef _SYS_SYSPROTO_H_
3003struct fchmod_args {
3004	int	fd;
3005	int	mode;
3006};
3007#endif
3008int
3009sys_fchmod(struct thread *td, struct fchmod_args *uap)
3010{
3011	struct file *fp;
3012	int error;
3013
3014	AUDIT_ARG_FD(uap->fd);
3015	AUDIT_ARG_MODE(uap->mode);
3016
3017	error = fget(td, uap->fd, CAP_FCHMOD, &fp);
3018	if (error != 0)
3019		return (error);
3020	error = fo_chmod(fp, uap->mode, td->td_ucred, td);
3021	fdrop(fp, td);
3022	return (error);
3023}
3024
3025/*
3026 * Common implementation for chown(), lchown(), and fchown()
3027 */
3028int
3029setfown(td, cred, vp, uid, gid)
3030	struct thread *td;
3031	struct ucred *cred;
3032	struct vnode *vp;
3033	uid_t uid;
3034	gid_t gid;
3035{
3036	int error;
3037	struct mount *mp;
3038	struct vattr vattr;
3039
3040	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3041		return (error);
3042	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3043	VATTR_NULL(&vattr);
3044	vattr.va_uid = uid;
3045	vattr.va_gid = gid;
3046#ifdef MAC
3047	error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
3048	    vattr.va_gid);
3049	if (error == 0)
3050#endif
3051		error = VOP_SETATTR(vp, &vattr, cred);
3052	VOP_UNLOCK(vp, 0);
3053	vn_finished_write(mp);
3054	return (error);
3055}
3056
3057/*
3058 * Set ownership given a path name.
3059 */
3060#ifndef _SYS_SYSPROTO_H_
3061struct chown_args {
3062	char	*path;
3063	int	uid;
3064	int	gid;
3065};
3066#endif
3067int
3068sys_chown(td, uap)
3069	struct thread *td;
3070	register struct chown_args /* {
3071		char *path;
3072		int uid;
3073		int gid;
3074	} */ *uap;
3075{
3076
3077	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3078}
3079
3080#ifndef _SYS_SYSPROTO_H_
3081struct fchownat_args {
3082	int fd;
3083	const char * path;
3084	uid_t uid;
3085	gid_t gid;
3086	int flag;
3087};
3088#endif
3089int
3090sys_fchownat(struct thread *td, struct fchownat_args *uap)
3091{
3092	int flag;
3093
3094	flag = uap->flag;
3095	if (flag & ~AT_SYMLINK_NOFOLLOW)
3096		return (EINVAL);
3097
3098	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
3099	    uap->gid, uap->flag));
3100}
3101
3102int
3103kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3104    int gid)
3105{
3106
3107	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
3108}
3109
3110int
3111kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3112    int uid, int gid, int flag)
3113{
3114	struct nameidata nd;
3115	int error, vfslocked, follow;
3116
3117	AUDIT_ARG_OWNER(uid, gid);
3118	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3119	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
3120	    path, fd, CAP_FCHOWN, td);
3121
3122	if ((error = namei(&nd)) != 0)
3123		return (error);
3124	vfslocked = NDHASGIANT(&nd);
3125	NDFREE(&nd, NDF_ONLY_PNBUF);
3126	error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
3127	vrele(nd.ni_vp);
3128	VFS_UNLOCK_GIANT(vfslocked);
3129	return (error);
3130}
3131
3132/*
3133 * Set ownership given a path name, do not cross symlinks.
3134 */
3135#ifndef _SYS_SYSPROTO_H_
3136struct lchown_args {
3137	char	*path;
3138	int	uid;
3139	int	gid;
3140};
3141#endif
3142int
3143sys_lchown(td, uap)
3144	struct thread *td;
3145	register struct lchown_args /* {
3146		char *path;
3147		int uid;
3148		int gid;
3149	} */ *uap;
3150{
3151
3152	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3153}
3154
3155int
3156kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3157    int gid)
3158{
3159
3160	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
3161	    AT_SYMLINK_NOFOLLOW));
3162}
3163
3164/*
3165 * Set ownership given a file descriptor.
3166 */
3167#ifndef _SYS_SYSPROTO_H_
3168struct fchown_args {
3169	int	fd;
3170	int	uid;
3171	int	gid;
3172};
3173#endif
3174int
3175sys_fchown(td, uap)
3176	struct thread *td;
3177	register struct fchown_args /* {
3178		int fd;
3179		int uid;
3180		int gid;
3181	} */ *uap;
3182{
3183	struct file *fp;
3184	int error;
3185
3186	AUDIT_ARG_FD(uap->fd);
3187	AUDIT_ARG_OWNER(uap->uid, uap->gid);
3188	error = fget(td, uap->fd, CAP_FCHOWN, &fp);
3189	if (error != 0)
3190		return (error);
3191	error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
3192	fdrop(fp, td);
3193	return (error);
3194}
3195
3196/*
3197 * Common implementation code for utimes(), lutimes(), and futimes().
3198 */
3199static int
3200getutimes(usrtvp, tvpseg, tsp)
3201	const struct timeval *usrtvp;
3202	enum uio_seg tvpseg;
3203	struct timespec *tsp;
3204{
3205	struct timeval tv[2];
3206	const struct timeval *tvp;
3207	int error;
3208
3209	if (usrtvp == NULL) {
3210		vfs_timestamp(&tsp[0]);
3211		tsp[1] = tsp[0];
3212	} else {
3213		if (tvpseg == UIO_SYSSPACE) {
3214			tvp = usrtvp;
3215		} else {
3216			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
3217				return (error);
3218			tvp = tv;
3219		}
3220
3221		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
3222		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
3223			return (EINVAL);
3224		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3225		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3226	}
3227	return (0);
3228}
3229
3230/*
3231 * Common implementation code for utimes(), lutimes(), and futimes().
3232 */
3233static int
3234setutimes(td, vp, ts, numtimes, nullflag)
3235	struct thread *td;
3236	struct vnode *vp;
3237	const struct timespec *ts;
3238	int numtimes;
3239	int nullflag;
3240{
3241	int error, setbirthtime;
3242	struct mount *mp;
3243	struct vattr vattr;
3244
3245	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3246		return (error);
3247	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3248	setbirthtime = 0;
3249	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
3250	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
3251		setbirthtime = 1;
3252	VATTR_NULL(&vattr);
3253	vattr.va_atime = ts[0];
3254	vattr.va_mtime = ts[1];
3255	if (setbirthtime)
3256		vattr.va_birthtime = ts[1];
3257	if (numtimes > 2)
3258		vattr.va_birthtime = ts[2];
3259	if (nullflag)
3260		vattr.va_vaflags |= VA_UTIMES_NULL;
3261#ifdef MAC
3262	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
3263	    vattr.va_mtime);
3264#endif
3265	if (error == 0)
3266		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3267	VOP_UNLOCK(vp, 0);
3268	vn_finished_write(mp);
3269	return (error);
3270}
3271
3272/*
3273 * Set the access and modification times of a file.
3274 */
3275#ifndef _SYS_SYSPROTO_H_
3276struct utimes_args {
3277	char	*path;
3278	struct	timeval *tptr;
3279};
3280#endif
3281int
3282sys_utimes(td, uap)
3283	struct thread *td;
3284	register struct utimes_args /* {
3285		char *path;
3286		struct timeval *tptr;
3287	} */ *uap;
3288{
3289
3290	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3291	    UIO_USERSPACE));
3292}
3293
3294#ifndef _SYS_SYSPROTO_H_
3295struct futimesat_args {
3296	int fd;
3297	const char * path;
3298	const struct timeval * times;
3299};
3300#endif
3301int
3302sys_futimesat(struct thread *td, struct futimesat_args *uap)
3303{
3304
3305	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
3306	    uap->times, UIO_USERSPACE));
3307}
3308
3309int
3310kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
3311    struct timeval *tptr, enum uio_seg tptrseg)
3312{
3313
3314	return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
3315}
3316
3317int
3318kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3319    struct timeval *tptr, enum uio_seg tptrseg)
3320{
3321	struct nameidata nd;
3322	struct timespec ts[2];
3323	int error, vfslocked;
3324
3325	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3326		return (error);
3327	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
3328	    path, fd, CAP_FUTIMES, td);
3329
3330	if ((error = namei(&nd)) != 0)
3331		return (error);
3332	vfslocked = NDHASGIANT(&nd);
3333	NDFREE(&nd, NDF_ONLY_PNBUF);
3334	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3335	vrele(nd.ni_vp);
3336	VFS_UNLOCK_GIANT(vfslocked);
3337	return (error);
3338}
3339
3340/*
3341 * Set the access and modification times of a file.
3342 */
3343#ifndef _SYS_SYSPROTO_H_
3344struct lutimes_args {
3345	char	*path;
3346	struct	timeval *tptr;
3347};
3348#endif
3349int
3350sys_lutimes(td, uap)
3351	struct thread *td;
3352	register struct lutimes_args /* {
3353		char *path;
3354		struct timeval *tptr;
3355	} */ *uap;
3356{
3357
3358	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3359	    UIO_USERSPACE));
3360}
3361
3362int
3363kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
3364    struct timeval *tptr, enum uio_seg tptrseg)
3365{
3366	struct timespec ts[2];
3367	int error;
3368	struct nameidata nd;
3369	int vfslocked;
3370
3371	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3372		return (error);
3373	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3374	if ((error = namei(&nd)) != 0)
3375		return (error);
3376	vfslocked = NDHASGIANT(&nd);
3377	NDFREE(&nd, NDF_ONLY_PNBUF);
3378	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3379	vrele(nd.ni_vp);
3380	VFS_UNLOCK_GIANT(vfslocked);
3381	return (error);
3382}
3383
3384/*
3385 * Set the access and modification times of a file.
3386 */
3387#ifndef _SYS_SYSPROTO_H_
3388struct futimes_args {
3389	int	fd;
3390	struct	timeval *tptr;
3391};
3392#endif
3393int
3394sys_futimes(td, uap)
3395	struct thread *td;
3396	register struct futimes_args /* {
3397		int  fd;
3398		struct timeval *tptr;
3399	} */ *uap;
3400{
3401
3402	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
3403}
3404
3405int
3406kern_futimes(struct thread *td, int fd, struct timeval *tptr,
3407    enum uio_seg tptrseg)
3408{
3409	struct timespec ts[2];
3410	struct file *fp;
3411	int vfslocked;
3412	int error;
3413
3414	AUDIT_ARG_FD(fd);
3415	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3416		return (error);
3417	if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp))
3418	    != 0)
3419		return (error);
3420	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3421#ifdef AUDIT
3422	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3423	AUDIT_ARG_VNODE1(fp->f_vnode);
3424	VOP_UNLOCK(fp->f_vnode, 0);
3425#endif
3426	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3427	VFS_UNLOCK_GIANT(vfslocked);
3428	fdrop(fp, td);
3429	return (error);
3430}
3431
3432/*
3433 * Truncate a file given its path name.
3434 */
3435#ifndef _SYS_SYSPROTO_H_
3436struct truncate_args {
3437	char	*path;
3438	int	pad;
3439	off_t	length;
3440};
3441#endif
3442int
3443sys_truncate(td, uap)
3444	struct thread *td;
3445	register struct truncate_args /* {
3446		char *path;
3447		int pad;
3448		off_t length;
3449	} */ *uap;
3450{
3451
3452	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3453}
3454
3455int
3456kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3457{
3458	struct mount *mp;
3459	struct vnode *vp;
3460	void *rl_cookie;
3461	struct vattr vattr;
3462	struct nameidata nd;
3463	int error, vfslocked;
3464
3465	if (length < 0)
3466		return(EINVAL);
3467	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3468	if ((error = namei(&nd)) != 0)
3469		return (error);
3470	vfslocked = NDHASGIANT(&nd);
3471	vp = nd.ni_vp;
3472	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
3473	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3474		vn_rangelock_unlock(vp, rl_cookie);
3475		vrele(vp);
3476		VFS_UNLOCK_GIANT(vfslocked);
3477		return (error);
3478	}
3479	NDFREE(&nd, NDF_ONLY_PNBUF);
3480	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3481	if (vp->v_type == VDIR)
3482		error = EISDIR;
3483#ifdef MAC
3484	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3485	}
3486#endif
3487	else if ((error = vn_writechk(vp)) == 0 &&
3488	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3489		VATTR_NULL(&vattr);
3490		vattr.va_size = length;
3491		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3492	}
3493	VOP_UNLOCK(vp, 0);
3494	vn_finished_write(mp);
3495	vn_rangelock_unlock(vp, rl_cookie);
3496	vrele(vp);
3497	VFS_UNLOCK_GIANT(vfslocked);
3498	return (error);
3499}
3500
3501#if defined(COMPAT_43)
3502/*
3503 * Truncate a file given its path name.
3504 */
3505#ifndef _SYS_SYSPROTO_H_
3506struct otruncate_args {
3507	char	*path;
3508	long	length;
3509};
3510#endif
3511int
3512otruncate(td, uap)
3513	struct thread *td;
3514	register struct otruncate_args /* {
3515		char *path;
3516		long length;
3517	} */ *uap;
3518{
3519	struct truncate_args /* {
3520		char *path;
3521		int pad;
3522		off_t length;
3523	} */ nuap;
3524
3525	nuap.path = uap->path;
3526	nuap.length = uap->length;
3527	return (sys_truncate(td, &nuap));
3528}
3529#endif /* COMPAT_43 */
3530
3531/* Versions with the pad argument */
3532int
3533freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3534{
3535	struct truncate_args ouap;
3536
3537	ouap.path = uap->path;
3538	ouap.length = uap->length;
3539	return (sys_truncate(td, &ouap));
3540}
3541
3542int
3543freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3544{
3545	struct ftruncate_args ouap;
3546
3547	ouap.fd = uap->fd;
3548	ouap.length = uap->length;
3549	return (sys_ftruncate(td, &ouap));
3550}
3551
3552/*
3553 * Sync an open file.
3554 */
3555#ifndef _SYS_SYSPROTO_H_
3556struct fsync_args {
3557	int	fd;
3558};
3559#endif
3560int
3561sys_fsync(td, uap)
3562	struct thread *td;
3563	struct fsync_args /* {
3564		int fd;
3565	} */ *uap;
3566{
3567	struct vnode *vp;
3568	struct mount *mp;
3569	struct file *fp;
3570	int vfslocked;
3571	int error, lock_flags;
3572
3573	AUDIT_ARG_FD(uap->fd);
3574	if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC,
3575	    &fp)) != 0)
3576		return (error);
3577	vp = fp->f_vnode;
3578	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3579	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3580		goto drop;
3581	if (MNT_SHARED_WRITES(mp) ||
3582	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
3583		lock_flags = LK_SHARED;
3584	} else {
3585		lock_flags = LK_EXCLUSIVE;
3586	}
3587	vn_lock(vp, lock_flags | LK_RETRY);
3588	AUDIT_ARG_VNODE1(vp);
3589	if (vp->v_object != NULL) {
3590		VM_OBJECT_LOCK(vp->v_object);
3591		vm_object_page_clean(vp->v_object, 0, 0, 0);
3592		VM_OBJECT_UNLOCK(vp->v_object);
3593	}
3594	error = VOP_FSYNC(vp, MNT_WAIT, td);
3595
3596	VOP_UNLOCK(vp, 0);
3597	vn_finished_write(mp);
3598drop:
3599	VFS_UNLOCK_GIANT(vfslocked);
3600	fdrop(fp, td);
3601	return (error);
3602}
3603
3604/*
3605 * Rename files.  Source and destination must either both be directories, or
3606 * both not be directories.  If target is a directory, it must be empty.
3607 */
3608#ifndef _SYS_SYSPROTO_H_
3609struct rename_args {
3610	char	*from;
3611	char	*to;
3612};
3613#endif
3614int
3615sys_rename(td, uap)
3616	struct thread *td;
3617	register struct rename_args /* {
3618		char *from;
3619		char *to;
3620	} */ *uap;
3621{
3622
3623	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3624}
3625
3626#ifndef _SYS_SYSPROTO_H_
3627struct renameat_args {
3628	int	oldfd;
3629	char	*old;
3630	int	newfd;
3631	char	*new;
3632};
3633#endif
3634int
3635sys_renameat(struct thread *td, struct renameat_args *uap)
3636{
3637
3638	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
3639	    UIO_USERSPACE));
3640}
3641
3642int
3643kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3644{
3645
3646	return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
3647}
3648
3649int
3650kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
3651    enum uio_seg pathseg)
3652{
3653	struct mount *mp = NULL;
3654	struct vnode *tvp, *fvp, *tdvp;
3655	struct nameidata fromnd, tond;
3656	int tvfslocked;
3657	int fvfslocked;
3658	int error;
3659
3660	bwillwrite();
3661#ifdef MAC
3662	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
3663	    MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
3664#else
3665	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3666	    AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
3667#endif
3668
3669	if ((error = namei(&fromnd)) != 0)
3670		return (error);
3671	fvfslocked = NDHASGIANT(&fromnd);
3672	tvfslocked = 0;
3673#ifdef MAC
3674	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3675	    fromnd.ni_vp, &fromnd.ni_cnd);
3676	VOP_UNLOCK(fromnd.ni_dvp, 0);
3677	if (fromnd.ni_dvp != fromnd.ni_vp)
3678		VOP_UNLOCK(fromnd.ni_vp, 0);
3679#endif
3680	fvp = fromnd.ni_vp;
3681	if (error == 0)
3682		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3683	if (error != 0) {
3684		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3685		vrele(fromnd.ni_dvp);
3686		vrele(fvp);
3687		goto out1;
3688	}
3689	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
3690	    SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
3691	    td);
3692	if (fromnd.ni_vp->v_type == VDIR)
3693		tond.ni_cnd.cn_flags |= WILLBEDIR;
3694	if ((error = namei(&tond)) != 0) {
3695		/* Translate error code for rename("dir1", "dir2/."). */
3696		if (error == EISDIR && fvp->v_type == VDIR)
3697			error = EINVAL;
3698		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3699		vrele(fromnd.ni_dvp);
3700		vrele(fvp);
3701		vn_finished_write(mp);
3702		goto out1;
3703	}
3704	tvfslocked = NDHASGIANT(&tond);
3705	tdvp = tond.ni_dvp;
3706	tvp = tond.ni_vp;
3707	if (tvp != NULL) {
3708		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3709			error = ENOTDIR;
3710			goto out;
3711		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3712			error = EISDIR;
3713			goto out;
3714		}
3715	}
3716	if (fvp == tdvp) {
3717		error = EINVAL;
3718		goto out;
3719	}
3720	/*
3721	 * If the source is the same as the destination (that is, if they
3722	 * are links to the same vnode), then there is nothing to do.
3723	 */
3724	if (fvp == tvp)
3725		error = -1;
3726#ifdef MAC
3727	else
3728		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3729		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3730#endif
3731out:
3732	if (!error) {
3733		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3734				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3735		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3736		NDFREE(&tond, NDF_ONLY_PNBUF);
3737	} else {
3738		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3739		NDFREE(&tond, NDF_ONLY_PNBUF);
3740		if (tvp)
3741			vput(tvp);
3742		if (tdvp == tvp)
3743			vrele(tdvp);
3744		else
3745			vput(tdvp);
3746		vrele(fromnd.ni_dvp);
3747		vrele(fvp);
3748	}
3749	vrele(tond.ni_startdir);
3750	vn_finished_write(mp);
3751out1:
3752	if (fromnd.ni_startdir)
3753		vrele(fromnd.ni_startdir);
3754	VFS_UNLOCK_GIANT(fvfslocked);
3755	VFS_UNLOCK_GIANT(tvfslocked);
3756	if (error == -1)
3757		return (0);
3758	return (error);
3759}
3760
3761/*
3762 * Make a directory file.
3763 */
3764#ifndef _SYS_SYSPROTO_H_
3765struct mkdir_args {
3766	char	*path;
3767	int	mode;
3768};
3769#endif
3770int
3771sys_mkdir(td, uap)
3772	struct thread *td;
3773	register struct mkdir_args /* {
3774		char *path;
3775		int mode;
3776	} */ *uap;
3777{
3778
3779	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3780}
3781
3782#ifndef _SYS_SYSPROTO_H_
3783struct mkdirat_args {
3784	int	fd;
3785	char	*path;
3786	mode_t	mode;
3787};
3788#endif
3789int
3790sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
3791{
3792
3793	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
3794}
3795
3796int
3797kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3798{
3799
3800	return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
3801}
3802
3803int
3804kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
3805    int mode)
3806{
3807	struct mount *mp;
3808	struct vnode *vp;
3809	struct vattr vattr;
3810	int error;
3811	struct nameidata nd;
3812	int vfslocked;
3813
3814	AUDIT_ARG_MODE(mode);
3815restart:
3816	bwillwrite();
3817	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
3818	    AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
3819	nd.ni_cnd.cn_flags |= WILLBEDIR;
3820	if ((error = namei(&nd)) != 0)
3821		return (error);
3822	vfslocked = NDHASGIANT(&nd);
3823	vp = nd.ni_vp;
3824	if (vp != NULL) {
3825		NDFREE(&nd, NDF_ONLY_PNBUF);
3826		/*
3827		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3828		 * the strange behaviour of leaving the vnode unlocked
3829		 * if the target is the same vnode as the parent.
3830		 */
3831		if (vp == nd.ni_dvp)
3832			vrele(nd.ni_dvp);
3833		else
3834			vput(nd.ni_dvp);
3835		vrele(vp);
3836		VFS_UNLOCK_GIANT(vfslocked);
3837		return (EEXIST);
3838	}
3839	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3840		NDFREE(&nd, NDF_ONLY_PNBUF);
3841		vput(nd.ni_dvp);
3842		VFS_UNLOCK_GIANT(vfslocked);
3843		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3844			return (error);
3845		goto restart;
3846	}
3847	VATTR_NULL(&vattr);
3848	vattr.va_type = VDIR;
3849	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3850#ifdef MAC
3851	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3852	    &vattr);
3853	if (error)
3854		goto out;
3855#endif
3856	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3857#ifdef MAC
3858out:
3859#endif
3860	NDFREE(&nd, NDF_ONLY_PNBUF);
3861	vput(nd.ni_dvp);
3862	if (!error)
3863		vput(nd.ni_vp);
3864	vn_finished_write(mp);
3865	VFS_UNLOCK_GIANT(vfslocked);
3866	return (error);
3867}
3868
3869/*
3870 * Remove a directory file.
3871 */
3872#ifndef _SYS_SYSPROTO_H_
3873struct rmdir_args {
3874	char	*path;
3875};
3876#endif
3877int
3878sys_rmdir(td, uap)
3879	struct thread *td;
3880	struct rmdir_args /* {
3881		char *path;
3882	} */ *uap;
3883{
3884
3885	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3886}
3887
3888int
3889kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3890{
3891
3892	return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
3893}
3894
3895int
3896kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
3897{
3898	struct mount *mp;
3899	struct vnode *vp;
3900	int error;
3901	struct nameidata nd;
3902	int vfslocked;
3903
3904restart:
3905	bwillwrite();
3906	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
3907	    AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
3908	if ((error = namei(&nd)) != 0)
3909		return (error);
3910	vfslocked = NDHASGIANT(&nd);
3911	vp = nd.ni_vp;
3912	if (vp->v_type != VDIR) {
3913		error = ENOTDIR;
3914		goto out;
3915	}
3916	/*
3917	 * No rmdir "." please.
3918	 */
3919	if (nd.ni_dvp == vp) {
3920		error = EINVAL;
3921		goto out;
3922	}
3923	/*
3924	 * The root of a mounted filesystem cannot be deleted.
3925	 */
3926	if (vp->v_vflag & VV_ROOT) {
3927		error = EBUSY;
3928		goto out;
3929	}
3930#ifdef MAC
3931	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3932	    &nd.ni_cnd);
3933	if (error)
3934		goto out;
3935#endif
3936	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3937		NDFREE(&nd, NDF_ONLY_PNBUF);
3938		vput(vp);
3939		if (nd.ni_dvp == vp)
3940			vrele(nd.ni_dvp);
3941		else
3942			vput(nd.ni_dvp);
3943		VFS_UNLOCK_GIANT(vfslocked);
3944		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3945			return (error);
3946		goto restart;
3947	}
3948	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
3949	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3950	vn_finished_write(mp);
3951out:
3952	NDFREE(&nd, NDF_ONLY_PNBUF);
3953	vput(vp);
3954	if (nd.ni_dvp == vp)
3955		vrele(nd.ni_dvp);
3956	else
3957		vput(nd.ni_dvp);
3958	VFS_UNLOCK_GIANT(vfslocked);
3959	return (error);
3960}
3961
3962#ifdef COMPAT_43
3963/*
3964 * Read a block of directory entries in a filesystem independent format.
3965 */
3966#ifndef _SYS_SYSPROTO_H_
3967struct ogetdirentries_args {
3968	int	fd;
3969	char	*buf;
3970	u_int	count;
3971	long	*basep;
3972};
3973#endif
3974int
3975ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
3976{
3977	long loff;
3978	int error;
3979
3980	error = kern_ogetdirentries(td, uap, &loff);
3981	if (error == 0)
3982		error = copyout(&loff, uap->basep, sizeof(long));
3983	return (error);
3984}
3985
3986int
3987kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
3988    long *ploff)
3989{
3990	struct vnode *vp;
3991	struct file *fp;
3992	struct uio auio, kuio;
3993	struct iovec aiov, kiov;
3994	struct dirent *dp, *edp;
3995	caddr_t dirbuf;
3996	int error, eofflag, readcnt, vfslocked;
3997	long loff;
3998	off_t foffset;
3999
4000	/* XXX arbitrary sanity limit on `count'. */
4001	if (uap->count > 64 * 1024)
4002		return (EINVAL);
4003	if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
4004	    &fp)) != 0)
4005		return (error);
4006	if ((fp->f_flag & FREAD) == 0) {
4007		fdrop(fp, td);
4008		return (EBADF);
4009	}
4010	vp = fp->f_vnode;
4011	foffset = foffset_lock(fp, 0);
4012unionread:
4013	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4014	if (vp->v_type != VDIR) {
4015		VFS_UNLOCK_GIANT(vfslocked);
4016		foffset_unlock(fp, foffset, 0);
4017		fdrop(fp, td);
4018		return (EINVAL);
4019	}
4020	aiov.iov_base = uap->buf;
4021	aiov.iov_len = uap->count;
4022	auio.uio_iov = &aiov;
4023	auio.uio_iovcnt = 1;
4024	auio.uio_rw = UIO_READ;
4025	auio.uio_segflg = UIO_USERSPACE;
4026	auio.uio_td = td;
4027	auio.uio_resid = uap->count;
4028	vn_lock(vp, LK_SHARED | LK_RETRY);
4029	loff = auio.uio_offset = foffset;
4030#ifdef MAC
4031	error = mac_vnode_check_readdir(td->td_ucred, vp);
4032	if (error) {
4033		VOP_UNLOCK(vp, 0);
4034		VFS_UNLOCK_GIANT(vfslocked);
4035		foffset_unlock(fp, foffset, FOF_NOUPDATE);
4036		fdrop(fp, td);
4037		return (error);
4038	}
4039#endif
4040#	if (BYTE_ORDER != LITTLE_ENDIAN)
4041		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
4042			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
4043			    NULL, NULL);
4044			foffset = auio.uio_offset;
4045		} else
4046#	endif
4047	{
4048		kuio = auio;
4049		kuio.uio_iov = &kiov;
4050		kuio.uio_segflg = UIO_SYSSPACE;
4051		kiov.iov_len = uap->count;
4052		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
4053		kiov.iov_base = dirbuf;
4054		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
4055			    NULL, NULL);
4056		foffset = kuio.uio_offset;
4057		if (error == 0) {
4058			readcnt = uap->count - kuio.uio_resid;
4059			edp = (struct dirent *)&dirbuf[readcnt];
4060			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
4061#				if (BYTE_ORDER == LITTLE_ENDIAN)
4062					/*
4063					 * The expected low byte of
4064					 * dp->d_namlen is our dp->d_type.
4065					 * The high MBZ byte of dp->d_namlen
4066					 * is our dp->d_namlen.
4067					 */
4068					dp->d_type = dp->d_namlen;
4069					dp->d_namlen = 0;
4070#				else
4071					/*
4072					 * The dp->d_type is the high byte
4073					 * of the expected dp->d_namlen,
4074					 * so must be zero'ed.
4075					 */
4076					dp->d_type = 0;
4077#				endif
4078				if (dp->d_reclen > 0) {
4079					dp = (struct dirent *)
4080					    ((char *)dp + dp->d_reclen);
4081				} else {
4082					error = EIO;
4083					break;
4084				}
4085			}
4086			if (dp >= edp)
4087				error = uiomove(dirbuf, readcnt, &auio);
4088		}
4089		free(dirbuf, M_TEMP);
4090	}
4091	if (error) {
4092		VOP_UNLOCK(vp, 0);
4093		VFS_UNLOCK_GIANT(vfslocked);
4094		foffset_unlock(fp, foffset, 0);
4095		fdrop(fp, td);
4096		return (error);
4097	}
4098	if (uap->count == auio.uio_resid &&
4099	    (vp->v_vflag & VV_ROOT) &&
4100	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4101		struct vnode *tvp = vp;
4102		vp = vp->v_mount->mnt_vnodecovered;
4103		VREF(vp);
4104		fp->f_vnode = vp;
4105		fp->f_data = vp;
4106		foffset = 0;
4107		vput(tvp);
4108		VFS_UNLOCK_GIANT(vfslocked);
4109		goto unionread;
4110	}
4111	VOP_UNLOCK(vp, 0);
4112	VFS_UNLOCK_GIANT(vfslocked);
4113	foffset_unlock(fp, foffset, 0);
4114	fdrop(fp, td);
4115	td->td_retval[0] = uap->count - auio.uio_resid;
4116	if (error == 0)
4117		*ploff = loff;
4118	return (error);
4119}
4120#endif /* COMPAT_43 */
4121
4122/*
4123 * Read a block of directory entries in a filesystem independent format.
4124 */
4125#ifndef _SYS_SYSPROTO_H_
4126struct getdirentries_args {
4127	int	fd;
4128	char	*buf;
4129	u_int	count;
4130	long	*basep;
4131};
4132#endif
4133int
4134sys_getdirentries(td, uap)
4135	struct thread *td;
4136	register struct getdirentries_args /* {
4137		int fd;
4138		char *buf;
4139		u_int count;
4140		long *basep;
4141	} */ *uap;
4142{
4143	long base;
4144	int error;
4145
4146	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
4147	if (error)
4148		return (error);
4149	if (uap->basep != NULL)
4150		error = copyout(&base, uap->basep, sizeof(long));
4151	return (error);
4152}
4153
4154int
4155kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
4156    long *basep)
4157{
4158	struct vnode *vp;
4159	struct file *fp;
4160	struct uio auio;
4161	struct iovec aiov;
4162	int vfslocked;
4163	long loff;
4164	int error, eofflag;
4165	off_t foffset;
4166
4167	AUDIT_ARG_FD(fd);
4168	auio.uio_resid = count;
4169	if (auio.uio_resid > IOSIZE_MAX)
4170		return (EINVAL);
4171	if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
4172	    &fp)) != 0)
4173		return (error);
4174	if ((fp->f_flag & FREAD) == 0) {
4175		fdrop(fp, td);
4176		return (EBADF);
4177	}
4178	vp = fp->f_vnode;
4179	foffset = foffset_lock(fp, 0);
4180unionread:
4181	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4182	if (vp->v_type != VDIR) {
4183		VFS_UNLOCK_GIANT(vfslocked);
4184		error = EINVAL;
4185		goto fail;
4186	}
4187	aiov.iov_base = buf;
4188	aiov.iov_len = count;
4189	auio.uio_iov = &aiov;
4190	auio.uio_iovcnt = 1;
4191	auio.uio_rw = UIO_READ;
4192	auio.uio_segflg = UIO_USERSPACE;
4193	auio.uio_td = td;
4194	vn_lock(vp, LK_SHARED | LK_RETRY);
4195	AUDIT_ARG_VNODE1(vp);
4196	loff = auio.uio_offset = foffset;
4197#ifdef MAC
4198	error = mac_vnode_check_readdir(td->td_ucred, vp);
4199	if (error == 0)
4200#endif
4201		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
4202		    NULL);
4203	foffset = auio.uio_offset;
4204	if (error) {
4205		VOP_UNLOCK(vp, 0);
4206		VFS_UNLOCK_GIANT(vfslocked);
4207		goto fail;
4208	}
4209	if (count == auio.uio_resid &&
4210	    (vp->v_vflag & VV_ROOT) &&
4211	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4212		struct vnode *tvp = vp;
4213		vp = vp->v_mount->mnt_vnodecovered;
4214		VREF(vp);
4215		fp->f_vnode = vp;
4216		fp->f_data = vp;
4217		foffset = 0;
4218		vput(tvp);
4219		VFS_UNLOCK_GIANT(vfslocked);
4220		goto unionread;
4221	}
4222	VOP_UNLOCK(vp, 0);
4223	VFS_UNLOCK_GIANT(vfslocked);
4224	*basep = loff;
4225	td->td_retval[0] = count - auio.uio_resid;
4226fail:
4227	foffset_unlock(fp, foffset, 0);
4228	fdrop(fp, td);
4229	return (error);
4230}
4231
4232#ifndef _SYS_SYSPROTO_H_
4233struct getdents_args {
4234	int fd;
4235	char *buf;
4236	size_t count;
4237};
4238#endif
4239int
4240sys_getdents(td, uap)
4241	struct thread *td;
4242	register struct getdents_args /* {
4243		int fd;
4244		char *buf;
4245		u_int count;
4246	} */ *uap;
4247{
4248	struct getdirentries_args ap;
4249	ap.fd = uap->fd;
4250	ap.buf = uap->buf;
4251	ap.count = uap->count;
4252	ap.basep = NULL;
4253	return (sys_getdirentries(td, &ap));
4254}
4255
4256/*
4257 * Set the mode mask for creation of filesystem nodes.
4258 */
4259#ifndef _SYS_SYSPROTO_H_
4260struct umask_args {
4261	int	newmask;
4262};
4263#endif
4264int
4265sys_umask(td, uap)
4266	struct thread *td;
4267	struct umask_args /* {
4268		int newmask;
4269	} */ *uap;
4270{
4271	register struct filedesc *fdp;
4272
4273	FILEDESC_XLOCK(td->td_proc->p_fd);
4274	fdp = td->td_proc->p_fd;
4275	td->td_retval[0] = fdp->fd_cmask;
4276	fdp->fd_cmask = uap->newmask & ALLPERMS;
4277	FILEDESC_XUNLOCK(td->td_proc->p_fd);
4278	return (0);
4279}
4280
4281/*
4282 * Void all references to file by ripping underlying filesystem away from
4283 * vnode.
4284 */
4285#ifndef _SYS_SYSPROTO_H_
4286struct revoke_args {
4287	char	*path;
4288};
4289#endif
4290int
4291sys_revoke(td, uap)
4292	struct thread *td;
4293	register struct revoke_args /* {
4294		char *path;
4295	} */ *uap;
4296{
4297	struct vnode *vp;
4298	struct vattr vattr;
4299	int error;
4300	struct nameidata nd;
4301	int vfslocked;
4302
4303	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4304	    UIO_USERSPACE, uap->path, td);
4305	if ((error = namei(&nd)) != 0)
4306		return (error);
4307	vfslocked = NDHASGIANT(&nd);
4308	vp = nd.ni_vp;
4309	NDFREE(&nd, NDF_ONLY_PNBUF);
4310	if (vp->v_type != VCHR || vp->v_rdev == NULL) {
4311		error = EINVAL;
4312		goto out;
4313	}
4314#ifdef MAC
4315	error = mac_vnode_check_revoke(td->td_ucred, vp);
4316	if (error)
4317		goto out;
4318#endif
4319	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
4320	if (error)
4321		goto out;
4322	if (td->td_ucred->cr_uid != vattr.va_uid) {
4323		error = priv_check(td, PRIV_VFS_ADMIN);
4324		if (error)
4325			goto out;
4326	}
4327	if (vcount(vp) > 1)
4328		VOP_REVOKE(vp, REVOKEALL);
4329out:
4330	vput(vp);
4331	VFS_UNLOCK_GIANT(vfslocked);
4332	return (error);
4333}
4334
4335/*
4336 * Convert a user file descriptor to a kernel file entry and check that, if it
4337 * is a capability, the correct rights are present. A reference on the file
4338 * entry is held upon returning.
4339 */
4340int
4341getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
4342    struct file **fpp)
4343{
4344	struct file *fp;
4345#ifdef CAPABILITIES
4346	struct file *fp_fromcap;
4347#endif
4348	int error;
4349
4350	error = 0;
4351	fp = NULL;
4352	if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
4353		return (EBADF);
4354#ifdef CAPABILITIES
4355	/*
4356	 * If the file descriptor is for a capability, test rights and use the
4357	 * file descriptor referenced by the capability.
4358	 */
4359	error = cap_funwrap(fp, rights, &fp_fromcap);
4360	if (error) {
4361		fdrop(fp, curthread);
4362		return (error);
4363	}
4364	if (fp != fp_fromcap) {
4365		fhold(fp_fromcap);
4366		fdrop(fp, curthread);
4367		fp = fp_fromcap;
4368	}
4369#endif /* CAPABILITIES */
4370
4371	/*
4372	 * The file could be not of the vnode type, or it may be not
4373	 * yet fully initialized, in which case the f_vnode pointer
4374	 * may be set, but f_ops is still badfileops.  E.g.,
4375	 * devfs_open() transiently create such situation to
4376	 * facilitate csw d_fdopen().
4377	 *
4378	 * Dupfdopen() handling in kern_openat() installs the
4379	 * half-baked file into the process descriptor table, allowing
4380	 * other thread to dereference it. Guard against the race by
4381	 * checking f_ops.
4382	 */
4383	if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
4384		fdrop(fp, curthread);
4385		return (EINVAL);
4386	}
4387	*fpp = fp;
4388	return (0);
4389}
4390
4391
4392/*
4393 * Get an (NFS) file handle.
4394 */
4395#ifndef _SYS_SYSPROTO_H_
4396struct lgetfh_args {
4397	char	*fname;
4398	fhandle_t *fhp;
4399};
4400#endif
4401int
4402sys_lgetfh(td, uap)
4403	struct thread *td;
4404	register struct lgetfh_args *uap;
4405{
4406	struct nameidata nd;
4407	fhandle_t fh;
4408	register struct vnode *vp;
4409	int vfslocked;
4410	int error;
4411
4412	error = priv_check(td, PRIV_VFS_GETFH);
4413	if (error)
4414		return (error);
4415	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4416	    UIO_USERSPACE, uap->fname, td);
4417	error = namei(&nd);
4418	if (error)
4419		return (error);
4420	vfslocked = NDHASGIANT(&nd);
4421	NDFREE(&nd, NDF_ONLY_PNBUF);
4422	vp = nd.ni_vp;
4423	bzero(&fh, sizeof(fh));
4424	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4425	error = VOP_VPTOFH(vp, &fh.fh_fid);
4426	vput(vp);
4427	VFS_UNLOCK_GIANT(vfslocked);
4428	if (error)
4429		return (error);
4430	error = copyout(&fh, uap->fhp, sizeof (fh));
4431	return (error);
4432}
4433
4434#ifndef _SYS_SYSPROTO_H_
4435struct getfh_args {
4436	char	*fname;
4437	fhandle_t *fhp;
4438};
4439#endif
4440int
4441sys_getfh(td, uap)
4442	struct thread *td;
4443	register struct getfh_args *uap;
4444{
4445	struct nameidata nd;
4446	fhandle_t fh;
4447	register struct vnode *vp;
4448	int vfslocked;
4449	int error;
4450
4451	error = priv_check(td, PRIV_VFS_GETFH);
4452	if (error)
4453		return (error);
4454	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4455	    UIO_USERSPACE, uap->fname, td);
4456	error = namei(&nd);
4457	if (error)
4458		return (error);
4459	vfslocked = NDHASGIANT(&nd);
4460	NDFREE(&nd, NDF_ONLY_PNBUF);
4461	vp = nd.ni_vp;
4462	bzero(&fh, sizeof(fh));
4463	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4464	error = VOP_VPTOFH(vp, &fh.fh_fid);
4465	vput(vp);
4466	VFS_UNLOCK_GIANT(vfslocked);
4467	if (error)
4468		return (error);
4469	error = copyout(&fh, uap->fhp, sizeof (fh));
4470	return (error);
4471}
4472
4473/*
4474 * syscall for the rpc.lockd to use to translate a NFS file handle into an
4475 * open descriptor.
4476 *
4477 * warning: do not remove the priv_check() call or this becomes one giant
4478 * security hole.
4479 */
4480#ifndef _SYS_SYSPROTO_H_
4481struct fhopen_args {
4482	const struct fhandle *u_fhp;
4483	int flags;
4484};
4485#endif
4486int
4487sys_fhopen(td, uap)
4488	struct thread *td;
4489	struct fhopen_args /* {
4490		const struct fhandle *u_fhp;
4491		int flags;
4492	} */ *uap;
4493{
4494	struct proc *p = td->td_proc;
4495	struct mount *mp;
4496	struct vnode *vp;
4497	struct fhandle fhp;
4498	struct vattr vat;
4499	struct vattr *vap = &vat;
4500	struct flock lf;
4501	struct file *fp;
4502	register struct filedesc *fdp = p->p_fd;
4503	int fmode, error, type;
4504	accmode_t accmode;
4505	struct file *nfp;
4506	int vfslocked;
4507	int indx;
4508
4509	error = priv_check(td, PRIV_VFS_FHOPEN);
4510	if (error)
4511		return (error);
4512	fmode = FFLAGS(uap->flags);
4513	/* why not allow a non-read/write open for our lockd? */
4514	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4515		return (EINVAL);
4516	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4517	if (error)
4518		return(error);
4519	/* find the mount point */
4520	mp = vfs_busyfs(&fhp.fh_fsid);
4521	if (mp == NULL)
4522		return (ESTALE);
4523	vfslocked = VFS_LOCK_GIANT(mp);
4524	/* now give me my vnode, it gets returned to me locked */
4525	error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
4526	vfs_unbusy(mp);
4527	if (error)
4528		goto out;
4529	/*
4530	 * from now on we have to make sure not
4531	 * to forget about the vnode
4532	 * any error that causes an abort must vput(vp)
4533	 * just set error = err and 'goto bad;'.
4534	 */
4535
4536	/*
4537	 * from vn_open
4538	 */
4539	if (vp->v_type == VLNK) {
4540		error = EMLINK;
4541		goto bad;
4542	}
4543	if (vp->v_type == VSOCK) {
4544		error = EOPNOTSUPP;
4545		goto bad;
4546	}
4547	if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
4548		error = ENOTDIR;
4549		goto bad;
4550	}
4551	accmode = 0;
4552	if (fmode & (FWRITE | O_TRUNC)) {
4553		if (vp->v_type == VDIR) {
4554			error = EISDIR;
4555			goto bad;
4556		}
4557		error = vn_writechk(vp);
4558		if (error)
4559			goto bad;
4560		accmode |= VWRITE;
4561	}
4562	if (fmode & FREAD)
4563		accmode |= VREAD;
4564	if ((fmode & O_APPEND) && (fmode & FWRITE))
4565		accmode |= VAPPEND;
4566#ifdef MAC
4567	error = mac_vnode_check_open(td->td_ucred, vp, accmode);
4568	if (error)
4569		goto bad;
4570#endif
4571	if (accmode) {
4572		error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
4573		if (error)
4574			goto bad;
4575	}
4576	if (fmode & O_TRUNC) {
4577		vfs_ref(mp);
4578		VOP_UNLOCK(vp, 0);				/* XXX */
4579		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4580			vrele(vp);
4581			vfs_rel(mp);
4582			goto out;
4583		}
4584		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4585		vfs_rel(mp);
4586#ifdef MAC
4587		/*
4588		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4589		 * should be right.
4590		 */
4591		error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
4592		if (error == 0) {
4593#endif
4594			VATTR_NULL(vap);
4595			vap->va_size = 0;
4596			error = VOP_SETATTR(vp, vap, td->td_ucred);
4597#ifdef MAC
4598		}
4599#endif
4600		vn_finished_write(mp);
4601		if (error)
4602			goto bad;
4603	}
4604	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4605	if (error)
4606		goto bad;
4607
4608	if (fmode & FWRITE)
4609		vp->v_writecount++;
4610
4611	/*
4612	 * end of vn_open code
4613	 */
4614
4615	if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
4616		if (fmode & FWRITE)
4617			vp->v_writecount--;
4618		goto bad;
4619	}
4620	/* An extra reference on `nfp' has been held for us by falloc(). */
4621	fp = nfp;
4622	nfp->f_vnode = vp;
4623	finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
4624	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4625		lf.l_whence = SEEK_SET;
4626		lf.l_start = 0;
4627		lf.l_len = 0;
4628		if (fmode & O_EXLOCK)
4629			lf.l_type = F_WRLCK;
4630		else
4631			lf.l_type = F_RDLCK;
4632		type = F_FLOCK;
4633		if ((fmode & FNONBLOCK) == 0)
4634			type |= F_WAIT;
4635		VOP_UNLOCK(vp, 0);
4636		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4637			    type)) != 0) {
4638			/*
4639			 * The lock request failed.  Normally close the
4640			 * descriptor but handle the case where someone might
4641			 * have dup()d or close()d it when we weren't looking.
4642			 */
4643			fdclose(fdp, fp, indx, td);
4644
4645			/*
4646			 * release our private reference
4647			 */
4648			fdrop(fp, td);
4649			goto out;
4650		}
4651		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4652		atomic_set_int(&fp->f_flag, FHASLOCK);
4653	}
4654
4655	VOP_UNLOCK(vp, 0);
4656	fdrop(fp, td);
4657	VFS_UNLOCK_GIANT(vfslocked);
4658	td->td_retval[0] = indx;
4659	return (0);
4660
4661bad:
4662	vput(vp);
4663out:
4664	VFS_UNLOCK_GIANT(vfslocked);
4665	return (error);
4666}
4667
4668/*
4669 * Stat an (NFS) file handle.
4670 */
4671#ifndef _SYS_SYSPROTO_H_
4672struct fhstat_args {
4673	struct fhandle *u_fhp;
4674	struct stat *sb;
4675};
4676#endif
4677int
4678sys_fhstat(td, uap)
4679	struct thread *td;
4680	register struct fhstat_args /* {
4681		struct fhandle *u_fhp;
4682		struct stat *sb;
4683	} */ *uap;
4684{
4685	struct stat sb;
4686	fhandle_t fh;
4687	struct mount *mp;
4688	struct vnode *vp;
4689	int vfslocked;
4690	int error;
4691
4692	error = priv_check(td, PRIV_VFS_FHSTAT);
4693	if (error)
4694		return (error);
4695	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4696	if (error)
4697		return (error);
4698	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4699		return (ESTALE);
4700	vfslocked = VFS_LOCK_GIANT(mp);
4701	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
4702	vfs_unbusy(mp);
4703	if (error) {
4704		VFS_UNLOCK_GIANT(vfslocked);
4705		return (error);
4706	}
4707	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4708	vput(vp);
4709	VFS_UNLOCK_GIANT(vfslocked);
4710	if (error)
4711		return (error);
4712	error = copyout(&sb, uap->sb, sizeof(sb));
4713	return (error);
4714}
4715
4716/*
4717 * Implement fstatfs() for (NFS) file handles.
4718 */
4719#ifndef _SYS_SYSPROTO_H_
4720struct fhstatfs_args {
4721	struct fhandle *u_fhp;
4722	struct statfs *buf;
4723};
4724#endif
4725int
4726sys_fhstatfs(td, uap)
4727	struct thread *td;
4728	struct fhstatfs_args /* {
4729		struct fhandle *u_fhp;
4730		struct statfs *buf;
4731	} */ *uap;
4732{
4733	struct statfs sf;
4734	fhandle_t fh;
4735	int error;
4736
4737	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4738	if (error)
4739		return (error);
4740	error = kern_fhstatfs(td, fh, &sf);
4741	if (error)
4742		return (error);
4743	return (copyout(&sf, uap->buf, sizeof(sf)));
4744}
4745
4746int
4747kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4748{
4749	struct statfs *sp;
4750	struct mount *mp;
4751	struct vnode *vp;
4752	int vfslocked;
4753	int error;
4754
4755	error = priv_check(td, PRIV_VFS_FHSTATFS);
4756	if (error)
4757		return (error);
4758	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4759		return (ESTALE);
4760	vfslocked = VFS_LOCK_GIANT(mp);
4761	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
4762	if (error) {
4763		vfs_unbusy(mp);
4764		VFS_UNLOCK_GIANT(vfslocked);
4765		return (error);
4766	}
4767	vput(vp);
4768	error = prison_canseemount(td->td_ucred, mp);
4769	if (error)
4770		goto out;
4771#ifdef MAC
4772	error = mac_mount_check_stat(td->td_ucred, mp);
4773	if (error)
4774		goto out;
4775#endif
4776	/*
4777	 * Set these in case the underlying filesystem fails to do so.
4778	 */
4779	sp = &mp->mnt_stat;
4780	sp->f_version = STATFS_VERSION;
4781	sp->f_namemax = NAME_MAX;
4782	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4783	error = VFS_STATFS(mp, sp);
4784	if (error == 0)
4785		*buf = *sp;
4786out:
4787	vfs_unbusy(mp);
4788	VFS_UNLOCK_GIANT(vfslocked);
4789	return (error);
4790}
4791
4792int
4793kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
4794{
4795	struct file *fp;
4796	struct mount *mp;
4797	struct vnode *vp;
4798	off_t olen, ooffset;
4799	int error, vfslocked;
4800
4801	fp = NULL;
4802	vfslocked = 0;
4803	error = fget(td, fd, CAP_WRITE, &fp);
4804	if (error != 0)
4805		goto out;
4806
4807	switch (fp->f_type) {
4808	case DTYPE_VNODE:
4809		break;
4810	case DTYPE_PIPE:
4811	case DTYPE_FIFO:
4812		error = ESPIPE;
4813		goto out;
4814	default:
4815		error = ENODEV;
4816		goto out;
4817	}
4818	if ((fp->f_flag & FWRITE) == 0) {
4819		error = EBADF;
4820		goto out;
4821	}
4822	vp = fp->f_vnode;
4823	if (vp->v_type != VREG) {
4824		error = ENODEV;
4825		goto out;
4826	}
4827	if (offset < 0 || len <= 0) {
4828		error = EINVAL;
4829		goto out;
4830	}
4831	/* Check for wrap. */
4832	if (offset > OFF_MAX - len) {
4833		error = EFBIG;
4834		goto out;
4835	}
4836
4837	/* Allocating blocks may take a long time, so iterate. */
4838	for (;;) {
4839		olen = len;
4840		ooffset = offset;
4841
4842		bwillwrite();
4843		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4844		mp = NULL;
4845		error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4846		if (error != 0) {
4847			VFS_UNLOCK_GIANT(vfslocked);
4848			break;
4849		}
4850		error = vn_lock(vp, LK_EXCLUSIVE);
4851		if (error != 0) {
4852			vn_finished_write(mp);
4853			VFS_UNLOCK_GIANT(vfslocked);
4854			break;
4855		}
4856#ifdef MAC
4857		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
4858		if (error == 0)
4859#endif
4860			error = VOP_ALLOCATE(vp, &offset, &len);
4861		VOP_UNLOCK(vp, 0);
4862		vn_finished_write(mp);
4863		VFS_UNLOCK_GIANT(vfslocked);
4864
4865		if (olen + ooffset != offset + len) {
4866			panic("offset + len changed from %jx/%jx to %jx/%jx",
4867			    ooffset, olen, offset, len);
4868		}
4869		if (error != 0 || len == 0)
4870			break;
4871		KASSERT(olen > len, ("Iteration did not make progress?"));
4872		maybe_yield();
4873	}
4874 out:
4875	if (fp != NULL)
4876		fdrop(fp, td);
4877	return (error);
4878}
4879
4880int
4881sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
4882{
4883
4884	td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
4885	    uap->len);
4886	return (0);
4887}
4888
4889/*
4890 * Unlike madvise(2), we do not make a best effort to remember every
4891 * possible caching hint.  Instead, we remember the last setting with
4892 * the exception that we will allow POSIX_FADV_NORMAL to adjust the
4893 * region of any current setting.
4894 */
4895int
4896kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
4897    int advice)
4898{
4899	struct fadvise_info *fa, *new;
4900	struct file *fp;
4901	struct vnode *vp;
4902	off_t end;
4903	int error;
4904
4905	if (offset < 0 || len < 0 || offset > OFF_MAX - len)
4906		return (EINVAL);
4907	switch (advice) {
4908	case POSIX_FADV_SEQUENTIAL:
4909	case POSIX_FADV_RANDOM:
4910	case POSIX_FADV_NOREUSE:
4911		new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
4912		break;
4913	case POSIX_FADV_NORMAL:
4914	case POSIX_FADV_WILLNEED:
4915	case POSIX_FADV_DONTNEED:
4916		new = NULL;
4917		break;
4918	default:
4919		return (EINVAL);
4920	}
4921	/* XXX: CAP_POSIX_FADVISE? */
4922	error = fget(td, fd, 0, &fp);
4923	if (error != 0)
4924		goto out;
4925
4926	switch (fp->f_type) {
4927	case DTYPE_VNODE:
4928		break;
4929	case DTYPE_PIPE:
4930	case DTYPE_FIFO:
4931		error = ESPIPE;
4932		goto out;
4933	default:
4934		error = ENODEV;
4935		goto out;
4936	}
4937	vp = fp->f_vnode;
4938	if (vp->v_type != VREG) {
4939		error = ENODEV;
4940		goto out;
4941	}
4942	if (len == 0)
4943		end = OFF_MAX;
4944	else
4945		end = offset + len - 1;
4946	switch (advice) {
4947	case POSIX_FADV_SEQUENTIAL:
4948	case POSIX_FADV_RANDOM:
4949	case POSIX_FADV_NOREUSE:
4950		/*
4951		 * Try to merge any existing non-standard region with
4952		 * this new region if possible, otherwise create a new
4953		 * non-standard region for this request.
4954		 */
4955		mtx_pool_lock(mtxpool_sleep, fp);
4956		fa = fp->f_advice;
4957		if (fa != NULL && fa->fa_advice == advice &&
4958		    ((fa->fa_start <= end && fa->fa_end >= offset) ||
4959		    (end != OFF_MAX && fa->fa_start == end + 1) ||
4960		    (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
4961			if (offset < fa->fa_start)
4962				fa->fa_start = offset;
4963			if (end > fa->fa_end)
4964				fa->fa_end = end;
4965		} else {
4966			new->fa_advice = advice;
4967			new->fa_start = offset;
4968			new->fa_end = end;
4969			new->fa_prevstart = 0;
4970			new->fa_prevend = 0;
4971			fp->f_advice = new;
4972			new = fa;
4973		}
4974		mtx_pool_unlock(mtxpool_sleep, fp);
4975		break;
4976	case POSIX_FADV_NORMAL:
4977		/*
4978		 * If a the "normal" region overlaps with an existing
4979		 * non-standard region, trim or remove the
4980		 * non-standard region.
4981		 */
4982		mtx_pool_lock(mtxpool_sleep, fp);
4983		fa = fp->f_advice;
4984		if (fa != NULL) {
4985			if (offset <= fa->fa_start && end >= fa->fa_end) {
4986				new = fa;
4987				fp->f_advice = NULL;
4988			} else if (offset <= fa->fa_start &&
4989 			    end >= fa->fa_start)
4990				fa->fa_start = end + 1;
4991			else if (offset <= fa->fa_end && end >= fa->fa_end)
4992				fa->fa_end = offset - 1;
4993			else if (offset >= fa->fa_start && end <= fa->fa_end) {
4994				/*
4995				 * If the "normal" region is a middle
4996				 * portion of the existing
4997				 * non-standard region, just remove
4998				 * the whole thing rather than picking
4999				 * one side or the other to
5000				 * preserve.
5001				 */
5002				new = fa;
5003				fp->f_advice = NULL;
5004			}
5005		}
5006		mtx_pool_unlock(mtxpool_sleep, fp);
5007		break;
5008	case POSIX_FADV_WILLNEED:
5009	case POSIX_FADV_DONTNEED:
5010		error = VOP_ADVISE(vp, offset, end, advice);
5011		break;
5012	}
5013out:
5014	if (fp != NULL)
5015		fdrop(fp, td);
5016	free(new, M_FADVISE);
5017	return (error);
5018}
5019
5020int
5021sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
5022{
5023
5024	td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
5025	    uap->len, uap->advice);
5026	return (0);
5027}
5028