vfs_syscalls.c revision 92884
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_syscalls.c 92884 2002-03-21 15:27:39Z arr $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_ffs.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/malloc.h>
52#include <sys/mount.h>
53#include <sys/mutex.h>
54#include <sys/sysproto.h>
55#include <sys/namei.h>
56#include <sys/filedesc.h>
57#include <sys/kernel.h>
58#include <sys/fcntl.h>
59#include <sys/file.h>
60#include <sys/linker.h>
61#include <sys/stat.h>
62#include <sys/sx.h>
63#include <sys/unistd.h>
64#include <sys/vnode.h>
65#include <sys/proc.h>
66#include <sys/dirent.h>
67#include <sys/extattr.h>
68#include <sys/jail.h>
69#include <sys/sysctl.h>
70
71#include <machine/limits.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/uma.h>
77
78static int change_dir(struct nameidata *ndp, struct thread *td);
79static void checkdirs(struct vnode *olddp, struct vnode *newdp);
80static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81static int getutimes(const struct timeval *, struct timespec *);
82static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83static int setfmode(struct thread *td, struct vnode *, int);
84static int setfflags(struct thread *td, struct vnode *, int);
85static int setutimes(struct thread *td, struct vnode *,
86    const struct timespec *, int);
87static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88    struct thread *td);
89
90static int	usermount = 0;	/* if 1, non-root can mount fs. */
91
92int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93
94SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
95
96/*
97 * Virtual File System System Calls
98 */
99
100#ifndef _SYS_SYSPROTO_H_
101struct nmount_args {
102	struct iovec    *iovp;
103	unsigned int    iovcnt;
104	int             flags;
105	};
106#endif
107/* ARGSUSED */
108int
109nmount(td, uap)
110	struct thread *td;
111	struct nmount_args /* {
112		syscallarg(struct iovec *) iovp;
113		syscallarg(unsigned int) iovcnt;
114		syscallarg(int) flags;
115	} */ *uap;
116{
117
118	return(EOPNOTSUPP);
119}
120
121/*
122 * Mount a file system.
123 */
124#ifndef _SYS_SYSPROTO_H_
125struct mount_args {
126	char	*type;
127	char	*path;
128	int	flags;
129	caddr_t	data;
130};
131#endif
132/* ARGSUSED */
133int
134mount(td, uap)
135	struct thread *td;
136	struct mount_args /* {
137		syscallarg(char *) type;
138		syscallarg(char *) path;
139		syscallarg(int) flags;
140		syscallarg(caddr_t) data;
141	} */ *uap;
142{
143	char *fstype;
144	char *fspath;
145	int error;
146
147	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
148	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
149
150	/*
151	 * vfs_mount() actually takes a kernel string for `type' and
152	 * `path' now, so extract them.
153	 */
154	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
155	if (error)
156		goto finish;
157	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
158	if (error)
159		goto finish;
160	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
161	    SCARG(uap, data));
162finish:
163	free(fstype, M_TEMP);
164	free(fspath, M_TEMP);
165	return (error);
166}
167
168/*
169 * vfs_mount(): actually attempt a filesystem mount.
170 *
171 * This routine is designed to be a "generic" entry point for routines
172 * that wish to mount a filesystem. All parameters except `fsdata' are
173 * pointers into kernel space. `fsdata' is currently still a pointer
174 * into userspace.
175 */
176int
177vfs_mount(td, fstype, fspath, fsflags, fsdata)
178	struct thread *td;
179	const char *fstype;
180	char *fspath;
181	int fsflags;
182	void *fsdata;
183{
184	struct vnode *vp;
185	struct mount *mp;
186	struct vfsconf *vfsp;
187	int error, flag = 0, flag2 = 0;
188	struct vattr va;
189	struct nameidata nd;
190
191	/*
192	 * Be ultra-paranoid about making sure the type and fspath
193	 * variables will fit in our mp buffers, including the
194	 * terminating NUL.
195	 */
196	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
197	    (strlen(fspath) >= MNAMELEN - 1))
198		return (ENAMETOOLONG);
199
200	if (usermount == 0) {
201		error = suser_td(td);
202		if (error)
203			return (error);
204	}
205	/*
206	 * Do not allow NFS export by non-root users.
207	 */
208	if (fsflags & MNT_EXPORTED) {
209		error = suser_td(td);
210		if (error)
211			return (error);
212	}
213	/*
214	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
215	 */
216	if (suser_xxx(td->td_ucred, 0, 0))
217		fsflags |= MNT_NOSUID | MNT_NODEV;
218	/*
219	 * Get vnode to be covered
220	 */
221	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
222	if ((error = namei(&nd)) != 0)
223		return (error);
224	NDFREE(&nd, NDF_ONLY_PNBUF);
225	vp = nd.ni_vp;
226	if (fsflags & MNT_UPDATE) {
227		if ((vp->v_flag & VROOT) == 0) {
228			vput(vp);
229			return (EINVAL);
230		}
231		mp = vp->v_mount;
232		flag = mp->mnt_flag;
233		flag2 = mp->mnt_kern_flag;
234		/*
235		 * We only allow the filesystem to be reloaded if it
236		 * is currently mounted read-only.
237		 */
238		if ((fsflags & MNT_RELOAD) &&
239		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
240			vput(vp);
241			return (EOPNOTSUPP);	/* Needs translation */
242		}
243		/*
244		 * Only root, or the user that did the original mount is
245		 * permitted to update it.
246		 */
247		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
248			error = suser_td(td);
249			if (error) {
250				vput(vp);
251				return (error);
252			}
253		}
254		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
255			vput(vp);
256			return (EBUSY);
257		}
258		mtx_lock(&vp->v_interlock);
259		if ((vp->v_flag & VMOUNT) != 0 ||
260		    vp->v_mountedhere != NULL) {
261			mtx_unlock(&vp->v_interlock);
262			vfs_unbusy(mp, td);
263			vput(vp);
264			return (EBUSY);
265		}
266		vp->v_flag |= VMOUNT;
267		mtx_unlock(&vp->v_interlock);
268		mp->mnt_flag |= fsflags &
269		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
270		VOP_UNLOCK(vp, 0, td);
271		goto update;
272	}
273	/*
274	 * If the user is not root, ensure that they own the directory
275	 * onto which we are attempting to mount.
276	 */
277	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
278	if (error) {
279		vput(vp);
280		return (error);
281	}
282	if (va.va_uid != td->td_ucred->cr_uid) {
283		error = suser_td(td);
284		if (error) {
285			vput(vp);
286			return (error);
287		}
288	}
289	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0))
290	    != 0) {
291		vput(vp);
292		return (error);
293	}
294	if (vp->v_type != VDIR) {
295		vput(vp);
296		return (ENOTDIR);
297	}
298	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
299		if (!strcmp(vfsp->vfc_name, fstype))
300			break;
301	if (vfsp == NULL) {
302		linker_file_t lf;
303
304		/* Only load modules for root (very important!) */
305		error = suser_td(td);
306		if (error) {
307			vput(vp);
308			return error;
309		}
310		error = securelevel_gt(td->td_ucred, 0);
311		if (error != 0) {
312			vput(vp);
313			return (EPERM);
314		}
315		error = linker_load_file(fstype, &lf);
316		if (error || lf == NULL) {
317			vput(vp);
318			if (lf == NULL)
319				error = ENODEV;
320			return error;
321		}
322		lf->userrefs++;
323		/* lookup again, see if the VFS was loaded */
324		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
325			if (!strcmp(vfsp->vfc_name, fstype))
326				break;
327		if (vfsp == NULL) {
328			lf->userrefs--;
329			linker_file_unload(lf);
330			vput(vp);
331			return (ENODEV);
332		}
333	}
334	mtx_lock(&vp->v_interlock);
335	if ((vp->v_flag & VMOUNT) != 0 ||
336	    vp->v_mountedhere != NULL) {
337		mtx_unlock(&vp->v_interlock);
338		vput(vp);
339		return (EBUSY);
340	}
341	vp->v_flag |= VMOUNT;
342	mtx_unlock(&vp->v_interlock);
343
344	/*
345	 * Allocate and initialize the filesystem.
346	 */
347	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
348	TAILQ_INIT(&mp->mnt_nvnodelist);
349	TAILQ_INIT(&mp->mnt_reservedvnlist);
350	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
351	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
352	mp->mnt_op = vfsp->vfc_vfsops;
353	mp->mnt_vfc = vfsp;
354	vfsp->vfc_refcount++;
355	mp->mnt_stat.f_type = vfsp->vfc_typenum;
356	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
357	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
358	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
359	mp->mnt_vnodecovered = vp;
360	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
361	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
362	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
363	mp->mnt_iosize_max = DFLTPHYS;
364	VOP_UNLOCK(vp, 0, td);
365update:
366	/*
367	 * Set the mount level flags.
368	 */
369	if (fsflags & MNT_RDONLY)
370		mp->mnt_flag |= MNT_RDONLY;
371	else if (mp->mnt_flag & MNT_RDONLY)
372		mp->mnt_kern_flag |= MNTK_WANTRDWR;
373	mp->mnt_flag &=~ MNT_UPDATEMASK;
374	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
375	/*
376	 * Mount the filesystem.
377	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
378	 * get.  No freeing of cn_pnbuf.
379	 */
380	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
381	if (mp->mnt_flag & MNT_UPDATE) {
382		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
383			mp->mnt_flag &= ~MNT_RDONLY;
384		mp->mnt_flag &=~
385		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
386		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
387		if (error) {
388			mp->mnt_flag = flag;
389			mp->mnt_kern_flag = flag2;
390		}
391		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
392			if (mp->mnt_syncer == NULL)
393				error = vfs_allocate_syncvnode(mp);
394		} else {
395			if (mp->mnt_syncer != NULL)
396				vrele(mp->mnt_syncer);
397			mp->mnt_syncer = NULL;
398		}
399		vfs_unbusy(mp, td);
400		mtx_lock(&vp->v_interlock);
401		vp->v_flag &= ~VMOUNT;
402		mtx_unlock(&vp->v_interlock);
403		vrele(vp);
404		return (error);
405	}
406	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
407	/*
408	 * Put the new filesystem on the mount list after root.
409	 */
410	cache_purge(vp);
411	if (!error) {
412		struct vnode *newdp;
413
414		mtx_lock(&vp->v_interlock);
415		vp->v_flag &= ~VMOUNT;
416		vp->v_mountedhere = mp;
417		mtx_unlock(&vp->v_interlock);
418		mtx_lock(&mountlist_mtx);
419		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
420		mtx_unlock(&mountlist_mtx);
421		if (VFS_ROOT(mp, &newdp))
422			panic("mount: lost mount");
423		checkdirs(vp, newdp);
424		vput(newdp);
425		VOP_UNLOCK(vp, 0, td);
426		if ((mp->mnt_flag & MNT_RDONLY) == 0)
427			error = vfs_allocate_syncvnode(mp);
428		vfs_unbusy(mp, td);
429		if ((error = VFS_START(mp, 0, td)) != 0)
430			vrele(vp);
431	} else {
432		mtx_lock(&vp->v_interlock);
433		vp->v_flag &= ~VMOUNT;
434		mtx_unlock(&vp->v_interlock);
435		mp->mnt_vfc->vfc_refcount--;
436		vfs_unbusy(mp, td);
437		free((caddr_t)mp, M_MOUNT);
438		vput(vp);
439	}
440	return (error);
441}
442
443/*
444 * Scan all active processes to see if any of them have a current
445 * or root directory of `olddp'. If so, replace them with the new
446 * mount point.
447 */
448static void
449checkdirs(olddp, newdp)
450	struct vnode *olddp, *newdp;
451{
452	struct filedesc *fdp;
453	struct proc *p;
454	int nrele;
455
456	if (olddp->v_usecount == 1)
457		return;
458	sx_slock(&allproc_lock);
459	LIST_FOREACH(p, &allproc, p_list) {
460		PROC_LOCK(p);
461		fdp = p->p_fd;
462		if (fdp == NULL) {
463			PROC_UNLOCK(p);
464			continue;
465		}
466		nrele = 0;
467		FILEDESC_LOCK(fdp);
468		if (fdp->fd_cdir == olddp) {
469			VREF(newdp);
470			fdp->fd_cdir = newdp;
471			nrele++;
472		}
473		if (fdp->fd_rdir == olddp) {
474			VREF(newdp);
475			fdp->fd_rdir = newdp;
476			nrele++;
477		}
478		FILEDESC_UNLOCK(fdp);
479		PROC_UNLOCK(p);
480		while (nrele--)
481			vrele(olddp);
482	}
483	sx_sunlock(&allproc_lock);
484	if (rootvnode == olddp) {
485		vrele(rootvnode);
486		VREF(newdp);
487		rootvnode = newdp;
488	}
489}
490
491/*
492 * Unmount a file system.
493 *
494 * Note: unmount takes a path to the vnode mounted on as argument,
495 * not special file (as before).
496 */
497#ifndef _SYS_SYSPROTO_H_
498struct unmount_args {
499	char	*path;
500	int	flags;
501};
502#endif
503/* ARGSUSED */
504int
505unmount(td, uap)
506	struct thread *td;
507	register struct unmount_args /* {
508		syscallarg(char *) path;
509		syscallarg(int) flags;
510	} */ *uap;
511{
512	register struct vnode *vp;
513	struct mount *mp;
514	int error;
515	struct nameidata nd;
516
517	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
518	    SCARG(uap, path), td);
519	if ((error = namei(&nd)) != 0)
520		return (error);
521	vp = nd.ni_vp;
522	NDFREE(&nd, NDF_ONLY_PNBUF);
523	mp = vp->v_mount;
524
525	/*
526	 * Only root, or the user that did the original mount is
527	 * permitted to unmount this filesystem.
528	 */
529	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
530		error = suser_td(td);
531		if (error) {
532			vput(vp);
533			return (error);
534		}
535	}
536
537	/*
538	 * Don't allow unmounting the root file system.
539	 */
540	if (mp->mnt_flag & MNT_ROOTFS) {
541		vput(vp);
542		return (EINVAL);
543	}
544
545	/*
546	 * Must be the root of the filesystem
547	 */
548	if ((vp->v_flag & VROOT) == 0) {
549		vput(vp);
550		return (EINVAL);
551	}
552	vput(vp);
553	return (dounmount(mp, SCARG(uap, flags), td));
554}
555
556/*
557 * Do the actual file system unmount.
558 */
559int
560dounmount(mp, flags, td)
561	struct mount *mp;
562	int flags;
563	struct thread *td;
564{
565	struct vnode *coveredvp, *fsrootvp;
566	int error;
567	int async_flag;
568
569	mtx_lock(&mountlist_mtx);
570	mp->mnt_kern_flag |= MNTK_UNMOUNT;
571	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
572	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
573	if (error) {
574		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
575		if (mp->mnt_kern_flag & MNTK_MWAIT)
576			wakeup((caddr_t)mp);
577		return (error);
578	}
579	vn_start_write(NULL, &mp, V_WAIT);
580
581	if (mp->mnt_flag & MNT_EXPUBLIC)
582		vfs_setpublicfs(NULL, NULL, NULL);
583
584	vfs_msync(mp, MNT_WAIT);
585	async_flag = mp->mnt_flag & MNT_ASYNC;
586	mp->mnt_flag &=~ MNT_ASYNC;
587	cache_purgevfs(mp);	/* remove cache entries for this file sys */
588	if (mp->mnt_syncer != NULL)
589		vrele(mp->mnt_syncer);
590	/* Move process cdir/rdir refs on fs root to underlying vnode. */
591	if (VFS_ROOT(mp, &fsrootvp) == 0) {
592		if (mp->mnt_vnodecovered != NULL)
593			checkdirs(fsrootvp, mp->mnt_vnodecovered);
594		if (fsrootvp == rootvnode) {
595			vrele(rootvnode);
596			rootvnode = NULL;
597		}
598		vput(fsrootvp);
599	}
600	if (((mp->mnt_flag & MNT_RDONLY) ||
601	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
602	    (flags & MNT_FORCE)) {
603		error = VFS_UNMOUNT(mp, flags, td);
604	}
605	vn_finished_write(mp);
606	if (error) {
607		/* Undo cdir/rdir and rootvnode changes made above. */
608		if (VFS_ROOT(mp, &fsrootvp) == 0) {
609			if (mp->mnt_vnodecovered != NULL)
610				checkdirs(mp->mnt_vnodecovered, fsrootvp);
611			if (rootvnode == NULL) {
612				rootvnode = fsrootvp;
613				vref(rootvnode);
614			}
615			vput(fsrootvp);
616		}
617		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
618			(void) vfs_allocate_syncvnode(mp);
619		mtx_lock(&mountlist_mtx);
620		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
621		mp->mnt_flag |= async_flag;
622		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
623		    &mountlist_mtx, td);
624		if (mp->mnt_kern_flag & MNTK_MWAIT)
625			wakeup((caddr_t)mp);
626		return (error);
627	}
628	mtx_lock(&mountlist_mtx);
629	TAILQ_REMOVE(&mountlist, mp, mnt_list);
630	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
631		coveredvp->v_mountedhere = NULL;
632	mp->mnt_vfc->vfc_refcount--;
633	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
634		panic("unmount: dangling vnode");
635	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
636	lockdestroy(&mp->mnt_lock);
637	if (coveredvp != NULL)
638		vrele(coveredvp);
639	if (mp->mnt_kern_flag & MNTK_MWAIT)
640		wakeup((caddr_t)mp);
641	free((caddr_t)mp, M_MOUNT);
642	return (0);
643}
644
645/*
646 * Sync each mounted filesystem.
647 */
648#ifndef _SYS_SYSPROTO_H_
649struct sync_args {
650        int     dummy;
651};
652#endif
653
654#ifdef DEBUG
655static int syncprt = 0;
656SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
657#endif
658
659/* ARGSUSED */
660int
661sync(td, uap)
662	struct thread *td;
663	struct sync_args *uap;
664{
665	struct mount *mp, *nmp;
666	int asyncflag;
667
668	mtx_lock(&mountlist_mtx);
669	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
670		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
671			nmp = TAILQ_NEXT(mp, mnt_list);
672			continue;
673		}
674		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
675		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
676			asyncflag = mp->mnt_flag & MNT_ASYNC;
677			mp->mnt_flag &= ~MNT_ASYNC;
678			vfs_msync(mp, MNT_NOWAIT);
679			VFS_SYNC(mp, MNT_NOWAIT,
680			    ((td != NULL) ? td->td_ucred : NOCRED), td);
681			mp->mnt_flag |= asyncflag;
682			vn_finished_write(mp);
683		}
684		mtx_lock(&mountlist_mtx);
685		nmp = TAILQ_NEXT(mp, mnt_list);
686		vfs_unbusy(mp, td);
687	}
688	mtx_unlock(&mountlist_mtx);
689#if 0
690/*
691 * XXX don't call vfs_bufstats() yet because that routine
692 * was not imported in the Lite2 merge.
693 */
694#ifdef DIAGNOSTIC
695	if (syncprt)
696		vfs_bufstats();
697#endif /* DIAGNOSTIC */
698#endif
699	return (0);
700}
701
702/* XXX PRISON: could be per prison flag */
703static int prison_quotas;
704#if 0
705SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
706#endif
707
708/*
709 * Change filesystem quotas.
710 */
711#ifndef _SYS_SYSPROTO_H_
712struct quotactl_args {
713	char *path;
714	int cmd;
715	int uid;
716	caddr_t arg;
717};
718#endif
719/* ARGSUSED */
720int
721quotactl(td, uap)
722	struct thread *td;
723	register struct quotactl_args /* {
724		syscallarg(char *) path;
725		syscallarg(int) cmd;
726		syscallarg(int) uid;
727		syscallarg(caddr_t) arg;
728	} */ *uap;
729{
730	struct mount *mp;
731	int error;
732	struct nameidata nd;
733
734	if (jailed(td->td_ucred) && !prison_quotas)
735		return (EPERM);
736	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
737	if ((error = namei(&nd)) != 0)
738		return (error);
739	NDFREE(&nd, NDF_ONLY_PNBUF);
740	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
741	vrele(nd.ni_vp);
742	if (error)
743		return (error);
744	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
745	    SCARG(uap, arg), td);
746	vn_finished_write(mp);
747	return (error);
748}
749
750/*
751 * Get filesystem statistics.
752 */
753#ifndef _SYS_SYSPROTO_H_
754struct statfs_args {
755	char *path;
756	struct statfs *buf;
757};
758#endif
759/* ARGSUSED */
760int
761statfs(td, uap)
762	struct thread *td;
763	register struct statfs_args /* {
764		syscallarg(char *) path;
765		syscallarg(struct statfs *) buf;
766	} */ *uap;
767{
768	register struct mount *mp;
769	register struct statfs *sp;
770	int error;
771	struct nameidata nd;
772	struct statfs sb;
773
774	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
775	if ((error = namei(&nd)) != 0)
776		return (error);
777	mp = nd.ni_vp->v_mount;
778	sp = &mp->mnt_stat;
779	NDFREE(&nd, NDF_ONLY_PNBUF);
780	vrele(nd.ni_vp);
781	error = VFS_STATFS(mp, sp, td);
782	if (error)
783		return (error);
784	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
785	if (suser_xxx(td->td_ucred, 0, 0)) {
786		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
787		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
788		sp = &sb;
789	}
790	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
791}
792
793/*
794 * Get filesystem statistics.
795 */
796#ifndef _SYS_SYSPROTO_H_
797struct fstatfs_args {
798	int fd;
799	struct statfs *buf;
800};
801#endif
802/* ARGSUSED */
803int
804fstatfs(td, uap)
805	struct thread *td;
806	register struct fstatfs_args /* {
807		syscallarg(int) fd;
808		syscallarg(struct statfs *) buf;
809	} */ *uap;
810{
811	struct file *fp;
812	struct mount *mp;
813	register struct statfs *sp;
814	int error;
815	struct statfs sb;
816
817	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
818		return (error);
819	mp = ((struct vnode *)fp->f_data)->v_mount;
820	fdrop(fp, td);
821	if (mp == NULL)
822		return (EBADF);
823	sp = &mp->mnt_stat;
824	error = VFS_STATFS(mp, sp, td);
825	if (error)
826		return (error);
827	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
828	if (suser_xxx(td->td_ucred, 0, 0)) {
829		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
830		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
831		sp = &sb;
832	}
833	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
834}
835
836/*
837 * Get statistics on all filesystems.
838 */
839#ifndef _SYS_SYSPROTO_H_
840struct getfsstat_args {
841	struct statfs *buf;
842	long bufsize;
843	int flags;
844};
845#endif
846int
847getfsstat(td, uap)
848	struct thread *td;
849	register struct getfsstat_args /* {
850		syscallarg(struct statfs *) buf;
851		syscallarg(long) bufsize;
852		syscallarg(int) flags;
853	} */ *uap;
854{
855	register struct mount *mp, *nmp;
856	register struct statfs *sp;
857	caddr_t sfsp;
858	long count, maxcount, error;
859
860	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
861	sfsp = (caddr_t)SCARG(uap, buf);
862	count = 0;
863	mtx_lock(&mountlist_mtx);
864	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
865		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
866			nmp = TAILQ_NEXT(mp, mnt_list);
867			continue;
868		}
869		if (sfsp && count < maxcount) {
870			sp = &mp->mnt_stat;
871			/*
872			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
873			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
874			 * overrides MNT_WAIT.
875			 */
876			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
877			    (SCARG(uap, flags) & MNT_WAIT)) &&
878			    (error = VFS_STATFS(mp, sp, td))) {
879				mtx_lock(&mountlist_mtx);
880				nmp = TAILQ_NEXT(mp, mnt_list);
881				vfs_unbusy(mp, td);
882				continue;
883			}
884			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
885			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
886			if (error) {
887				vfs_unbusy(mp, td);
888				return (error);
889			}
890			sfsp += sizeof(*sp);
891		}
892		count++;
893		mtx_lock(&mountlist_mtx);
894		nmp = TAILQ_NEXT(mp, mnt_list);
895		vfs_unbusy(mp, td);
896	}
897	mtx_unlock(&mountlist_mtx);
898	if (sfsp && count > maxcount)
899		td->td_retval[0] = maxcount;
900	else
901		td->td_retval[0] = count;
902	return (0);
903}
904
905/*
906 * Change current working directory to a given file descriptor.
907 */
908#ifndef _SYS_SYSPROTO_H_
909struct fchdir_args {
910	int	fd;
911};
912#endif
913/* ARGSUSED */
914int
915fchdir(td, uap)
916	struct thread *td;
917	struct fchdir_args /* {
918		syscallarg(int) fd;
919	} */ *uap;
920{
921	register struct filedesc *fdp = td->td_proc->p_fd;
922	struct vnode *vp, *tdp, *vpold;
923	struct mount *mp;
924	struct file *fp;
925	int error;
926
927	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
928		return (error);
929	vp = (struct vnode *)fp->f_data;
930	VREF(vp);
931	fdrop(fp, td);
932	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
933	if (vp->v_type != VDIR)
934		error = ENOTDIR;
935	else
936		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
937	while (!error && (mp = vp->v_mountedhere) != NULL) {
938		if (vfs_busy(mp, 0, 0, td))
939			continue;
940		error = VFS_ROOT(mp, &tdp);
941		vfs_unbusy(mp, td);
942		if (error)
943			break;
944		vput(vp);
945		vp = tdp;
946	}
947	if (error) {
948		vput(vp);
949		return (error);
950	}
951	VOP_UNLOCK(vp, 0, td);
952	FILEDESC_LOCK(fdp);
953	vpold = fdp->fd_cdir;
954	fdp->fd_cdir = vp;
955	FILEDESC_UNLOCK(fdp);
956	vrele(vpold);
957	return (0);
958}
959
960/*
961 * Change current working directory (``.'').
962 */
963#ifndef _SYS_SYSPROTO_H_
964struct chdir_args {
965	char	*path;
966};
967#endif
968/* ARGSUSED */
969int
970chdir(td, uap)
971	struct thread *td;
972	struct chdir_args /* {
973		syscallarg(char *) path;
974	} */ *uap;
975{
976	register struct filedesc *fdp = td->td_proc->p_fd;
977	int error;
978	struct nameidata nd;
979	struct vnode *vp;
980
981	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
982	    SCARG(uap, path), td);
983	if ((error = change_dir(&nd, td)) != 0)
984		return (error);
985	NDFREE(&nd, NDF_ONLY_PNBUF);
986	FILEDESC_LOCK(fdp);
987	vp = fdp->fd_cdir;
988	fdp->fd_cdir = nd.ni_vp;
989	FILEDESC_UNLOCK(fdp);
990	vrele(vp);
991	return (0);
992}
993
994/*
995 * Helper function for raised chroot(2) security function:  Refuse if
996 * any filedescriptors are open directories.
997 */
998static int
999chroot_refuse_vdir_fds(fdp)
1000	struct filedesc *fdp;
1001{
1002	struct vnode *vp;
1003	struct file *fp;
1004	int fd;
1005
1006	FILEDESC_LOCK(fdp);
1007	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1008		fp = fget_locked(fdp, fd);
1009		if (fp == NULL)
1010			continue;
1011		if (fp->f_type == DTYPE_VNODE) {
1012			vp = (struct vnode *)fp->f_data;
1013			if (vp->v_type == VDIR) {
1014				FILEDESC_UNLOCK(fdp);
1015				return (EPERM);
1016			}
1017		}
1018	}
1019	FILEDESC_UNLOCK(fdp);
1020	return (0);
1021}
1022
1023/*
1024 * This sysctl determines if we will allow a process to chroot(2) if it
1025 * has a directory open:
1026 *	0: disallowed for all processes.
1027 *	1: allowed for processes that were not already chroot(2)'ed.
1028 *	2: allowed for all processes.
1029 */
1030
1031static int chroot_allow_open_directories = 1;
1032
1033SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1034     &chroot_allow_open_directories, 0, "");
1035
1036/*
1037 * Change notion of root (``/'') directory.
1038 */
1039#ifndef _SYS_SYSPROTO_H_
1040struct chroot_args {
1041	char	*path;
1042};
1043#endif
1044/* ARGSUSED */
1045int
1046chroot(td, uap)
1047	struct thread *td;
1048	struct chroot_args /* {
1049		syscallarg(char *) path;
1050	} */ *uap;
1051{
1052	register struct filedesc *fdp = td->td_proc->p_fd;
1053	int error;
1054	struct nameidata nd;
1055	struct vnode *vp;
1056
1057	error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1058	if (error)
1059		return (error);
1060	FILEDESC_LOCK(fdp);
1061	if (chroot_allow_open_directories == 0 ||
1062	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1063		FILEDESC_UNLOCK(fdp);
1064		error = chroot_refuse_vdir_fds(fdp);
1065	} else
1066		FILEDESC_UNLOCK(fdp);
1067	if (error)
1068		return (error);
1069	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1070	    SCARG(uap, path), td);
1071	if ((error = change_dir(&nd, td)) != 0)
1072		return (error);
1073	NDFREE(&nd, NDF_ONLY_PNBUF);
1074	FILEDESC_LOCK(fdp);
1075	vp = fdp->fd_rdir;
1076	fdp->fd_rdir = nd.ni_vp;
1077	if (!fdp->fd_jdir) {
1078		fdp->fd_jdir = nd.ni_vp;
1079                VREF(fdp->fd_jdir);
1080	}
1081	FILEDESC_UNLOCK(fdp);
1082	vrele(vp);
1083	return (0);
1084}
1085
1086/*
1087 * Common routine for chroot and chdir.
1088 */
1089static int
1090change_dir(ndp, td)
1091	register struct nameidata *ndp;
1092	struct thread *td;
1093{
1094	struct vnode *vp;
1095	int error;
1096
1097	error = namei(ndp);
1098	if (error)
1099		return (error);
1100	vp = ndp->ni_vp;
1101	if (vp->v_type != VDIR)
1102		error = ENOTDIR;
1103	else
1104		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1105	if (error)
1106		vput(vp);
1107	else
1108		VOP_UNLOCK(vp, 0, td);
1109	return (error);
1110}
1111
1112/*
1113 * Check permissions, allocate an open file structure,
1114 * and call the device open routine if any.
1115 */
1116#ifndef _SYS_SYSPROTO_H_
1117struct open_args {
1118	char	*path;
1119	int	flags;
1120	int	mode;
1121};
1122#endif
1123int
1124open(td, uap)
1125	struct thread *td;
1126	register struct open_args /* {
1127		syscallarg(char *) path;
1128		syscallarg(int) flags;
1129		syscallarg(int) mode;
1130	} */ *uap;
1131{
1132	struct proc *p = td->td_proc;
1133	struct filedesc *fdp = p->p_fd;
1134	struct file *fp;
1135	struct vnode *vp;
1136	struct vattr vat;
1137	struct mount *mp;
1138	int cmode, flags, oflags;
1139	struct file *nfp;
1140	int type, indx, error;
1141	struct flock lf;
1142	struct nameidata nd;
1143
1144	oflags = SCARG(uap, flags);
1145	if ((oflags & O_ACCMODE) == O_ACCMODE)
1146		return (EINVAL);
1147	flags = FFLAGS(oflags);
1148	error = falloc(td, &nfp, &indx);
1149	if (error)
1150		return (error);
1151	fp = nfp;
1152	FILEDESC_LOCK(fdp);
1153	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1154	FILEDESC_UNLOCK(fdp);
1155	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1156	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1157	/*
1158	 * Bump the ref count to prevent another process from closing
1159	 * the descriptor while we are blocked in vn_open()
1160	 */
1161	fhold(fp);
1162	error = vn_open(&nd, &flags, cmode);
1163	if (error) {
1164		/*
1165		 * release our own reference
1166		 */
1167		fdrop(fp, td);
1168
1169		/*
1170		 * handle special fdopen() case.  bleh.  dupfdopen() is
1171		 * responsible for dropping the old contents of ofiles[indx]
1172		 * if it succeeds.
1173		 */
1174		if ((error == ENODEV || error == ENXIO) &&
1175		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1176		    (error =
1177			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1178			td->td_retval[0] = indx;
1179			return (0);
1180		}
1181		/*
1182		 * Clean up the descriptor, but only if another thread hadn't
1183		 * replaced or closed it.
1184		 */
1185		FILEDESC_LOCK(fdp);
1186		if (fdp->fd_ofiles[indx] == fp) {
1187			fdp->fd_ofiles[indx] = NULL;
1188			FILEDESC_UNLOCK(fdp);
1189			fdrop(fp, td);
1190		} else
1191			FILEDESC_UNLOCK(fdp);
1192
1193		if (error == ERESTART)
1194			error = EINTR;
1195		return (error);
1196	}
1197	td->td_dupfd = 0;
1198	NDFREE(&nd, NDF_ONLY_PNBUF);
1199	vp = nd.ni_vp;
1200
1201	/*
1202	 * There should be 2 references on the file, one from the descriptor
1203	 * table, and one for us.
1204	 *
1205	 * Handle the case where someone closed the file (via its file
1206	 * descriptor) while we were blocked.  The end result should look
1207	 * like opening the file succeeded but it was immediately closed.
1208	 */
1209	FILEDESC_LOCK(fdp);
1210	FILE_LOCK(fp);
1211	if (fp->f_count == 1) {
1212		KASSERT(fdp->fd_ofiles[indx] != fp,
1213		    ("Open file descriptor lost all refs"));
1214		FILEDESC_UNLOCK(fdp);
1215		FILE_UNLOCK(fp);
1216		VOP_UNLOCK(vp, 0, td);
1217		vn_close(vp, flags & FMASK, fp->f_cred, td);
1218		fdrop(fp, td);
1219		td->td_retval[0] = indx;
1220		return 0;
1221	}
1222
1223	fp->f_data = (caddr_t)vp;
1224	fp->f_flag = flags & FMASK;
1225	fp->f_ops = &vnops;
1226	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1227	FILEDESC_UNLOCK(fdp);
1228	FILE_UNLOCK(fp);
1229	VOP_UNLOCK(vp, 0, td);
1230	if (flags & (O_EXLOCK | O_SHLOCK)) {
1231		lf.l_whence = SEEK_SET;
1232		lf.l_start = 0;
1233		lf.l_len = 0;
1234		if (flags & O_EXLOCK)
1235			lf.l_type = F_WRLCK;
1236		else
1237			lf.l_type = F_RDLCK;
1238		type = F_FLOCK;
1239		if ((flags & FNONBLOCK) == 0)
1240			type |= F_WAIT;
1241		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1242			goto bad;
1243		fp->f_flag |= FHASLOCK;
1244	}
1245	if (flags & O_TRUNC) {
1246		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1247			goto bad;
1248		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1249		VATTR_NULL(&vat);
1250		vat.va_size = 0;
1251		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1252		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1253		VOP_UNLOCK(vp, 0, td);
1254		vn_finished_write(mp);
1255		if (error)
1256			goto bad;
1257	}
1258	/* assert that vn_open created a backing object if one is needed */
1259	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1260		("open: vmio vnode has no backing object after vn_open"));
1261	/*
1262	 * Release our private reference, leaving the one associated with
1263	 * the descriptor table intact.
1264	 */
1265	fdrop(fp, td);
1266	td->td_retval[0] = indx;
1267	return (0);
1268bad:
1269	FILEDESC_LOCK(fdp);
1270	if (fdp->fd_ofiles[indx] == fp) {
1271		fdp->fd_ofiles[indx] = NULL;
1272		FILEDESC_UNLOCK(fdp);
1273		fdrop(fp, td);
1274	} else
1275		FILEDESC_UNLOCK(fdp);
1276	return (error);
1277}
1278
1279#ifdef COMPAT_43
1280/*
1281 * Create a file.
1282 */
1283#ifndef _SYS_SYSPROTO_H_
1284struct ocreat_args {
1285	char	*path;
1286	int	mode;
1287};
1288#endif
1289int
1290ocreat(td, uap)
1291	struct thread *td;
1292	register struct ocreat_args /* {
1293		syscallarg(char *) path;
1294		syscallarg(int) mode;
1295	} */ *uap;
1296{
1297	struct open_args /* {
1298		syscallarg(char *) path;
1299		syscallarg(int) flags;
1300		syscallarg(int) mode;
1301	} */ nuap;
1302
1303	SCARG(&nuap, path) = SCARG(uap, path);
1304	SCARG(&nuap, mode) = SCARG(uap, mode);
1305	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1306	return (open(td, &nuap));
1307}
1308#endif /* COMPAT_43 */
1309
1310/*
1311 * Create a special file.
1312 */
1313#ifndef _SYS_SYSPROTO_H_
1314struct mknod_args {
1315	char	*path;
1316	int	mode;
1317	int	dev;
1318};
1319#endif
1320/* ARGSUSED */
1321int
1322mknod(td, uap)
1323	struct thread *td;
1324	register struct mknod_args /* {
1325		syscallarg(char *) path;
1326		syscallarg(int) mode;
1327		syscallarg(int) dev;
1328	} */ *uap;
1329{
1330	struct vnode *vp;
1331	struct mount *mp;
1332	struct vattr vattr;
1333	int error;
1334	int whiteout = 0;
1335	struct nameidata nd;
1336
1337	switch (SCARG(uap, mode) & S_IFMT) {
1338	case S_IFCHR:
1339	case S_IFBLK:
1340		error = suser_td(td);
1341		break;
1342	default:
1343		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1344		break;
1345	}
1346	if (error)
1347		return (error);
1348restart:
1349	bwillwrite();
1350	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1351	if ((error = namei(&nd)) != 0)
1352		return (error);
1353	vp = nd.ni_vp;
1354	if (vp != NULL) {
1355		vrele(vp);
1356		error = EEXIST;
1357	} else {
1358		VATTR_NULL(&vattr);
1359		FILEDESC_LOCK(td->td_proc->p_fd);
1360		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1361		FILEDESC_UNLOCK(td->td_proc->p_fd);
1362		vattr.va_rdev = SCARG(uap, dev);
1363		whiteout = 0;
1364
1365		switch (SCARG(uap, mode) & S_IFMT) {
1366		case S_IFMT:	/* used by badsect to flag bad sectors */
1367			vattr.va_type = VBAD;
1368			break;
1369		case S_IFCHR:
1370			vattr.va_type = VCHR;
1371			break;
1372		case S_IFBLK:
1373			vattr.va_type = VBLK;
1374			break;
1375		case S_IFWHT:
1376			whiteout = 1;
1377			break;
1378		default:
1379			error = EINVAL;
1380			break;
1381		}
1382	}
1383	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1384		NDFREE(&nd, NDF_ONLY_PNBUF);
1385		vput(nd.ni_dvp);
1386		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1387			return (error);
1388		goto restart;
1389	}
1390	if (!error) {
1391		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1392		if (whiteout)
1393			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1394		else {
1395			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1396						&nd.ni_cnd, &vattr);
1397			if (error == 0)
1398				vput(nd.ni_vp);
1399		}
1400	}
1401	NDFREE(&nd, NDF_ONLY_PNBUF);
1402	vput(nd.ni_dvp);
1403	vn_finished_write(mp);
1404	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1405	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1406	return (error);
1407}
1408
1409/*
1410 * Create a named pipe.
1411 */
1412#ifndef _SYS_SYSPROTO_H_
1413struct mkfifo_args {
1414	char	*path;
1415	int	mode;
1416};
1417#endif
1418/* ARGSUSED */
1419int
1420mkfifo(td, uap)
1421	struct thread *td;
1422	register struct mkfifo_args /* {
1423		syscallarg(char *) path;
1424		syscallarg(int) mode;
1425	} */ *uap;
1426{
1427	struct mount *mp;
1428	struct vattr vattr;
1429	int error;
1430	struct nameidata nd;
1431
1432restart:
1433	bwillwrite();
1434	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1435	if ((error = namei(&nd)) != 0)
1436		return (error);
1437	if (nd.ni_vp != NULL) {
1438		NDFREE(&nd, NDF_ONLY_PNBUF);
1439		vrele(nd.ni_vp);
1440		vput(nd.ni_dvp);
1441		return (EEXIST);
1442	}
1443	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1444		NDFREE(&nd, NDF_ONLY_PNBUF);
1445		vput(nd.ni_dvp);
1446		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1447			return (error);
1448		goto restart;
1449	}
1450	VATTR_NULL(&vattr);
1451	vattr.va_type = VFIFO;
1452	FILEDESC_LOCK(td->td_proc->p_fd);
1453	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1454	FILEDESC_UNLOCK(td->td_proc->p_fd);
1455	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1456	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1457	if (error == 0)
1458		vput(nd.ni_vp);
1459	NDFREE(&nd, NDF_ONLY_PNBUF);
1460	vput(nd.ni_dvp);
1461	vn_finished_write(mp);
1462	return (error);
1463}
1464
1465/*
1466 * Make a hard file link.
1467 */
1468#ifndef _SYS_SYSPROTO_H_
1469struct link_args {
1470	char	*path;
1471	char	*link;
1472};
1473#endif
1474/* ARGSUSED */
1475int
1476link(td, uap)
1477	struct thread *td;
1478	register struct link_args /* {
1479		syscallarg(char *) path;
1480		syscallarg(char *) link;
1481	} */ *uap;
1482{
1483	struct vnode *vp;
1484	struct mount *mp;
1485	struct nameidata nd;
1486	int error;
1487
1488	bwillwrite();
1489	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
1490	if ((error = namei(&nd)) != 0)
1491		return (error);
1492	NDFREE(&nd, NDF_ONLY_PNBUF);
1493	vp = nd.ni_vp;
1494	if (vp->v_type == VDIR) {
1495		vrele(vp);
1496		return (EPERM);		/* POSIX */
1497	}
1498	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1499		vrele(vp);
1500		return (error);
1501	}
1502	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1503	if ((error = namei(&nd)) == 0) {
1504		if (nd.ni_vp != NULL) {
1505			vrele(nd.ni_vp);
1506			error = EEXIST;
1507		} else {
1508			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1509			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1510			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1511		}
1512		NDFREE(&nd, NDF_ONLY_PNBUF);
1513		vput(nd.ni_dvp);
1514	}
1515	vrele(vp);
1516	vn_finished_write(mp);
1517	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1518	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1519	return (error);
1520}
1521
1522/*
1523 * Make a symbolic link.
1524 */
1525#ifndef _SYS_SYSPROTO_H_
1526struct symlink_args {
1527	char	*path;
1528	char	*link;
1529};
1530#endif
1531/* ARGSUSED */
1532int
1533symlink(td, uap)
1534	struct thread *td;
1535	register struct symlink_args /* {
1536		syscallarg(char *) path;
1537		syscallarg(char *) link;
1538	} */ *uap;
1539{
1540	struct mount *mp;
1541	struct vattr vattr;
1542	char *path;
1543	int error;
1544	struct nameidata nd;
1545
1546	path = uma_zalloc(namei_zone, M_WAITOK);
1547	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1548		goto out;
1549restart:
1550	bwillwrite();
1551	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1552	if ((error = namei(&nd)) != 0)
1553		goto out;
1554	if (nd.ni_vp) {
1555		NDFREE(&nd, NDF_ONLY_PNBUF);
1556		vrele(nd.ni_vp);
1557		vput(nd.ni_dvp);
1558		error = EEXIST;
1559		goto out;
1560	}
1561	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1562		NDFREE(&nd, NDF_ONLY_PNBUF);
1563		vput(nd.ni_dvp);
1564		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1565			return (error);
1566		goto restart;
1567	}
1568	VATTR_NULL(&vattr);
1569	FILEDESC_LOCK(td->td_proc->p_fd);
1570	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1571	FILEDESC_UNLOCK(td->td_proc->p_fd);
1572	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1573	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1574	NDFREE(&nd, NDF_ONLY_PNBUF);
1575	if (error == 0)
1576		vput(nd.ni_vp);
1577	vput(nd.ni_dvp);
1578	vn_finished_write(mp);
1579	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1580	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1581out:
1582	uma_zfree(namei_zone, path);
1583	return (error);
1584}
1585
1586/*
1587 * Delete a whiteout from the filesystem.
1588 */
1589/* ARGSUSED */
1590int
1591undelete(td, uap)
1592	struct thread *td;
1593	register struct undelete_args /* {
1594		syscallarg(char *) path;
1595	} */ *uap;
1596{
1597	int error;
1598	struct mount *mp;
1599	struct nameidata nd;
1600
1601restart:
1602	bwillwrite();
1603	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1604	    SCARG(uap, path), td);
1605	error = namei(&nd);
1606	if (error)
1607		return (error);
1608
1609	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1610		NDFREE(&nd, NDF_ONLY_PNBUF);
1611		if (nd.ni_vp)
1612			vrele(nd.ni_vp);
1613		vput(nd.ni_dvp);
1614		return (EEXIST);
1615	}
1616	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1617		NDFREE(&nd, NDF_ONLY_PNBUF);
1618		vput(nd.ni_dvp);
1619		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1620			return (error);
1621		goto restart;
1622	}
1623	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1624	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1625	NDFREE(&nd, NDF_ONLY_PNBUF);
1626	vput(nd.ni_dvp);
1627	vn_finished_write(mp);
1628	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1629	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1630	return (error);
1631}
1632
1633/*
1634 * Delete a name from the filesystem.
1635 */
1636#ifndef _SYS_SYSPROTO_H_
1637struct unlink_args {
1638	char	*path;
1639};
1640#endif
1641/* ARGSUSED */
1642int
1643unlink(td, uap)
1644	struct thread *td;
1645	struct unlink_args /* {
1646		syscallarg(char *) path;
1647	} */ *uap;
1648{
1649	struct mount *mp;
1650	struct vnode *vp;
1651	int error;
1652	struct nameidata nd;
1653
1654restart:
1655	bwillwrite();
1656	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1657	if ((error = namei(&nd)) != 0)
1658		return (error);
1659	vp = nd.ni_vp;
1660	if (vp->v_type == VDIR)
1661		error = EPERM;		/* POSIX */
1662	else {
1663		/*
1664		 * The root of a mounted filesystem cannot be deleted.
1665		 *
1666		 * XXX: can this only be a VDIR case?
1667		 */
1668		if (vp->v_flag & VROOT)
1669			error = EBUSY;
1670	}
1671	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1672		NDFREE(&nd, NDF_ONLY_PNBUF);
1673		vrele(vp);
1674		vput(nd.ni_dvp);
1675		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1676			return (error);
1677		goto restart;
1678	}
1679	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1680	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1681	if (!error) {
1682		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1683		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1684	}
1685	NDFREE(&nd, NDF_ONLY_PNBUF);
1686	vput(nd.ni_dvp);
1687	vput(vp);
1688	vn_finished_write(mp);
1689	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1690	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1691	return (error);
1692}
1693
1694/*
1695 * Reposition read/write file offset.
1696 */
1697#ifndef _SYS_SYSPROTO_H_
1698struct lseek_args {
1699	int	fd;
1700	int	pad;
1701	off_t	offset;
1702	int	whence;
1703};
1704#endif
1705int
1706lseek(td, uap)
1707	struct thread *td;
1708	register struct lseek_args /* {
1709		syscallarg(int) fd;
1710		syscallarg(int) pad;
1711		syscallarg(off_t) offset;
1712		syscallarg(int) whence;
1713	} */ *uap;
1714{
1715	struct ucred *cred = td->td_ucred;
1716	struct file *fp;
1717	struct vnode *vp;
1718	struct vattr vattr;
1719	off_t offset;
1720	int error, noneg;
1721
1722	if ((error = fget(td, uap->fd, &fp)) != 0)
1723		return (error);
1724	if (fp->f_type != DTYPE_VNODE) {
1725		fdrop(fp, td);
1726		return (ESPIPE);
1727	}
1728	vp = (struct vnode *)fp->f_data;
1729	noneg = (vp->v_type != VCHR);
1730	offset = SCARG(uap, offset);
1731	switch (SCARG(uap, whence)) {
1732	case L_INCR:
1733		if (noneg &&
1734		    (fp->f_offset < 0 ||
1735		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1736			return (EOVERFLOW);
1737		offset += fp->f_offset;
1738		break;
1739	case L_XTND:
1740		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1741		error = VOP_GETATTR(vp, &vattr, cred, td);
1742		VOP_UNLOCK(vp, 0, td);
1743		if (error)
1744			return (error);
1745		if (noneg &&
1746		    (vattr.va_size > OFF_MAX ||
1747		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1748			return (EOVERFLOW);
1749		offset += vattr.va_size;
1750		break;
1751	case L_SET:
1752		break;
1753	default:
1754		fdrop(fp, td);
1755		return (EINVAL);
1756	}
1757	if (noneg && offset < 0)
1758		return (EINVAL);
1759	fp->f_offset = offset;
1760	*(off_t *)(td->td_retval) = fp->f_offset;
1761	fdrop(fp, td);
1762	return (0);
1763}
1764
1765#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1766/*
1767 * Reposition read/write file offset.
1768 */
1769#ifndef _SYS_SYSPROTO_H_
1770struct olseek_args {
1771	int	fd;
1772	long	offset;
1773	int	whence;
1774};
1775#endif
1776int
1777olseek(td, uap)
1778	struct thread *td;
1779	register struct olseek_args /* {
1780		syscallarg(int) fd;
1781		syscallarg(long) offset;
1782		syscallarg(int) whence;
1783	} */ *uap;
1784{
1785	struct lseek_args /* {
1786		syscallarg(int) fd;
1787		syscallarg(int) pad;
1788		syscallarg(off_t) offset;
1789		syscallarg(int) whence;
1790	} */ nuap;
1791	int error;
1792
1793	SCARG(&nuap, fd) = SCARG(uap, fd);
1794	SCARG(&nuap, offset) = SCARG(uap, offset);
1795	SCARG(&nuap, whence) = SCARG(uap, whence);
1796	error = lseek(td, &nuap);
1797	return (error);
1798}
1799#endif /* COMPAT_43 */
1800
1801/*
1802 * Check access permissions using passed credentials.
1803 */
1804static int
1805vn_access(vp, user_flags, cred, td)
1806	struct vnode	*vp;
1807	int		user_flags;
1808	struct ucred	*cred;
1809	struct thread	*td;
1810{
1811	int error, flags;
1812
1813	/* Flags == 0 means only check for existence. */
1814	error = 0;
1815	if (user_flags) {
1816		flags = 0;
1817		if (user_flags & R_OK)
1818			flags |= VREAD;
1819		if (user_flags & W_OK)
1820			flags |= VWRITE;
1821		if (user_flags & X_OK)
1822			flags |= VEXEC;
1823		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1824			error = VOP_ACCESS(vp, flags, cred, td);
1825	}
1826	return (error);
1827}
1828
1829/*
1830 * Check access permissions using "real" credentials.
1831 */
1832#ifndef _SYS_SYSPROTO_H_
1833struct access_args {
1834	char	*path;
1835	int	flags;
1836};
1837#endif
1838int
1839access(td, uap)
1840	struct thread *td;
1841	register struct access_args /* {
1842		syscallarg(char *) path;
1843		syscallarg(int) flags;
1844	} */ *uap;
1845{
1846	struct ucred *cred, *tmpcred;
1847	register struct vnode *vp;
1848	int error;
1849	struct nameidata nd;
1850
1851	/*
1852	 * Create and modify a temporary credential instead of one that
1853	 * is potentially shared.  This could also mess up socket
1854	 * buffer accounting which can run in an interrupt context.
1855	 *
1856	 * XXX - Depending on how "threads" are finally implemented, it
1857	 * may be better to explicitly pass the credential to namei()
1858	 * rather than to modify the potentially shared process structure.
1859	 */
1860	cred = td->td_ucred;
1861	tmpcred = crdup(cred);
1862	tmpcred->cr_uid = cred->cr_ruid;
1863	tmpcred->cr_groups[0] = cred->cr_rgid;
1864	td->td_ucred = tmpcred;
1865	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1866	    SCARG(uap, path), td);
1867	if ((error = namei(&nd)) != 0)
1868		goto out1;
1869	vp = nd.ni_vp;
1870
1871	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
1872	NDFREE(&nd, NDF_ONLY_PNBUF);
1873	vput(vp);
1874out1:
1875	td->td_ucred = cred;
1876	crfree(tmpcred);
1877	return (error);
1878}
1879
1880/*
1881 * Check access permissions using "effective" credentials.
1882 */
1883#ifndef _SYS_SYSPROTO_H_
1884struct eaccess_args {
1885	char	*path;
1886	int	flags;
1887};
1888#endif
1889int
1890eaccess(td, uap)
1891	struct thread *td;
1892	register struct eaccess_args /* {
1893		syscallarg(char *) path;
1894		syscallarg(int) flags;
1895	} */ *uap;
1896{
1897	struct nameidata nd;
1898	struct vnode *vp;
1899	int error;
1900
1901	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1902	    SCARG(uap, path), td);
1903	if ((error = namei(&nd)) != 0)
1904		return (error);
1905	vp = nd.ni_vp;
1906
1907	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1908	NDFREE(&nd, NDF_ONLY_PNBUF);
1909	vput(vp);
1910	return (error);
1911}
1912
1913#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1914/*
1915 * Get file status; this version follows links.
1916 */
1917#ifndef _SYS_SYSPROTO_H_
1918struct ostat_args {
1919	char	*path;
1920	struct ostat *ub;
1921};
1922#endif
1923/* ARGSUSED */
1924int
1925ostat(td, uap)
1926	struct thread *td;
1927	register struct ostat_args /* {
1928		syscallarg(char *) path;
1929		syscallarg(struct ostat *) ub;
1930	} */ *uap;
1931{
1932	struct stat sb;
1933	struct ostat osb;
1934	int error;
1935	struct nameidata nd;
1936
1937	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1938	    SCARG(uap, path), td);
1939	if ((error = namei(&nd)) != 0)
1940		return (error);
1941	NDFREE(&nd, NDF_ONLY_PNBUF);
1942	error = vn_stat(nd.ni_vp, &sb, td);
1943	vput(nd.ni_vp);
1944	if (error)
1945		return (error);
1946	cvtstat(&sb, &osb);
1947	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1948	return (error);
1949}
1950
1951/*
1952 * Get file status; this version does not follow links.
1953 */
1954#ifndef _SYS_SYSPROTO_H_
1955struct olstat_args {
1956	char	*path;
1957	struct ostat *ub;
1958};
1959#endif
1960/* ARGSUSED */
1961int
1962olstat(td, uap)
1963	struct thread *td;
1964	register struct olstat_args /* {
1965		syscallarg(char *) path;
1966		syscallarg(struct ostat *) ub;
1967	} */ *uap;
1968{
1969	struct vnode *vp;
1970	struct stat sb;
1971	struct ostat osb;
1972	int error;
1973	struct nameidata nd;
1974
1975	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1976	    SCARG(uap, path), td);
1977	if ((error = namei(&nd)) != 0)
1978		return (error);
1979	vp = nd.ni_vp;
1980	error = vn_stat(vp, &sb, td);
1981	NDFREE(&nd, NDF_ONLY_PNBUF);
1982	vput(vp);
1983	if (error)
1984		return (error);
1985	cvtstat(&sb, &osb);
1986	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1987	return (error);
1988}
1989
1990/*
1991 * Convert from an old to a new stat structure.
1992 */
1993void
1994cvtstat(st, ost)
1995	struct stat *st;
1996	struct ostat *ost;
1997{
1998
1999	ost->st_dev = st->st_dev;
2000	ost->st_ino = st->st_ino;
2001	ost->st_mode = st->st_mode;
2002	ost->st_nlink = st->st_nlink;
2003	ost->st_uid = st->st_uid;
2004	ost->st_gid = st->st_gid;
2005	ost->st_rdev = st->st_rdev;
2006	if (st->st_size < (quad_t)1 << 32)
2007		ost->st_size = st->st_size;
2008	else
2009		ost->st_size = -2;
2010	ost->st_atime = st->st_atime;
2011	ost->st_mtime = st->st_mtime;
2012	ost->st_ctime = st->st_ctime;
2013	ost->st_blksize = st->st_blksize;
2014	ost->st_blocks = st->st_blocks;
2015	ost->st_flags = st->st_flags;
2016	ost->st_gen = st->st_gen;
2017}
2018#endif /* COMPAT_43 || COMPAT_SUNOS */
2019
2020/*
2021 * Get file status; this version follows links.
2022 */
2023#ifndef _SYS_SYSPROTO_H_
2024struct stat_args {
2025	char	*path;
2026	struct stat *ub;
2027};
2028#endif
2029/* ARGSUSED */
2030int
2031stat(td, uap)
2032	struct thread *td;
2033	register struct stat_args /* {
2034		syscallarg(char *) path;
2035		syscallarg(struct stat *) ub;
2036	} */ *uap;
2037{
2038	struct stat sb;
2039	int error;
2040	struct nameidata nd;
2041
2042#ifdef LOOKUP_SHARED
2043	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2044	    UIO_USERSPACE, SCARG(uap, path), td);
2045#else
2046	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2047	    SCARG(uap, path), td);
2048#endif
2049	if ((error = namei(&nd)) != 0)
2050		return (error);
2051	error = vn_stat(nd.ni_vp, &sb, td);
2052	NDFREE(&nd, NDF_ONLY_PNBUF);
2053	vput(nd.ni_vp);
2054	if (error)
2055		return (error);
2056	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2057	return (error);
2058}
2059
2060/*
2061 * Get file status; this version does not follow links.
2062 */
2063#ifndef _SYS_SYSPROTO_H_
2064struct lstat_args {
2065	char	*path;
2066	struct stat *ub;
2067};
2068#endif
2069/* ARGSUSED */
2070int
2071lstat(td, uap)
2072	struct thread *td;
2073	register struct lstat_args /* {
2074		syscallarg(char *) path;
2075		syscallarg(struct stat *) ub;
2076	} */ *uap;
2077{
2078	int error;
2079	struct vnode *vp;
2080	struct stat sb;
2081	struct nameidata nd;
2082
2083	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2084	    SCARG(uap, path), td);
2085	if ((error = namei(&nd)) != 0)
2086		return (error);
2087	vp = nd.ni_vp;
2088	error = vn_stat(vp, &sb, td);
2089	NDFREE(&nd, NDF_ONLY_PNBUF);
2090	vput(vp);
2091	if (error)
2092		return (error);
2093	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2094	return (error);
2095}
2096
2097/*
2098 * Implementation of the NetBSD stat() function.
2099 * XXX This should probably be collapsed with the FreeBSD version,
2100 * as the differences are only due to vn_stat() clearing spares at
2101 * the end of the structures.  vn_stat could be split to avoid this,
2102 * and thus collapse the following to close to zero code.
2103 */
2104void
2105cvtnstat(sb, nsb)
2106	struct stat *sb;
2107	struct nstat *nsb;
2108{
2109	nsb->st_dev = sb->st_dev;
2110	nsb->st_ino = sb->st_ino;
2111	nsb->st_mode = sb->st_mode;
2112	nsb->st_nlink = sb->st_nlink;
2113	nsb->st_uid = sb->st_uid;
2114	nsb->st_gid = sb->st_gid;
2115	nsb->st_rdev = sb->st_rdev;
2116	nsb->st_atimespec = sb->st_atimespec;
2117	nsb->st_mtimespec = sb->st_mtimespec;
2118	nsb->st_ctimespec = sb->st_ctimespec;
2119	nsb->st_size = sb->st_size;
2120	nsb->st_blocks = sb->st_blocks;
2121	nsb->st_blksize = sb->st_blksize;
2122	nsb->st_flags = sb->st_flags;
2123	nsb->st_gen = sb->st_gen;
2124	nsb->st_qspare[0] = sb->st_qspare[0];
2125	nsb->st_qspare[1] = sb->st_qspare[1];
2126}
2127
2128#ifndef _SYS_SYSPROTO_H_
2129struct nstat_args {
2130	char	*path;
2131	struct nstat *ub;
2132};
2133#endif
2134/* ARGSUSED */
2135int
2136nstat(td, uap)
2137	struct thread *td;
2138	register struct nstat_args /* {
2139		syscallarg(char *) path;
2140		syscallarg(struct nstat *) ub;
2141	} */ *uap;
2142{
2143	struct stat sb;
2144	struct nstat nsb;
2145	int error;
2146	struct nameidata nd;
2147
2148	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2149	    SCARG(uap, path), td);
2150	if ((error = namei(&nd)) != 0)
2151		return (error);
2152	NDFREE(&nd, NDF_ONLY_PNBUF);
2153	error = vn_stat(nd.ni_vp, &sb, td);
2154	vput(nd.ni_vp);
2155	if (error)
2156		return (error);
2157	cvtnstat(&sb, &nsb);
2158	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2159	return (error);
2160}
2161
2162/*
2163 * NetBSD lstat.  Get file status; this version does not follow links.
2164 */
2165#ifndef _SYS_SYSPROTO_H_
2166struct lstat_args {
2167	char	*path;
2168	struct stat *ub;
2169};
2170#endif
2171/* ARGSUSED */
2172int
2173nlstat(td, uap)
2174	struct thread *td;
2175	register struct nlstat_args /* {
2176		syscallarg(char *) path;
2177		syscallarg(struct nstat *) ub;
2178	} */ *uap;
2179{
2180	int error;
2181	struct vnode *vp;
2182	struct stat sb;
2183	struct nstat nsb;
2184	struct nameidata nd;
2185
2186	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2187	    SCARG(uap, path), td);
2188	if ((error = namei(&nd)) != 0)
2189		return (error);
2190	vp = nd.ni_vp;
2191	NDFREE(&nd, NDF_ONLY_PNBUF);
2192	error = vn_stat(vp, &sb, td);
2193	vput(vp);
2194	if (error)
2195		return (error);
2196	cvtnstat(&sb, &nsb);
2197	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2198	return (error);
2199}
2200
2201/*
2202 * Get configurable pathname variables.
2203 */
2204#ifndef _SYS_SYSPROTO_H_
2205struct pathconf_args {
2206	char	*path;
2207	int	name;
2208};
2209#endif
2210/* ARGSUSED */
2211int
2212pathconf(td, uap)
2213	struct thread *td;
2214	register struct pathconf_args /* {
2215		syscallarg(char *) path;
2216		syscallarg(int) name;
2217	} */ *uap;
2218{
2219	int error;
2220	struct nameidata nd;
2221
2222	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2223	    SCARG(uap, path), td);
2224	if ((error = namei(&nd)) != 0)
2225		return (error);
2226	NDFREE(&nd, NDF_ONLY_PNBUF);
2227	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2228	vput(nd.ni_vp);
2229	return (error);
2230}
2231
2232/*
2233 * Return target name of a symbolic link.
2234 */
2235#ifndef _SYS_SYSPROTO_H_
2236struct readlink_args {
2237	char	*path;
2238	char	*buf;
2239	int	count;
2240};
2241#endif
2242/* ARGSUSED */
2243int
2244readlink(td, uap)
2245	struct thread *td;
2246	register struct readlink_args /* {
2247		syscallarg(char *) path;
2248		syscallarg(char *) buf;
2249		syscallarg(int) count;
2250	} */ *uap;
2251{
2252	register struct vnode *vp;
2253	struct iovec aiov;
2254	struct uio auio;
2255	int error;
2256	struct nameidata nd;
2257
2258	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2259	    SCARG(uap, path), td);
2260	if ((error = namei(&nd)) != 0)
2261		return (error);
2262	NDFREE(&nd, NDF_ONLY_PNBUF);
2263	vp = nd.ni_vp;
2264	if (vp->v_type != VLNK)
2265		error = EINVAL;
2266	else {
2267		aiov.iov_base = SCARG(uap, buf);
2268		aiov.iov_len = SCARG(uap, count);
2269		auio.uio_iov = &aiov;
2270		auio.uio_iovcnt = 1;
2271		auio.uio_offset = 0;
2272		auio.uio_rw = UIO_READ;
2273		auio.uio_segflg = UIO_USERSPACE;
2274		auio.uio_td = td;
2275		auio.uio_resid = SCARG(uap, count);
2276		error = VOP_READLINK(vp, &auio, td->td_ucred);
2277	}
2278	vput(vp);
2279	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2280	return (error);
2281}
2282
2283/*
2284 * Common implementation code for chflags() and fchflags().
2285 */
2286static int
2287setfflags(td, vp, flags)
2288	struct thread *td;
2289	struct vnode *vp;
2290	int flags;
2291{
2292	int error;
2293	struct mount *mp;
2294	struct vattr vattr;
2295
2296	/*
2297	 * Prevent non-root users from setting flags on devices.  When
2298	 * a device is reused, users can retain ownership of the device
2299	 * if they are allowed to set flags and programs assume that
2300	 * chown can't fail when done as root.
2301	 */
2302	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2303		error = suser_xxx(td->td_ucred, td->td_proc, PRISON_ROOT);
2304		if (error)
2305			return (error);
2306	}
2307
2308	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2309		return (error);
2310	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2311	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2312	VATTR_NULL(&vattr);
2313	vattr.va_flags = flags;
2314	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2315	VOP_UNLOCK(vp, 0, td);
2316	vn_finished_write(mp);
2317	return (error);
2318}
2319
2320/*
2321 * Change flags of a file given a path name.
2322 */
2323#ifndef _SYS_SYSPROTO_H_
2324struct chflags_args {
2325	char	*path;
2326	int	flags;
2327};
2328#endif
2329/* ARGSUSED */
2330int
2331chflags(td, uap)
2332	struct thread *td;
2333	register struct chflags_args /* {
2334		syscallarg(char *) path;
2335		syscallarg(int) flags;
2336	} */ *uap;
2337{
2338	int error;
2339	struct nameidata nd;
2340
2341	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2342	if ((error = namei(&nd)) != 0)
2343		return (error);
2344	NDFREE(&nd, NDF_ONLY_PNBUF);
2345	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2346	vrele(nd.ni_vp);
2347	return error;
2348}
2349
2350/*
2351 * Change flags of a file given a file descriptor.
2352 */
2353#ifndef _SYS_SYSPROTO_H_
2354struct fchflags_args {
2355	int	fd;
2356	int	flags;
2357};
2358#endif
2359/* ARGSUSED */
2360int
2361fchflags(td, uap)
2362	struct thread *td;
2363	register struct fchflags_args /* {
2364		syscallarg(int) fd;
2365		syscallarg(int) flags;
2366	} */ *uap;
2367{
2368	struct file *fp;
2369	int error;
2370
2371	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2372		return (error);
2373	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2374	fdrop(fp, td);
2375	return (error);
2376}
2377
2378/*
2379 * Common implementation code for chmod(), lchmod() and fchmod().
2380 */
2381static int
2382setfmode(td, vp, mode)
2383	struct thread *td;
2384	struct vnode *vp;
2385	int mode;
2386{
2387	int error;
2388	struct mount *mp;
2389	struct vattr vattr;
2390
2391	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2392		return (error);
2393	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2394	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2395	VATTR_NULL(&vattr);
2396	vattr.va_mode = mode & ALLPERMS;
2397	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2398	VOP_UNLOCK(vp, 0, td);
2399	vn_finished_write(mp);
2400	return error;
2401}
2402
2403/*
2404 * Change mode of a file given path name.
2405 */
2406#ifndef _SYS_SYSPROTO_H_
2407struct chmod_args {
2408	char	*path;
2409	int	mode;
2410};
2411#endif
2412/* ARGSUSED */
2413int
2414chmod(td, uap)
2415	struct thread *td;
2416	register struct chmod_args /* {
2417		syscallarg(char *) path;
2418		syscallarg(int) mode;
2419	} */ *uap;
2420{
2421	int error;
2422	struct nameidata nd;
2423
2424	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2425	if ((error = namei(&nd)) != 0)
2426		return (error);
2427	NDFREE(&nd, NDF_ONLY_PNBUF);
2428	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2429	vrele(nd.ni_vp);
2430	return error;
2431}
2432
2433/*
2434 * Change mode of a file given path name (don't follow links.)
2435 */
2436#ifndef _SYS_SYSPROTO_H_
2437struct lchmod_args {
2438	char	*path;
2439	int	mode;
2440};
2441#endif
2442/* ARGSUSED */
2443int
2444lchmod(td, uap)
2445	struct thread *td;
2446	register struct lchmod_args /* {
2447		syscallarg(char *) path;
2448		syscallarg(int) mode;
2449	} */ *uap;
2450{
2451	int error;
2452	struct nameidata nd;
2453
2454	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2455	if ((error = namei(&nd)) != 0)
2456		return (error);
2457	NDFREE(&nd, NDF_ONLY_PNBUF);
2458	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2459	vrele(nd.ni_vp);
2460	return error;
2461}
2462
2463/*
2464 * Change mode of a file given a file descriptor.
2465 */
2466#ifndef _SYS_SYSPROTO_H_
2467struct fchmod_args {
2468	int	fd;
2469	int	mode;
2470};
2471#endif
2472/* ARGSUSED */
2473int
2474fchmod(td, uap)
2475	struct thread *td;
2476	register struct fchmod_args /* {
2477		syscallarg(int) fd;
2478		syscallarg(int) mode;
2479	} */ *uap;
2480{
2481	struct file *fp;
2482	struct vnode *vp;
2483	int error;
2484
2485	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2486		return (error);
2487	vp = (struct vnode *)fp->f_data;
2488	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2489	fdrop(fp, td);
2490	return (error);
2491}
2492
2493/*
2494 * Common implementation for chown(), lchown(), and fchown()
2495 */
2496static int
2497setfown(td, vp, uid, gid)
2498	struct thread *td;
2499	struct vnode *vp;
2500	uid_t uid;
2501	gid_t gid;
2502{
2503	int error;
2504	struct mount *mp;
2505	struct vattr vattr;
2506
2507	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2508		return (error);
2509	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2510	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2511	VATTR_NULL(&vattr);
2512	vattr.va_uid = uid;
2513	vattr.va_gid = gid;
2514	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2515	VOP_UNLOCK(vp, 0, td);
2516	vn_finished_write(mp);
2517	return error;
2518}
2519
2520/*
2521 * Set ownership given a path name.
2522 */
2523#ifndef _SYS_SYSPROTO_H_
2524struct chown_args {
2525	char	*path;
2526	int	uid;
2527	int	gid;
2528};
2529#endif
2530/* ARGSUSED */
2531int
2532chown(td, uap)
2533	struct thread *td;
2534	register struct chown_args /* {
2535		syscallarg(char *) path;
2536		syscallarg(int) uid;
2537		syscallarg(int) gid;
2538	} */ *uap;
2539{
2540	int error;
2541	struct nameidata nd;
2542
2543	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2544	if ((error = namei(&nd)) != 0)
2545		return (error);
2546	NDFREE(&nd, NDF_ONLY_PNBUF);
2547	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2548	vrele(nd.ni_vp);
2549	return (error);
2550}
2551
2552/*
2553 * Set ownership given a path name, do not cross symlinks.
2554 */
2555#ifndef _SYS_SYSPROTO_H_
2556struct lchown_args {
2557	char	*path;
2558	int	uid;
2559	int	gid;
2560};
2561#endif
2562/* ARGSUSED */
2563int
2564lchown(td, uap)
2565	struct thread *td;
2566	register struct lchown_args /* {
2567		syscallarg(char *) path;
2568		syscallarg(int) uid;
2569		syscallarg(int) gid;
2570	} */ *uap;
2571{
2572	int error;
2573	struct nameidata nd;
2574
2575	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2576	if ((error = namei(&nd)) != 0)
2577		return (error);
2578	NDFREE(&nd, NDF_ONLY_PNBUF);
2579	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2580	vrele(nd.ni_vp);
2581	return (error);
2582}
2583
2584/*
2585 * Set ownership given a file descriptor.
2586 */
2587#ifndef _SYS_SYSPROTO_H_
2588struct fchown_args {
2589	int	fd;
2590	int	uid;
2591	int	gid;
2592};
2593#endif
2594/* ARGSUSED */
2595int
2596fchown(td, uap)
2597	struct thread *td;
2598	register struct fchown_args /* {
2599		syscallarg(int) fd;
2600		syscallarg(int) uid;
2601		syscallarg(int) gid;
2602	} */ *uap;
2603{
2604	struct file *fp;
2605	struct vnode *vp;
2606	int error;
2607
2608	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2609		return (error);
2610	vp = (struct vnode *)fp->f_data;
2611	error = setfown(td, (struct vnode *)fp->f_data,
2612		SCARG(uap, uid), SCARG(uap, gid));
2613	fdrop(fp, td);
2614	return (error);
2615}
2616
2617/*
2618 * Common implementation code for utimes(), lutimes(), and futimes().
2619 */
2620static int
2621getutimes(usrtvp, tsp)
2622	const struct timeval *usrtvp;
2623	struct timespec *tsp;
2624{
2625	struct timeval tv[2];
2626	int error;
2627
2628	if (usrtvp == NULL) {
2629		microtime(&tv[0]);
2630		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2631		tsp[1] = tsp[0];
2632	} else {
2633		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2634			return (error);
2635		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2636		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2637	}
2638	return 0;
2639}
2640
2641/*
2642 * Common implementation code for utimes(), lutimes(), and futimes().
2643 */
2644static int
2645setutimes(td, vp, ts, nullflag)
2646	struct thread *td;
2647	struct vnode *vp;
2648	const struct timespec *ts;
2649	int nullflag;
2650{
2651	int error;
2652	struct mount *mp;
2653	struct vattr vattr;
2654
2655	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2656		return (error);
2657	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2658	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2659	VATTR_NULL(&vattr);
2660	vattr.va_atime = ts[0];
2661	vattr.va_mtime = ts[1];
2662	if (nullflag)
2663		vattr.va_vaflags |= VA_UTIMES_NULL;
2664	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2665	VOP_UNLOCK(vp, 0, td);
2666	vn_finished_write(mp);
2667	return error;
2668}
2669
2670/*
2671 * Set the access and modification times of a file.
2672 */
2673#ifndef _SYS_SYSPROTO_H_
2674struct utimes_args {
2675	char	*path;
2676	struct	timeval *tptr;
2677};
2678#endif
2679/* ARGSUSED */
2680int
2681utimes(td, uap)
2682	struct thread *td;
2683	register struct utimes_args /* {
2684		syscallarg(char *) path;
2685		syscallarg(struct timeval *) tptr;
2686	} */ *uap;
2687{
2688	struct timespec ts[2];
2689	struct timeval *usrtvp;
2690	int error;
2691	struct nameidata nd;
2692
2693	usrtvp = SCARG(uap, tptr);
2694	if ((error = getutimes(usrtvp, ts)) != 0)
2695		return (error);
2696	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2697	if ((error = namei(&nd)) != 0)
2698		return (error);
2699	NDFREE(&nd, NDF_ONLY_PNBUF);
2700	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2701	vrele(nd.ni_vp);
2702	return (error);
2703}
2704
2705/*
2706 * Set the access and modification times of a file.
2707 */
2708#ifndef _SYS_SYSPROTO_H_
2709struct lutimes_args {
2710	char	*path;
2711	struct	timeval *tptr;
2712};
2713#endif
2714/* ARGSUSED */
2715int
2716lutimes(td, uap)
2717	struct thread *td;
2718	register struct lutimes_args /* {
2719		syscallarg(char *) path;
2720		syscallarg(struct timeval *) tptr;
2721	} */ *uap;
2722{
2723	struct timespec ts[2];
2724	struct timeval *usrtvp;
2725	int error;
2726	struct nameidata nd;
2727
2728	usrtvp = SCARG(uap, tptr);
2729	if ((error = getutimes(usrtvp, ts)) != 0)
2730		return (error);
2731	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2732	if ((error = namei(&nd)) != 0)
2733		return (error);
2734	NDFREE(&nd, NDF_ONLY_PNBUF);
2735	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2736	vrele(nd.ni_vp);
2737	return (error);
2738}
2739
2740/*
2741 * Set the access and modification times of a file.
2742 */
2743#ifndef _SYS_SYSPROTO_H_
2744struct futimes_args {
2745	int	fd;
2746	struct	timeval *tptr;
2747};
2748#endif
2749/* ARGSUSED */
2750int
2751futimes(td, uap)
2752	struct thread *td;
2753	register struct futimes_args /* {
2754		syscallarg(int ) fd;
2755		syscallarg(struct timeval *) tptr;
2756	} */ *uap;
2757{
2758	struct timespec ts[2];
2759	struct file *fp;
2760	struct timeval *usrtvp;
2761	int error;
2762
2763	usrtvp = SCARG(uap, tptr);
2764	if ((error = getutimes(usrtvp, ts)) != 0)
2765		return (error);
2766	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2767		return (error);
2768	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2769	fdrop(fp, td);
2770	return (error);
2771}
2772
2773/*
2774 * Truncate a file given its path name.
2775 */
2776#ifndef _SYS_SYSPROTO_H_
2777struct truncate_args {
2778	char	*path;
2779	int	pad;
2780	off_t	length;
2781};
2782#endif
2783/* ARGSUSED */
2784int
2785truncate(td, uap)
2786	struct thread *td;
2787	register struct truncate_args /* {
2788		syscallarg(char *) path;
2789		syscallarg(int) pad;
2790		syscallarg(off_t) length;
2791	} */ *uap;
2792{
2793	struct mount *mp;
2794	struct vnode *vp;
2795	struct vattr vattr;
2796	int error;
2797	struct nameidata nd;
2798
2799	if (uap->length < 0)
2800		return(EINVAL);
2801	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2802	if ((error = namei(&nd)) != 0)
2803		return (error);
2804	vp = nd.ni_vp;
2805	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2806		vrele(vp);
2807		return (error);
2808	}
2809	NDFREE(&nd, NDF_ONLY_PNBUF);
2810	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2811	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2812	if (vp->v_type == VDIR)
2813		error = EISDIR;
2814	else if ((error = vn_writechk(vp)) == 0 &&
2815	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2816		VATTR_NULL(&vattr);
2817		vattr.va_size = SCARG(uap, length);
2818		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2819	}
2820	vput(vp);
2821	vn_finished_write(mp);
2822	return (error);
2823}
2824
2825/*
2826 * Truncate a file given a file descriptor.
2827 */
2828#ifndef _SYS_SYSPROTO_H_
2829struct ftruncate_args {
2830	int	fd;
2831	int	pad;
2832	off_t	length;
2833};
2834#endif
2835/* ARGSUSED */
2836int
2837ftruncate(td, uap)
2838	struct thread *td;
2839	register struct ftruncate_args /* {
2840		syscallarg(int) fd;
2841		syscallarg(int) pad;
2842		syscallarg(off_t) length;
2843	} */ *uap;
2844{
2845	struct mount *mp;
2846	struct vattr vattr;
2847	struct vnode *vp;
2848	struct file *fp;
2849	int error;
2850
2851	if (uap->length < 0)
2852		return(EINVAL);
2853	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2854		return (error);
2855	if ((fp->f_flag & FWRITE) == 0) {
2856		fdrop(fp, td);
2857		return (EINVAL);
2858	}
2859	vp = (struct vnode *)fp->f_data;
2860	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2861		fdrop(fp, td);
2862		return (error);
2863	}
2864	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2865	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2866	if (vp->v_type == VDIR)
2867		error = EISDIR;
2868	else if ((error = vn_writechk(vp)) == 0) {
2869		VATTR_NULL(&vattr);
2870		vattr.va_size = SCARG(uap, length);
2871		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2872	}
2873	VOP_UNLOCK(vp, 0, td);
2874	vn_finished_write(mp);
2875	fdrop(fp, td);
2876	return (error);
2877}
2878
2879#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2880/*
2881 * Truncate a file given its path name.
2882 */
2883#ifndef _SYS_SYSPROTO_H_
2884struct otruncate_args {
2885	char	*path;
2886	long	length;
2887};
2888#endif
2889/* ARGSUSED */
2890int
2891otruncate(td, uap)
2892	struct thread *td;
2893	register struct otruncate_args /* {
2894		syscallarg(char *) path;
2895		syscallarg(long) length;
2896	} */ *uap;
2897{
2898	struct truncate_args /* {
2899		syscallarg(char *) path;
2900		syscallarg(int) pad;
2901		syscallarg(off_t) length;
2902	} */ nuap;
2903
2904	SCARG(&nuap, path) = SCARG(uap, path);
2905	SCARG(&nuap, length) = SCARG(uap, length);
2906	return (truncate(td, &nuap));
2907}
2908
2909/*
2910 * Truncate a file given a file descriptor.
2911 */
2912#ifndef _SYS_SYSPROTO_H_
2913struct oftruncate_args {
2914	int	fd;
2915	long	length;
2916};
2917#endif
2918/* ARGSUSED */
2919int
2920oftruncate(td, uap)
2921	struct thread *td;
2922	register struct oftruncate_args /* {
2923		syscallarg(int) fd;
2924		syscallarg(long) length;
2925	} */ *uap;
2926{
2927	struct ftruncate_args /* {
2928		syscallarg(int) fd;
2929		syscallarg(int) pad;
2930		syscallarg(off_t) length;
2931	} */ nuap;
2932
2933	SCARG(&nuap, fd) = SCARG(uap, fd);
2934	SCARG(&nuap, length) = SCARG(uap, length);
2935	return (ftruncate(td, &nuap));
2936}
2937#endif /* COMPAT_43 || COMPAT_SUNOS */
2938
2939/*
2940 * Sync an open file.
2941 */
2942#ifndef _SYS_SYSPROTO_H_
2943struct fsync_args {
2944	int	fd;
2945};
2946#endif
2947/* ARGSUSED */
2948int
2949fsync(td, uap)
2950	struct thread *td;
2951	struct fsync_args /* {
2952		syscallarg(int) fd;
2953	} */ *uap;
2954{
2955	struct vnode *vp;
2956	struct mount *mp;
2957	struct file *fp;
2958	vm_object_t obj;
2959	int error;
2960
2961	GIANT_REQUIRED;
2962
2963	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2964		return (error);
2965	vp = (struct vnode *)fp->f_data;
2966	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2967		fdrop(fp, td);
2968		return (error);
2969	}
2970	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2971	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2972		vm_object_page_clean(obj, 0, 0, 0);
2973	}
2974	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2975#ifdef SOFTUPDATES
2976	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2977	    error = softdep_fsync(vp);
2978#endif
2979
2980	VOP_UNLOCK(vp, 0, td);
2981	vn_finished_write(mp);
2982	fdrop(fp, td);
2983	return (error);
2984}
2985
2986/*
2987 * Rename files.  Source and destination must either both be directories,
2988 * or both not be directories.  If target is a directory, it must be empty.
2989 */
2990#ifndef _SYS_SYSPROTO_H_
2991struct rename_args {
2992	char	*from;
2993	char	*to;
2994};
2995#endif
2996/* ARGSUSED */
2997int
2998rename(td, uap)
2999	struct thread *td;
3000	register struct rename_args /* {
3001		syscallarg(char *) from;
3002		syscallarg(char *) to;
3003	} */ *uap;
3004{
3005	struct mount *mp;
3006	struct vnode *tvp, *fvp, *tdvp;
3007	struct nameidata fromnd, tond;
3008	int error;
3009
3010	bwillwrite();
3011	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3012	    SCARG(uap, from), td);
3013	if ((error = namei(&fromnd)) != 0)
3014		return (error);
3015	fvp = fromnd.ni_vp;
3016	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3017		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3018		vrele(fromnd.ni_dvp);
3019		vrele(fvp);
3020		goto out1;
3021	}
3022	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3023	    UIO_USERSPACE, SCARG(uap, to), td);
3024	if (fromnd.ni_vp->v_type == VDIR)
3025		tond.ni_cnd.cn_flags |= WILLBEDIR;
3026	if ((error = namei(&tond)) != 0) {
3027		/* Translate error code for rename("dir1", "dir2/."). */
3028		if (error == EISDIR && fvp->v_type == VDIR)
3029			error = EINVAL;
3030		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3031		vrele(fromnd.ni_dvp);
3032		vrele(fvp);
3033		goto out1;
3034	}
3035	tdvp = tond.ni_dvp;
3036	tvp = tond.ni_vp;
3037	if (tvp != NULL) {
3038		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3039			error = ENOTDIR;
3040			goto out;
3041		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3042			error = EISDIR;
3043			goto out;
3044		}
3045	}
3046	if (fvp == tdvp)
3047		error = EINVAL;
3048	/*
3049	 * If source is the same as the destination (that is the
3050	 * same inode number with the same name in the same directory),
3051	 * then there is nothing to do.
3052	 */
3053	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3054	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3055	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3056	      fromnd.ni_cnd.cn_namelen))
3057		error = -1;
3058out:
3059	if (!error) {
3060		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3061		if (fromnd.ni_dvp != tdvp) {
3062			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3063		}
3064		if (tvp) {
3065			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3066		}
3067		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3068				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3069		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3070		NDFREE(&tond, NDF_ONLY_PNBUF);
3071	} else {
3072		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3073		NDFREE(&tond, NDF_ONLY_PNBUF);
3074		if (tdvp == tvp)
3075			vrele(tdvp);
3076		else
3077			vput(tdvp);
3078		if (tvp)
3079			vput(tvp);
3080		vrele(fromnd.ni_dvp);
3081		vrele(fvp);
3082	}
3083	vrele(tond.ni_startdir);
3084	vn_finished_write(mp);
3085	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3086	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3087	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3088	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3089out1:
3090	if (fromnd.ni_startdir)
3091		vrele(fromnd.ni_startdir);
3092	if (error == -1)
3093		return (0);
3094	return (error);
3095}
3096
3097/*
3098 * Make a directory file.
3099 */
3100#ifndef _SYS_SYSPROTO_H_
3101struct mkdir_args {
3102	char	*path;
3103	int	mode;
3104};
3105#endif
3106/* ARGSUSED */
3107int
3108mkdir(td, uap)
3109	struct thread *td;
3110	register struct mkdir_args /* {
3111		syscallarg(char *) path;
3112		syscallarg(int) mode;
3113	} */ *uap;
3114{
3115
3116	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3117}
3118
3119int
3120vn_mkdir(path, mode, segflg, td)
3121	char *path;
3122	int mode;
3123	enum uio_seg segflg;
3124	struct thread *td;
3125{
3126	struct mount *mp;
3127	struct vnode *vp;
3128	struct vattr vattr;
3129	int error;
3130	struct nameidata nd;
3131
3132restart:
3133	bwillwrite();
3134	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3135	nd.ni_cnd.cn_flags |= WILLBEDIR;
3136	if ((error = namei(&nd)) != 0)
3137		return (error);
3138	vp = nd.ni_vp;
3139	if (vp != NULL) {
3140		NDFREE(&nd, NDF_ONLY_PNBUF);
3141		vrele(vp);
3142		vput(nd.ni_dvp);
3143		return (EEXIST);
3144	}
3145	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3146		NDFREE(&nd, NDF_ONLY_PNBUF);
3147		vput(nd.ni_dvp);
3148		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3149			return (error);
3150		goto restart;
3151	}
3152	VATTR_NULL(&vattr);
3153	vattr.va_type = VDIR;
3154	FILEDESC_LOCK(td->td_proc->p_fd);
3155	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3156	FILEDESC_UNLOCK(td->td_proc->p_fd);
3157	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3158	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3159	NDFREE(&nd, NDF_ONLY_PNBUF);
3160	vput(nd.ni_dvp);
3161	if (!error)
3162		vput(nd.ni_vp);
3163	vn_finished_write(mp);
3164	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3165	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3166	return (error);
3167}
3168
3169/*
3170 * Remove a directory file.
3171 */
3172#ifndef _SYS_SYSPROTO_H_
3173struct rmdir_args {
3174	char	*path;
3175};
3176#endif
3177/* ARGSUSED */
3178int
3179rmdir(td, uap)
3180	struct thread *td;
3181	struct rmdir_args /* {
3182		syscallarg(char *) path;
3183	} */ *uap;
3184{
3185	struct mount *mp;
3186	struct vnode *vp;
3187	int error;
3188	struct nameidata nd;
3189
3190restart:
3191	bwillwrite();
3192	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3193	    SCARG(uap, path), td);
3194	if ((error = namei(&nd)) != 0)
3195		return (error);
3196	vp = nd.ni_vp;
3197	if (vp->v_type != VDIR) {
3198		error = ENOTDIR;
3199		goto out;
3200	}
3201	/*
3202	 * No rmdir "." please.
3203	 */
3204	if (nd.ni_dvp == vp) {
3205		error = EINVAL;
3206		goto out;
3207	}
3208	/*
3209	 * The root of a mounted filesystem cannot be deleted.
3210	 */
3211	if (vp->v_flag & VROOT) {
3212		error = EBUSY;
3213		goto out;
3214	}
3215	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3216		NDFREE(&nd, NDF_ONLY_PNBUF);
3217		if (nd.ni_dvp == vp)
3218			vrele(nd.ni_dvp);
3219		else
3220			vput(nd.ni_dvp);
3221		vput(vp);
3222		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3223			return (error);
3224		goto restart;
3225	}
3226	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3227	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3228	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3229	vn_finished_write(mp);
3230out:
3231	NDFREE(&nd, NDF_ONLY_PNBUF);
3232	if (nd.ni_dvp == vp)
3233		vrele(nd.ni_dvp);
3234	else
3235		vput(nd.ni_dvp);
3236	vput(vp);
3237	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3238	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3239	return (error);
3240}
3241
3242#ifdef COMPAT_43
3243/*
3244 * Read a block of directory entries in a file system independent format.
3245 */
3246#ifndef _SYS_SYSPROTO_H_
3247struct ogetdirentries_args {
3248	int	fd;
3249	char	*buf;
3250	u_int	count;
3251	long	*basep;
3252};
3253#endif
3254int
3255ogetdirentries(td, uap)
3256	struct thread *td;
3257	register struct ogetdirentries_args /* {
3258		syscallarg(int) fd;
3259		syscallarg(char *) buf;
3260		syscallarg(u_int) count;
3261		syscallarg(long *) basep;
3262	} */ *uap;
3263{
3264	struct vnode *vp;
3265	struct file *fp;
3266	struct uio auio, kuio;
3267	struct iovec aiov, kiov;
3268	struct dirent *dp, *edp;
3269	caddr_t dirbuf;
3270	int error, eofflag, readcnt;
3271	long loff;
3272
3273	/* XXX arbitrary sanity limit on `count'. */
3274	if (SCARG(uap, count) > 64 * 1024)
3275		return (EINVAL);
3276	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3277		return (error);
3278	if ((fp->f_flag & FREAD) == 0) {
3279		fdrop(fp, td);
3280		return (EBADF);
3281	}
3282	vp = (struct vnode *)fp->f_data;
3283unionread:
3284	if (vp->v_type != VDIR) {
3285		fdrop(fp, td);
3286		return (EINVAL);
3287	}
3288	aiov.iov_base = SCARG(uap, buf);
3289	aiov.iov_len = SCARG(uap, count);
3290	auio.uio_iov = &aiov;
3291	auio.uio_iovcnt = 1;
3292	auio.uio_rw = UIO_READ;
3293	auio.uio_segflg = UIO_USERSPACE;
3294	auio.uio_td = td;
3295	auio.uio_resid = SCARG(uap, count);
3296	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3297	loff = auio.uio_offset = fp->f_offset;
3298#	if (BYTE_ORDER != LITTLE_ENDIAN)
3299		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3300			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3301			    NULL, NULL);
3302			fp->f_offset = auio.uio_offset;
3303		} else
3304#	endif
3305	{
3306		kuio = auio;
3307		kuio.uio_iov = &kiov;
3308		kuio.uio_segflg = UIO_SYSSPACE;
3309		kiov.iov_len = SCARG(uap, count);
3310		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3311		kiov.iov_base = dirbuf;
3312		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3313			    NULL, NULL);
3314		fp->f_offset = kuio.uio_offset;
3315		if (error == 0) {
3316			readcnt = SCARG(uap, count) - kuio.uio_resid;
3317			edp = (struct dirent *)&dirbuf[readcnt];
3318			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3319#				if (BYTE_ORDER == LITTLE_ENDIAN)
3320					/*
3321					 * The expected low byte of
3322					 * dp->d_namlen is our dp->d_type.
3323					 * The high MBZ byte of dp->d_namlen
3324					 * is our dp->d_namlen.
3325					 */
3326					dp->d_type = dp->d_namlen;
3327					dp->d_namlen = 0;
3328#				else
3329					/*
3330					 * The dp->d_type is the high byte
3331					 * of the expected dp->d_namlen,
3332					 * so must be zero'ed.
3333					 */
3334					dp->d_type = 0;
3335#				endif
3336				if (dp->d_reclen > 0) {
3337					dp = (struct dirent *)
3338					    ((char *)dp + dp->d_reclen);
3339				} else {
3340					error = EIO;
3341					break;
3342				}
3343			}
3344			if (dp >= edp)
3345				error = uiomove(dirbuf, readcnt, &auio);
3346		}
3347		FREE(dirbuf, M_TEMP);
3348	}
3349	VOP_UNLOCK(vp, 0, td);
3350	if (error) {
3351		fdrop(fp, td);
3352		return (error);
3353	}
3354	if (SCARG(uap, count) == auio.uio_resid) {
3355		if (union_dircheckp) {
3356			error = union_dircheckp(td, &vp, fp);
3357			if (error == -1)
3358				goto unionread;
3359			if (error) {
3360				fdrop(fp, td);
3361				return (error);
3362			}
3363		}
3364		if ((vp->v_flag & VROOT) &&
3365		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3366			struct vnode *tvp = vp;
3367			vp = vp->v_mount->mnt_vnodecovered;
3368			VREF(vp);
3369			fp->f_data = (caddr_t) vp;
3370			fp->f_offset = 0;
3371			vrele(tvp);
3372			goto unionread;
3373		}
3374	}
3375	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3376	    sizeof(long));
3377	fdrop(fp, td);
3378	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3379	return (error);
3380}
3381#endif /* COMPAT_43 */
3382
3383/*
3384 * Read a block of directory entries in a file system independent format.
3385 */
3386#ifndef _SYS_SYSPROTO_H_
3387struct getdirentries_args {
3388	int	fd;
3389	char	*buf;
3390	u_int	count;
3391	long	*basep;
3392};
3393#endif
3394int
3395getdirentries(td, uap)
3396	struct thread *td;
3397	register struct getdirentries_args /* {
3398		syscallarg(int) fd;
3399		syscallarg(char *) buf;
3400		syscallarg(u_int) count;
3401		syscallarg(long *) basep;
3402	} */ *uap;
3403{
3404	struct vnode *vp;
3405	struct file *fp;
3406	struct uio auio;
3407	struct iovec aiov;
3408	long loff;
3409	int error, eofflag;
3410
3411	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3412		return (error);
3413	if ((fp->f_flag & FREAD) == 0) {
3414		fdrop(fp, td);
3415		return (EBADF);
3416	}
3417	vp = (struct vnode *)fp->f_data;
3418unionread:
3419	if (vp->v_type != VDIR) {
3420		fdrop(fp, td);
3421		return (EINVAL);
3422	}
3423	aiov.iov_base = SCARG(uap, buf);
3424	aiov.iov_len = SCARG(uap, count);
3425	auio.uio_iov = &aiov;
3426	auio.uio_iovcnt = 1;
3427	auio.uio_rw = UIO_READ;
3428	auio.uio_segflg = UIO_USERSPACE;
3429	auio.uio_td = td;
3430	auio.uio_resid = SCARG(uap, count);
3431	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3432	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3433	loff = auio.uio_offset = fp->f_offset;
3434	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3435	fp->f_offset = auio.uio_offset;
3436	VOP_UNLOCK(vp, 0, td);
3437	if (error) {
3438		fdrop(fp, td);
3439		return (error);
3440	}
3441	if (SCARG(uap, count) == auio.uio_resid) {
3442		if (union_dircheckp) {
3443			error = union_dircheckp(td, &vp, fp);
3444			if (error == -1)
3445				goto unionread;
3446			if (error) {
3447				fdrop(fp, td);
3448				return (error);
3449			}
3450		}
3451		if ((vp->v_flag & VROOT) &&
3452		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3453			struct vnode *tvp = vp;
3454			vp = vp->v_mount->mnt_vnodecovered;
3455			VREF(vp);
3456			fp->f_data = (caddr_t) vp;
3457			fp->f_offset = 0;
3458			vrele(tvp);
3459			goto unionread;
3460		}
3461	}
3462	if (SCARG(uap, basep) != NULL) {
3463		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3464		    sizeof(long));
3465	}
3466	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3467	fdrop(fp, td);
3468	return (error);
3469}
3470#ifndef _SYS_SYSPROTO_H_
3471struct getdents_args {
3472	int fd;
3473	char *buf;
3474	size_t count;
3475};
3476#endif
3477int
3478getdents(td, uap)
3479	struct thread *td;
3480	register struct getdents_args /* {
3481		syscallarg(int) fd;
3482		syscallarg(char *) buf;
3483		syscallarg(u_int) count;
3484	} */ *uap;
3485{
3486	struct getdirentries_args ap;
3487	ap.fd = uap->fd;
3488	ap.buf = uap->buf;
3489	ap.count = uap->count;
3490	ap.basep = NULL;
3491	return getdirentries(td, &ap);
3492}
3493
3494/*
3495 * Set the mode mask for creation of filesystem nodes.
3496 *
3497 * MP SAFE
3498 */
3499#ifndef _SYS_SYSPROTO_H_
3500struct umask_args {
3501	int	newmask;
3502};
3503#endif
3504int
3505umask(td, uap)
3506	struct thread *td;
3507	struct umask_args /* {
3508		syscallarg(int) newmask;
3509	} */ *uap;
3510{
3511	register struct filedesc *fdp;
3512
3513	FILEDESC_LOCK(td->td_proc->p_fd);
3514	fdp = td->td_proc->p_fd;
3515	td->td_retval[0] = fdp->fd_cmask;
3516	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3517	FILEDESC_UNLOCK(td->td_proc->p_fd);
3518	return (0);
3519}
3520
3521/*
3522 * Void all references to file by ripping underlying filesystem
3523 * away from vnode.
3524 */
3525#ifndef _SYS_SYSPROTO_H_
3526struct revoke_args {
3527	char	*path;
3528};
3529#endif
3530/* ARGSUSED */
3531int
3532revoke(td, uap)
3533	struct thread *td;
3534	register struct revoke_args /* {
3535		syscallarg(char *) path;
3536	} */ *uap;
3537{
3538	struct mount *mp;
3539	struct vnode *vp;
3540	struct vattr vattr;
3541	int error;
3542	struct nameidata nd;
3543
3544	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3545	    td);
3546	if ((error = namei(&nd)) != 0)
3547		return (error);
3548	vp = nd.ni_vp;
3549	NDFREE(&nd, NDF_ONLY_PNBUF);
3550	if (vp->v_type != VCHR) {
3551		vput(vp);
3552		return (EINVAL);
3553	}
3554	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3555	if (error) {
3556		vput(vp);
3557		return (error);
3558	}
3559	VOP_UNLOCK(vp, 0, td);
3560	if (td->td_ucred->cr_uid != vattr.va_uid) {
3561		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
3562		if (error)
3563			goto out;
3564	}
3565	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3566		goto out;
3567	if (vcount(vp) > 1)
3568		VOP_REVOKE(vp, REVOKEALL);
3569	vn_finished_write(mp);
3570out:
3571	vrele(vp);
3572	return (error);
3573}
3574
3575/*
3576 * Convert a user file descriptor to a kernel file entry.
3577 * The file entry is locked upon returning.
3578 */
3579int
3580getvnode(fdp, fd, fpp)
3581	struct filedesc *fdp;
3582	int fd;
3583	struct file **fpp;
3584{
3585	int error;
3586	struct file *fp;
3587
3588	fp = NULL;
3589	if (fdp == NULL)
3590		error = EBADF;
3591	else {
3592		FILEDESC_LOCK(fdp);
3593		if ((u_int)fd >= fdp->fd_nfiles ||
3594		    (fp = fdp->fd_ofiles[fd]) == NULL)
3595			error = EBADF;
3596		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3597			fp = NULL;
3598			error = EINVAL;
3599		} else {
3600			fhold(fp);
3601			error = 0;
3602		}
3603		FILEDESC_UNLOCK(fdp);
3604	}
3605	*fpp = fp;
3606	return (error);
3607}
3608/*
3609 * Get (NFS) file handle
3610 */
3611#ifndef _SYS_SYSPROTO_H_
3612struct getfh_args {
3613	char	*fname;
3614	fhandle_t *fhp;
3615};
3616#endif
3617int
3618getfh(td, uap)
3619	struct thread *td;
3620	register struct getfh_args *uap;
3621{
3622	struct nameidata nd;
3623	fhandle_t fh;
3624	register struct vnode *vp;
3625	int error;
3626
3627	/*
3628	 * Must be super user
3629	 */
3630	error = suser_td(td);
3631	if (error)
3632		return (error);
3633	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3634	error = namei(&nd);
3635	if (error)
3636		return (error);
3637	NDFREE(&nd, NDF_ONLY_PNBUF);
3638	vp = nd.ni_vp;
3639	bzero(&fh, sizeof(fh));
3640	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3641	error = VFS_VPTOFH(vp, &fh.fh_fid);
3642	vput(vp);
3643	if (error)
3644		return (error);
3645	error = copyout(&fh, uap->fhp, sizeof (fh));
3646	return (error);
3647}
3648
3649/*
3650 * syscall for the rpc.lockd to use to translate a NFS file handle into
3651 * an open descriptor.
3652 *
3653 * warning: do not remove the suser() call or this becomes one giant
3654 * security hole.
3655 */
3656#ifndef _SYS_SYSPROTO_H_
3657struct fhopen_args {
3658	const struct fhandle *u_fhp;
3659	int flags;
3660};
3661#endif
3662int
3663fhopen(td, uap)
3664	struct thread *td;
3665	struct fhopen_args /* {
3666		syscallarg(const struct fhandle *) u_fhp;
3667		syscallarg(int) flags;
3668	} */ *uap;
3669{
3670	struct proc *p = td->td_proc;
3671	struct mount *mp;
3672	struct vnode *vp;
3673	struct fhandle fhp;
3674	struct vattr vat;
3675	struct vattr *vap = &vat;
3676	struct flock lf;
3677	struct file *fp;
3678	register struct filedesc *fdp = p->p_fd;
3679	int fmode, mode, error, type;
3680	struct file *nfp;
3681	int indx;
3682
3683	/*
3684	 * Must be super user
3685	 */
3686	error = suser_td(td);
3687	if (error)
3688		return (error);
3689
3690	fmode = FFLAGS(SCARG(uap, flags));
3691	/* why not allow a non-read/write open for our lockd? */
3692	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3693		return (EINVAL);
3694	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3695	if (error)
3696		return(error);
3697	/* find the mount point */
3698	mp = vfs_getvfs(&fhp.fh_fsid);
3699	if (mp == NULL)
3700		return (ESTALE);
3701	/* now give me my vnode, it gets returned to me locked */
3702	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3703	if (error)
3704		return (error);
3705 	/*
3706	 * from now on we have to make sure not
3707	 * to forget about the vnode
3708	 * any error that causes an abort must vput(vp)
3709	 * just set error = err and 'goto bad;'.
3710	 */
3711
3712	/*
3713	 * from vn_open
3714	 */
3715	if (vp->v_type == VLNK) {
3716		error = EMLINK;
3717		goto bad;
3718	}
3719	if (vp->v_type == VSOCK) {
3720		error = EOPNOTSUPP;
3721		goto bad;
3722	}
3723	mode = 0;
3724	if (fmode & (FWRITE | O_TRUNC)) {
3725		if (vp->v_type == VDIR) {
3726			error = EISDIR;
3727			goto bad;
3728		}
3729		error = vn_writechk(vp);
3730		if (error)
3731			goto bad;
3732		mode |= VWRITE;
3733	}
3734	if (fmode & FREAD)
3735		mode |= VREAD;
3736	if (mode) {
3737		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3738		if (error)
3739			goto bad;
3740	}
3741	if (fmode & O_TRUNC) {
3742		VOP_UNLOCK(vp, 0, td);				/* XXX */
3743		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3744			vrele(vp);
3745			return (error);
3746		}
3747		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3748		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3749		VATTR_NULL(vap);
3750		vap->va_size = 0;
3751		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3752		vn_finished_write(mp);
3753		if (error)
3754			goto bad;
3755	}
3756	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3757	if (error)
3758		goto bad;
3759	/*
3760	 * Make sure that a VM object is created for VMIO support.
3761	 */
3762	if (vn_canvmio(vp) == TRUE) {
3763		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3764			goto bad;
3765	}
3766	if (fmode & FWRITE)
3767		vp->v_writecount++;
3768
3769	/*
3770	 * end of vn_open code
3771	 */
3772
3773	if ((error = falloc(td, &nfp, &indx)) != 0) {
3774		if (fmode & FWRITE)
3775			vp->v_writecount--;
3776		goto bad;
3777	}
3778	fp = nfp;
3779
3780	/*
3781	 * Hold an extra reference to avoid having fp ripped out
3782	 * from under us while we block in the lock op
3783	 */
3784	fhold(fp);
3785	nfp->f_data = (caddr_t)vp;
3786	nfp->f_flag = fmode & FMASK;
3787	nfp->f_ops = &vnops;
3788	nfp->f_type = DTYPE_VNODE;
3789	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3790		lf.l_whence = SEEK_SET;
3791		lf.l_start = 0;
3792		lf.l_len = 0;
3793		if (fmode & O_EXLOCK)
3794			lf.l_type = F_WRLCK;
3795		else
3796			lf.l_type = F_RDLCK;
3797		type = F_FLOCK;
3798		if ((fmode & FNONBLOCK) == 0)
3799			type |= F_WAIT;
3800		VOP_UNLOCK(vp, 0, td);
3801		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3802			/*
3803			 * The lock request failed.  Normally close the
3804			 * descriptor but handle the case where someone might
3805			 * have dup()d or close()d it when we weren't looking.
3806			 */
3807			FILEDESC_LOCK(fdp);
3808			if (fdp->fd_ofiles[indx] == fp) {
3809				fdp->fd_ofiles[indx] = NULL;
3810				FILEDESC_UNLOCK(fdp);
3811				fdrop(fp, td);
3812			} else
3813				FILEDESC_UNLOCK(fdp);
3814			/*
3815			 * release our private reference
3816			 */
3817			fdrop(fp, td);
3818			return(error);
3819		}
3820		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3821		fp->f_flag |= FHASLOCK;
3822	}
3823	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3824		vfs_object_create(vp, td, td->td_ucred);
3825
3826	VOP_UNLOCK(vp, 0, td);
3827	fdrop(fp, td);
3828	td->td_retval[0] = indx;
3829	return (0);
3830
3831bad:
3832	vput(vp);
3833	return (error);
3834}
3835
3836/*
3837 * Stat an (NFS) file handle.
3838 */
3839#ifndef _SYS_SYSPROTO_H_
3840struct fhstat_args {
3841	struct fhandle *u_fhp;
3842	struct stat *sb;
3843};
3844#endif
3845int
3846fhstat(td, uap)
3847	struct thread *td;
3848	register struct fhstat_args /* {
3849		syscallarg(struct fhandle *) u_fhp;
3850		syscallarg(struct stat *) sb;
3851	} */ *uap;
3852{
3853	struct stat sb;
3854	fhandle_t fh;
3855	struct mount *mp;
3856	struct vnode *vp;
3857	int error;
3858
3859	/*
3860	 * Must be super user
3861	 */
3862	error = suser_td(td);
3863	if (error)
3864		return (error);
3865
3866	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3867	if (error)
3868		return (error);
3869
3870	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3871		return (ESTALE);
3872	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3873		return (error);
3874	error = vn_stat(vp, &sb, td);
3875	vput(vp);
3876	if (error)
3877		return (error);
3878	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3879	return (error);
3880}
3881
3882/*
3883 * Implement fstatfs() for (NFS) file handles.
3884 */
3885#ifndef _SYS_SYSPROTO_H_
3886struct fhstatfs_args {
3887	struct fhandle *u_fhp;
3888	struct statfs *buf;
3889};
3890#endif
3891int
3892fhstatfs(td, uap)
3893	struct thread *td;
3894	struct fhstatfs_args /* {
3895		syscallarg(struct fhandle) *u_fhp;
3896		syscallarg(struct statfs) *buf;
3897	} */ *uap;
3898{
3899	struct statfs *sp;
3900	struct mount *mp;
3901	struct vnode *vp;
3902	struct statfs sb;
3903	fhandle_t fh;
3904	int error;
3905
3906	/*
3907	 * Must be super user
3908	 */
3909	error = suser_td(td);
3910	if (error)
3911		return (error);
3912
3913	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3914		return (error);
3915
3916	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3917		return (ESTALE);
3918	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3919		return (error);
3920	mp = vp->v_mount;
3921	sp = &mp->mnt_stat;
3922	vput(vp);
3923	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3924		return (error);
3925	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3926	if (suser_xxx(td->td_ucred, 0, 0)) {
3927		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3928		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3929		sp = &sb;
3930	}
3931	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3932}
3933
3934/*
3935 * Syscall to push extended attribute configuration information into the
3936 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3937 * a command (int cmd), and attribute name and misc data.  For now, the
3938 * attribute name is left in userspace for consumption by the VFS_op.
3939 * It will probably be changed to be copied into sysspace by the
3940 * syscall in the future, once issues with various consumers of the
3941 * attribute code have raised their hands.
3942 *
3943 * Currently this is used only by UFS Extended Attributes.
3944 */
3945int
3946extattrctl(td, uap)
3947	struct thread *td;
3948	struct extattrctl_args *uap;
3949{
3950	struct vnode *filename_vp;
3951	struct nameidata nd;
3952	struct mount *mp, *mp_writable;
3953	char attrname[EXTATTR_MAXNAMELEN];
3954	int error;
3955
3956	/*
3957	 * SCARG(uap, attrname) not always defined.  We check again later
3958	 * when we invoke the VFS call so as to pass in NULL there if needed.
3959	 */
3960	if (SCARG(uap, attrname) != NULL) {
3961		error = copyinstr(SCARG(uap, attrname), attrname,
3962		    EXTATTR_MAXNAMELEN, NULL);
3963		if (error)
3964			return (error);
3965	}
3966
3967	/*
3968	 * SCARG(uap, filename) not always defined.  If it is, grab
3969	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3970	 */
3971	filename_vp = NULL;
3972	if (SCARG(uap, filename) != NULL) {
3973		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3974		    SCARG(uap, filename), td);
3975		if ((error = namei(&nd)) != 0)
3976			return (error);
3977		filename_vp = nd.ni_vp;
3978		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3979	}
3980
3981	/* SCARG(uap, path) always defined. */
3982	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3983	if ((error = namei(&nd)) != 0) {
3984		if (filename_vp != NULL)
3985			vput(filename_vp);
3986		return (error);
3987	}
3988	mp = nd.ni_vp->v_mount;
3989	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3990	NDFREE(&nd, 0);
3991	if (error) {
3992		if (filename_vp != NULL)
3993			vput(filename_vp);
3994		return (error);
3995	}
3996
3997	if (SCARG(uap, attrname) != NULL) {
3998		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3999		    SCARG(uap, attrnamespace), attrname, td);
4000	} else {
4001		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
4002		    SCARG(uap, attrnamespace), NULL, td);
4003	}
4004
4005	vn_finished_write(mp_writable);
4006	/*
4007	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4008	 * filename_vp, so vrele it if it is defined.
4009	 */
4010	if (filename_vp != NULL)
4011		vrele(filename_vp);
4012
4013	return (error);
4014}
4015
4016/*-
4017 * Set a named extended attribute on a file or directory
4018 *
4019 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4020 *            kernelspace string pointer "attrname", userspace buffer
4021 *            pointer "data", buffer length "nbytes", thread "td".
4022 * Returns: 0 on success, an error number otherwise
4023 * Locks: none
4024 * References: vp must be a valid reference for the duration of the call
4025 */
4026static int
4027extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4028    void *data, size_t nbytes, struct thread *td)
4029{
4030	struct mount *mp;
4031	struct uio auio;
4032	struct iovec aiov;
4033	ssize_t cnt;
4034	int error;
4035
4036	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4037		return (error);
4038	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4039	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4040
4041	aiov.iov_base = data;
4042	aiov.iov_len = nbytes;
4043	auio.uio_iov = &aiov;
4044	auio.uio_iovcnt = 1;
4045	auio.uio_offset = 0;
4046	if (nbytes > INT_MAX) {
4047		error = EINVAL;
4048		goto done;
4049	}
4050	auio.uio_resid = nbytes;
4051	auio.uio_rw = UIO_WRITE;
4052	auio.uio_segflg = UIO_USERSPACE;
4053	auio.uio_td = td;
4054	cnt = nbytes;
4055
4056	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4057	    td->td_ucred, td);
4058	cnt -= auio.uio_resid;
4059	td->td_retval[0] = cnt;
4060
4061done:
4062	VOP_UNLOCK(vp, 0, td);
4063	vn_finished_write(mp);
4064	return (error);
4065}
4066
4067int
4068extattr_set_file(td, uap)
4069	struct thread *td;
4070	struct extattr_set_file_args *uap;
4071{
4072	struct nameidata nd;
4073	char attrname[EXTATTR_MAXNAMELEN];
4074	int error;
4075
4076	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4077	    NULL);
4078	if (error)
4079		return (error);
4080
4081	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4082	if ((error = namei(&nd)) != 0)
4083		return (error);
4084	NDFREE(&nd, NDF_ONLY_PNBUF);
4085
4086	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4087	    SCARG(uap, data), SCARG(uap, nbytes), td);
4088
4089	vrele(nd.ni_vp);
4090	return (error);
4091}
4092
4093int
4094extattr_set_fd(td, uap)
4095	struct thread *td;
4096	struct extattr_set_fd_args *uap;
4097{
4098	struct file *fp;
4099	char attrname[EXTATTR_MAXNAMELEN];
4100	int error;
4101
4102	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4103	    NULL);
4104	if (error)
4105		return (error);
4106
4107	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4108		return (error);
4109
4110	error = extattr_set_vp((struct vnode *)fp->f_data,
4111	    SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4112	    SCARG(uap, nbytes), td);
4113	fdrop(fp, td);
4114
4115	return (error);
4116}
4117
4118/*-
4119 * Get a named extended attribute on a file or directory
4120 *
4121 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4122 *            kernelspace string pointer "attrname", userspace buffer
4123 *            pointer "data", buffer length "nbytes", thread "td".
4124 * Returns: 0 on success, an error number otherwise
4125 * Locks: none
4126 * References: vp must be a valid reference for the duration of the call
4127 */
4128static int
4129extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4130    void *data, size_t nbytes, struct thread *td)
4131{
4132	struct uio auio;
4133	struct iovec aiov;
4134	ssize_t cnt;
4135	size_t size;
4136	int error;
4137
4138	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4139	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4140
4141	/*
4142	 * Slightly unusual semantics: if the user provides a NULL data
4143	 * pointer, they don't want to receive the data, just the
4144	 * maximum read length.
4145	 */
4146	if (data != NULL) {
4147		aiov.iov_base = data;
4148		aiov.iov_len = nbytes;
4149		auio.uio_iov = &aiov;
4150		auio.uio_offset = 0;
4151		if (nbytes > INT_MAX) {
4152			error = EINVAL;
4153			goto done;
4154		}
4155		auio.uio_resid = nbytes;
4156		auio.uio_rw = UIO_READ;
4157		auio.uio_segflg = UIO_USERSPACE;
4158		auio.uio_td = td;
4159		cnt = nbytes;
4160		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
4161		    NULL, td->td_ucred, td);
4162		cnt -= auio.uio_resid;
4163		td->td_retval[0] = cnt;
4164	} else {
4165		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
4166		    &size, td->td_ucred, td);
4167		td->td_retval[0] = size;
4168	}
4169done:
4170	VOP_UNLOCK(vp, 0, td);
4171	return (error);
4172}
4173
4174int
4175extattr_get_file(td, uap)
4176	struct thread *td;
4177	struct extattr_get_file_args *uap;
4178{
4179	struct nameidata nd;
4180	char attrname[EXTATTR_MAXNAMELEN];
4181	int error;
4182
4183	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4184	    NULL);
4185	if (error)
4186		return (error);
4187
4188	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4189	if ((error = namei(&nd)) != 0)
4190		return (error);
4191	NDFREE(&nd, NDF_ONLY_PNBUF);
4192
4193	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4194	    SCARG(uap, data), SCARG(uap, nbytes), td);
4195
4196	vrele(nd.ni_vp);
4197	return (error);
4198}
4199
4200int
4201extattr_get_fd(td, uap)
4202	struct thread *td;
4203	struct extattr_get_fd_args *uap;
4204{
4205	struct file *fp;
4206	char attrname[EXTATTR_MAXNAMELEN];
4207	int error;
4208
4209	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4210	    NULL);
4211	if (error)
4212		return (error);
4213
4214	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4215		return (error);
4216
4217	error = extattr_get_vp((struct vnode *)fp->f_data,
4218	    SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4219	    SCARG(uap, nbytes), td);
4220
4221	fdrop(fp, td);
4222	return (error);
4223}
4224
4225/*
4226 * extattr_delete_vp(): Delete a named extended attribute on a file or
4227 *                      directory
4228 *
4229 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4230 *            kernelspace string pointer "attrname", proc "p"
4231 * Returns: 0 on success, an error number otherwise
4232 * Locks: none
4233 * References: vp must be a valid reference for the duration of the call
4234 */
4235static int
4236extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4237    struct thread *td)
4238{
4239	struct mount *mp;
4240	int error;
4241
4242	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4243		return (error);
4244	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4245	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4246
4247	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4248	    td);
4249
4250	VOP_UNLOCK(vp, 0, td);
4251	vn_finished_write(mp);
4252	return (error);
4253}
4254
4255int
4256extattr_delete_file(td, uap)
4257	struct thread *td;
4258	struct extattr_delete_file_args *uap;
4259{
4260	struct nameidata nd;
4261	char attrname[EXTATTR_MAXNAMELEN];
4262	int error;
4263
4264	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4265	     NULL);
4266	if (error)
4267		return(error);
4268
4269	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4270	if ((error = namei(&nd)) != 0)
4271		return(error);
4272	NDFREE(&nd, NDF_ONLY_PNBUF);
4273
4274	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4275	    attrname, td);
4276
4277	vrele(nd.ni_vp);
4278	return(error);
4279}
4280
4281int
4282extattr_delete_fd(td, uap)
4283	struct thread *td;
4284	struct extattr_delete_fd_args *uap;
4285{
4286	struct file *fp;
4287	struct vnode *vp;
4288	char attrname[EXTATTR_MAXNAMELEN];
4289	int error;
4290
4291	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4292	    NULL);
4293	if (error)
4294		return (error);
4295
4296	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4297		return (error);
4298	vp = (struct vnode *)fp->f_data;
4299
4300	error = extattr_delete_vp((struct vnode *)fp->f_data,
4301	    SCARG(uap, attrnamespace), attrname, td);
4302
4303	fdrop(fp, td);
4304	return (error);
4305}
4306