ufs_vnops.c revision 207141
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 207141 2010-04-24 07:05:35Z jeff $");
39
40#include "opt_quota.h"
41#include "opt_suiddir.h"
42#include "opt_ufs.h"
43#include "opt_ffs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/malloc.h>
48#include <sys/namei.h>
49#include <sys/kernel.h>
50#include <sys/fcntl.h>
51#include <sys/stat.h>
52#include <sys/bio.h>
53#include <sys/buf.h>
54#include <sys/mount.h>
55#include <sys/priv.h>
56#include <sys/refcount.h>
57#include <sys/unistd.h>
58#include <sys/vnode.h>
59#include <sys/dirent.h>
60#include <sys/lockf.h>
61#include <sys/conf.h>
62#include <sys/acl.h>
63
64#include <machine/mutex.h>
65
66#include <security/mac/mac_framework.h>
67
68#include <sys/file.h>		/* XXX */
69
70#include <vm/vm.h>
71#include <vm/vm_extern.h>
72
73#include <fs/fifofs/fifo.h>
74
75#include <ufs/ufs/acl.h>
76#include <ufs/ufs/extattr.h>
77#include <ufs/ufs/quota.h>
78#include <ufs/ufs/inode.h>
79#include <ufs/ufs/dir.h>
80#include <ufs/ufs/ufsmount.h>
81#include <ufs/ufs/ufs_extern.h>
82#ifdef UFS_DIRHASH
83#include <ufs/ufs/dirhash.h>
84#endif
85#ifdef UFS_GJOURNAL
86#include <ufs/ufs/gjournal.h>
87#endif
88
89#include <ufs/ffs/ffs_extern.h>
90
91static vop_accessx_t	ufs_accessx;
92static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
93static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
94static vop_close_t	ufs_close;
95static vop_create_t	ufs_create;
96static vop_getattr_t	ufs_getattr;
97static vop_link_t	ufs_link;
98static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
99static vop_markatime_t	ufs_markatime;
100static vop_mkdir_t	ufs_mkdir;
101static vop_mknod_t	ufs_mknod;
102static vop_open_t	ufs_open;
103static vop_pathconf_t	ufs_pathconf;
104static vop_print_t	ufs_print;
105static vop_readlink_t	ufs_readlink;
106static vop_remove_t	ufs_remove;
107static vop_rename_t	ufs_rename;
108static vop_rmdir_t	ufs_rmdir;
109static vop_setattr_t	ufs_setattr;
110static vop_strategy_t	ufs_strategy;
111static vop_symlink_t	ufs_symlink;
112static vop_whiteout_t	ufs_whiteout;
113static vop_close_t	ufsfifo_close;
114static vop_kqfilter_t	ufsfifo_kqfilter;
115static vop_pathconf_t	ufsfifo_pathconf;
116
117SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
118
119/*
120 * A virgin directory (no blushing please).
121 */
122static struct dirtemplate mastertemplate = {
123	0, 12, DT_DIR, 1, ".",
124	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
125};
126static struct odirtemplate omastertemplate = {
127	0, 12, 1, ".",
128	0, DIRBLKSIZ - 12, 2, ".."
129};
130
131static void
132ufs_itimes_locked(struct vnode *vp)
133{
134	struct inode *ip;
135	struct timespec ts;
136
137	ASSERT_VI_LOCKED(vp, __func__);
138
139	ip = VTOI(vp);
140	if (UFS_RDONLY(ip))
141		goto out;
142	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
143		return;
144
145	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
146		ip->i_flag |= IN_LAZYMOD;
147	else if (((vp->v_mount->mnt_kern_flag &
148		    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
149		    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
150		ip->i_flag |= IN_MODIFIED;
151	else if (ip->i_flag & IN_ACCESS)
152		ip->i_flag |= IN_LAZYACCESS;
153	vfs_timestamp(&ts);
154	if (ip->i_flag & IN_ACCESS) {
155		DIP_SET(ip, i_atime, ts.tv_sec);
156		DIP_SET(ip, i_atimensec, ts.tv_nsec);
157	}
158	if (ip->i_flag & IN_UPDATE) {
159		DIP_SET(ip, i_mtime, ts.tv_sec);
160		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
161	}
162	if (ip->i_flag & IN_CHANGE) {
163		DIP_SET(ip, i_ctime, ts.tv_sec);
164		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
165		DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
166	}
167
168 out:
169	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
170}
171
172void
173ufs_itimes(struct vnode *vp)
174{
175
176	VI_LOCK(vp);
177	ufs_itimes_locked(vp);
178	VI_UNLOCK(vp);
179}
180
181/*
182 * Create a regular file
183 */
184static int
185ufs_create(ap)
186	struct vop_create_args /* {
187		struct vnode *a_dvp;
188		struct vnode **a_vpp;
189		struct componentname *a_cnp;
190		struct vattr *a_vap;
191	} */ *ap;
192{
193	int error;
194
195	error =
196	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
197	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
198	if (error)
199		return (error);
200	return (0);
201}
202
203/*
204 * Mknod vnode call
205 */
206/* ARGSUSED */
207static int
208ufs_mknod(ap)
209	struct vop_mknod_args /* {
210		struct vnode *a_dvp;
211		struct vnode **a_vpp;
212		struct componentname *a_cnp;
213		struct vattr *a_vap;
214	} */ *ap;
215{
216	struct vattr *vap = ap->a_vap;
217	struct vnode **vpp = ap->a_vpp;
218	struct inode *ip;
219	ino_t ino;
220	int error;
221
222	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
223	    ap->a_dvp, vpp, ap->a_cnp);
224	if (error)
225		return (error);
226	ip = VTOI(*vpp);
227	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
228	if (vap->va_rdev != VNOVAL) {
229		/*
230		 * Want to be able to use this to make badblock
231		 * inodes, so don't truncate the dev number.
232		 */
233		DIP_SET(ip, i_rdev, vap->va_rdev);
234	}
235	/*
236	 * Remove inode, then reload it through VFS_VGET so it is
237	 * checked to see if it is an alias of an existing entry in
238	 * the inode cache.  XXX I don't believe this is necessary now.
239	 */
240	(*vpp)->v_type = VNON;
241	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
242	vgone(*vpp);
243	vput(*vpp);
244	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
245	if (error) {
246		*vpp = NULL;
247		return (error);
248	}
249	return (0);
250}
251
252/*
253 * Open called.
254 */
255/* ARGSUSED */
256static int
257ufs_open(struct vop_open_args *ap)
258{
259	struct vnode *vp = ap->a_vp;
260	struct inode *ip;
261
262	if (vp->v_type == VCHR || vp->v_type == VBLK)
263		return (EOPNOTSUPP);
264
265	ip = VTOI(vp);
266	/*
267	 * Files marked append-only must be opened for appending.
268	 */
269	if ((ip->i_flags & APPEND) &&
270	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
271		return (EPERM);
272	vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
273	return (0);
274}
275
276/*
277 * Close called.
278 *
279 * Update the times on the inode.
280 */
281/* ARGSUSED */
282static int
283ufs_close(ap)
284	struct vop_close_args /* {
285		struct vnode *a_vp;
286		int  a_fflag;
287		struct ucred *a_cred;
288		struct thread *a_td;
289	} */ *ap;
290{
291	struct vnode *vp = ap->a_vp;
292	int usecount;
293
294	VI_LOCK(vp);
295	usecount = vp->v_usecount;
296	if (usecount > 1)
297		ufs_itimes_locked(vp);
298	VI_UNLOCK(vp);
299	return (0);
300}
301
302static int
303ufs_accessx(ap)
304	struct vop_accessx_args /* {
305		struct vnode *a_vp;
306		accmode_t a_accmode;
307		struct ucred *a_cred;
308		struct thread *a_td;
309	} */ *ap;
310{
311	struct vnode *vp = ap->a_vp;
312	struct inode *ip = VTOI(vp);
313	accmode_t accmode = ap->a_accmode;
314	int error;
315#ifdef QUOTA
316	int relocked;
317#endif
318#ifdef UFS_ACL
319	struct acl *acl;
320	acl_type_t type;
321#endif
322
323	/*
324	 * Disallow write attempts on read-only filesystems;
325	 * unless the file is a socket, fifo, or a block or
326	 * character device resident on the filesystem.
327	 */
328	if (accmode & VMODIFY_PERMS) {
329		switch (vp->v_type) {
330		case VDIR:
331		case VLNK:
332		case VREG:
333			if (vp->v_mount->mnt_flag & MNT_RDONLY)
334				return (EROFS);
335#ifdef QUOTA
336			/*
337			 * Inode is accounted in the quotas only if struct
338			 * dquot is attached to it. VOP_ACCESS() is called
339			 * from vn_open_cred() and provides a convenient
340			 * point to call getinoquota().
341			 */
342			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
343
344				/*
345				 * Upgrade vnode lock, since getinoquota()
346				 * requires exclusive lock to modify inode.
347				 */
348				relocked = 1;
349				vhold(vp);
350				vn_lock(vp, LK_UPGRADE | LK_RETRY);
351				VI_LOCK(vp);
352				if (vp->v_iflag & VI_DOOMED) {
353					vdropl(vp);
354					error = ENOENT;
355					goto relock;
356				}
357				vdropl(vp);
358			} else
359				relocked = 0;
360			error = getinoquota(ip);
361relock:
362			if (relocked)
363				vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
364			if (error != 0)
365				return (error);
366#endif
367			break;
368		default:
369			break;
370		}
371	}
372
373	/*
374	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
375	 * is here, because without it, * it would be impossible for the owner
376	 * to remove the IMMUTABLE flag.
377	 */
378	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
379	    (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
380		return (EPERM);
381
382#ifdef UFS_ACL
383	if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
384		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
385			type = ACL_TYPE_NFS4;
386		else
387			type = ACL_TYPE_ACCESS;
388
389		acl = acl_alloc(M_WAITOK);
390		if (type == ACL_TYPE_NFS4)
391			error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
392		else
393			error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
394		switch (error) {
395		case 0:
396			if (type == ACL_TYPE_NFS4) {
397				error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
398				    ip->i_gid, acl, accmode, ap->a_cred, NULL);
399			} else {
400				error = vfs_unixify_accmode(&accmode);
401				if (error == 0)
402					error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
403					    ip->i_gid, acl, accmode, ap->a_cred, NULL);
404			}
405			break;
406		default:
407			if (error != EOPNOTSUPP)
408				printf(
409"ufs_accessx(): Error retrieving ACL on object (%d).\n",
410				    error);
411			/*
412			 * XXX: Fall back until debugged.  Should
413			 * eventually possibly log an error, and return
414			 * EPERM for safety.
415			 */
416			error = vfs_unixify_accmode(&accmode);
417			if (error == 0)
418				error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
419				    ip->i_gid, accmode, ap->a_cred, NULL);
420		}
421		acl_free(acl);
422
423		return (error);
424	}
425#endif /* !UFS_ACL */
426	error = vfs_unixify_accmode(&accmode);
427	if (error == 0)
428		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
429		    accmode, ap->a_cred, NULL);
430	return (error);
431}
432
433/* ARGSUSED */
434static int
435ufs_getattr(ap)
436	struct vop_getattr_args /* {
437		struct vnode *a_vp;
438		struct vattr *a_vap;
439		struct ucred *a_cred;
440	} */ *ap;
441{
442	struct vnode *vp = ap->a_vp;
443	struct inode *ip = VTOI(vp);
444	struct vattr *vap = ap->a_vap;
445
446	VI_LOCK(vp);
447	ufs_itimes_locked(vp);
448	if (ip->i_ump->um_fstype == UFS1) {
449		vap->va_atime.tv_sec = ip->i_din1->di_atime;
450		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
451	} else {
452		vap->va_atime.tv_sec = ip->i_din2->di_atime;
453		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
454	}
455	VI_UNLOCK(vp);
456	/*
457	 * Copy from inode table
458	 */
459	vap->va_fsid = dev2udev(ip->i_dev);
460	vap->va_fileid = ip->i_number;
461	vap->va_mode = ip->i_mode & ~IFMT;
462	vap->va_nlink = ip->i_effnlink;
463	vap->va_uid = ip->i_uid;
464	vap->va_gid = ip->i_gid;
465	if (ip->i_ump->um_fstype == UFS1) {
466		vap->va_rdev = ip->i_din1->di_rdev;
467		vap->va_size = ip->i_din1->di_size;
468		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
469		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
470		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
471		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
472		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
473		vap->va_filerev = ip->i_din1->di_modrev;
474	} else {
475		vap->va_rdev = ip->i_din2->di_rdev;
476		vap->va_size = ip->i_din2->di_size;
477		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
478		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
479		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
480		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
481		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
482		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
483		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
484		vap->va_filerev = ip->i_din2->di_modrev;
485	}
486	vap->va_flags = ip->i_flags;
487	vap->va_gen = ip->i_gen;
488	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
489	vap->va_type = IFTOVT(ip->i_mode);
490	return (0);
491}
492
493/*
494 * Set attribute vnode op. called from several syscalls
495 */
496static int
497ufs_setattr(ap)
498	struct vop_setattr_args /* {
499		struct vnode *a_vp;
500		struct vattr *a_vap;
501		struct ucred *a_cred;
502	} */ *ap;
503{
504	struct vattr *vap = ap->a_vap;
505	struct vnode *vp = ap->a_vp;
506	struct inode *ip = VTOI(vp);
507	struct ucred *cred = ap->a_cred;
508	struct thread *td = curthread;
509	int error;
510
511	/*
512	 * Check for unsettable attributes.
513	 */
514	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
515	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
516	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
517	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
518		return (EINVAL);
519	}
520	if (vap->va_flags != VNOVAL) {
521		if (vp->v_mount->mnt_flag & MNT_RDONLY)
522			return (EROFS);
523		/*
524		 * Callers may only modify the file flags on objects they
525		 * have VADMIN rights for.
526		 */
527		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
528			return (error);
529		/*
530		 * Unprivileged processes are not permitted to unset system
531		 * flags, or modify flags if any system flags are set.
532		 * Privileged non-jail processes may not modify system flags
533		 * if securelevel > 0 and any existing system flags are set.
534		 * Privileged jail processes behave like privileged non-jail
535		 * processes if the security.jail.chflags_allowed sysctl is
536		 * is non-zero; otherwise, they behave like unprivileged
537		 * processes.
538		 */
539		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
540			if (ip->i_flags
541			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
542				error = securelevel_gt(cred, 0);
543				if (error)
544					return (error);
545			}
546			/* Snapshot flag cannot be set or cleared */
547			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
548			     (ip->i_flags & SF_SNAPSHOT) == 0) ||
549			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
550			     (ip->i_flags & SF_SNAPSHOT) != 0))
551				return (EPERM);
552			ip->i_flags = vap->va_flags;
553			DIP_SET(ip, i_flags, vap->va_flags);
554		} else {
555			if (ip->i_flags
556			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
557			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
558				return (EPERM);
559			ip->i_flags &= SF_SETTABLE;
560			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
561			DIP_SET(ip, i_flags, ip->i_flags);
562		}
563		ip->i_flag |= IN_CHANGE;
564		if (vap->va_flags & (IMMUTABLE | APPEND))
565			return (0);
566	}
567	if (ip->i_flags & (IMMUTABLE | APPEND))
568		return (EPERM);
569	/*
570	 * Go through the fields and update iff not VNOVAL.
571	 */
572	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
573		if (vp->v_mount->mnt_flag & MNT_RDONLY)
574			return (EROFS);
575		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
576		    td)) != 0)
577			return (error);
578	}
579	if (vap->va_size != VNOVAL) {
580		/*
581		 * XXX most of the following special cases should be in
582		 * callers instead of in N filesystems.  The VDIR check
583		 * mostly already is.
584		 */
585		switch (vp->v_type) {
586		case VDIR:
587			return (EISDIR);
588		case VLNK:
589		case VREG:
590			/*
591			 * Truncation should have an effect in these cases.
592			 * Disallow it if the filesystem is read-only or
593			 * the file is being snapshotted.
594			 */
595			if (vp->v_mount->mnt_flag & MNT_RDONLY)
596				return (EROFS);
597			if ((ip->i_flags & SF_SNAPSHOT) != 0)
598				return (EPERM);
599			break;
600		default:
601			/*
602			 * According to POSIX, the result is unspecified
603			 * for file types other than regular files,
604			 * directories and shared memory objects.  We
605			 * don't support shared memory objects in the file
606			 * system, and have dubious support for truncating
607			 * symlinks.  Just ignore the request in other cases.
608			 */
609			return (0);
610		}
611		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
612		    cred, td)) != 0)
613			return (error);
614	}
615	if (vap->va_atime.tv_sec != VNOVAL ||
616	    vap->va_mtime.tv_sec != VNOVAL ||
617	    vap->va_birthtime.tv_sec != VNOVAL) {
618		if (vp->v_mount->mnt_flag & MNT_RDONLY)
619			return (EROFS);
620		if ((ip->i_flags & SF_SNAPSHOT) != 0)
621			return (EPERM);
622		/*
623		 * From utimes(2):
624		 * If times is NULL, ... The caller must be the owner of
625		 * the file, have permission to write the file, or be the
626		 * super-user.
627		 * If times is non-NULL, ... The caller must be the owner of
628		 * the file or be the super-user.
629		 *
630		 * Possibly for historical reasons, try to use VADMIN in
631		 * preference to VWRITE for a NULL timestamp.  This means we
632		 * will return EACCES in preference to EPERM if neither
633		 * check succeeds.
634		 */
635		if (vap->va_vaflags & VA_UTIMES_NULL) {
636			/*
637			 * NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes
638			 *
639			 * "A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
640			 * will be allowed to set the times [..] to the current
641			 * server time."
642			 *
643			 * XXX: Calling it four times seems a little excessive.
644			 */
645			error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
646			if (error)
647				error = VOP_ACCESS(vp, VWRITE, cred, td);
648		} else
649			error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
650		if (error)
651			return (error);
652		if (vap->va_atime.tv_sec != VNOVAL)
653			ip->i_flag |= IN_ACCESS;
654		if (vap->va_mtime.tv_sec != VNOVAL)
655			ip->i_flag |= IN_CHANGE | IN_UPDATE;
656		if (vap->va_birthtime.tv_sec != VNOVAL &&
657		    ip->i_ump->um_fstype == UFS2)
658			ip->i_flag |= IN_MODIFIED;
659		ufs_itimes(vp);
660		if (vap->va_atime.tv_sec != VNOVAL) {
661			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
662			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
663		}
664		if (vap->va_mtime.tv_sec != VNOVAL) {
665			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
666			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
667		}
668		if (vap->va_birthtime.tv_sec != VNOVAL &&
669		    ip->i_ump->um_fstype == UFS2) {
670			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
671			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
672		}
673		error = UFS_UPDATE(vp, 0);
674		if (error)
675			return (error);
676	}
677	error = 0;
678	if (vap->va_mode != (mode_t)VNOVAL) {
679		if (vp->v_mount->mnt_flag & MNT_RDONLY)
680			return (EROFS);
681		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
682		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
683			return (EPERM);
684		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
685	}
686	return (error);
687}
688
689#ifdef UFS_ACL
690static int
691ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
692    int file_owner_id, struct ucred *cred, struct thread *td)
693{
694	int error;
695	struct acl *aclp;
696
697	aclp = acl_alloc(M_WAITOK);
698	error = ufs_getacl_nfs4_internal(vp, aclp, td);
699	/*
700	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
701	 * it supports ACLs.
702	 */
703	if (error)
704		goto out;
705
706	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
707	error = ufs_setacl_nfs4_internal(vp, aclp, td);
708
709out:
710	acl_free(aclp);
711	return (error);
712}
713#endif /* UFS_ACL */
714
715/*
716 * Mark this file's access time for update for vfs_mark_atime().  This
717 * is called from execve() and mmap().
718 */
719static int
720ufs_markatime(ap)
721	struct vop_markatime_args /* {
722		struct vnode *a_vp;
723	} */ *ap;
724{
725	struct vnode *vp = ap->a_vp;
726	struct inode *ip = VTOI(vp);
727
728	VI_LOCK(vp);
729	ip->i_flag |= IN_ACCESS;
730	VI_UNLOCK(vp);
731	return (0);
732}
733
734/*
735 * Change the mode on a file.
736 * Inode must be locked before calling.
737 */
738static int
739ufs_chmod(vp, mode, cred, td)
740	struct vnode *vp;
741	int mode;
742	struct ucred *cred;
743	struct thread *td;
744{
745	struct inode *ip = VTOI(vp);
746	int error;
747
748	/*
749	 * To modify the permissions on a file, must possess VADMIN
750	 * for that file.
751	 */
752	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
753		return (error);
754	/*
755	 * Privileged processes may set the sticky bit on non-directories,
756	 * as well as set the setgid bit on a file with a group that the
757	 * process is not a member of.  Both of these are allowed in
758	 * jail(8).
759	 */
760	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
761		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
762			return (EFTYPE);
763	}
764	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
765		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
766		if (error)
767			return (error);
768	}
769
770	/*
771	 * Deny setting setuid if we are not the file owner.
772	 */
773	if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
774		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
775		if (error)
776			return (error);
777	}
778
779	ip->i_mode &= ~ALLPERMS;
780	ip->i_mode |= (mode & ALLPERMS);
781	DIP_SET(ip, i_mode, ip->i_mode);
782	ip->i_flag |= IN_CHANGE;
783#ifdef UFS_ACL
784	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
785		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
786#endif
787	return (error);
788}
789
790/*
791 * Perform chown operation on inode ip;
792 * inode must be locked prior to call.
793 */
794static int
795ufs_chown(vp, uid, gid, cred, td)
796	struct vnode *vp;
797	uid_t uid;
798	gid_t gid;
799	struct ucred *cred;
800	struct thread *td;
801{
802	struct inode *ip = VTOI(vp);
803	uid_t ouid;
804	gid_t ogid;
805	int error = 0;
806#ifdef QUOTA
807	int i;
808	ufs2_daddr_t change;
809#endif
810
811	if (uid == (uid_t)VNOVAL)
812		uid = ip->i_uid;
813	if (gid == (gid_t)VNOVAL)
814		gid = ip->i_gid;
815	/*
816	 * To modify the ownership of a file, must possess VADMIN for that
817	 * file.
818	 */
819	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
820		return (error);
821	/*
822	 * To change the owner of a file, or change the group of a file to a
823	 * group of which we are not a member, the caller must have
824	 * privilege.
825	 */
826	if (((uid != ip->i_uid && uid != cred->cr_uid) ||
827	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
828	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
829		return (error);
830	ogid = ip->i_gid;
831	ouid = ip->i_uid;
832#ifdef QUOTA
833	if ((error = getinoquota(ip)) != 0)
834		return (error);
835	if (ouid == uid) {
836		dqrele(vp, ip->i_dquot[USRQUOTA]);
837		ip->i_dquot[USRQUOTA] = NODQUOT;
838	}
839	if (ogid == gid) {
840		dqrele(vp, ip->i_dquot[GRPQUOTA]);
841		ip->i_dquot[GRPQUOTA] = NODQUOT;
842	}
843	change = DIP(ip, i_blocks);
844	(void) chkdq(ip, -change, cred, CHOWN);
845	(void) chkiq(ip, -1, cred, CHOWN);
846	for (i = 0; i < MAXQUOTAS; i++) {
847		dqrele(vp, ip->i_dquot[i]);
848		ip->i_dquot[i] = NODQUOT;
849	}
850#endif
851	ip->i_gid = gid;
852	DIP_SET(ip, i_gid, gid);
853	ip->i_uid = uid;
854	DIP_SET(ip, i_uid, uid);
855#ifdef QUOTA
856	if ((error = getinoquota(ip)) == 0) {
857		if (ouid == uid) {
858			dqrele(vp, ip->i_dquot[USRQUOTA]);
859			ip->i_dquot[USRQUOTA] = NODQUOT;
860		}
861		if (ogid == gid) {
862			dqrele(vp, ip->i_dquot[GRPQUOTA]);
863			ip->i_dquot[GRPQUOTA] = NODQUOT;
864		}
865		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
866			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
867				goto good;
868			else
869				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
870		}
871		for (i = 0; i < MAXQUOTAS; i++) {
872			dqrele(vp, ip->i_dquot[i]);
873			ip->i_dquot[i] = NODQUOT;
874		}
875	}
876	ip->i_gid = ogid;
877	DIP_SET(ip, i_gid, ogid);
878	ip->i_uid = ouid;
879	DIP_SET(ip, i_uid, ouid);
880	if (getinoquota(ip) == 0) {
881		if (ouid == uid) {
882			dqrele(vp, ip->i_dquot[USRQUOTA]);
883			ip->i_dquot[USRQUOTA] = NODQUOT;
884		}
885		if (ogid == gid) {
886			dqrele(vp, ip->i_dquot[GRPQUOTA]);
887			ip->i_dquot[GRPQUOTA] = NODQUOT;
888		}
889		(void) chkdq(ip, change, cred, FORCE|CHOWN);
890		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
891		(void) getinoquota(ip);
892	}
893	return (error);
894good:
895	if (getinoquota(ip))
896		panic("ufs_chown: lost quota");
897#endif /* QUOTA */
898	ip->i_flag |= IN_CHANGE;
899	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
900		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
901			ip->i_mode &= ~(ISUID | ISGID);
902			DIP_SET(ip, i_mode, ip->i_mode);
903		}
904	}
905	return (0);
906}
907
908static int
909ufs_remove(ap)
910	struct vop_remove_args /* {
911		struct vnode *a_dvp;
912		struct vnode *a_vp;
913		struct componentname *a_cnp;
914	} */ *ap;
915{
916	struct inode *ip;
917	struct vnode *vp = ap->a_vp;
918	struct vnode *dvp = ap->a_dvp;
919	int error;
920	struct thread *td;
921
922	td = curthread;
923	ip = VTOI(vp);
924	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
925	    (VTOI(dvp)->i_flags & APPEND)) {
926		error = EPERM;
927		goto out;
928	}
929#ifdef UFS_GJOURNAL
930	ufs_gjournal_orphan(vp);
931#endif
932	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
933	if (ip->i_nlink <= 0)
934		vp->v_vflag |= VV_NOSYNC;
935	if ((ip->i_flags & SF_SNAPSHOT) != 0) {
936		/*
937		 * Avoid deadlock where another thread is trying to
938		 * update the inodeblock for dvp and is waiting on
939		 * snaplk.  Temporary unlock the vnode lock for the
940		 * unlinked file and sync the directory.  This should
941		 * allow vput() of the directory to not block later on
942		 * while holding the snapshot vnode locked, assuming
943		 * that the directory hasn't been unlinked too.
944		 */
945		VOP_UNLOCK(vp, 0);
946		(void) VOP_FSYNC(dvp, MNT_WAIT, td);
947		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
948	}
949out:
950	return (error);
951}
952
953/*
954 * link vnode call
955 */
956static int
957ufs_link(ap)
958	struct vop_link_args /* {
959		struct vnode *a_tdvp;
960		struct vnode *a_vp;
961		struct componentname *a_cnp;
962	} */ *ap;
963{
964	struct vnode *vp = ap->a_vp;
965	struct vnode *tdvp = ap->a_tdvp;
966	struct componentname *cnp = ap->a_cnp;
967	struct inode *ip;
968	struct direct newdir;
969	int error;
970
971#ifdef INVARIANTS
972	if ((cnp->cn_flags & HASBUF) == 0)
973		panic("ufs_link: no name");
974#endif
975	if (tdvp->v_mount != vp->v_mount) {
976		error = EXDEV;
977		goto out;
978	}
979	if (VTOI(tdvp)->i_effnlink < 2)
980		panic("ufs_link: Bad link count %d on parent",
981		    VTOI(tdvp)->i_effnlink);
982	ip = VTOI(vp);
983	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
984		error = EMLINK;
985		goto out;
986	}
987	if (ip->i_flags & (IMMUTABLE | APPEND)) {
988		error = EPERM;
989		goto out;
990	}
991	ip->i_effnlink++;
992	ip->i_nlink++;
993	DIP_SET(ip, i_nlink, ip->i_nlink);
994	ip->i_flag |= IN_CHANGE;
995	if (DOINGSOFTDEP(vp))
996		softdep_setup_link(VTOI(tdvp), ip);
997	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
998	if (!error) {
999		ufs_makedirentry(ip, cnp, &newdir);
1000		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
1001	}
1002
1003	if (error) {
1004		ip->i_effnlink--;
1005		ip->i_nlink--;
1006		DIP_SET(ip, i_nlink, ip->i_nlink);
1007		ip->i_flag |= IN_CHANGE;
1008		if (DOINGSOFTDEP(vp))
1009			softdep_revert_link(VTOI(tdvp), ip);
1010	}
1011out:
1012	return (error);
1013}
1014
1015/*
1016 * whiteout vnode call
1017 */
1018static int
1019ufs_whiteout(ap)
1020	struct vop_whiteout_args /* {
1021		struct vnode *a_dvp;
1022		struct componentname *a_cnp;
1023		int a_flags;
1024	} */ *ap;
1025{
1026	struct vnode *dvp = ap->a_dvp;
1027	struct componentname *cnp = ap->a_cnp;
1028	struct direct newdir;
1029	int error = 0;
1030
1031	switch (ap->a_flags) {
1032	case LOOKUP:
1033		/* 4.4 format directories support whiteout operations */
1034		if (dvp->v_mount->mnt_maxsymlinklen > 0)
1035			return (0);
1036		return (EOPNOTSUPP);
1037
1038	case CREATE:
1039		/* create a new directory whiteout */
1040#ifdef INVARIANTS
1041		if ((cnp->cn_flags & SAVENAME) == 0)
1042			panic("ufs_whiteout: missing name");
1043		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
1044			panic("ufs_whiteout: old format filesystem");
1045#endif
1046
1047		newdir.d_ino = WINO;
1048		newdir.d_namlen = cnp->cn_namelen;
1049		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
1050		newdir.d_type = DT_WHT;
1051		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
1052		break;
1053
1054	case DELETE:
1055		/* remove an existing directory whiteout */
1056#ifdef INVARIANTS
1057		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
1058			panic("ufs_whiteout: old format filesystem");
1059#endif
1060
1061		cnp->cn_flags &= ~DOWHITEOUT;
1062		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
1063		break;
1064	default:
1065		panic("ufs_whiteout: unknown op");
1066	}
1067	return (error);
1068}
1069
1070static volatile int rename_restarts;
1071SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
1072    __DEVOLATILE(int *, &rename_restarts), 0,
1073    "Times rename had to restart due to lock contention");
1074
1075/*
1076 * Rename system call.
1077 * 	rename("foo", "bar");
1078 * is essentially
1079 *	unlink("bar");
1080 *	link("foo", "bar");
1081 *	unlink("foo");
1082 * but ``atomically''.  Can't do full commit without saving state in the
1083 * inode on disk which isn't feasible at this time.  Best we can do is
1084 * always guarantee the target exists.
1085 *
1086 * Basic algorithm is:
1087 *
1088 * 1) Bump link count on source while we're linking it to the
1089 *    target.  This also ensure the inode won't be deleted out
1090 *    from underneath us while we work (it may be truncated by
1091 *    a concurrent `trunc' or `open' for creation).
1092 * 2) Link source to destination.  If destination already exists,
1093 *    delete it first.
1094 * 3) Unlink source reference to inode if still around. If a
1095 *    directory was moved and the parent of the destination
1096 *    is different from the source, patch the ".." entry in the
1097 *    directory.
1098 */
1099static int
1100ufs_rename(ap)
1101	struct vop_rename_args  /* {
1102		struct vnode *a_fdvp;
1103		struct vnode *a_fvp;
1104		struct componentname *a_fcnp;
1105		struct vnode *a_tdvp;
1106		struct vnode *a_tvp;
1107		struct componentname *a_tcnp;
1108	} */ *ap;
1109{
1110	struct vnode *tvp = ap->a_tvp;
1111	struct vnode *tdvp = ap->a_tdvp;
1112	struct vnode *fvp = ap->a_fvp;
1113	struct vnode *fdvp = ap->a_fdvp;
1114	struct vnode *nvp;
1115	struct componentname *tcnp = ap->a_tcnp;
1116	struct componentname *fcnp = ap->a_fcnp;
1117	struct thread *td = fcnp->cn_thread;
1118	struct inode *fip, *tip, *tdp, *fdp;
1119	struct direct newdir;
1120	off_t endoff;
1121	int doingdirectory, newparent;
1122	int error = 0, ioflag;
1123	struct mount *mp;
1124	ino_t ino;
1125
1126#ifdef INVARIANTS
1127	if ((tcnp->cn_flags & HASBUF) == 0 ||
1128	    (fcnp->cn_flags & HASBUF) == 0)
1129		panic("ufs_rename: no name");
1130#endif
1131	endoff = 0;
1132	mp = tdvp->v_mount;
1133	VOP_UNLOCK(tdvp, 0);
1134	if (tvp && tvp != tdvp)
1135		VOP_UNLOCK(tvp, 0);
1136	/*
1137	 * Check for cross-device rename.
1138	 */
1139	if ((fvp->v_mount != tdvp->v_mount) ||
1140	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1141		error = EXDEV;
1142		mp = NULL;
1143		goto releout;
1144	}
1145	error = vfs_busy(mp, 0);
1146	if (error) {
1147		mp = NULL;
1148		goto releout;
1149	}
1150relock:
1151	/*
1152	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
1153	 * and fdvp and tdvp are the same directory.  Subsequently we need
1154	 * to double-check all paths and in the directory rename case we
1155	 * need to verify that we are not creating a directory loop.  To
1156	 * handle this we acquire all but fdvp using non-blocking
1157	 * acquisitions.  If we fail to acquire any lock in the path we will
1158	 * drop all held locks, acquire the new lock in a blocking fashion,
1159	 * and then release it and restart the rename.  This acquire/release
1160	 * step ensures that we do not spin on a lock waiting for release.
1161	 */
1162	error = vn_lock(fdvp, LK_EXCLUSIVE);
1163	if (error)
1164		goto releout;
1165	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1166		VOP_UNLOCK(fdvp, 0);
1167		error = vn_lock(tdvp, LK_EXCLUSIVE);
1168		if (error)
1169			goto releout;
1170		VOP_UNLOCK(tdvp, 0);
1171		atomic_add_int(&rename_restarts, 1);
1172		goto relock;
1173	}
1174	/*
1175	 * Re-resolve fvp to be certain it still exists and fetch the
1176	 * correct vnode.
1177	 */
1178	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
1179	if (error) {
1180		VOP_UNLOCK(fdvp, 0);
1181		VOP_UNLOCK(tdvp, 0);
1182		goto releout;
1183	}
1184	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
1185	if (error) {
1186		VOP_UNLOCK(fdvp, 0);
1187		VOP_UNLOCK(tdvp, 0);
1188		if (error != EBUSY)
1189			goto releout;
1190		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
1191		if (error != 0)
1192			goto releout;
1193		VOP_UNLOCK(nvp, 0);
1194		vrele(fvp);
1195		fvp = nvp;
1196		atomic_add_int(&rename_restarts, 1);
1197		goto relock;
1198	}
1199	vrele(fvp);
1200	fvp = nvp;
1201	/*
1202	 * Re-resolve tvp and acquire the vnode lock if present.
1203	 */
1204	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
1205	if (error != 0 && error != EJUSTRETURN) {
1206		VOP_UNLOCK(fdvp, 0);
1207		VOP_UNLOCK(tdvp, 0);
1208		VOP_UNLOCK(fvp, 0);
1209		goto releout;
1210	}
1211	/*
1212	 * If tvp disappeared we just carry on.
1213	 */
1214	if (error == EJUSTRETURN && tvp != NULL) {
1215		vrele(tvp);
1216		tvp = NULL;
1217	}
1218	/*
1219	 * Get the tvp ino if the lookup succeeded.  We may have to restart
1220	 * if the non-blocking acquire fails.
1221	 */
1222	if (error == 0) {
1223		nvp = NULL;
1224		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
1225		if (tvp)
1226			vrele(tvp);
1227		tvp = nvp;
1228		if (error) {
1229			VOP_UNLOCK(fdvp, 0);
1230			VOP_UNLOCK(tdvp, 0);
1231			VOP_UNLOCK(fvp, 0);
1232			if (error != EBUSY)
1233				goto releout;
1234			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
1235			if (error != 0)
1236				goto releout;
1237			VOP_UNLOCK(nvp, 0);
1238			atomic_add_int(&rename_restarts, 1);
1239			goto relock;
1240		}
1241	}
1242	fdp = VTOI(fdvp);
1243	fip = VTOI(fvp);
1244	tdp = VTOI(tdvp);
1245	tip = NULL;
1246	if (tvp)
1247		tip = VTOI(tvp);
1248	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1249	    (VTOI(tdvp)->i_flags & APPEND))) {
1250		error = EPERM;
1251		goto unlockout;
1252	}
1253	/*
1254	 * Renaming a file to itself has no effect.  The upper layers should
1255	 * not call us in that case.  However, things could change after
1256	 * we drop the locks above.
1257	 */
1258	if (fvp == tvp) {
1259		error = 0;
1260		goto unlockout;
1261	}
1262	doingdirectory = 0;
1263	newparent = 0;
1264	ino = fip->i_number;
1265	if (fip->i_nlink >= LINK_MAX) {
1266		error = EMLINK;
1267		goto unlockout;
1268	}
1269	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
1270	    || (fdp->i_flags & APPEND)) {
1271		error = EPERM;
1272		goto unlockout;
1273	}
1274	if ((fip->i_mode & IFMT) == IFDIR) {
1275		/*
1276		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1277		 */
1278		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1279		    fdp == fip ||
1280		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
1281			error = EINVAL;
1282			goto unlockout;
1283		}
1284		if (fdp->i_number != tdp->i_number)
1285			newparent = tdp->i_number;
1286		doingdirectory = 1;
1287	}
1288	if (fvp->v_mountedhere != NULL || (tvp && tvp->v_mountedhere != NULL)) {
1289		error = EXDEV;
1290		goto unlockout;
1291	}
1292
1293	/*
1294	 * If ".." must be changed (ie the directory gets a new
1295	 * parent) then the source directory must not be in the
1296	 * directory hierarchy above the target, as this would
1297	 * orphan everything below the source directory. Also
1298	 * the user must have write permission in the source so
1299	 * as to be able to change "..".
1300	 */
1301	if (doingdirectory && newparent) {
1302		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1303		if (error)
1304			goto unlockout;
1305		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
1306		    &ino);
1307		/*
1308		 * We encountered a lock that we have to wait for.  Unlock
1309		 * everything else and VGET before restarting.
1310		 */
1311		if (ino) {
1312			VOP_UNLOCK(fdvp, 0);
1313			VOP_UNLOCK(fvp, 0);
1314			VOP_UNLOCK(tdvp, 0);
1315			if (tvp)
1316				VOP_UNLOCK(tvp, 0);
1317			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
1318			if (error == 0)
1319				vput(nvp);
1320			atomic_add_int(&rename_restarts, 1);
1321			goto relock;
1322		}
1323		if (error)
1324			goto unlockout;
1325		if ((tcnp->cn_flags & SAVESTART) == 0)
1326			panic("ufs_rename: lost to startdir");
1327	}
1328	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
1329	    tdp->i_effnlink == 0)
1330		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
1331
1332	/*
1333	 * 1) Bump link count while we're moving stuff
1334	 *    around.  If we crash somewhere before
1335	 *    completing our work, the link count
1336	 *    may be wrong, but correctable.
1337	 */
1338	fip->i_effnlink++;
1339	fip->i_nlink++;
1340	DIP_SET(fip, i_nlink, fip->i_nlink);
1341	fip->i_flag |= IN_CHANGE;
1342	if (DOINGSOFTDEP(fvp))
1343		softdep_setup_link(tdp, fip);
1344	error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
1345	if (error)
1346		goto bad;
1347
1348	/*
1349	 * 2) If target doesn't exist, link the target
1350	 *    to the source and unlink the source.
1351	 *    Otherwise, rewrite the target directory
1352	 *    entry to reference the source inode and
1353	 *    expunge the original entry's existence.
1354	 */
1355	if (tip == NULL) {
1356		if (tdp->i_dev != fip->i_dev)
1357			panic("ufs_rename: EXDEV");
1358		if (doingdirectory && newparent) {
1359			/*
1360			 * Account for ".." in new directory.
1361			 * When source and destination have the same
1362			 * parent we don't adjust the link count.  The
1363			 * actual link modification is completed when
1364			 * .. is rewritten below.
1365			 */
1366			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
1367				error = EMLINK;
1368				goto bad;
1369			}
1370		}
1371		ufs_makedirentry(fip, tcnp, &newdir);
1372		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
1373		if (error)
1374			goto bad;
1375		/* Setup tdvp for directory compaction if needed. */
1376		if (tdp->i_count && tdp->i_endoff &&
1377		    tdp->i_endoff < tdp->i_size)
1378			endoff = tdp->i_endoff;
1379	} else {
1380		if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
1381			panic("ufs_rename: EXDEV");
1382		/*
1383		 * Short circuit rename(foo, foo).
1384		 */
1385		if (tip->i_number == fip->i_number)
1386			panic("ufs_rename: same file");
1387		/*
1388		 * If the parent directory is "sticky", then the caller
1389		 * must possess VADMIN for the parent directory, or the
1390		 * destination of the rename.  This implements append-only
1391		 * directories.
1392		 */
1393		if ((tdp->i_mode & S_ISTXT) &&
1394		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
1395		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
1396			error = EPERM;
1397			goto bad;
1398		}
1399		/*
1400		 * Target must be empty if a directory and have no links
1401		 * to it. Also, ensure source and target are compatible
1402		 * (both directories, or both not directories).
1403		 */
1404		if ((tip->i_mode & IFMT) == IFDIR) {
1405			if ((tip->i_effnlink > 2) ||
1406			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
1407				error = ENOTEMPTY;
1408				goto bad;
1409			}
1410			if (!doingdirectory) {
1411				error = ENOTDIR;
1412				goto bad;
1413			}
1414			cache_purge(tdvp);
1415		} else if (doingdirectory) {
1416			error = EISDIR;
1417			goto bad;
1418		}
1419		if (doingdirectory) {
1420			if (!newparent) {
1421				tdp->i_effnlink--;
1422				if (DOINGSOFTDEP(tdvp))
1423					softdep_change_linkcnt(tdp);
1424			}
1425			tip->i_effnlink--;
1426			if (DOINGSOFTDEP(tvp))
1427				softdep_change_linkcnt(tip);
1428		}
1429		error = ufs_dirrewrite(tdp, tip, fip->i_number,
1430		    IFTODT(fip->i_mode),
1431		    (doingdirectory && newparent) ? newparent : doingdirectory);
1432		if (error) {
1433			if (doingdirectory) {
1434				if (!newparent) {
1435					tdp->i_effnlink++;
1436					if (DOINGSOFTDEP(tdvp))
1437						softdep_change_linkcnt(tdp);
1438				}
1439				tip->i_effnlink++;
1440				if (DOINGSOFTDEP(tvp))
1441					softdep_change_linkcnt(tip);
1442			}
1443		}
1444		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1445			/*
1446			 * Truncate inode. The only stuff left in the directory
1447			 * is "." and "..". The "." reference is inconsequential
1448			 * since we are quashing it. We have removed the "."
1449			 * reference and the reference in the parent directory,
1450			 * but there may be other hard links. The soft
1451			 * dependency code will arrange to do these operations
1452			 * after the parent directory entry has been deleted on
1453			 * disk, so when running with that code we avoid doing
1454			 * them now.
1455			 */
1456			if (!newparent) {
1457				tdp->i_nlink--;
1458				DIP_SET(tdp, i_nlink, tdp->i_nlink);
1459				tdp->i_flag |= IN_CHANGE;
1460			}
1461			tip->i_nlink--;
1462			DIP_SET(tip, i_nlink, tip->i_nlink);
1463			tip->i_flag |= IN_CHANGE;
1464			ioflag = IO_NORMAL;
1465			if (!DOINGASYNC(tvp))
1466				ioflag |= IO_SYNC;
1467			/* Don't go to bad here as the new link exists. */
1468			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
1469			    tcnp->cn_cred, tcnp->cn_thread)) != 0)
1470				goto unlockout;
1471		}
1472	}
1473
1474	/*
1475	 * 3) Unlink the source.  We have to resolve the path again to
1476	 * fixup the directory offset and count for ufs_dirremove.
1477	 */
1478	if (fdvp == tdvp) {
1479		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
1480		if (error)
1481			panic("ufs_rename: from entry went away!");
1482		if (ino != fip->i_number)
1483			panic("ufs_rename: ino mismatch %d != %d\n", ino,
1484			    fip->i_number);
1485	}
1486	/*
1487	 * If the source is a directory with a
1488	 * new parent, the link count of the old
1489	 * parent directory must be decremented
1490	 * and ".." set to point to the new parent.
1491	 */
1492	if (doingdirectory && newparent) {
1493		/*
1494		 * If tip exists we simply use its link, otherwise we must
1495		 * add a new one.
1496		 */
1497		if (tip == NULL) {
1498			tdp->i_effnlink++;
1499			tdp->i_nlink++;
1500			DIP_SET(tdp, i_nlink, tdp->i_nlink);
1501			tdp->i_flag |= IN_CHANGE;
1502			if (DOINGSOFTDEP(tdvp))
1503				softdep_setup_dotdot_link(tdp, fip);
1504			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1505						   DOINGASYNC(tdvp)));
1506			/* Don't go to bad here as the new link exists. */
1507			if (error)
1508				goto unlockout;
1509		}
1510		fip->i_offset = mastertemplate.dot_reclen;
1511		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
1512		cache_purge(fdvp);
1513	}
1514	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
1515
1516unlockout:
1517	vput(fdvp);
1518	vput(fvp);
1519	if (tvp)
1520		vput(tvp);
1521	/*
1522	 * If compaction or fsync was requested do it now that other locks
1523	 * are no longer needed.
1524	 */
1525	if (error == 0 && endoff != 0) {
1526#ifdef UFS_DIRHASH
1527		if (tdp->i_dirhash != NULL)
1528			ufsdirhash_dirtrunc(tdp, endoff);
1529#endif
1530		UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
1531		    td);
1532	}
1533	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
1534		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
1535	vput(tdvp);
1536	if (mp)
1537		vfs_unbusy(mp);
1538	return (error);
1539
1540bad:
1541	fip->i_effnlink--;
1542	fip->i_nlink--;
1543	DIP_SET(fip, i_nlink, fip->i_nlink);
1544	fip->i_flag |= IN_CHANGE;
1545	if (DOINGSOFTDEP(fvp))
1546		softdep_revert_link(tdp, fip);
1547	goto unlockout;
1548
1549releout:
1550	vrele(fdvp);
1551	vrele(fvp);
1552	vrele(tdvp);
1553	if (tvp)
1554		vrele(tvp);
1555	if (mp)
1556		vfs_unbusy(mp);
1557
1558	return (error);
1559}
1560
1561#ifdef UFS_ACL
1562static int
1563ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
1564    mode_t dmode, struct ucred *cred, struct thread *td)
1565{
1566	int error;
1567	struct inode *ip = VTOI(tvp);
1568	struct acl *dacl, *acl;
1569
1570	acl = acl_alloc(M_WAITOK);
1571	dacl = acl_alloc(M_WAITOK);
1572
1573	/*
1574	 * Retrieve default ACL from parent, if any.
1575	 */
1576	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
1577	switch (error) {
1578	case 0:
1579		/*
1580		 * Retrieved a default ACL, so merge mode and ACL if
1581		 * necessary.  If the ACL is empty, fall through to
1582		 * the "not defined or available" case.
1583		 */
1584		if (acl->acl_cnt != 0) {
1585			dmode = acl_posix1e_newfilemode(dmode, acl);
1586			ip->i_mode = dmode;
1587			DIP_SET(ip, i_mode, dmode);
1588			*dacl = *acl;
1589			ufs_sync_acl_from_inode(ip, acl);
1590			break;
1591		}
1592		/* FALLTHROUGH */
1593
1594	case EOPNOTSUPP:
1595		/*
1596		 * Just use the mode as-is.
1597		 */
1598		ip->i_mode = dmode;
1599		DIP_SET(ip, i_mode, dmode);
1600		error = 0;
1601		goto out;
1602
1603	default:
1604		goto out;
1605	}
1606
1607	/*
1608	 * XXX: If we abort now, will Soft Updates notify the extattr
1609	 * code that the EAs for the file need to be released?
1610	 */
1611	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
1612	if (error == 0)
1613		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
1614	switch (error) {
1615	case 0:
1616		break;
1617
1618	case EOPNOTSUPP:
1619		/*
1620		 * XXX: This should not happen, as EOPNOTSUPP above
1621		 * was supposed to free acl.
1622		 */
1623		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
1624		/*
1625		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
1626		 */
1627		break;
1628
1629	default:
1630		goto out;
1631	}
1632
1633out:
1634	acl_free(acl);
1635	acl_free(dacl);
1636
1637	return (error);
1638}
1639
1640static int
1641ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
1642    mode_t mode, struct ucred *cred, struct thread *td)
1643{
1644	int error;
1645	struct inode *ip = VTOI(tvp);
1646	struct acl *acl;
1647
1648	acl = acl_alloc(M_WAITOK);
1649
1650	/*
1651	 * Retrieve default ACL for parent, if any.
1652	 */
1653	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
1654	switch (error) {
1655	case 0:
1656		/*
1657		 * Retrieved a default ACL, so merge mode and ACL if
1658		 * necessary.
1659		 */
1660		if (acl->acl_cnt != 0) {
1661			/*
1662			 * Two possible ways for default ACL to not
1663			 * be present.  First, the EA can be
1664			 * undefined, or second, the default ACL can
1665			 * be blank.  If it's blank, fall through to
1666			 * the it's not defined case.
1667			 */
1668			mode = acl_posix1e_newfilemode(mode, acl);
1669			ip->i_mode = mode;
1670			DIP_SET(ip, i_mode, mode);
1671			ufs_sync_acl_from_inode(ip, acl);
1672			break;
1673		}
1674		/* FALLTHROUGH */
1675
1676	case EOPNOTSUPP:
1677		/*
1678		 * Just use the mode as-is.
1679		 */
1680		ip->i_mode = mode;
1681		DIP_SET(ip, i_mode, mode);
1682		error = 0;
1683		goto out;
1684
1685	default:
1686		goto out;
1687	}
1688
1689	/*
1690	 * XXX: If we abort now, will Soft Updates notify the extattr
1691	 * code that the EAs for the file need to be released?
1692	 */
1693	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
1694	switch (error) {
1695	case 0:
1696		break;
1697
1698	case EOPNOTSUPP:
1699		/*
1700		 * XXX: This should not happen, as EOPNOTSUPP above was
1701		 * supposed to free acl.
1702		 */
1703		printf("ufs_makeinode: VOP_GETACL() but no "
1704		    "VOP_SETACL()\n");
1705		/* panic("ufs_makeinode: VOP_GETACL() but no "
1706		    "VOP_SETACL()"); */
1707		break;
1708
1709	default:
1710		goto out;
1711	}
1712
1713out:
1714	acl_free(acl);
1715
1716	return (error);
1717}
1718
1719static int
1720ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
1721    mode_t child_mode, struct ucred *cred, struct thread *td)
1722{
1723	int error;
1724	struct acl *parent_aclp, *child_aclp;
1725
1726	parent_aclp = acl_alloc(M_WAITOK);
1727	child_aclp = acl_alloc(M_WAITOK | M_ZERO);
1728
1729	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
1730	if (error)
1731		goto out;
1732	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
1733	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
1734	error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
1735	if (error)
1736		goto out;
1737out:
1738	acl_free(parent_aclp);
1739	acl_free(child_aclp);
1740
1741	return (error);
1742}
1743#endif
1744
1745/*
1746 * Mkdir system call
1747 */
1748static int
1749ufs_mkdir(ap)
1750	struct vop_mkdir_args /* {
1751		struct vnode *a_dvp;
1752		struct vnode **a_vpp;
1753		struct componentname *a_cnp;
1754		struct vattr *a_vap;
1755	} */ *ap;
1756{
1757	struct vnode *dvp = ap->a_dvp;
1758	struct vattr *vap = ap->a_vap;
1759	struct componentname *cnp = ap->a_cnp;
1760	struct inode *ip, *dp;
1761	struct vnode *tvp;
1762	struct buf *bp;
1763	struct dirtemplate dirtemplate, *dtp;
1764	struct direct newdir;
1765	int error, dmode;
1766	long blkoff;
1767
1768#ifdef INVARIANTS
1769	if ((cnp->cn_flags & HASBUF) == 0)
1770		panic("ufs_mkdir: no name");
1771#endif
1772	dp = VTOI(dvp);
1773	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1774		error = EMLINK;
1775		goto out;
1776	}
1777	dmode = vap->va_mode & 0777;
1778	dmode |= IFDIR;
1779	/*
1780	 * Must simulate part of ufs_makeinode here to acquire the inode,
1781	 * but not have it entered in the parent directory. The entry is
1782	 * made later after writing "." and ".." entries.
1783	 */
1784	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1785	if (error)
1786		goto out;
1787	ip = VTOI(tvp);
1788	ip->i_gid = dp->i_gid;
1789	DIP_SET(ip, i_gid, dp->i_gid);
1790#ifdef SUIDDIR
1791	{
1792#ifdef QUOTA
1793		struct ucred ucred, *ucp;
1794		gid_t ucred_group;
1795		ucp = cnp->cn_cred;
1796#endif
1797		/*
1798		 * If we are hacking owners here, (only do this where told to)
1799		 * and we are not giving it TO root, (would subvert quotas)
1800		 * then go ahead and give it to the other user.
1801		 * The new directory also inherits the SUID bit.
1802		 * If user's UID and dir UID are the same,
1803		 * 'give it away' so that the SUID is still forced on.
1804		 */
1805		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1806		    (dp->i_mode & ISUID) && dp->i_uid) {
1807			dmode |= ISUID;
1808			ip->i_uid = dp->i_uid;
1809			DIP_SET(ip, i_uid, dp->i_uid);
1810#ifdef QUOTA
1811			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1812				/*
1813				 * Make sure the correct user gets charged
1814				 * for the space.
1815				 * Make a dummy credential for the victim.
1816				 * XXX This seems to never be accessed out of
1817				 * our context so a stack variable is ok.
1818				 */
1819				refcount_init(&ucred.cr_ref, 1);
1820				ucred.cr_uid = ip->i_uid;
1821				ucred.cr_ngroups = 1;
1822				ucred.cr_groups = &ucred_group;
1823				ucred.cr_groups[0] = dp->i_gid;
1824				ucp = &ucred;
1825			}
1826#endif
1827		} else {
1828			ip->i_uid = cnp->cn_cred->cr_uid;
1829			DIP_SET(ip, i_uid, ip->i_uid);
1830		}
1831#ifdef QUOTA
1832		if ((error = getinoquota(ip)) ||
1833	    	    (error = chkiq(ip, 1, ucp, 0))) {
1834			UFS_VFREE(tvp, ip->i_number, dmode);
1835			vput(tvp);
1836			return (error);
1837		}
1838#endif
1839	}
1840#else	/* !SUIDDIR */
1841	ip->i_uid = cnp->cn_cred->cr_uid;
1842	DIP_SET(ip, i_uid, ip->i_uid);
1843#ifdef QUOTA
1844	if ((error = getinoquota(ip)) ||
1845	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1846		UFS_VFREE(tvp, ip->i_number, dmode);
1847		vput(tvp);
1848		return (error);
1849	}
1850#endif
1851#endif	/* !SUIDDIR */
1852	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1853	ip->i_mode = dmode;
1854	DIP_SET(ip, i_mode, dmode);
1855	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1856	ip->i_effnlink = 2;
1857	ip->i_nlink = 2;
1858	DIP_SET(ip, i_nlink, 2);
1859
1860	if (cnp->cn_flags & ISWHITEOUT) {
1861		ip->i_flags |= UF_OPAQUE;
1862		DIP_SET(ip, i_flags, ip->i_flags);
1863	}
1864
1865	/*
1866	 * Bump link count in parent directory to reflect work done below.
1867	 * Should be done before reference is created so cleanup is
1868	 * possible if we crash.
1869	 */
1870	dp->i_effnlink++;
1871	dp->i_nlink++;
1872	DIP_SET(dp, i_nlink, dp->i_nlink);
1873	dp->i_flag |= IN_CHANGE;
1874	if (DOINGSOFTDEP(dvp))
1875		softdep_setup_mkdir(dp, ip);
1876	error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1877	if (error)
1878		goto bad;
1879#ifdef MAC
1880	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
1881		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
1882		    dvp, tvp, cnp);
1883		if (error)
1884			goto bad;
1885	}
1886#endif
1887#ifdef UFS_ACL
1888	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
1889		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
1890		    cnp->cn_cred, cnp->cn_thread);
1891		if (error)
1892			goto bad;
1893	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
1894		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
1895		    cnp->cn_cred, cnp->cn_thread);
1896		if (error)
1897			goto bad;
1898	}
1899#endif /* !UFS_ACL */
1900
1901	/*
1902	 * Initialize directory with "." and ".." from static template.
1903	 */
1904	if (dvp->v_mount->mnt_maxsymlinklen > 0)
1905		dtp = &mastertemplate;
1906	else
1907		dtp = (struct dirtemplate *)&omastertemplate;
1908	dirtemplate = *dtp;
1909	dirtemplate.dot_ino = ip->i_number;
1910	dirtemplate.dotdot_ino = dp->i_number;
1911	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1912	    BA_CLRBUF, &bp)) != 0)
1913		goto bad;
1914	ip->i_size = DIRBLKSIZ;
1915	DIP_SET(ip, i_size, DIRBLKSIZ);
1916	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1917	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1918	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1919	if (DOINGSOFTDEP(tvp)) {
1920		/*
1921		 * Ensure that the entire newly allocated block is a
1922		 * valid directory so that future growth within the
1923		 * block does not have to ensure that the block is
1924		 * written before the inode.
1925		 */
1926		blkoff = DIRBLKSIZ;
1927		while (blkoff < bp->b_bcount) {
1928			((struct direct *)
1929			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1930			blkoff += DIRBLKSIZ;
1931		}
1932	}
1933	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1934				       DOINGASYNC(tvp)))) != 0) {
1935		(void)bwrite(bp);
1936		goto bad;
1937	}
1938	/*
1939	 * Directory set up, now install its entry in the parent directory.
1940	 *
1941	 * If we are not doing soft dependencies, then we must write out the
1942	 * buffer containing the new directory body before entering the new
1943	 * name in the parent. If we are doing soft dependencies, then the
1944	 * buffer containing the new directory body will be passed to and
1945	 * released in the soft dependency code after the code has attached
1946	 * an appropriate ordering dependency to the buffer which ensures that
1947	 * the buffer is written before the new name is written in the parent.
1948	 */
1949	if (DOINGASYNC(dvp))
1950		bdwrite(bp);
1951	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
1952		goto bad;
1953	ufs_makedirentry(ip, cnp, &newdir);
1954	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
1955
1956bad:
1957	if (error == 0) {
1958		*ap->a_vpp = tvp;
1959	} else {
1960		dp->i_effnlink--;
1961		dp->i_nlink--;
1962		DIP_SET(dp, i_nlink, dp->i_nlink);
1963		dp->i_flag |= IN_CHANGE;
1964		/*
1965		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1966		 * do this for us because we set the link count to 0.
1967		 */
1968		ip->i_effnlink = 0;
1969		ip->i_nlink = 0;
1970		DIP_SET(ip, i_nlink, 0);
1971		ip->i_flag |= IN_CHANGE;
1972		if (DOINGSOFTDEP(tvp))
1973			softdep_revert_mkdir(dp, ip);
1974
1975		vput(tvp);
1976	}
1977out:
1978	return (error);
1979}
1980
1981/*
1982 * Rmdir system call.
1983 */
1984static int
1985ufs_rmdir(ap)
1986	struct vop_rmdir_args /* {
1987		struct vnode *a_dvp;
1988		struct vnode *a_vp;
1989		struct componentname *a_cnp;
1990	} */ *ap;
1991{
1992	struct vnode *vp = ap->a_vp;
1993	struct vnode *dvp = ap->a_dvp;
1994	struct componentname *cnp = ap->a_cnp;
1995	struct inode *ip, *dp;
1996	int error, ioflag;
1997
1998	ip = VTOI(vp);
1999	dp = VTOI(dvp);
2000
2001	/*
2002	 * Do not remove a directory that is in the process of being renamed.
2003	 * Verify the directory is empty (and valid). Rmdir ".." will not be
2004	 * valid since ".." will contain a reference to the current directory
2005	 * and thus be non-empty. Do not allow the removal of mounted on
2006	 * directories (this can happen when an NFS exported filesystem
2007	 * tries to remove a locally mounted on directory).
2008	 */
2009	error = 0;
2010	if (ip->i_effnlink < 2) {
2011		error = EINVAL;
2012		goto out;
2013	}
2014	if (dp->i_effnlink < 3)
2015		panic("ufs_dirrem: Bad link count %d on parent",
2016		    dp->i_effnlink);
2017	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
2018		error = ENOTEMPTY;
2019		goto out;
2020	}
2021	if ((dp->i_flags & APPEND)
2022	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
2023		error = EPERM;
2024		goto out;
2025	}
2026	if (vp->v_mountedhere != 0) {
2027		error = EINVAL;
2028		goto out;
2029	}
2030#ifdef UFS_GJOURNAL
2031	ufs_gjournal_orphan(vp);
2032#endif
2033	/*
2034	 * Delete reference to directory before purging
2035	 * inode.  If we crash in between, the directory
2036	 * will be reattached to lost+found,
2037	 */
2038	dp->i_effnlink--;
2039	ip->i_effnlink--;
2040	if (DOINGSOFTDEP(vp))
2041		softdep_setup_rmdir(dp, ip);
2042	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
2043	if (error) {
2044		dp->i_effnlink++;
2045		ip->i_effnlink++;
2046		if (DOINGSOFTDEP(vp))
2047			softdep_revert_rmdir(dp, ip);
2048		goto out;
2049	}
2050	cache_purge(dvp);
2051	/*
2052	 * Truncate inode. The only stuff left in the directory is "." and
2053	 * "..". The "." reference is inconsequential since we are quashing
2054	 * it. The soft dependency code will arrange to do these operations
2055	 * after the parent directory entry has been deleted on disk, so
2056	 * when running with that code we avoid doing them now.
2057	 */
2058	if (!DOINGSOFTDEP(vp)) {
2059		dp->i_nlink--;
2060		DIP_SET(dp, i_nlink, dp->i_nlink);
2061		dp->i_flag |= IN_CHANGE;
2062		ip->i_nlink--;
2063		DIP_SET(ip, i_nlink, ip->i_nlink);
2064		ip->i_flag |= IN_CHANGE;
2065		ioflag = IO_NORMAL;
2066		if (!DOINGASYNC(vp))
2067			ioflag |= IO_SYNC;
2068		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
2069		    cnp->cn_thread);
2070	}
2071	cache_purge(vp);
2072#ifdef UFS_DIRHASH
2073	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
2074	if (ip->i_dirhash != NULL)
2075		ufsdirhash_free(ip);
2076#endif
2077out:
2078	return (error);
2079}
2080
2081/*
2082 * symlink -- make a symbolic link
2083 */
2084static int
2085ufs_symlink(ap)
2086	struct vop_symlink_args /* {
2087		struct vnode *a_dvp;
2088		struct vnode **a_vpp;
2089		struct componentname *a_cnp;
2090		struct vattr *a_vap;
2091		char *a_target;
2092	} */ *ap;
2093{
2094	struct vnode *vp, **vpp = ap->a_vpp;
2095	struct inode *ip;
2096	int len, error;
2097
2098	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
2099	    vpp, ap->a_cnp);
2100	if (error)
2101		return (error);
2102	vp = *vpp;
2103	len = strlen(ap->a_target);
2104	if (len < vp->v_mount->mnt_maxsymlinklen) {
2105		ip = VTOI(vp);
2106		bcopy(ap->a_target, SHORTLINK(ip), len);
2107		ip->i_size = len;
2108		DIP_SET(ip, i_size, len);
2109		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2110	} else
2111		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
2112		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
2113		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
2114	if (error)
2115		vput(vp);
2116	return (error);
2117}
2118
2119/*
2120 * Vnode op for reading directories.
2121 *
2122 * The routine below assumes that the on-disk format of a directory
2123 * is the same as that defined by <sys/dirent.h>. If the on-disk
2124 * format changes, then it will be necessary to do a conversion
2125 * from the on-disk format that read returns to the format defined
2126 * by <sys/dirent.h>.
2127 */
2128int
2129ufs_readdir(ap)
2130	struct vop_readdir_args /* {
2131		struct vnode *a_vp;
2132		struct uio *a_uio;
2133		struct ucred *a_cred;
2134		int *a_eofflag;
2135		int *a_ncookies;
2136		u_long **a_cookies;
2137	} */ *ap;
2138{
2139	struct uio *uio = ap->a_uio;
2140	int error;
2141	size_t count, lost;
2142	off_t off;
2143
2144	if (ap->a_ncookies != NULL)
2145		/*
2146		 * Ensure that the block is aligned.  The caller can use
2147		 * the cookies to determine where in the block to start.
2148		 */
2149		uio->uio_offset &= ~(DIRBLKSIZ - 1);
2150	off = uio->uio_offset;
2151	count = uio->uio_resid;
2152	/* Make sure we don't return partial entries. */
2153	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
2154		return (EINVAL);
2155	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
2156	lost = uio->uio_resid - count;
2157	uio->uio_resid = count;
2158	uio->uio_iov->iov_len = count;
2159#	if (BYTE_ORDER == LITTLE_ENDIAN)
2160		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
2161			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
2162		} else {
2163			struct dirent *dp, *edp;
2164			struct uio auio;
2165			struct iovec aiov;
2166			caddr_t dirbuf;
2167			int readcnt;
2168			u_char tmp;
2169
2170			auio = *uio;
2171			auio.uio_iov = &aiov;
2172			auio.uio_iovcnt = 1;
2173			auio.uio_segflg = UIO_SYSSPACE;
2174			aiov.iov_len = count;
2175			dirbuf = malloc(count, M_TEMP, M_WAITOK);
2176			aiov.iov_base = dirbuf;
2177			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
2178			if (error == 0) {
2179				readcnt = count - auio.uio_resid;
2180				edp = (struct dirent *)&dirbuf[readcnt];
2181				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
2182					tmp = dp->d_namlen;
2183					dp->d_namlen = dp->d_type;
2184					dp->d_type = tmp;
2185					if (dp->d_reclen > 0) {
2186						dp = (struct dirent *)
2187						    ((char *)dp + dp->d_reclen);
2188					} else {
2189						error = EIO;
2190						break;
2191					}
2192				}
2193				if (dp >= edp)
2194					error = uiomove(dirbuf, readcnt, uio);
2195			}
2196			free(dirbuf, M_TEMP);
2197		}
2198#	else
2199		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
2200#	endif
2201	if (!error && ap->a_ncookies != NULL) {
2202		struct dirent* dpStart;
2203		struct dirent* dpEnd;
2204		struct dirent* dp;
2205		int ncookies;
2206		u_long *cookies;
2207		u_long *cookiep;
2208
2209		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
2210			panic("ufs_readdir: unexpected uio from NFS server");
2211		dpStart = (struct dirent *)
2212		    ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off));
2213		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
2214		for (dp = dpStart, ncookies = 0;
2215		     dp < dpEnd;
2216		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
2217			ncookies++;
2218		cookies = malloc(ncookies * sizeof(u_long), M_TEMP,
2219		    M_WAITOK);
2220		for (dp = dpStart, cookiep = cookies;
2221		     dp < dpEnd;
2222		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
2223			off += dp->d_reclen;
2224			*cookiep++ = (u_long) off;
2225		}
2226		*ap->a_ncookies = ncookies;
2227		*ap->a_cookies = cookies;
2228	}
2229	uio->uio_resid += lost;
2230	if (ap->a_eofflag)
2231	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
2232	return (error);
2233}
2234
2235/*
2236 * Return target name of a symbolic link
2237 */
2238static int
2239ufs_readlink(ap)
2240	struct vop_readlink_args /* {
2241		struct vnode *a_vp;
2242		struct uio *a_uio;
2243		struct ucred *a_cred;
2244	} */ *ap;
2245{
2246	struct vnode *vp = ap->a_vp;
2247	struct inode *ip = VTOI(vp);
2248	doff_t isize;
2249
2250	isize = ip->i_size;
2251	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
2252	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
2253		return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
2254	}
2255	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
2256}
2257
2258/*
2259 * Calculate the logical to physical mapping if not done already,
2260 * then call the device strategy routine.
2261 *
2262 * In order to be able to swap to a file, the ufs_bmaparray() operation may not
2263 * deadlock on memory.  See ufs_bmap() for details.
2264 */
2265static int
2266ufs_strategy(ap)
2267	struct vop_strategy_args /* {
2268		struct vnode *a_vp;
2269		struct buf *a_bp;
2270	} */ *ap;
2271{
2272	struct buf *bp = ap->a_bp;
2273	struct vnode *vp = ap->a_vp;
2274	struct bufobj *bo;
2275	struct inode *ip;
2276	ufs2_daddr_t blkno;
2277	int error;
2278
2279	ip = VTOI(vp);
2280	if (bp->b_blkno == bp->b_lblkno) {
2281		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
2282		bp->b_blkno = blkno;
2283		if (error) {
2284			bp->b_error = error;
2285			bp->b_ioflags |= BIO_ERROR;
2286			bufdone(bp);
2287			return (0);
2288		}
2289		if ((long)bp->b_blkno == -1)
2290			vfs_bio_clrbuf(bp);
2291	}
2292	if ((long)bp->b_blkno == -1) {
2293		bufdone(bp);
2294		return (0);
2295	}
2296	bp->b_iooffset = dbtob(bp->b_blkno);
2297	bo = ip->i_umbufobj;
2298	BO_STRATEGY(bo, bp);
2299	return (0);
2300}
2301
2302/*
2303 * Print out the contents of an inode.
2304 */
2305static int
2306ufs_print(ap)
2307	struct vop_print_args /* {
2308		struct vnode *a_vp;
2309	} */ *ap;
2310{
2311	struct vnode *vp = ap->a_vp;
2312	struct inode *ip = VTOI(vp);
2313
2314	printf("\tino %lu, on dev %s", (u_long)ip->i_number,
2315	    devtoname(ip->i_dev));
2316	if (vp->v_type == VFIFO)
2317		fifo_printinfo(vp);
2318	printf("\n");
2319	return (0);
2320}
2321
2322/*
2323 * Close wrapper for fifos.
2324 *
2325 * Update the times on the inode then do device close.
2326 */
2327static int
2328ufsfifo_close(ap)
2329	struct vop_close_args /* {
2330		struct vnode *a_vp;
2331		int  a_fflag;
2332		struct ucred *a_cred;
2333		struct thread *a_td;
2334	} */ *ap;
2335{
2336	struct vnode *vp = ap->a_vp;
2337	int usecount;
2338
2339	VI_LOCK(vp);
2340	usecount = vp->v_usecount;
2341	if (usecount > 1)
2342		ufs_itimes_locked(vp);
2343	VI_UNLOCK(vp);
2344	return (fifo_specops.vop_close(ap));
2345}
2346
2347/*
2348 * Kqfilter wrapper for fifos.
2349 *
2350 * Fall through to ufs kqfilter routines if needed
2351 */
2352static int
2353ufsfifo_kqfilter(ap)
2354	struct vop_kqfilter_args *ap;
2355{
2356	int error;
2357
2358	error = fifo_specops.vop_kqfilter(ap);
2359	if (error)
2360		error = vfs_kqfilter(ap);
2361	return (error);
2362}
2363
2364/*
2365 * Return POSIX pathconf information applicable to fifos.
2366 */
2367static int
2368ufsfifo_pathconf(ap)
2369	struct vop_pathconf_args /* {
2370		struct vnode *a_vp;
2371		int a_name;
2372		int *a_retval;
2373	} */ *ap;
2374{
2375
2376	switch (ap->a_name) {
2377	case _PC_ACL_EXTENDED:
2378	case _PC_ACL_NFS4:
2379	case _PC_ACL_PATH_MAX:
2380	case _PC_MAC_PRESENT:
2381		return (ufs_pathconf(ap));
2382	default:
2383		return (fifo_specops.vop_pathconf(ap));
2384	}
2385	/* NOTREACHED */
2386}
2387
2388/*
2389 * Return POSIX pathconf information applicable to ufs filesystems.
2390 */
2391static int
2392ufs_pathconf(ap)
2393	struct vop_pathconf_args /* {
2394		struct vnode *a_vp;
2395		int a_name;
2396		int *a_retval;
2397	} */ *ap;
2398{
2399	int error;
2400
2401	error = 0;
2402	switch (ap->a_name) {
2403	case _PC_LINK_MAX:
2404		*ap->a_retval = LINK_MAX;
2405		break;
2406	case _PC_NAME_MAX:
2407		*ap->a_retval = NAME_MAX;
2408		break;
2409	case _PC_PATH_MAX:
2410		*ap->a_retval = PATH_MAX;
2411		break;
2412	case _PC_PIPE_BUF:
2413		*ap->a_retval = PIPE_BUF;
2414		break;
2415	case _PC_CHOWN_RESTRICTED:
2416		*ap->a_retval = 1;
2417		break;
2418	case _PC_NO_TRUNC:
2419		*ap->a_retval = 1;
2420		break;
2421	case _PC_ACL_EXTENDED:
2422#ifdef UFS_ACL
2423		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
2424			*ap->a_retval = 1;
2425		else
2426			*ap->a_retval = 0;
2427#else
2428		*ap->a_retval = 0;
2429#endif
2430		break;
2431
2432	case _PC_ACL_NFS4:
2433#ifdef UFS_ACL
2434		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
2435			*ap->a_retval = 1;
2436		else
2437			*ap->a_retval = 0;
2438#else
2439		*ap->a_retval = 0;
2440#endif
2441		break;
2442
2443	case _PC_ACL_PATH_MAX:
2444#ifdef UFS_ACL
2445		if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
2446			*ap->a_retval = ACL_MAX_ENTRIES;
2447		else
2448			*ap->a_retval = 3;
2449#else
2450		*ap->a_retval = 3;
2451#endif
2452		break;
2453	case _PC_MAC_PRESENT:
2454#ifdef MAC
2455		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
2456			*ap->a_retval = 1;
2457		else
2458			*ap->a_retval = 0;
2459#else
2460		*ap->a_retval = 0;
2461#endif
2462		break;
2463	case _PC_ASYNC_IO:
2464		/* _PC_ASYNC_IO should have been handled by upper layers. */
2465		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
2466		error = EINVAL;
2467		break;
2468	case _PC_PRIO_IO:
2469		*ap->a_retval = 0;
2470		break;
2471	case _PC_SYNC_IO:
2472		*ap->a_retval = 0;
2473		break;
2474	case _PC_ALLOC_SIZE_MIN:
2475		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2476		break;
2477	case _PC_FILESIZEBITS:
2478		*ap->a_retval = 64;
2479		break;
2480	case _PC_REC_INCR_XFER_SIZE:
2481		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2482		break;
2483	case _PC_REC_MAX_XFER_SIZE:
2484		*ap->a_retval = -1; /* means ``unlimited'' */
2485		break;
2486	case _PC_REC_MIN_XFER_SIZE:
2487		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2488		break;
2489	case _PC_REC_XFER_ALIGN:
2490		*ap->a_retval = PAGE_SIZE;
2491		break;
2492	case _PC_SYMLINK_MAX:
2493		*ap->a_retval = MAXPATHLEN;
2494		break;
2495
2496	default:
2497		error = EINVAL;
2498		break;
2499	}
2500	return (error);
2501}
2502
2503/*
2504 * Initialize the vnode associated with a new inode, handle aliased
2505 * vnodes.
2506 */
2507int
2508ufs_vinit(mntp, fifoops, vpp)
2509	struct mount *mntp;
2510	struct vop_vector *fifoops;
2511	struct vnode **vpp;
2512{
2513	struct inode *ip;
2514	struct vnode *vp;
2515
2516	vp = *vpp;
2517	ip = VTOI(vp);
2518	vp->v_type = IFTOVT(ip->i_mode);
2519	if (vp->v_type == VFIFO)
2520		vp->v_op = fifoops;
2521	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
2522	if (ip->i_number == ROOTINO)
2523		vp->v_vflag |= VV_ROOT;
2524	*vpp = vp;
2525	return (0);
2526}
2527
2528/*
2529 * Allocate a new inode.
2530 * Vnode dvp must be locked.
2531 */
2532static int
2533ufs_makeinode(mode, dvp, vpp, cnp)
2534	int mode;
2535	struct vnode *dvp;
2536	struct vnode **vpp;
2537	struct componentname *cnp;
2538{
2539	struct inode *ip, *pdir;
2540	struct direct newdir;
2541	struct vnode *tvp;
2542	int error;
2543
2544	pdir = VTOI(dvp);
2545#ifdef INVARIANTS
2546	if ((cnp->cn_flags & HASBUF) == 0)
2547		panic("ufs_makeinode: no name");
2548#endif
2549	*vpp = NULL;
2550	if ((mode & IFMT) == 0)
2551		mode |= IFREG;
2552
2553	if (VTOI(dvp)->i_effnlink < 2)
2554		panic("ufs_makeinode: Bad link count %d on parent",
2555		    VTOI(dvp)->i_effnlink);
2556	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2557	if (error)
2558		return (error);
2559	ip = VTOI(tvp);
2560	ip->i_gid = pdir->i_gid;
2561	DIP_SET(ip, i_gid, pdir->i_gid);
2562#ifdef SUIDDIR
2563	{
2564#ifdef QUOTA
2565		struct ucred ucred, *ucp;
2566		gid_t ucred_group;
2567		ucp = cnp->cn_cred;
2568#endif
2569		/*
2570		 * If we are not the owner of the directory,
2571		 * and we are hacking owners here, (only do this where told to)
2572		 * and we are not giving it TO root, (would subvert quotas)
2573		 * then go ahead and give it to the other user.
2574		 * Note that this drops off the execute bits for security.
2575		 */
2576		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2577		    (pdir->i_mode & ISUID) &&
2578		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2579			ip->i_uid = pdir->i_uid;
2580			DIP_SET(ip, i_uid, ip->i_uid);
2581			mode &= ~07111;
2582#ifdef QUOTA
2583			/*
2584			 * Make sure the correct user gets charged
2585			 * for the space.
2586			 * Quickly knock up a dummy credential for the victim.
2587			 * XXX This seems to never be accessed out of our
2588			 * context so a stack variable is ok.
2589			 */
2590			refcount_init(&ucred.cr_ref, 1);
2591			ucred.cr_uid = ip->i_uid;
2592			ucred.cr_ngroups = 1;
2593			ucred.cr_groups = &ucred_group;
2594			ucred.cr_groups[0] = pdir->i_gid;
2595			ucp = &ucred;
2596#endif
2597		} else {
2598			ip->i_uid = cnp->cn_cred->cr_uid;
2599			DIP_SET(ip, i_uid, ip->i_uid);
2600		}
2601
2602#ifdef QUOTA
2603		if ((error = getinoquota(ip)) ||
2604	    	    (error = chkiq(ip, 1, ucp, 0))) {
2605			UFS_VFREE(tvp, ip->i_number, mode);
2606			vput(tvp);
2607			return (error);
2608		}
2609#endif
2610	}
2611#else	/* !SUIDDIR */
2612	ip->i_uid = cnp->cn_cred->cr_uid;
2613	DIP_SET(ip, i_uid, ip->i_uid);
2614#ifdef QUOTA
2615	if ((error = getinoquota(ip)) ||
2616	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2617		UFS_VFREE(tvp, ip->i_number, mode);
2618		vput(tvp);
2619		return (error);
2620	}
2621#endif
2622#endif	/* !SUIDDIR */
2623	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2624	ip->i_mode = mode;
2625	DIP_SET(ip, i_mode, mode);
2626	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2627	ip->i_effnlink = 1;
2628	ip->i_nlink = 1;
2629	DIP_SET(ip, i_nlink, 1);
2630	if (DOINGSOFTDEP(tvp))
2631		softdep_setup_create(VTOI(dvp), ip);
2632	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2633	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
2634		ip->i_mode &= ~ISGID;
2635		DIP_SET(ip, i_mode, ip->i_mode);
2636	}
2637
2638	if (cnp->cn_flags & ISWHITEOUT) {
2639		ip->i_flags |= UF_OPAQUE;
2640		DIP_SET(ip, i_flags, ip->i_flags);
2641	}
2642
2643	/*
2644	 * Make sure inode goes to disk before directory entry.
2645	 */
2646	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2647	if (error)
2648		goto bad;
2649#ifdef MAC
2650	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
2651		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
2652		    dvp, tvp, cnp);
2653		if (error)
2654			goto bad;
2655	}
2656#endif
2657#ifdef UFS_ACL
2658	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
2659		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
2660		    cnp->cn_cred, cnp->cn_thread);
2661		if (error)
2662			goto bad;
2663	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
2664		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
2665		    cnp->cn_cred, cnp->cn_thread);
2666		if (error)
2667			goto bad;
2668	}
2669#endif /* !UFS_ACL */
2670	ufs_makedirentry(ip, cnp, &newdir);
2671	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
2672	if (error)
2673		goto bad;
2674	*vpp = tvp;
2675	return (0);
2676
2677bad:
2678	/*
2679	 * Write error occurred trying to update the inode
2680	 * or the directory so must deallocate the inode.
2681	 */
2682	ip->i_effnlink = 0;
2683	ip->i_nlink = 0;
2684	DIP_SET(ip, i_nlink, 0);
2685	ip->i_flag |= IN_CHANGE;
2686	if (DOINGSOFTDEP(tvp))
2687		softdep_revert_create(VTOI(dvp), ip);
2688	vput(tvp);
2689	return (error);
2690}
2691
2692/* Global vfs data structures for ufs. */
2693struct vop_vector ufs_vnodeops = {
2694	.vop_default =		&default_vnodeops,
2695	.vop_fsync =		VOP_PANIC,
2696	.vop_read =		VOP_PANIC,
2697	.vop_reallocblks =	VOP_PANIC,
2698	.vop_write =		VOP_PANIC,
2699	.vop_accessx =		ufs_accessx,
2700	.vop_bmap =		ufs_bmap,
2701	.vop_cachedlookup =	ufs_lookup,
2702	.vop_close =		ufs_close,
2703	.vop_create =		ufs_create,
2704	.vop_getattr =		ufs_getattr,
2705	.vop_inactive =		ufs_inactive,
2706	.vop_link =		ufs_link,
2707	.vop_lookup =		vfs_cache_lookup,
2708	.vop_markatime =	ufs_markatime,
2709	.vop_mkdir =		ufs_mkdir,
2710	.vop_mknod =		ufs_mknod,
2711	.vop_open =		ufs_open,
2712	.vop_pathconf =		ufs_pathconf,
2713	.vop_poll =		vop_stdpoll,
2714	.vop_print =		ufs_print,
2715	.vop_readdir =		ufs_readdir,
2716	.vop_readlink =		ufs_readlink,
2717	.vop_reclaim =		ufs_reclaim,
2718	.vop_remove =		ufs_remove,
2719	.vop_rename =		ufs_rename,
2720	.vop_rmdir =		ufs_rmdir,
2721	.vop_setattr =		ufs_setattr,
2722#ifdef MAC
2723	.vop_setlabel =		vop_stdsetlabel_ea,
2724#endif
2725	.vop_strategy =		ufs_strategy,
2726	.vop_symlink =		ufs_symlink,
2727	.vop_whiteout =		ufs_whiteout,
2728#ifdef UFS_EXTATTR
2729	.vop_getextattr =	ufs_getextattr,
2730	.vop_deleteextattr =	ufs_deleteextattr,
2731	.vop_setextattr =	ufs_setextattr,
2732#endif
2733#ifdef UFS_ACL
2734	.vop_getacl =		ufs_getacl,
2735	.vop_setacl =		ufs_setacl,
2736	.vop_aclcheck =		ufs_aclcheck,
2737#endif
2738};
2739
2740struct vop_vector ufs_fifoops = {
2741	.vop_default =		&fifo_specops,
2742	.vop_fsync =		VOP_PANIC,
2743	.vop_accessx =		ufs_accessx,
2744	.vop_close =		ufsfifo_close,
2745	.vop_getattr =		ufs_getattr,
2746	.vop_inactive =		ufs_inactive,
2747	.vop_kqfilter =		ufsfifo_kqfilter,
2748	.vop_markatime =	ufs_markatime,
2749	.vop_pathconf = 	ufsfifo_pathconf,
2750	.vop_print =		ufs_print,
2751	.vop_read =		VOP_PANIC,
2752	.vop_reclaim =		ufs_reclaim,
2753	.vop_setattr =		ufs_setattr,
2754#ifdef MAC
2755	.vop_setlabel =		vop_stdsetlabel_ea,
2756#endif
2757	.vop_write =		VOP_PANIC,
2758#ifdef UFS_EXTATTR
2759	.vop_getextattr =	ufs_getextattr,
2760	.vop_deleteextattr =	ufs_deleteextattr,
2761	.vop_setextattr =	ufs_setextattr,
2762#endif
2763#ifdef UFS_ACL
2764	.vop_getacl =		ufs_getacl,
2765	.vop_setacl =		ufs_setacl,
2766	.vop_aclcheck =		ufs_aclcheck,
2767#endif
2768};
2769