ufs_vnops.c revision 67893
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
39 * $FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 67893 2000-10-29 16:06:56Z phk $
40 */
41
42#include "opt_quota.h"
43#include "opt_suiddir.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/namei.h>
48#include <sys/kernel.h>
49#include <sys/fcntl.h>
50#include <sys/stat.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/mount.h>
54#include <sys/unistd.h>
55#include <sys/vnode.h>
56#include <sys/malloc.h>
57#include <sys/dirent.h>
58#include <sys/lockf.h>
59#include <sys/event.h>
60#include <sys/conf.h>
61
62#include <machine/mutex.h>
63
64#include <vm/vm.h>
65#include <vm/vm_extern.h>
66
67#include <miscfs/fifofs/fifo.h>
68
69#include <ufs/ufs/extattr.h>
70#include <ufs/ufs/quota.h>
71#include <ufs/ufs/inode.h>
72#include <ufs/ufs/dir.h>
73#include <ufs/ufs/ufsmount.h>
74#include <ufs/ufs/ufs_extern.h>
75
76static int ufs_access __P((struct vop_access_args *));
77static int ufs_advlock __P((struct vop_advlock_args *));
78static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
79static int ufs_chown __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
80static int ufs_close __P((struct vop_close_args *));
81static int ufs_create __P((struct vop_create_args *));
82static int ufs_getattr __P((struct vop_getattr_args *));
83static int ufs_link __P((struct vop_link_args *));
84static int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
85static int ufs_missingop __P((struct vop_generic_args *ap));
86static int ufs_mkdir __P((struct vop_mkdir_args *));
87static int ufs_mknod __P((struct vop_mknod_args *));
88static int ufs_mmap __P((struct vop_mmap_args *));
89static int ufs_open __P((struct vop_open_args *));
90static int ufs_pathconf __P((struct vop_pathconf_args *));
91static int ufs_print __P((struct vop_print_args *));
92static int ufs_readdir __P((struct vop_readdir_args *));
93static int ufs_readlink __P((struct vop_readlink_args *));
94static int ufs_remove __P((struct vop_remove_args *));
95static int ufs_rename __P((struct vop_rename_args *));
96static int ufs_rmdir __P((struct vop_rmdir_args *));
97static int ufs_setattr __P((struct vop_setattr_args *));
98static int ufs_strategy __P((struct vop_strategy_args *));
99static int ufs_symlink __P((struct vop_symlink_args *));
100static int ufs_whiteout __P((struct vop_whiteout_args *));
101static int ufsfifo_close __P((struct vop_close_args *));
102static int ufsfifo_read __P((struct vop_read_args *));
103static int ufsfifo_write __P((struct vop_write_args *));
104static int ufsspec_close __P((struct vop_close_args *));
105static int ufsspec_read __P((struct vop_read_args *));
106static int ufsspec_write __P((struct vop_write_args *));
107
108union _qcvt {
109	int64_t qcvt;
110	int32_t val[2];
111};
112#define SETHIGH(q, h) { \
113	union _qcvt tmp; \
114	tmp.qcvt = (q); \
115	tmp.val[_QUAD_HIGHWORD] = (h); \
116	(q) = tmp.qcvt; \
117}
118#define SETLOW(q, l) { \
119	union _qcvt tmp; \
120	tmp.qcvt = (q); \
121	tmp.val[_QUAD_LOWWORD] = (l); \
122	(q) = tmp.qcvt; \
123}
124#define VN_KNOTE(vp, b) \
125	KNOTE(&vp->v_pollinfo.vpi_selinfo.si_note, (b))
126
127/*
128 * A virgin directory (no blushing please).
129 */
130static struct dirtemplate mastertemplate = {
131	0, 12, DT_DIR, 1, ".",
132	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
133};
134static struct odirtemplate omastertemplate = {
135	0, 12, 1, ".",
136	0, DIRBLKSIZ - 12, 2, ".."
137};
138
139void
140ufs_itimes(vp)
141	struct vnode *vp;
142{
143	struct inode *ip;
144	struct timespec ts;
145
146	ip = VTOI(vp);
147	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
148		return;
149	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
150		vfs_timestamp(&ts);
151		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
152		    !DOINGSOFTDEP(vp))
153			ip->i_flag |= IN_LAZYMOD;
154		else
155			ip->i_flag |= IN_MODIFIED;
156		if (ip->i_flag & IN_ACCESS) {
157			ip->i_atime = ts.tv_sec;
158			ip->i_atimensec = ts.tv_nsec;
159		}
160		if (ip->i_flag & IN_UPDATE) {
161			ip->i_mtime = ts.tv_sec;
162			ip->i_mtimensec = ts.tv_nsec;
163			ip->i_modrev++;
164		}
165		if (ip->i_flag & IN_CHANGE) {
166			ip->i_ctime = ts.tv_sec;
167			ip->i_ctimensec = ts.tv_nsec;
168		}
169	}
170	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
171}
172
173/*
174 * Create a regular file
175 */
176int
177ufs_create(ap)
178	struct vop_create_args /* {
179		struct vnode *a_dvp;
180		struct vnode **a_vpp;
181		struct componentname *a_cnp;
182		struct vattr *a_vap;
183	} */ *ap;
184{
185	int error;
186
187	error =
188	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
189	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
190	if (error)
191		return (error);
192	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
193	return (0);
194}
195
196/*
197 * Mknod vnode call
198 */
199/* ARGSUSED */
200int
201ufs_mknod(ap)
202	struct vop_mknod_args /* {
203		struct vnode *a_dvp;
204		struct vnode **a_vpp;
205		struct componentname *a_cnp;
206		struct vattr *a_vap;
207	} */ *ap;
208{
209	struct vattr *vap = ap->a_vap;
210	struct vnode **vpp = ap->a_vpp;
211	struct inode *ip;
212	int error;
213
214	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
215	    ap->a_dvp, vpp, ap->a_cnp);
216	if (error)
217		return (error);
218	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
219	ip = VTOI(*vpp);
220	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
221	if (vap->va_rdev != VNOVAL) {
222		/*
223		 * Want to be able to use this to make badblock
224		 * inodes, so don't truncate the dev number.
225		 */
226		ip->i_rdev = vap->va_rdev;
227	}
228	/*
229	 * Remove inode, then reload it through VFS_VGET so it is
230	 * checked to see if it is an alias of an existing entry in
231	 * the inode cache.
232	 */
233	vput(*vpp);
234	(*vpp)->v_type = VNON;
235	vgone(*vpp);
236	error = VFS_VGET(ap->a_dvp->v_mount, ip->i_ino, vpp);
237	if (error) {
238		*vpp = NULL;
239		return (error);
240	}
241	return (0);
242}
243
244/*
245 * Open called.
246 *
247 * Nothing to do.
248 */
249/* ARGSUSED */
250int
251ufs_open(ap)
252	struct vop_open_args /* {
253		struct vnode *a_vp;
254		int  a_mode;
255		struct ucred *a_cred;
256		struct proc *a_p;
257	} */ *ap;
258{
259
260	/*
261	 * Files marked append-only must be opened for appending.
262	 */
263	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
264	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
265		return (EPERM);
266	return (0);
267}
268
269/*
270 * Close called.
271 *
272 * Update the times on the inode.
273 */
274/* ARGSUSED */
275int
276ufs_close(ap)
277	struct vop_close_args /* {
278		struct vnode *a_vp;
279		int  a_fflag;
280		struct ucred *a_cred;
281		struct proc *a_p;
282	} */ *ap;
283{
284	register struct vnode *vp = ap->a_vp;
285
286	mtx_enter(&vp->v_interlock, MTX_DEF);
287	if (vp->v_usecount > 1)
288		ufs_itimes(vp);
289	mtx_exit(&vp->v_interlock, MTX_DEF);
290	return (0);
291}
292
293int
294ufs_access(ap)
295	struct vop_access_args /* {
296		struct vnode *a_vp;
297		int  a_mode;
298		struct ucred *a_cred;
299		struct proc *a_p;
300	} */ *ap;
301{
302	struct vnode *vp = ap->a_vp;
303	struct inode *ip = VTOI(vp);
304	mode_t mode = ap->a_mode;
305#ifdef QUOTA
306	int error;
307#endif
308
309	/*
310	 * Disallow write attempts on read-only file systems;
311	 * unless the file is a socket, fifo, or a block or
312	 * character device resident on the file system.
313	 */
314	if (mode & VWRITE) {
315		switch (vp->v_type) {
316		case VDIR:
317		case VLNK:
318		case VREG:
319			if (vp->v_mount->mnt_flag & MNT_RDONLY)
320				return (EROFS);
321#ifdef QUOTA
322			if ((error = getinoquota(ip)) != 0)
323				return (error);
324#endif
325			break;
326		default:
327			break;
328		}
329	}
330
331	/* If immutable bit set, nobody gets to write it. */
332	if ((mode & VWRITE) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
333		return (EPERM);
334
335	return (vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
336	    ap->a_mode, ap->a_cred, NULL));
337}
338
339/* ARGSUSED */
340int
341ufs_getattr(ap)
342	struct vop_getattr_args /* {
343		struct vnode *a_vp;
344		struct vattr *a_vap;
345		struct ucred *a_cred;
346		struct proc *a_p;
347	} */ *ap;
348{
349	register struct vnode *vp = ap->a_vp;
350	register struct inode *ip = VTOI(vp);
351	register struct vattr *vap = ap->a_vap;
352
353	ufs_itimes(vp);
354	/*
355	 * Copy from inode table
356	 */
357	vap->va_fsid = dev2udev(ip->i_dev);
358	vap->va_fileid = ip->i_number;
359	vap->va_mode = ip->i_mode & ~IFMT;
360	vap->va_nlink = VFSTOUFS(vp->v_mount)->um_i_effnlink_valid ?
361	    ip->i_effnlink : ip->i_nlink;
362	vap->va_uid = ip->i_uid;
363	vap->va_gid = ip->i_gid;
364	vap->va_rdev = ip->i_rdev;
365	vap->va_size = ip->i_din.di_size;
366	vap->va_atime.tv_sec = ip->i_atime;
367	vap->va_atime.tv_nsec = ip->i_atimensec;
368	vap->va_mtime.tv_sec = ip->i_mtime;
369	vap->va_mtime.tv_nsec = ip->i_mtimensec;
370	vap->va_ctime.tv_sec = ip->i_ctime;
371	vap->va_ctime.tv_nsec = ip->i_ctimensec;
372	vap->va_flags = ip->i_flags;
373	vap->va_gen = ip->i_gen;
374	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
375	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
376	vap->va_type = IFTOVT(ip->i_mode);
377	vap->va_filerev = ip->i_modrev;
378	return (0);
379}
380
381/*
382 * Set attribute vnode op. called from several syscalls
383 */
384int
385ufs_setattr(ap)
386	struct vop_setattr_args /* {
387		struct vnode *a_vp;
388		struct vattr *a_vap;
389		struct ucred *a_cred;
390		struct proc *a_p;
391	} */ *ap;
392{
393	struct vattr *vap = ap->a_vap;
394	struct vnode *vp = ap->a_vp;
395	struct inode *ip = VTOI(vp);
396	struct ucred *cred = ap->a_cred;
397	struct proc *p = ap->a_p;
398	int error;
399
400	/*
401	 * Check for unsettable attributes.
402	 */
403	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
404	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
405	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
406	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
407		return (EINVAL);
408	}
409	if (vap->va_flags != VNOVAL) {
410		if (vp->v_mount->mnt_flag & MNT_RDONLY)
411			return (EROFS);
412		/*
413		 * Callers may only modify the file flags on objects they
414		 * have VADMIN rights for.
415		 */
416		if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
417			return (error);
418		/*
419		 * Unprivileged processes and privileged processes in
420		 * jail() are not permitted to set system flags.
421		 * Privileged processes not in jail() may only set system
422		 * flags if the securelevel <= 0.
423		 */
424		if (!suser_xxx(cred, NULL, 0)) {
425			if ((ip->i_flags
426			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
427			    securelevel > 0)
428				return (EPERM);
429			/* Snapshot flag cannot be set or cleared */
430			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
431			     (ip->i_flags & SF_SNAPSHOT) == 0) ||
432			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
433			     (ip->i_flags & SF_SNAPSHOT) != 0))
434				return (EPERM);
435			ip->i_flags = vap->va_flags;
436		} else {
437			if (ip->i_flags
438			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
439			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
440				return (EPERM);
441			ip->i_flags &= SF_SETTABLE;
442			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
443		}
444		ip->i_flag |= IN_CHANGE;
445		if (vap->va_flags & (IMMUTABLE | APPEND))
446			return (0);
447	}
448	if (ip->i_flags & (IMMUTABLE | APPEND))
449		return (EPERM);
450	/*
451	 * Go through the fields and update iff not VNOVAL.
452	 */
453	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
454		if (vp->v_mount->mnt_flag & MNT_RDONLY)
455			return (EROFS);
456		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
457		    p)) != 0)
458			return (error);
459	}
460	if (vap->va_size != VNOVAL) {
461		/*
462		 * Disallow write attempts on read-only file systems;
463		 * unless the file is a socket, fifo, or a block or
464		 * character device resident on the file system.
465		 */
466		switch (vp->v_type) {
467		case VDIR:
468			return (EISDIR);
469		case VLNK:
470		case VREG:
471			if (vp->v_mount->mnt_flag & MNT_RDONLY)
472				return (EROFS);
473			if ((ip->i_flags & SF_SNAPSHOT) != 0)
474				return (EPERM);
475			break;
476		default:
477			break;
478		}
479		if ((error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0)
480			return (error);
481	}
482	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
483		if (vp->v_mount->mnt_flag & MNT_RDONLY)
484			return (EROFS);
485		if ((ip->i_flags & SF_SNAPSHOT) != 0)
486			return (EPERM);
487		/*
488		 * From utimes(2):
489		 * If times is NULL, ... The caller must be the owner of
490		 * the file, have permission to write the file, or be the
491		 * super-user.
492		 * If times is non-NULL, ... The caller must be the owner of
493		 * the file or be the super-user.
494		 */
495		if ((error = VOP_ACCESS(vp, VADMIN, cred, p)) &&
496		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
497		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
498			return (error);
499		if (vap->va_atime.tv_sec != VNOVAL)
500			ip->i_flag |= IN_ACCESS;
501		if (vap->va_mtime.tv_sec != VNOVAL)
502			ip->i_flag |= IN_CHANGE | IN_UPDATE;
503		ufs_itimes(vp);
504		if (vap->va_atime.tv_sec != VNOVAL) {
505			ip->i_atime = vap->va_atime.tv_sec;
506			ip->i_atimensec = vap->va_atime.tv_nsec;
507		}
508		if (vap->va_mtime.tv_sec != VNOVAL) {
509			ip->i_mtime = vap->va_mtime.tv_sec;
510			ip->i_mtimensec = vap->va_mtime.tv_nsec;
511		}
512		error = UFS_UPDATE(vp, 0);
513		if (error)
514			return (error);
515	}
516	error = 0;
517	if (vap->va_mode != (mode_t)VNOVAL) {
518		if (vp->v_mount->mnt_flag & MNT_RDONLY)
519			return (EROFS);
520		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
521		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
522			return (EPERM);
523		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
524	}
525	VN_KNOTE(vp, NOTE_ATTRIB);
526	return (error);
527}
528
529/*
530 * Change the mode on a file.
531 * Inode must be locked before calling.
532 */
533static int
534ufs_chmod(vp, mode, cred, p)
535	register struct vnode *vp;
536	register int mode;
537	register struct ucred *cred;
538	struct proc *p;
539{
540	register struct inode *ip = VTOI(vp);
541	int error;
542
543	/*
544	 * To modify the permissions on a file, must possess VADMIN
545	 * for that file.
546	 */
547	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
548		return (error);
549	/*
550	 * Privileged processes may set the sticky bit on non-directories,
551	 * as well as set the setgid bit on a file with a group that the
552	 * process is not a member of.
553	 */
554	if (suser_xxx(cred, NULL, PRISON_ROOT)) {
555		if (vp->v_type != VDIR && (mode & S_ISTXT))
556			return (EFTYPE);
557		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
558			return (EPERM);
559	}
560	ip->i_mode &= ~ALLPERMS;
561	ip->i_mode |= (mode & ALLPERMS);
562	ip->i_flag |= IN_CHANGE;
563	return (0);
564}
565
566/*
567 * Perform chown operation on inode ip;
568 * inode must be locked prior to call.
569 */
570static int
571ufs_chown(vp, uid, gid, cred, p)
572	register struct vnode *vp;
573	uid_t uid;
574	gid_t gid;
575	struct ucred *cred;
576	struct proc *p;
577{
578	register struct inode *ip = VTOI(vp);
579	uid_t ouid;
580	gid_t ogid;
581	int error = 0;
582#ifdef QUOTA
583	register int i;
584	long change;
585#endif
586
587	if (uid == (uid_t)VNOVAL)
588		uid = ip->i_uid;
589	if (gid == (gid_t)VNOVAL)
590		gid = ip->i_gid;
591	/*
592	 * To modify the ownership of a file, must possess VADMIN
593	 * for that file.
594	 */
595	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
596		return (error);
597	/*
598	 * To change the owner of a file, or change the group of a file
599	 * to a group of which we are not a member, the caller must
600	 * have privilege.
601	 */
602	if ((uid != ip->i_uid ||
603	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
604	    (error = suser_xxx(cred, p, PRISON_ROOT)))
605		return (error);
606	ogid = ip->i_gid;
607	ouid = ip->i_uid;
608#ifdef QUOTA
609	if ((error = getinoquota(ip)) != 0)
610		return (error);
611	if (ouid == uid) {
612		dqrele(vp, ip->i_dquot[USRQUOTA]);
613		ip->i_dquot[USRQUOTA] = NODQUOT;
614	}
615	if (ogid == gid) {
616		dqrele(vp, ip->i_dquot[GRPQUOTA]);
617		ip->i_dquot[GRPQUOTA] = NODQUOT;
618	}
619	change = ip->i_blocks;
620	(void) chkdq(ip, -change, cred, CHOWN);
621	(void) chkiq(ip, -1, cred, CHOWN);
622	for (i = 0; i < MAXQUOTAS; i++) {
623		dqrele(vp, ip->i_dquot[i]);
624		ip->i_dquot[i] = NODQUOT;
625	}
626#endif
627	ip->i_gid = gid;
628	ip->i_uid = uid;
629#ifdef QUOTA
630	if ((error = getinoquota(ip)) == 0) {
631		if (ouid == uid) {
632			dqrele(vp, ip->i_dquot[USRQUOTA]);
633			ip->i_dquot[USRQUOTA] = NODQUOT;
634		}
635		if (ogid == gid) {
636			dqrele(vp, ip->i_dquot[GRPQUOTA]);
637			ip->i_dquot[GRPQUOTA] = NODQUOT;
638		}
639		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
640			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
641				goto good;
642			else
643				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
644		}
645		for (i = 0; i < MAXQUOTAS; i++) {
646			dqrele(vp, ip->i_dquot[i]);
647			ip->i_dquot[i] = NODQUOT;
648		}
649	}
650	ip->i_gid = ogid;
651	ip->i_uid = ouid;
652	if (getinoquota(ip) == 0) {
653		if (ouid == uid) {
654			dqrele(vp, ip->i_dquot[USRQUOTA]);
655			ip->i_dquot[USRQUOTA] = NODQUOT;
656		}
657		if (ogid == gid) {
658			dqrele(vp, ip->i_dquot[GRPQUOTA]);
659			ip->i_dquot[GRPQUOTA] = NODQUOT;
660		}
661		(void) chkdq(ip, change, cred, FORCE|CHOWN);
662		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
663		(void) getinoquota(ip);
664	}
665	return (error);
666good:
667	if (getinoquota(ip))
668		panic("ufs_chown: lost quota");
669#endif /* QUOTA */
670	ip->i_flag |= IN_CHANGE;
671	if (suser_xxx(cred, NULL, PRISON_ROOT) && (ouid != uid || ogid != gid))
672		ip->i_mode &= ~(ISUID | ISGID);
673	return (0);
674}
675
676/*
677 * Mmap a file
678 *
679 * NB Currently unsupported.
680 */
681/* ARGSUSED */
682int
683ufs_mmap(ap)
684	struct vop_mmap_args /* {
685		struct vnode *a_vp;
686		int  a_fflags;
687		struct ucred *a_cred;
688		struct proc *a_p;
689	} */ *ap;
690{
691
692	return (EINVAL);
693}
694
695int
696ufs_remove(ap)
697	struct vop_remove_args /* {
698		struct vnode *a_dvp;
699		struct vnode *a_vp;
700		struct componentname *a_cnp;
701	} */ *ap;
702{
703	struct inode *ip;
704	struct vnode *vp = ap->a_vp;
705	struct vnode *dvp = ap->a_dvp;
706	int error;
707
708	ip = VTOI(vp);
709	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
710	    (VTOI(dvp)->i_flags & APPEND)) {
711		error = EPERM;
712		goto out;
713	}
714	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
715	VN_KNOTE(vp, NOTE_DELETE);
716	VN_KNOTE(dvp, NOTE_WRITE);
717out:
718	return (error);
719}
720
721/*
722 * link vnode call
723 */
724int
725ufs_link(ap)
726	struct vop_link_args /* {
727		struct vnode *a_tdvp;
728		struct vnode *a_vp;
729		struct componentname *a_cnp;
730	} */ *ap;
731{
732	struct vnode *vp = ap->a_vp;
733	struct vnode *tdvp = ap->a_tdvp;
734	struct componentname *cnp = ap->a_cnp;
735	struct proc *p = cnp->cn_proc;
736	struct inode *ip;
737	struct direct newdir;
738	int error;
739
740#ifdef DIAGNOSTIC
741	if ((cnp->cn_flags & HASBUF) == 0)
742		panic("ufs_link: no name");
743#endif
744	if (tdvp->v_mount != vp->v_mount) {
745		error = EXDEV;
746		goto out2;
747	}
748	if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
749		goto out2;
750	}
751	ip = VTOI(vp);
752	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
753		error = EMLINK;
754		goto out1;
755	}
756	if (ip->i_flags & (IMMUTABLE | APPEND)) {
757		error = EPERM;
758		goto out1;
759	}
760	ip->i_effnlink++;
761	ip->i_nlink++;
762	ip->i_flag |= IN_CHANGE;
763	if (DOINGSOFTDEP(vp))
764		softdep_change_linkcnt(ip);
765	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
766	if (!error) {
767		ufs_makedirentry(ip, cnp, &newdir);
768		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
769	}
770
771	if (error) {
772		ip->i_effnlink--;
773		ip->i_nlink--;
774		ip->i_flag |= IN_CHANGE;
775		if (DOINGSOFTDEP(vp))
776			softdep_change_linkcnt(ip);
777	}
778out1:
779	if (tdvp != vp)
780		VOP_UNLOCK(vp, 0, p);
781out2:
782	VN_KNOTE(vp, NOTE_LINK);
783	VN_KNOTE(tdvp, NOTE_WRITE);
784	return (error);
785}
786
787/*
788 * whiteout vnode call
789 */
790int
791ufs_whiteout(ap)
792	struct vop_whiteout_args /* {
793		struct vnode *a_dvp;
794		struct componentname *a_cnp;
795		int a_flags;
796	} */ *ap;
797{
798	struct vnode *dvp = ap->a_dvp;
799	struct componentname *cnp = ap->a_cnp;
800	struct direct newdir;
801	int error = 0;
802
803	switch (ap->a_flags) {
804	case LOOKUP:
805		/* 4.4 format directories support whiteout operations */
806		if (dvp->v_mount->mnt_maxsymlinklen > 0)
807			return (0);
808		return (EOPNOTSUPP);
809
810	case CREATE:
811		/* create a new directory whiteout */
812#ifdef DIAGNOSTIC
813		if ((cnp->cn_flags & SAVENAME) == 0)
814			panic("ufs_whiteout: missing name");
815		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
816			panic("ufs_whiteout: old format filesystem");
817#endif
818
819		newdir.d_ino = WINO;
820		newdir.d_namlen = cnp->cn_namelen;
821		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
822		newdir.d_type = DT_WHT;
823		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
824		break;
825
826	case DELETE:
827		/* remove an existing directory whiteout */
828#ifdef DIAGNOSTIC
829		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
830			panic("ufs_whiteout: old format filesystem");
831#endif
832
833		cnp->cn_flags &= ~DOWHITEOUT;
834		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
835		break;
836	default:
837		panic("ufs_whiteout: unknown op");
838	}
839	return (error);
840}
841
842/*
843 * Rename system call.
844 * 	rename("foo", "bar");
845 * is essentially
846 *	unlink("bar");
847 *	link("foo", "bar");
848 *	unlink("foo");
849 * but ``atomically''.  Can't do full commit without saving state in the
850 * inode on disk which isn't feasible at this time.  Best we can do is
851 * always guarantee the target exists.
852 *
853 * Basic algorithm is:
854 *
855 * 1) Bump link count on source while we're linking it to the
856 *    target.  This also ensure the inode won't be deleted out
857 *    from underneath us while we work (it may be truncated by
858 *    a concurrent `trunc' or `open' for creation).
859 * 2) Link source to destination.  If destination already exists,
860 *    delete it first.
861 * 3) Unlink source reference to inode if still around. If a
862 *    directory was moved and the parent of the destination
863 *    is different from the source, patch the ".." entry in the
864 *    directory.
865 */
866int
867ufs_rename(ap)
868	struct vop_rename_args  /* {
869		struct vnode *a_fdvp;
870		struct vnode *a_fvp;
871		struct componentname *a_fcnp;
872		struct vnode *a_tdvp;
873		struct vnode *a_tvp;
874		struct componentname *a_tcnp;
875	} */ *ap;
876{
877	struct vnode *tvp = ap->a_tvp;
878	register struct vnode *tdvp = ap->a_tdvp;
879	struct vnode *fvp = ap->a_fvp;
880	struct vnode *fdvp = ap->a_fdvp;
881	struct componentname *tcnp = ap->a_tcnp;
882	struct componentname *fcnp = ap->a_fcnp;
883	struct proc *p = fcnp->cn_proc;
884	struct inode *ip, *xp, *dp;
885	struct direct newdir;
886	int doingdirectory = 0, oldparent = 0, newparent = 0;
887	int error = 0, ioflag;
888
889#ifdef DIAGNOSTIC
890	if ((tcnp->cn_flags & HASBUF) == 0 ||
891	    (fcnp->cn_flags & HASBUF) == 0)
892		panic("ufs_rename: no name");
893#endif
894	/*
895	 * Check for cross-device rename.
896	 */
897	if ((fvp->v_mount != tdvp->v_mount) ||
898	    (tvp && (fvp->v_mount != tvp->v_mount))) {
899		error = EXDEV;
900abortit:
901		if (tdvp == tvp)
902			vrele(tdvp);
903		else
904			vput(tdvp);
905		if (tvp)
906			vput(tvp);
907		vrele(fdvp);
908		vrele(fvp);
909		return (error);
910	}
911
912	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
913	    (VTOI(tdvp)->i_flags & APPEND))) {
914		error = EPERM;
915		goto abortit;
916	}
917
918	/*
919	 * Check if just deleting a link name or if we've lost a race.
920	 * If another process completes the same rename after we've looked
921	 * up the source and have blocked looking up the target, then the
922	 * source and target inodes may be identical now although the
923	 * names were never linked.
924	 */
925	if (fvp == tvp) {
926		if (fvp->v_type == VDIR) {
927			/*
928			 * Linked directories are impossible, so we must
929			 * have lost the race.  Pretend that the rename
930			 * completed before the lookup.
931			 */
932#ifdef UFS_RENAME_DEBUG
933			printf("ufs_rename: fvp == tvp for directories\n");
934#endif
935			error = ENOENT;
936			goto abortit;
937		}
938
939		/* Release destination completely. */
940		vput(tdvp);
941		vput(tvp);
942
943		/*
944		 * Delete source.  There is another race now that everything
945		 * is unlocked, but this doesn't cause any new complications.
946		 * Relookup() may find a file that is unrelated to the
947		 * original one, or it may fail.  Too bad.
948		 */
949		vrele(fdvp);
950		vrele(fvp);
951		fcnp->cn_flags &= ~MODMASK;
952		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
953		if ((fcnp->cn_flags & SAVESTART) == 0)
954			panic("ufs_rename: lost from startdir");
955		fcnp->cn_nameiop = DELETE;
956		VREF(fdvp);
957		error = relookup(fdvp, &fvp, fcnp);
958		if (error == 0)
959			vrele(fdvp);
960		if (fvp == NULL) {
961#ifdef UFS_RENAME_DEBUG
962			printf("ufs_rename: from name disappeared\n");
963#endif
964			return (ENOENT);
965		}
966		error = VOP_REMOVE(fdvp, fvp, fcnp);
967		if (fdvp == fvp)
968			vrele(fdvp);
969		else
970			vput(fdvp);
971		vput(fvp);
972		return (error);
973	}
974	if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
975		goto abortit;
976	dp = VTOI(fdvp);
977	ip = VTOI(fvp);
978	if (ip->i_nlink >= LINK_MAX) {
979		VOP_UNLOCK(fvp, 0, p);
980		error = EMLINK;
981		goto abortit;
982	}
983	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
984	    || (dp->i_flags & APPEND)) {
985		VOP_UNLOCK(fvp, 0, p);
986		error = EPERM;
987		goto abortit;
988	}
989	if ((ip->i_mode & IFMT) == IFDIR) {
990		/*
991		 * Avoid ".", "..", and aliases of "." for obvious reasons.
992		 */
993		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
994		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
995		    (ip->i_flag & IN_RENAME)) {
996			VOP_UNLOCK(fvp, 0, p);
997			error = EINVAL;
998			goto abortit;
999		}
1000		ip->i_flag |= IN_RENAME;
1001		oldparent = dp->i_number;
1002		doingdirectory = 1;
1003	}
1004	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXX right place? */
1005	vrele(fdvp);
1006
1007	/*
1008	 * When the target exists, both the directory
1009	 * and target vnodes are returned locked.
1010	 */
1011	dp = VTOI(tdvp);
1012	xp = NULL;
1013	if (tvp)
1014		xp = VTOI(tvp);
1015
1016	/*
1017	 * 1) Bump link count while we're moving stuff
1018	 *    around.  If we crash somewhere before
1019	 *    completing our work, the link count
1020	 *    may be wrong, but correctable.
1021	 */
1022	ip->i_effnlink++;
1023	ip->i_nlink++;
1024	ip->i_flag |= IN_CHANGE;
1025	if (DOINGSOFTDEP(fvp))
1026		softdep_change_linkcnt(ip);
1027	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
1028				       DOINGASYNC(fvp)))) != 0) {
1029		VOP_UNLOCK(fvp, 0, p);
1030		goto bad;
1031	}
1032
1033	/*
1034	 * If ".." must be changed (ie the directory gets a new
1035	 * parent) then the source directory must not be in the
1036	 * directory heirarchy above the target, as this would
1037	 * orphan everything below the source directory. Also
1038	 * the user must have write permission in the source so
1039	 * as to be able to change "..". We must repeat the call
1040	 * to namei, as the parent directory is unlocked by the
1041	 * call to checkpath().
1042	 */
1043	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
1044	VOP_UNLOCK(fvp, 0, p);
1045	if (oldparent != dp->i_number)
1046		newparent = dp->i_number;
1047	if (doingdirectory && newparent) {
1048		if (error)	/* write access check above */
1049			goto bad;
1050		if (xp != NULL)
1051			vput(tvp);
1052		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
1053		if (error)
1054			goto out;
1055		if ((tcnp->cn_flags & SAVESTART) == 0)
1056			panic("ufs_rename: lost to startdir");
1057		VREF(tdvp);
1058		error = relookup(tdvp, &tvp, tcnp);
1059		if (error)
1060			goto out;
1061		vrele(tdvp);
1062		dp = VTOI(tdvp);
1063		xp = NULL;
1064		if (tvp)
1065			xp = VTOI(tvp);
1066	}
1067	/*
1068	 * 2) If target doesn't exist, link the target
1069	 *    to the source and unlink the source.
1070	 *    Otherwise, rewrite the target directory
1071	 *    entry to reference the source inode and
1072	 *    expunge the original entry's existence.
1073	 */
1074	if (xp == NULL) {
1075		if (dp->i_dev != ip->i_dev)
1076			panic("ufs_rename: EXDEV");
1077		/*
1078		 * Account for ".." in new directory.
1079		 * When source and destination have the same
1080		 * parent we don't fool with the link count.
1081		 */
1082		if (doingdirectory && newparent) {
1083			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1084				error = EMLINK;
1085				goto bad;
1086			}
1087			dp->i_effnlink++;
1088			dp->i_nlink++;
1089			dp->i_flag |= IN_CHANGE;
1090			if (DOINGSOFTDEP(tdvp))
1091				softdep_change_linkcnt(dp);
1092			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1093						   DOINGASYNC(tdvp)));
1094			if (error)
1095				goto bad;
1096		}
1097		ufs_makedirentry(ip, tcnp, &newdir);
1098		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
1099		if (error) {
1100			if (doingdirectory && newparent) {
1101				dp->i_effnlink--;
1102				dp->i_nlink--;
1103				dp->i_flag |= IN_CHANGE;
1104				if (DOINGSOFTDEP(tdvp))
1105					softdep_change_linkcnt(dp);
1106				(void)UFS_UPDATE(tdvp, 1);
1107			}
1108			goto bad;
1109		}
1110		VN_KNOTE(tdvp, NOTE_WRITE);
1111		vput(tdvp);
1112	} else {
1113		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1114			panic("ufs_rename: EXDEV");
1115		/*
1116		 * Short circuit rename(foo, foo).
1117		 */
1118		if (xp->i_number == ip->i_number)
1119			panic("ufs_rename: same file");
1120		/*
1121		 * If the parent directory is "sticky", then the caller
1122		 * must possess VADMIN for the parent directory, or the
1123		 * destination of the rename.  This implements append-only
1124		 * directories.
1125		 */
1126		if ((dp->i_mode & S_ISTXT) &&
1127		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, p) &&
1128		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, p)) {
1129			error = EPERM;
1130			goto bad;
1131		}
1132		/*
1133		 * Target must be empty if a directory and have no links
1134		 * to it. Also, ensure source and target are compatible
1135		 * (both directories, or both not directories).
1136		 */
1137		if ((xp->i_mode&IFMT) == IFDIR) {
1138			if ((xp->i_effnlink > 2) ||
1139			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
1140				error = ENOTEMPTY;
1141				goto bad;
1142			}
1143			if (!doingdirectory) {
1144				error = ENOTDIR;
1145				goto bad;
1146			}
1147			cache_purge(tdvp);
1148		} else if (doingdirectory) {
1149			error = EISDIR;
1150			goto bad;
1151		}
1152		error = ufs_dirrewrite(dp, xp, ip->i_number,
1153		    IFTODT(ip->i_mode),
1154		    (doingdirectory && newparent) ? newparent : doingdirectory);
1155		if (error)
1156			goto bad;
1157		if (doingdirectory) {
1158			if (!newparent) {
1159				dp->i_effnlink--;
1160				if (DOINGSOFTDEP(tdvp))
1161					softdep_change_linkcnt(dp);
1162			}
1163			xp->i_effnlink--;
1164			if (DOINGSOFTDEP(tvp))
1165				softdep_change_linkcnt(xp);
1166		}
1167		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1168			/*
1169			 * Truncate inode. The only stuff left in the directory
1170			 * is "." and "..". The "." reference is inconsequential
1171			 * since we are quashing it. We have removed the "."
1172			 * reference and the reference in the parent directory,
1173			 * but there may be other hard links. The soft
1174			 * dependency code will arrange to do these operations
1175			 * after the parent directory entry has been deleted on
1176			 * disk, so when running with that code we avoid doing
1177			 * them now.
1178			 */
1179			if (!newparent) {
1180				dp->i_nlink--;
1181				dp->i_flag |= IN_CHANGE;
1182			}
1183			xp->i_nlink--;
1184			xp->i_flag |= IN_CHANGE;
1185			ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
1186			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
1187			    tcnp->cn_cred, tcnp->cn_proc)) != 0)
1188				goto bad;
1189		}
1190		VN_KNOTE(tdvp, NOTE_WRITE);
1191		vput(tdvp);
1192		VN_KNOTE(tvp, NOTE_DELETE);
1193		vput(tvp);
1194		xp = NULL;
1195	}
1196
1197	/*
1198	 * 3) Unlink the source.
1199	 */
1200	fcnp->cn_flags &= ~MODMASK;
1201	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1202	if ((fcnp->cn_flags & SAVESTART) == 0)
1203		panic("ufs_rename: lost from startdir");
1204	VREF(fdvp);
1205	error = relookup(fdvp, &fvp, fcnp);
1206	if (error == 0)
1207		vrele(fdvp);
1208	if (fvp != NULL) {
1209		xp = VTOI(fvp);
1210		dp = VTOI(fdvp);
1211	} else {
1212		/*
1213		 * From name has disappeared.
1214		 */
1215		if (doingdirectory)
1216			panic("ufs_rename: lost dir entry");
1217		vrele(ap->a_fvp);
1218		return (0);
1219	}
1220	/*
1221	 * Ensure that the directory entry still exists and has not
1222	 * changed while the new name has been entered. If the source is
1223	 * a file then the entry may have been unlinked or renamed. In
1224	 * either case there is no further work to be done. If the source
1225	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
1226	 * flag ensures that it cannot be moved by another rename or removed
1227	 * by a rmdir.
1228	 */
1229	if (xp != ip) {
1230		if (doingdirectory)
1231			panic("ufs_rename: lost dir entry");
1232	} else {
1233		/*
1234		 * If the source is a directory with a
1235		 * new parent, the link count of the old
1236		 * parent directory must be decremented
1237		 * and ".." set to point to the new parent.
1238		 */
1239		if (doingdirectory && newparent) {
1240			xp->i_offset = mastertemplate.dot_reclen;
1241			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
1242			cache_purge(fdvp);
1243		}
1244		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
1245		xp->i_flag &= ~IN_RENAME;
1246	}
1247	VN_KNOTE(fvp, NOTE_RENAME);
1248	if (dp)
1249		vput(fdvp);
1250	if (xp)
1251		vput(fvp);
1252	vrele(ap->a_fvp);
1253	return (error);
1254
1255bad:
1256	if (xp)
1257		vput(ITOV(xp));
1258	vput(ITOV(dp));
1259out:
1260	if (doingdirectory)
1261		ip->i_flag &= ~IN_RENAME;
1262	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
1263		ip->i_effnlink--;
1264		ip->i_nlink--;
1265		ip->i_flag |= IN_CHANGE;
1266		ip->i_flag &= ~IN_RENAME;
1267		if (DOINGSOFTDEP(fvp))
1268			softdep_change_linkcnt(ip);
1269		vput(fvp);
1270	} else
1271		vrele(fvp);
1272	return (error);
1273}
1274
1275/*
1276 * Mkdir system call
1277 */
1278int
1279ufs_mkdir(ap)
1280	struct vop_mkdir_args /* {
1281		struct vnode *a_dvp;
1282		struct vnode **a_vpp;
1283		struct componentname *a_cnp;
1284		struct vattr *a_vap;
1285	} */ *ap;
1286{
1287	register struct vnode *dvp = ap->a_dvp;
1288	register struct vattr *vap = ap->a_vap;
1289	register struct componentname *cnp = ap->a_cnp;
1290	register struct inode *ip, *dp;
1291	struct vnode *tvp;
1292	struct buf *bp;
1293	struct dirtemplate dirtemplate, *dtp;
1294	struct direct newdir;
1295	int error, dmode;
1296	long blkoff;
1297
1298#ifdef DIAGNOSTIC
1299	if ((cnp->cn_flags & HASBUF) == 0)
1300		panic("ufs_mkdir: no name");
1301#endif
1302	dp = VTOI(dvp);
1303	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1304		error = EMLINK;
1305		goto out;
1306	}
1307	dmode = vap->va_mode & 0777;
1308	dmode |= IFDIR;
1309	/*
1310	 * Must simulate part of ufs_makeinode here to acquire the inode,
1311	 * but not have it entered in the parent directory. The entry is
1312	 * made later after writing "." and ".." entries.
1313	 */
1314	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1315	if (error)
1316		goto out;
1317	ip = VTOI(tvp);
1318	ip->i_gid = dp->i_gid;
1319#ifdef SUIDDIR
1320	{
1321#ifdef QUOTA
1322		struct ucred ucred, *ucp;
1323		ucp = cnp->cn_cred;
1324#endif
1325		/*
1326		 * If we are hacking owners here, (only do this where told to)
1327		 * and we are not giving it TO root, (would subvert quotas)
1328		 * then go ahead and give it to the other user.
1329		 * The new directory also inherits the SUID bit.
1330		 * If user's UID and dir UID are the same,
1331		 * 'give it away' so that the SUID is still forced on.
1332		 */
1333		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1334		    (dp->i_mode & ISUID) && dp->i_uid) {
1335			dmode |= ISUID;
1336			ip->i_uid = dp->i_uid;
1337#ifdef QUOTA
1338			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1339				/*
1340				 * Make sure the correct user gets charged
1341				 * for the space.
1342				 * Make a dummy credential for the victim.
1343				 * XXX This seems to never be accessed out of
1344				 * our context so a stack variable is ok.
1345				 */
1346				ucred.cr_ref = 1;
1347				ucred.cr_uid = ip->i_uid;
1348				ucred.cr_ngroups = 1;
1349				ucred.cr_groups[0] = dp->i_gid;
1350				ucp = &ucred;
1351			}
1352#endif
1353		} else
1354			ip->i_uid = cnp->cn_cred->cr_uid;
1355#ifdef QUOTA
1356		if ((error = getinoquota(ip)) ||
1357	    	    (error = chkiq(ip, 1, ucp, 0))) {
1358			UFS_VFREE(tvp, ip->i_number, dmode);
1359			vput(tvp);
1360			return (error);
1361		}
1362#endif
1363	}
1364#else	/* !SUIDDIR */
1365	ip->i_uid = cnp->cn_cred->cr_uid;
1366#ifdef QUOTA
1367	if ((error = getinoquota(ip)) ||
1368	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1369		UFS_VFREE(tvp, ip->i_number, dmode);
1370		vput(tvp);
1371		return (error);
1372	}
1373#endif
1374#endif	/* !SUIDDIR */
1375	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1376	ip->i_mode = dmode;
1377	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1378	ip->i_effnlink = 2;
1379	ip->i_nlink = 2;
1380	if (DOINGSOFTDEP(tvp))
1381		softdep_change_linkcnt(ip);
1382	if (cnp->cn_flags & ISWHITEOUT)
1383		ip->i_flags |= UF_OPAQUE;
1384
1385	/*
1386	 * Bump link count in parent directory to reflect work done below.
1387	 * Should be done before reference is created so cleanup is
1388	 * possible if we crash.
1389	 */
1390	dp->i_effnlink++;
1391	dp->i_nlink++;
1392	dp->i_flag |= IN_CHANGE;
1393	if (DOINGSOFTDEP(dvp))
1394		softdep_change_linkcnt(dp);
1395	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1396	if (error)
1397		goto bad;
1398
1399	/*
1400	 * Initialize directory with "." and ".." from static template.
1401	 */
1402	if (dvp->v_mount->mnt_maxsymlinklen > 0
1403	)
1404		dtp = &mastertemplate;
1405	else
1406		dtp = (struct dirtemplate *)&omastertemplate;
1407	dirtemplate = *dtp;
1408	dirtemplate.dot_ino = ip->i_number;
1409	dirtemplate.dotdot_ino = dp->i_number;
1410	if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1411	    B_CLRBUF, &bp)) != 0)
1412		goto bad;
1413	ip->i_size = DIRBLKSIZ;
1414	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1415	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1416	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1417	if (DOINGSOFTDEP(tvp)) {
1418		/*
1419		 * Ensure that the entire newly allocated block is a
1420		 * valid directory so that future growth within the
1421		 * block does not have to ensure that the block is
1422		 * written before the inode.
1423		 */
1424		blkoff = DIRBLKSIZ;
1425		while (blkoff < bp->b_bcount) {
1426			((struct direct *)
1427			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1428			blkoff += DIRBLKSIZ;
1429		}
1430	}
1431	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1432				       DOINGASYNC(tvp)))) != 0) {
1433		(void)BUF_WRITE(bp);
1434		goto bad;
1435	}
1436	/*
1437	 * Directory set up, now install its entry in the parent directory.
1438	 *
1439	 * If we are not doing soft dependencies, then we must write out the
1440	 * buffer containing the new directory body before entering the new
1441	 * name in the parent. If we are doing soft dependencies, then the
1442	 * buffer containing the new directory body will be passed to and
1443	 * released in the soft dependency code after the code has attached
1444	 * an appropriate ordering dependency to the buffer which ensures that
1445	 * the buffer is written before the new name is written in the parent.
1446	 */
1447	if (DOINGASYNC(dvp))
1448		bdwrite(bp);
1449	else if (!DOINGSOFTDEP(dvp) && ((error = BUF_WRITE(bp))))
1450		goto bad;
1451	ufs_makedirentry(ip, cnp, &newdir);
1452	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
1453
1454bad:
1455	if (error == 0) {
1456		VN_KNOTE(dvp, NOTE_WRITE);
1457		*ap->a_vpp = tvp;
1458	} else {
1459		dp->i_effnlink--;
1460		dp->i_nlink--;
1461		dp->i_flag |= IN_CHANGE;
1462		if (DOINGSOFTDEP(dvp))
1463			softdep_change_linkcnt(dp);
1464		/*
1465		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1466		 * do this for us because we set the link count to 0.
1467		 */
1468		ip->i_effnlink = 0;
1469		ip->i_nlink = 0;
1470		ip->i_flag |= IN_CHANGE;
1471		if (DOINGSOFTDEP(tvp))
1472			softdep_change_linkcnt(ip);
1473		vput(tvp);
1474	}
1475out:
1476	return (error);
1477}
1478
1479/*
1480 * Rmdir system call.
1481 */
1482int
1483ufs_rmdir(ap)
1484	struct vop_rmdir_args /* {
1485		struct vnode *a_dvp;
1486		struct vnode *a_vp;
1487		struct componentname *a_cnp;
1488	} */ *ap;
1489{
1490	struct vnode *vp = ap->a_vp;
1491	struct vnode *dvp = ap->a_dvp;
1492	struct componentname *cnp = ap->a_cnp;
1493	struct inode *ip, *dp;
1494	int error, ioflag;
1495
1496	ip = VTOI(vp);
1497	dp = VTOI(dvp);
1498
1499	/*
1500	 * Do not remove a directory that is in the process of being renamed.
1501	 * Verify the directory is empty (and valid). Rmdir ".." will not be
1502	 * valid since ".." will contain a reference to the current directory
1503	 * and thus be non-empty. Do not allow the removal of mounted on
1504	 * directories (this can happen when an NFS exported filesystem
1505	 * tries to remove a locally mounted on directory).
1506	 */
1507	error = 0;
1508	if (ip->i_flag & IN_RENAME) {
1509		error = EINVAL;
1510		goto out;
1511	}
1512	if (ip->i_effnlink != 2 ||
1513	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1514		error = ENOTEMPTY;
1515		goto out;
1516	}
1517	if ((dp->i_flags & APPEND)
1518	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1519		error = EPERM;
1520		goto out;
1521	}
1522	if (vp->v_mountedhere != 0) {
1523		error = EINVAL;
1524		goto out;
1525	}
1526	/*
1527	 * Delete reference to directory before purging
1528	 * inode.  If we crash in between, the directory
1529	 * will be reattached to lost+found,
1530	 */
1531	dp->i_effnlink--;
1532	ip->i_effnlink--;
1533	if (DOINGSOFTDEP(vp)) {
1534		softdep_change_linkcnt(dp);
1535		softdep_change_linkcnt(ip);
1536	}
1537	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
1538	if (error) {
1539		dp->i_effnlink++;
1540		ip->i_effnlink++;
1541		if (DOINGSOFTDEP(vp)) {
1542			softdep_change_linkcnt(dp);
1543			softdep_change_linkcnt(ip);
1544		}
1545		goto out;
1546	}
1547	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1548	cache_purge(dvp);
1549	/*
1550	 * Truncate inode. The only stuff left in the directory is "." and
1551	 * "..". The "." reference is inconsequential since we are quashing
1552	 * it. The soft dependency code will arrange to do these operations
1553	 * after the parent directory entry has been deleted on disk, so
1554	 * when running with that code we avoid doing them now.
1555	 */
1556	if (!DOINGSOFTDEP(vp)) {
1557		dp->i_nlink--;
1558		dp->i_flag |= IN_CHANGE;
1559		ip->i_nlink--;
1560		ip->i_flag |= IN_CHANGE;
1561		ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
1562		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
1563		    cnp->cn_proc);
1564	}
1565	cache_purge(vp);
1566out:
1567	VN_KNOTE(vp, NOTE_DELETE);
1568	return (error);
1569}
1570
1571/*
1572 * symlink -- make a symbolic link
1573 */
1574int
1575ufs_symlink(ap)
1576	struct vop_symlink_args /* {
1577		struct vnode *a_dvp;
1578		struct vnode **a_vpp;
1579		struct componentname *a_cnp;
1580		struct vattr *a_vap;
1581		char *a_target;
1582	} */ *ap;
1583{
1584	register struct vnode *vp, **vpp = ap->a_vpp;
1585	register struct inode *ip;
1586	int len, error;
1587
1588	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1589	    vpp, ap->a_cnp);
1590	if (error)
1591		return (error);
1592	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1593	vp = *vpp;
1594	len = strlen(ap->a_target);
1595	if (len < vp->v_mount->mnt_maxsymlinklen) {
1596		ip = VTOI(vp);
1597		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
1598		ip->i_size = len;
1599		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1600	} else
1601		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1602		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
1603		    (struct proc *)0);
1604	if (error)
1605		vput(vp);
1606	return (error);
1607}
1608
1609/*
1610 * Vnode op for reading directories.
1611 *
1612 * The routine below assumes that the on-disk format of a directory
1613 * is the same as that defined by <sys/dirent.h>. If the on-disk
1614 * format changes, then it will be necessary to do a conversion
1615 * from the on-disk format that read returns to the format defined
1616 * by <sys/dirent.h>.
1617 */
1618int
1619ufs_readdir(ap)
1620	struct vop_readdir_args /* {
1621		struct vnode *a_vp;
1622		struct uio *a_uio;
1623		struct ucred *a_cred;
1624		int *a_eofflag;
1625		int *ncookies;
1626		u_long **a_cookies;
1627	} */ *ap;
1628{
1629	register struct uio *uio = ap->a_uio;
1630	int error;
1631	size_t count, lost;
1632	off_t off;
1633
1634	if (ap->a_ncookies != NULL)
1635		/*
1636		 * Ensure that the block is aligned.  The caller can use
1637		 * the cookies to determine where in the block to start.
1638		 */
1639		uio->uio_offset &= ~(DIRBLKSIZ - 1);
1640	off = uio->uio_offset;
1641	count = uio->uio_resid;
1642	/* Make sure we don't return partial entries. */
1643	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
1644		return (EINVAL);
1645	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
1646	lost = uio->uio_resid - count;
1647	uio->uio_resid = count;
1648	uio->uio_iov->iov_len = count;
1649#	if (BYTE_ORDER == LITTLE_ENDIAN)
1650		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
1651			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1652		} else {
1653			struct dirent *dp, *edp;
1654			struct uio auio;
1655			struct iovec aiov;
1656			caddr_t dirbuf;
1657			int readcnt;
1658			u_char tmp;
1659
1660			auio = *uio;
1661			auio.uio_iov = &aiov;
1662			auio.uio_iovcnt = 1;
1663			auio.uio_segflg = UIO_SYSSPACE;
1664			aiov.iov_len = count;
1665			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
1666			aiov.iov_base = dirbuf;
1667			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
1668			if (error == 0) {
1669				readcnt = count - auio.uio_resid;
1670				edp = (struct dirent *)&dirbuf[readcnt];
1671				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
1672					tmp = dp->d_namlen;
1673					dp->d_namlen = dp->d_type;
1674					dp->d_type = tmp;
1675					if (dp->d_reclen > 0) {
1676						dp = (struct dirent *)
1677						    ((char *)dp + dp->d_reclen);
1678					} else {
1679						error = EIO;
1680						break;
1681					}
1682				}
1683				if (dp >= edp)
1684					error = uiomove(dirbuf, readcnt, uio);
1685			}
1686			FREE(dirbuf, M_TEMP);
1687		}
1688#	else
1689		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1690#	endif
1691	if (!error && ap->a_ncookies != NULL) {
1692		struct dirent* dpStart;
1693		struct dirent* dpEnd;
1694		struct dirent* dp;
1695		int ncookies;
1696		u_long *cookies;
1697		u_long *cookiep;
1698
1699		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1700			panic("ufs_readdir: unexpected uio from NFS server");
1701		dpStart = (struct dirent *)
1702		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
1703		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
1704		for (dp = dpStart, ncookies = 0;
1705		     dp < dpEnd;
1706		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
1707			ncookies++;
1708		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
1709		    M_WAITOK);
1710		for (dp = dpStart, cookiep = cookies;
1711		     dp < dpEnd;
1712		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
1713			off += dp->d_reclen;
1714			*cookiep++ = (u_long) off;
1715		}
1716		*ap->a_ncookies = ncookies;
1717		*ap->a_cookies = cookies;
1718	}
1719	uio->uio_resid += lost;
1720	if (ap->a_eofflag)
1721	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
1722	return (error);
1723}
1724
1725/*
1726 * Return target name of a symbolic link
1727 */
1728int
1729ufs_readlink(ap)
1730	struct vop_readlink_args /* {
1731		struct vnode *a_vp;
1732		struct uio *a_uio;
1733		struct ucred *a_cred;
1734	} */ *ap;
1735{
1736	register struct vnode *vp = ap->a_vp;
1737	register struct inode *ip = VTOI(vp);
1738	int isize;
1739
1740	isize = ip->i_size;
1741	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1742	    (ip->i_din.di_blocks == 0)) {	/* XXX - for old fastlink support */
1743		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
1744		return (0);
1745	}
1746	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1747}
1748
1749/*
1750 * Calculate the logical to physical mapping if not done already,
1751 * then call the device strategy routine.
1752 *
1753 * In order to be able to swap to a file, the VOP_BMAP operation may not
1754 * deadlock on memory.  See ufs_bmap() for details.
1755 */
1756int
1757ufs_strategy(ap)
1758	struct vop_strategy_args /* {
1759		struct vnode *a_vp;
1760		struct buf *a_bp;
1761	} */ *ap;
1762{
1763	register struct buf *bp = ap->a_bp;
1764	register struct vnode *vp = ap->a_vp;
1765	register struct inode *ip;
1766	int error;
1767
1768	ip = VTOI(vp);
1769	if (vp->v_type == VBLK || vp->v_type == VCHR)
1770		panic("ufs_strategy: spec");
1771	if (bp->b_blkno == bp->b_lblkno) {
1772		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
1773		if (error) {
1774			bp->b_error = error;
1775			bp->b_ioflags |= BIO_ERROR;
1776			bufdone(bp);
1777			return (error);
1778		}
1779		if ((long)bp->b_blkno == -1)
1780			vfs_bio_clrbuf(bp);
1781	}
1782	if ((long)bp->b_blkno == -1) {
1783		bufdone(bp);
1784		return (0);
1785	}
1786	vp = ip->i_devvp;
1787	bp->b_dev = vp->v_rdev;
1788	VOP_STRATEGY(vp, bp);
1789	return (0);
1790}
1791
1792/*
1793 * Print out the contents of an inode.
1794 */
1795int
1796ufs_print(ap)
1797	struct vop_print_args /* {
1798		struct vnode *a_vp;
1799	} */ *ap;
1800{
1801	register struct vnode *vp = ap->a_vp;
1802	register struct inode *ip = VTOI(vp);
1803
1804	printf("tag VT_UFS, ino %lu, on dev %s (%d, %d)",
1805	    (u_long)ip->i_number, devtoname(ip->i_dev), major(ip->i_dev),
1806	    minor(ip->i_dev));
1807	if (vp->v_type == VFIFO)
1808		fifo_printinfo(vp);
1809	lockmgr_printinfo(&vp->v_lock);
1810	printf("\n");
1811	return (0);
1812}
1813
1814/*
1815 * Read wrapper for special devices.
1816 */
1817int
1818ufsspec_read(ap)
1819	struct vop_read_args /* {
1820		struct vnode *a_vp;
1821		struct uio *a_uio;
1822		int  a_ioflag;
1823		struct ucred *a_cred;
1824	} */ *ap;
1825{
1826	int error, resid;
1827	struct inode *ip;
1828	struct uio *uio;
1829
1830	uio = ap->a_uio;
1831	resid = uio->uio_resid;
1832	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap);
1833	/*
1834	 * The inode may have been revoked during the call, so it must not
1835	 * be accessed blindly here or in the other wrapper functions.
1836	 */
1837	ip = VTOI(ap->a_vp);
1838	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1839		ip->i_flag |= IN_ACCESS;
1840	return (error);
1841}
1842
1843/*
1844 * Write wrapper for special devices.
1845 */
1846int
1847ufsspec_write(ap)
1848	struct vop_write_args /* {
1849		struct vnode *a_vp;
1850		struct uio *a_uio;
1851		int  a_ioflag;
1852		struct ucred *a_cred;
1853	} */ *ap;
1854{
1855	int error, resid;
1856	struct inode *ip;
1857	struct uio *uio;
1858
1859	uio = ap->a_uio;
1860	resid = uio->uio_resid;
1861	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap);
1862	ip = VTOI(ap->a_vp);
1863	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1864		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1865	return (error);
1866}
1867
1868/*
1869 * Close wrapper for special devices.
1870 *
1871 * Update the times on the inode then do device close.
1872 */
1873int
1874ufsspec_close(ap)
1875	struct vop_close_args /* {
1876		struct vnode *a_vp;
1877		int  a_fflag;
1878		struct ucred *a_cred;
1879		struct proc *a_p;
1880	} */ *ap;
1881{
1882	struct vnode *vp = ap->a_vp;
1883
1884	mtx_enter(&vp->v_interlock, MTX_DEF);
1885	if (vp->v_usecount > 1)
1886		ufs_itimes(vp);
1887	mtx_exit(&vp->v_interlock, MTX_DEF);
1888	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
1889}
1890
1891/*
1892 * Read wrapper for fifos.
1893 */
1894int
1895ufsfifo_read(ap)
1896	struct vop_read_args /* {
1897		struct vnode *a_vp;
1898		struct uio *a_uio;
1899		int  a_ioflag;
1900		struct ucred *a_cred;
1901	} */ *ap;
1902{
1903	int error, resid;
1904	struct inode *ip;
1905	struct uio *uio;
1906
1907	uio = ap->a_uio;
1908	resid = uio->uio_resid;
1909	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap);
1910	ip = VTOI(ap->a_vp);
1911	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
1912	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
1913		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1914	return (error);
1915}
1916
1917/*
1918 * Write wrapper for fifos.
1919 */
1920int
1921ufsfifo_write(ap)
1922	struct vop_write_args /* {
1923		struct vnode *a_vp;
1924		struct uio *a_uio;
1925		int  a_ioflag;
1926		struct ucred *a_cred;
1927	} */ *ap;
1928{
1929	int error, resid;
1930	struct inode *ip;
1931	struct uio *uio;
1932
1933	uio = ap->a_uio;
1934	resid = uio->uio_resid;
1935	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap);
1936	ip = VTOI(ap->a_vp);
1937	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1938		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1939	return (error);
1940}
1941
1942/*
1943 * Close wrapper for fifos.
1944 *
1945 * Update the times on the inode then do device close.
1946 */
1947int
1948ufsfifo_close(ap)
1949	struct vop_close_args /* {
1950		struct vnode *a_vp;
1951		int  a_fflag;
1952		struct ucred *a_cred;
1953		struct proc *a_p;
1954	} */ *ap;
1955{
1956	struct vnode *vp = ap->a_vp;
1957
1958	mtx_enter(&vp->v_interlock, MTX_DEF);
1959	if (vp->v_usecount > 1)
1960		ufs_itimes(vp);
1961	mtx_exit(&vp->v_interlock, MTX_DEF);
1962	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
1963}
1964
1965/*
1966 * Return POSIX pathconf information applicable to ufs filesystems.
1967 */
1968int
1969ufs_pathconf(ap)
1970	struct vop_pathconf_args /* {
1971		struct vnode *a_vp;
1972		int a_name;
1973		int *a_retval;
1974	} */ *ap;
1975{
1976
1977	switch (ap->a_name) {
1978	case _PC_LINK_MAX:
1979		*ap->a_retval = LINK_MAX;
1980		return (0);
1981	case _PC_NAME_MAX:
1982		*ap->a_retval = NAME_MAX;
1983		return (0);
1984	case _PC_PATH_MAX:
1985		*ap->a_retval = PATH_MAX;
1986		return (0);
1987	case _PC_PIPE_BUF:
1988		*ap->a_retval = PIPE_BUF;
1989		return (0);
1990	case _PC_CHOWN_RESTRICTED:
1991		*ap->a_retval = 1;
1992		return (0);
1993	case _PC_NO_TRUNC:
1994		*ap->a_retval = 1;
1995		return (0);
1996	default:
1997		return (EINVAL);
1998	}
1999	/* NOTREACHED */
2000}
2001
2002/*
2003 * Advisory record locking support
2004 */
2005int
2006ufs_advlock(ap)
2007	struct vop_advlock_args /* {
2008		struct vnode *a_vp;
2009		caddr_t  a_id;
2010		int  a_op;
2011		struct flock *a_fl;
2012		int  a_flags;
2013	} */ *ap;
2014{
2015	register struct inode *ip = VTOI(ap->a_vp);
2016
2017	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
2018}
2019
2020/*
2021 * Initialize the vnode associated with a new inode, handle aliased
2022 * vnodes.
2023 */
2024int
2025ufs_vinit(mntp, specops, fifoops, vpp)
2026	struct mount *mntp;
2027	vop_t **specops;
2028	vop_t **fifoops;
2029	struct vnode **vpp;
2030{
2031	struct inode *ip;
2032	struct vnode *vp;
2033	struct timeval tv;
2034
2035	vp = *vpp;
2036	ip = VTOI(vp);
2037	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2038	case VCHR:
2039	case VBLK:
2040		vp->v_op = specops;
2041		vp = addaliasu(vp, ip->i_rdev);
2042		ip->i_vnode = vp;
2043		break;
2044	case VFIFO:
2045		vp->v_op = fifoops;
2046		break;
2047	default:
2048		break;
2049
2050	}
2051	if (ip->i_number == ROOTINO)
2052		vp->v_flag |= VROOT;
2053	/*
2054	 * Initialize modrev times
2055	 */
2056	getmicrouptime(&tv);
2057	SETHIGH(ip->i_modrev, tv.tv_sec);
2058	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
2059	*vpp = vp;
2060	return (0);
2061}
2062
2063/*
2064 * Allocate a new inode.
2065 */
2066int
2067ufs_makeinode(mode, dvp, vpp, cnp)
2068	int mode;
2069	struct vnode *dvp;
2070	struct vnode **vpp;
2071	struct componentname *cnp;
2072{
2073	register struct inode *ip, *pdir;
2074	struct direct newdir;
2075	struct vnode *tvp;
2076	int error;
2077
2078	pdir = VTOI(dvp);
2079#ifdef DIAGNOSTIC
2080	if ((cnp->cn_flags & HASBUF) == 0)
2081		panic("ufs_makeinode: no name");
2082#endif
2083	*vpp = NULL;
2084	if ((mode & IFMT) == 0)
2085		mode |= IFREG;
2086
2087	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2088	if (error)
2089		return (error);
2090	ip = VTOI(tvp);
2091	ip->i_gid = pdir->i_gid;
2092#ifdef SUIDDIR
2093	{
2094#ifdef QUOTA
2095		struct ucred ucred, *ucp;
2096		ucp = cnp->cn_cred;
2097#endif
2098		/*
2099		 * If we are not the owner of the directory,
2100		 * and we are hacking owners here, (only do this where told to)
2101		 * and we are not giving it TO root, (would subvert quotas)
2102		 * then go ahead and give it to the other user.
2103		 * Note that this drops off the execute bits for security.
2104		 */
2105		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2106		    (pdir->i_mode & ISUID) &&
2107		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2108			ip->i_uid = pdir->i_uid;
2109			mode &= ~07111;
2110#ifdef QUOTA
2111			/*
2112			 * Make sure the correct user gets charged
2113			 * for the space.
2114			 * Quickly knock up a dummy credential for the victim.
2115			 * XXX This seems to never be accessed out of our
2116			 * context so a stack variable is ok.
2117			 */
2118			ucred.cr_ref = 1;
2119			ucred.cr_uid = ip->i_uid;
2120			ucred.cr_ngroups = 1;
2121			ucred.cr_groups[0] = pdir->i_gid;
2122			ucp = &ucred;
2123#endif
2124		} else
2125			ip->i_uid = cnp->cn_cred->cr_uid;
2126
2127#ifdef QUOTA
2128		if ((error = getinoquota(ip)) ||
2129	    	    (error = chkiq(ip, 1, ucp, 0))) {
2130			UFS_VFREE(tvp, ip->i_number, mode);
2131			vput(tvp);
2132			return (error);
2133		}
2134#endif
2135	}
2136#else	/* !SUIDDIR */
2137	ip->i_uid = cnp->cn_cred->cr_uid;
2138#ifdef QUOTA
2139	if ((error = getinoquota(ip)) ||
2140	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2141		UFS_VFREE(tvp, ip->i_number, mode);
2142		vput(tvp);
2143		return (error);
2144	}
2145#endif
2146#endif	/* !SUIDDIR */
2147	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2148	ip->i_mode = mode;
2149	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2150	ip->i_effnlink = 1;
2151	ip->i_nlink = 1;
2152	if (DOINGSOFTDEP(tvp))
2153		softdep_change_linkcnt(ip);
2154	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2155	    suser_xxx(cnp->cn_cred, NULL, PRISON_ROOT))
2156		ip->i_mode &= ~ISGID;
2157
2158	if (cnp->cn_flags & ISWHITEOUT)
2159		ip->i_flags |= UF_OPAQUE;
2160
2161	/*
2162	 * Make sure inode goes to disk before directory entry.
2163	 */
2164	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2165	if (error)
2166		goto bad;
2167	ufs_makedirentry(ip, cnp, &newdir);
2168	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
2169	if (error)
2170		goto bad;
2171	*vpp = tvp;
2172	return (0);
2173
2174bad:
2175	/*
2176	 * Write error occurred trying to update the inode
2177	 * or the directory so must deallocate the inode.
2178	 */
2179	ip->i_effnlink = 0;
2180	ip->i_nlink = 0;
2181	ip->i_flag |= IN_CHANGE;
2182	if (DOINGSOFTDEP(tvp))
2183		softdep_change_linkcnt(ip);
2184	vput(tvp);
2185	return (error);
2186}
2187
2188static int
2189ufs_missingop(ap)
2190	struct vop_generic_args *ap;
2191{
2192
2193	panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name);
2194	return (EOPNOTSUPP);
2195}
2196
2197/* Global vfs data structures for ufs. */
2198static vop_t **ufs_vnodeop_p;
2199static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
2200	{ &vop_default_desc,		(vop_t *) vop_defaultop },
2201	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2202	{ &vop_read_desc,		(vop_t *) ufs_missingop },
2203	{ &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
2204	{ &vop_write_desc,		(vop_t *) ufs_missingop },
2205	{ &vop_access_desc,		(vop_t *) ufs_access },
2206	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
2207	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
2208	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
2209	{ &vop_close_desc,		(vop_t *) ufs_close },
2210	{ &vop_create_desc,		(vop_t *) ufs_create },
2211	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2212	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2213	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2214	{ &vop_link_desc,		(vop_t *) ufs_link },
2215	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2216	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
2217	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
2218	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
2219	{ &vop_mmap_desc,		(vop_t *) ufs_mmap },
2220	{ &vop_open_desc,		(vop_t *) ufs_open },
2221	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
2222	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
2223	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
2224	{ &vop_print_desc,		(vop_t *) ufs_print },
2225	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
2226	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
2227	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2228	{ &vop_remove_desc,		(vop_t *) ufs_remove },
2229	{ &vop_rename_desc,		(vop_t *) ufs_rename },
2230	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
2231	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2232	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
2233	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
2234	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2235	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
2236	{ NULL, NULL }
2237};
2238static struct vnodeopv_desc ufs_vnodeop_opv_desc =
2239	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
2240
2241static vop_t **ufs_specop_p;
2242static struct vnodeopv_entry_desc ufs_specop_entries[] = {
2243	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
2244	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2245	{ &vop_access_desc,		(vop_t *) ufs_access },
2246	{ &vop_close_desc,		(vop_t *) ufsspec_close },
2247	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2248	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2249	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2250	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2251	{ &vop_print_desc,		(vop_t *) ufs_print },
2252	{ &vop_read_desc,		(vop_t *) ufsspec_read },
2253	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2254	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2255	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2256	{ &vop_write_desc,		(vop_t *) ufsspec_write },
2257	{ NULL, NULL }
2258};
2259static struct vnodeopv_desc ufs_specop_opv_desc =
2260	{ &ufs_specop_p, ufs_specop_entries };
2261
2262static vop_t **ufs_fifoop_p;
2263static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
2264	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
2265	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2266	{ &vop_access_desc,		(vop_t *) ufs_access },
2267	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
2268	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2269	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2270	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2271	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2272	{ &vop_print_desc,		(vop_t *) ufs_print },
2273	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
2274	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2275	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2276	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2277	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
2278	{ NULL, NULL }
2279};
2280static struct vnodeopv_desc ufs_fifoop_opv_desc =
2281	{ &ufs_fifoop_p, ufs_fifoop_entries };
2282
2283VNODEOP_SET(ufs_vnodeop_opv_desc);
2284VNODEOP_SET(ufs_specop_opv_desc);
2285VNODEOP_SET(ufs_fifoop_opv_desc);
2286
2287int
2288ufs_vnoperate(ap)
2289	struct vop_generic_args /* {
2290		struct vnodeop_desc *a_desc;
2291	} */ *ap;
2292{
2293	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2294}
2295
2296int
2297ufs_vnoperatefifo(ap)
2298	struct vop_generic_args /* {
2299		struct vnodeop_desc *a_desc;
2300	} */ *ap;
2301{
2302	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
2303}
2304
2305int
2306ufs_vnoperatespec(ap)
2307	struct vop_generic_args /* {
2308		struct vnodeop_desc *a_desc;
2309	} */ *ap;
2310{
2311	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
2312}
2313