ufs_vnops.c revision 121205
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 121205 2003-10-18 14:10:28Z phk $");
43
44#include "opt_mac.h"
45#include "opt_quota.h"
46#include "opt_suiddir.h"
47#include "opt_ufs.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/malloc.h>
52#include <sys/namei.h>
53#include <sys/kernel.h>
54#include <sys/fcntl.h>
55#include <sys/stat.h>
56#include <sys/bio.h>
57#include <sys/buf.h>
58#include <sys/mount.h>
59#include <sys/unistd.h>
60#include <sys/vnode.h>
61#include <sys/dirent.h>
62#include <sys/lockf.h>
63#include <sys/event.h>
64#include <sys/conf.h>
65#include <sys/acl.h>
66#include <sys/mac.h>
67
68#include <machine/mutex.h>
69
70#include <sys/file.h>		/* XXX */
71
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74
75#include <fs/fifofs/fifo.h>
76
77#include <ufs/ufs/acl.h>
78#include <ufs/ufs/extattr.h>
79#include <ufs/ufs/quota.h>
80#include <ufs/ufs/inode.h>
81#include <ufs/ufs/dir.h>
82#include <ufs/ufs/ufsmount.h>
83#include <ufs/ufs/ufs_extern.h>
84#ifdef UFS_DIRHASH
85#include <ufs/ufs/dirhash.h>
86#endif
87
88static int ufs_access(struct vop_access_args *);
89static int ufs_advlock(struct vop_advlock_args *);
90static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
91static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
92static int ufs_close(struct vop_close_args *);
93static int ufs_create(struct vop_create_args *);
94static int ufs_getattr(struct vop_getattr_args *);
95static int ufs_link(struct vop_link_args *);
96static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
97static int ufs_mkdir(struct vop_mkdir_args *);
98static int ufs_mknod(struct vop_mknod_args *);
99static int ufs_open(struct vop_open_args *);
100static int ufs_pathconf(struct vop_pathconf_args *);
101static int ufs_print(struct vop_print_args *);
102static int ufs_readlink(struct vop_readlink_args *);
103static int ufs_remove(struct vop_remove_args *);
104static int ufs_rename(struct vop_rename_args *);
105static int ufs_rmdir(struct vop_rmdir_args *);
106static int ufs_setattr(struct vop_setattr_args *);
107static int ufs_strategy(struct vop_strategy_args *);
108static int ufs_symlink(struct vop_symlink_args *);
109static int ufs_whiteout(struct vop_whiteout_args *);
110static int ufsfifo_close(struct vop_close_args *);
111static int ufsfifo_kqfilter(struct vop_kqfilter_args *);
112static int ufsfifo_read(struct vop_read_args *);
113static int ufsfifo_write(struct vop_write_args *);
114static int ufsspec_close(struct vop_close_args *);
115static int ufsspec_read(struct vop_read_args *);
116static int ufsspec_write(struct vop_write_args *);
117static int filt_ufsread(struct knote *kn, long hint);
118static int filt_ufswrite(struct knote *kn, long hint);
119static int filt_ufsvnode(struct knote *kn, long hint);
120static void filt_ufsdetach(struct knote *kn);
121static int ufs_kqfilter(struct vop_kqfilter_args *ap);
122
123union _qcvt {
124	int64_t qcvt;
125	int32_t val[2];
126};
127#define SETHIGH(q, h) { \
128	union _qcvt tmp; \
129	tmp.qcvt = (q); \
130	tmp.val[_QUAD_HIGHWORD] = (h); \
131	(q) = tmp.qcvt; \
132}
133#define SETLOW(q, l) { \
134	union _qcvt tmp; \
135	tmp.qcvt = (q); \
136	tmp.val[_QUAD_LOWWORD] = (l); \
137	(q) = tmp.qcvt; \
138}
139
140/*
141 * A virgin directory (no blushing please).
142 */
143static struct dirtemplate mastertemplate = {
144	0, 12, DT_DIR, 1, ".",
145	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
146};
147static struct odirtemplate omastertemplate = {
148	0, 12, 1, ".",
149	0, DIRBLKSIZ - 12, 2, ".."
150};
151
152void
153ufs_itimes(vp)
154	struct vnode *vp;
155{
156	struct inode *ip;
157	struct timespec ts;
158
159	ip = VTOI(vp);
160	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
161		return;
162	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
163		ip->i_flag |= IN_LAZYMOD;
164	else
165		ip->i_flag |= IN_MODIFIED;
166	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
167		vfs_timestamp(&ts);
168		if (ip->i_flag & IN_ACCESS) {
169			DIP(ip, i_atime) = ts.tv_sec;
170			DIP(ip, i_atimensec) = ts.tv_nsec;
171		}
172		if (ip->i_flag & IN_UPDATE) {
173			DIP(ip, i_mtime) = ts.tv_sec;
174			DIP(ip, i_mtimensec) = ts.tv_nsec;
175			ip->i_modrev++;
176		}
177		if (ip->i_flag & IN_CHANGE) {
178			DIP(ip, i_ctime) = ts.tv_sec;
179			DIP(ip, i_ctimensec) = ts.tv_nsec;
180		}
181	}
182	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
183}
184
185/*
186 * Create a regular file
187 */
188static int
189ufs_create(ap)
190	struct vop_create_args /* {
191		struct vnode *a_dvp;
192		struct vnode **a_vpp;
193		struct componentname *a_cnp;
194		struct vattr *a_vap;
195	} */ *ap;
196{
197	int error;
198
199	error =
200	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
201	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
202	if (error)
203		return (error);
204	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
205	return (0);
206}
207
208/*
209 * Mknod vnode call
210 */
211/* ARGSUSED */
212static int
213ufs_mknod(ap)
214	struct vop_mknod_args /* {
215		struct vnode *a_dvp;
216		struct vnode **a_vpp;
217		struct componentname *a_cnp;
218		struct vattr *a_vap;
219	} */ *ap;
220{
221	struct vattr *vap = ap->a_vap;
222	struct vnode **vpp = ap->a_vpp;
223	struct inode *ip;
224	ino_t ino;
225	int error;
226
227	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
228	    ap->a_dvp, vpp, ap->a_cnp);
229	if (error)
230		return (error);
231	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
232	ip = VTOI(*vpp);
233	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
234	if (vap->va_rdev != VNOVAL) {
235		/*
236		 * Want to be able to use this to make badblock
237		 * inodes, so don't truncate the dev number.
238		 */
239		DIP(ip, i_rdev) = vap->va_rdev;
240	}
241	/*
242	 * Remove inode, then reload it through VFS_VGET so it is
243	 * checked to see if it is an alias of an existing entry in
244	 * the inode cache.
245	 */
246	vput(*vpp);
247	(*vpp)->v_type = VNON;
248	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
249	vgone(*vpp);
250	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
251	if (error) {
252		*vpp = NULL;
253		return (error);
254	}
255	return (0);
256}
257
258/*
259 * Open called.
260 *
261 * Nothing to do.
262 */
263/* ARGSUSED */
264static int
265ufs_open(ap)
266	struct vop_open_args /* {
267		struct vnode *a_vp;
268		int  a_mode;
269		struct ucred *a_cred;
270		struct thread *a_td;
271	} */ *ap;
272{
273
274	/*
275	 * Files marked append-only must be opened for appending.
276	 */
277	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
278	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
279		return (EPERM);
280	return (0);
281}
282
283/*
284 * Close called.
285 *
286 * Update the times on the inode.
287 */
288/* ARGSUSED */
289static int
290ufs_close(ap)
291	struct vop_close_args /* {
292		struct vnode *a_vp;
293		int  a_fflag;
294		struct ucred *a_cred;
295		struct thread *a_td;
296	} */ *ap;
297{
298	struct vnode *vp = ap->a_vp;
299	struct mount *mp;
300
301	VI_LOCK(vp);
302	if (vp->v_usecount > 1) {
303		ufs_itimes(vp);
304		VI_UNLOCK(vp);
305	} else {
306		VI_UNLOCK(vp);
307		/*
308		 * If we are closing the last reference to an unlinked
309		 * file, then it will be freed by the inactive routine.
310		 * Because the freeing causes a the filesystem to be
311		 * modified, it must be held up during periods when the
312		 * filesystem is suspended.
313		 *
314		 * XXX - EAGAIN is returned to prevent vn_close from
315		 * repeating the vrele operation.
316		 */
317		if (vp->v_type == VREG && VTOI(vp)->i_effnlink == 0) {
318			(void) vn_start_write(vp, &mp, V_WAIT);
319			vrele(vp);
320			vn_finished_write(mp);
321			return (EAGAIN);
322		}
323	}
324	return (0);
325}
326
327static int
328ufs_access(ap)
329	struct vop_access_args /* {
330		struct vnode *a_vp;
331		int  a_mode;
332		struct ucred *a_cred;
333		struct thread *a_td;
334	} */ *ap;
335{
336	struct vnode *vp = ap->a_vp;
337	struct inode *ip = VTOI(vp);
338	mode_t mode = ap->a_mode;
339	int error;
340#ifdef UFS_ACL
341	struct acl *acl;
342#endif
343
344	/*
345	 * Disallow write attempts on read-only filesystems;
346	 * unless the file is a socket, fifo, or a block or
347	 * character device resident on the filesystem.
348	 */
349	if (mode & VWRITE) {
350		switch (vp->v_type) {
351		case VDIR:
352		case VLNK:
353		case VREG:
354			if (vp->v_mount->mnt_flag & MNT_RDONLY)
355				return (EROFS);
356#ifdef QUOTA
357			if ((error = getinoquota(ip)) != 0)
358				return (error);
359#endif
360			break;
361		default:
362			break;
363		}
364	}
365
366	/* If immutable bit set, nobody gets to write it. */
367	if ((mode & VWRITE) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
368		return (EPERM);
369
370#ifdef UFS_ACL
371	if ((vp->v_mount->mnt_flag & MNT_ACLS) != 0) {
372		MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, M_WAITOK);
373		error = VOP_GETACL(vp, ACL_TYPE_ACCESS, acl, ap->a_cred,
374		    ap->a_td);
375		switch (error) {
376		case EOPNOTSUPP:
377			error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
378			    ip->i_gid, ap->a_mode, ap->a_cred, NULL);
379			break;
380		case 0:
381			error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
382			    ip->i_gid, acl, ap->a_mode, ap->a_cred, NULL);
383			break;
384		default:
385			printf(
386"ufs_access(): Error retrieving ACL on object (%d).\n",
387			    error);
388			/*
389			 * XXX: Fall back until debugged.  Should
390			 * eventually possibly log an error, and return
391			 * EPERM for safety.
392			 */
393			error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
394			    ip->i_gid, ap->a_mode, ap->a_cred, NULL);
395		}
396		FREE(acl, M_ACL);
397	} else
398#endif /* !UFS_ACL */
399		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
400		    ap->a_mode, ap->a_cred, NULL);
401	return (error);
402}
403
404/* ARGSUSED */
405static int
406ufs_getattr(ap)
407	struct vop_getattr_args /* {
408		struct vnode *a_vp;
409		struct vattr *a_vap;
410		struct ucred *a_cred;
411		struct thread *a_td;
412	} */ *ap;
413{
414	struct vnode *vp = ap->a_vp;
415	struct inode *ip = VTOI(vp);
416	struct vattr *vap = ap->a_vap;
417
418	ufs_itimes(vp);
419	/*
420	 * Copy from inode table
421	 */
422	vap->va_fsid = dev2udev(ip->i_dev);
423	vap->va_fileid = ip->i_number;
424	vap->va_mode = ip->i_mode & ~IFMT;
425	vap->va_nlink = ip->i_effnlink;
426	vap->va_uid = ip->i_uid;
427	vap->va_gid = ip->i_gid;
428	if (ip->i_ump->um_fstype == UFS1) {
429		vap->va_rdev = ip->i_din1->di_rdev;
430		vap->va_size = ip->i_din1->di_size;
431		vap->va_atime.tv_sec = ip->i_din1->di_atime;
432		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
433		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
434		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
435		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
436		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
437		vap->va_birthtime.tv_sec = 0;
438		vap->va_birthtime.tv_nsec = 0;
439		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
440	} else {
441		vap->va_rdev = ip->i_din2->di_rdev;
442		vap->va_size = ip->i_din2->di_size;
443		vap->va_atime.tv_sec = ip->i_din2->di_atime;
444		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
445		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
446		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
447		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
448		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
449		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
450		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
451		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
452	}
453	vap->va_flags = ip->i_flags;
454	vap->va_gen = ip->i_gen;
455	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
456	vap->va_type = IFTOVT(ip->i_mode);
457	vap->va_filerev = ip->i_modrev;
458	return (0);
459}
460
461/*
462 * Set attribute vnode op. called from several syscalls
463 */
464static int
465ufs_setattr(ap)
466	struct vop_setattr_args /* {
467		struct vnode *a_vp;
468		struct vattr *a_vap;
469		struct ucred *a_cred;
470		struct thread *a_td;
471	} */ *ap;
472{
473	struct vattr *vap = ap->a_vap;
474	struct vnode *vp = ap->a_vp;
475	struct inode *ip = VTOI(vp);
476	struct ucred *cred = ap->a_cred;
477	struct thread *td = ap->a_td;
478	int error;
479
480	/*
481	 * Check for unsettable attributes.
482	 */
483	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
484	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
485	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
486	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
487		return (EINVAL);
488	}
489	if (vap->va_flags != VNOVAL) {
490		if (vp->v_mount->mnt_flag & MNT_RDONLY)
491			return (EROFS);
492		/*
493		 * Callers may only modify the file flags on objects they
494		 * have VADMIN rights for.
495		 */
496		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
497			return (error);
498		/*
499		 * Unprivileged processes and privileged processes in
500		 * jail() are not permitted to unset system flags, or
501		 * modify flags if any system flags are set.
502		 * Privileged non-jail processes may not modify system flags
503		 * if securelevel > 0 and any existing system flags are set.
504		 */
505		if (!suser_cred(cred, PRISON_ROOT)) {
506			if (ip->i_flags
507			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
508				error = securelevel_gt(cred, 0);
509				if (error)
510					return (error);
511			}
512			/* Snapshot flag cannot be set or cleared */
513			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
514			     (ip->i_flags & SF_SNAPSHOT) == 0) ||
515			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
516			     (ip->i_flags & SF_SNAPSHOT) != 0))
517				return (EPERM);
518			ip->i_flags = vap->va_flags;
519			DIP(ip, i_flags) = vap->va_flags;
520		} else {
521			if (ip->i_flags
522			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
523			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
524				return (EPERM);
525			ip->i_flags &= SF_SETTABLE;
526			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
527			DIP(ip, i_flags) = ip->i_flags;
528		}
529		ip->i_flag |= IN_CHANGE;
530		if (vap->va_flags & (IMMUTABLE | APPEND))
531			return (0);
532	}
533	if (ip->i_flags & (IMMUTABLE | APPEND))
534		return (EPERM);
535	/*
536	 * Go through the fields and update iff not VNOVAL.
537	 */
538	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
539		if (vp->v_mount->mnt_flag & MNT_RDONLY)
540			return (EROFS);
541		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
542		    td)) != 0)
543			return (error);
544	}
545	if (vap->va_size != VNOVAL) {
546		/*
547		 * Disallow write attempts on read-only filesystems;
548		 * unless the file is a socket, fifo, or a block or
549		 * character device resident on the filesystem.
550		 */
551		switch (vp->v_type) {
552		case VDIR:
553			return (EISDIR);
554		case VLNK:
555		case VREG:
556			if (vp->v_mount->mnt_flag & MNT_RDONLY)
557				return (EROFS);
558			if ((ip->i_flags & SF_SNAPSHOT) != 0)
559				return (EPERM);
560			break;
561		default:
562			break;
563		}
564		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
565		    cred, td)) != 0)
566			return (error);
567	}
568	if (vap->va_atime.tv_sec != VNOVAL ||
569	    vap->va_mtime.tv_sec != VNOVAL ||
570	    vap->va_birthtime.tv_sec != VNOVAL) {
571		if (vp->v_mount->mnt_flag & MNT_RDONLY)
572			return (EROFS);
573		if ((ip->i_flags & SF_SNAPSHOT) != 0)
574			return (EPERM);
575		/*
576		 * From utimes(2):
577		 * If times is NULL, ... The caller must be the owner of
578		 * the file, have permission to write the file, or be the
579		 * super-user.
580		 * If times is non-NULL, ... The caller must be the owner of
581		 * the file or be the super-user.
582		 */
583		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) &&
584		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
585		    (error = VOP_ACCESS(vp, VWRITE, cred, td))))
586			return (error);
587		if (vap->va_atime.tv_sec != VNOVAL)
588			ip->i_flag |= IN_ACCESS;
589		if (vap->va_mtime.tv_sec != VNOVAL)
590			ip->i_flag |= IN_CHANGE | IN_UPDATE;
591		if (vap->va_birthtime.tv_sec != VNOVAL &&
592		    ip->i_ump->um_fstype == UFS2)
593			ip->i_flag |= IN_MODIFIED;
594		ufs_itimes(vp);
595		if (vap->va_atime.tv_sec != VNOVAL) {
596			DIP(ip, i_atime) = vap->va_atime.tv_sec;
597			DIP(ip, i_atimensec) = vap->va_atime.tv_nsec;
598		}
599		if (vap->va_mtime.tv_sec != VNOVAL) {
600			DIP(ip, i_mtime) = vap->va_mtime.tv_sec;
601			DIP(ip, i_mtimensec) = vap->va_mtime.tv_nsec;
602		}
603		if (vap->va_birthtime.tv_sec != VNOVAL &&
604		    ip->i_ump->um_fstype == UFS2) {
605			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
606			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
607		}
608		error = UFS_UPDATE(vp, 0);
609		if (error)
610			return (error);
611	}
612	error = 0;
613	if (vap->va_mode != (mode_t)VNOVAL) {
614		if (vp->v_mount->mnt_flag & MNT_RDONLY)
615			return (EROFS);
616		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
617		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
618			return (EPERM);
619		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
620	}
621	VN_KNOTE(vp, NOTE_ATTRIB);
622	return (error);
623}
624
625/*
626 * Change the mode on a file.
627 * Inode must be locked before calling.
628 */
629static int
630ufs_chmod(vp, mode, cred, td)
631	struct vnode *vp;
632	int mode;
633	struct ucred *cred;
634	struct thread *td;
635{
636	struct inode *ip = VTOI(vp);
637	int error;
638
639	/*
640	 * To modify the permissions on a file, must possess VADMIN
641	 * for that file.
642	 */
643	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
644		return (error);
645	/*
646	 * Privileged processes may set the sticky bit on non-directories,
647	 * as well as set the setgid bit on a file with a group that the
648	 * process is not a member of.  Both of these are allowed in
649	 * jail(8).
650	 */
651	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
652		if (suser_cred(cred, PRISON_ROOT))
653			return (EFTYPE);
654	}
655	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
656		error = suser_cred(cred, PRISON_ROOT);
657		if (error)
658			return (error);
659	}
660	ip->i_mode &= ~ALLPERMS;
661	ip->i_mode |= (mode & ALLPERMS);
662	DIP(ip, i_mode) = ip->i_mode;
663	ip->i_flag |= IN_CHANGE;
664	return (0);
665}
666
667/*
668 * Perform chown operation on inode ip;
669 * inode must be locked prior to call.
670 */
671static int
672ufs_chown(vp, uid, gid, cred, td)
673	struct vnode *vp;
674	uid_t uid;
675	gid_t gid;
676	struct ucred *cred;
677	struct thread *td;
678{
679	struct inode *ip = VTOI(vp);
680	uid_t ouid;
681	gid_t ogid;
682	int error = 0;
683#ifdef QUOTA
684	int i;
685	ufs2_daddr_t change;
686#endif
687
688	if (uid == (uid_t)VNOVAL)
689		uid = ip->i_uid;
690	if (gid == (gid_t)VNOVAL)
691		gid = ip->i_gid;
692	/*
693	 * To modify the ownership of a file, must possess VADMIN
694	 * for that file.
695	 */
696	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
697		return (error);
698	/*
699	 * To change the owner of a file, or change the group of a file
700	 * to a group of which we are not a member, the caller must
701	 * have privilege.
702	 */
703	if ((uid != ip->i_uid ||
704	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
705	    (error = suser_cred(cred, PRISON_ROOT)))
706		return (error);
707	ogid = ip->i_gid;
708	ouid = ip->i_uid;
709#ifdef QUOTA
710	if ((error = getinoquota(ip)) != 0)
711		return (error);
712	if (ouid == uid) {
713		dqrele(vp, ip->i_dquot[USRQUOTA]);
714		ip->i_dquot[USRQUOTA] = NODQUOT;
715	}
716	if (ogid == gid) {
717		dqrele(vp, ip->i_dquot[GRPQUOTA]);
718		ip->i_dquot[GRPQUOTA] = NODQUOT;
719	}
720	change = DIP(ip, i_blocks);
721	(void) chkdq(ip, -change, cred, CHOWN);
722	(void) chkiq(ip, -1, cred, CHOWN);
723	for (i = 0; i < MAXQUOTAS; i++) {
724		dqrele(vp, ip->i_dquot[i]);
725		ip->i_dquot[i] = NODQUOT;
726	}
727#endif
728	ip->i_gid = gid;
729	DIP(ip, i_gid) = gid;
730	ip->i_uid = uid;
731	DIP(ip, i_uid) = uid;
732#ifdef QUOTA
733	if ((error = getinoquota(ip)) == 0) {
734		if (ouid == uid) {
735			dqrele(vp, ip->i_dquot[USRQUOTA]);
736			ip->i_dquot[USRQUOTA] = NODQUOT;
737		}
738		if (ogid == gid) {
739			dqrele(vp, ip->i_dquot[GRPQUOTA]);
740			ip->i_dquot[GRPQUOTA] = NODQUOT;
741		}
742		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
743			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
744				goto good;
745			else
746				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
747		}
748		for (i = 0; i < MAXQUOTAS; i++) {
749			dqrele(vp, ip->i_dquot[i]);
750			ip->i_dquot[i] = NODQUOT;
751		}
752	}
753	ip->i_gid = ogid;
754	DIP(ip, i_gid) = ogid;
755	ip->i_uid = ouid;
756	DIP(ip, i_uid) = ouid;
757	if (getinoquota(ip) == 0) {
758		if (ouid == uid) {
759			dqrele(vp, ip->i_dquot[USRQUOTA]);
760			ip->i_dquot[USRQUOTA] = NODQUOT;
761		}
762		if (ogid == gid) {
763			dqrele(vp, ip->i_dquot[GRPQUOTA]);
764			ip->i_dquot[GRPQUOTA] = NODQUOT;
765		}
766		(void) chkdq(ip, change, cred, FORCE|CHOWN);
767		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
768		(void) getinoquota(ip);
769	}
770	return (error);
771good:
772	if (getinoquota(ip))
773		panic("ufs_chown: lost quota");
774#endif /* QUOTA */
775	ip->i_flag |= IN_CHANGE;
776	if (suser_cred(cred, PRISON_ROOT) && (ouid != uid || ogid != gid)) {
777		ip->i_mode &= ~(ISUID | ISGID);
778		DIP(ip, i_mode) = ip->i_mode;
779	}
780	return (0);
781}
782
783static int
784ufs_remove(ap)
785	struct vop_remove_args /* {
786		struct vnode *a_dvp;
787		struct vnode *a_vp;
788		struct componentname *a_cnp;
789	} */ *ap;
790{
791	struct inode *ip;
792	struct vnode *vp = ap->a_vp;
793	struct vnode *dvp = ap->a_dvp;
794	int error;
795
796	ip = VTOI(vp);
797	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
798	    (VTOI(dvp)->i_flags & APPEND)) {
799		error = EPERM;
800		goto out;
801	}
802	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
803	if (ip->i_nlink <= 0)
804		vp->v_vflag |= VV_NOSYNC;
805	VN_KNOTE(vp, NOTE_DELETE);
806	VN_KNOTE(dvp, NOTE_WRITE);
807out:
808	return (error);
809}
810
811/*
812 * link vnode call
813 */
814static int
815ufs_link(ap)
816	struct vop_link_args /* {
817		struct vnode *a_tdvp;
818		struct vnode *a_vp;
819		struct componentname *a_cnp;
820	} */ *ap;
821{
822	struct vnode *vp = ap->a_vp;
823	struct vnode *tdvp = ap->a_tdvp;
824	struct componentname *cnp = ap->a_cnp;
825	struct inode *ip;
826	struct direct newdir;
827	int error;
828
829#ifdef DIAGNOSTIC
830	if ((cnp->cn_flags & HASBUF) == 0)
831		panic("ufs_link: no name");
832#endif
833	if (tdvp->v_mount != vp->v_mount) {
834		error = EXDEV;
835		goto out;
836	}
837	ip = VTOI(vp);
838	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
839		error = EMLINK;
840		goto out;
841	}
842	if (ip->i_flags & (IMMUTABLE | APPEND)) {
843		error = EPERM;
844		goto out;
845	}
846	ip->i_effnlink++;
847	ip->i_nlink++;
848	DIP(ip, i_nlink) = ip->i_nlink;
849	ip->i_flag |= IN_CHANGE;
850	if (DOINGSOFTDEP(vp))
851		softdep_change_linkcnt(ip);
852	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
853	if (!error) {
854		ufs_makedirentry(ip, cnp, &newdir);
855		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
856	}
857
858	if (error) {
859		ip->i_effnlink--;
860		ip->i_nlink--;
861		DIP(ip, i_nlink) = ip->i_nlink;
862		ip->i_flag |= IN_CHANGE;
863		if (DOINGSOFTDEP(vp))
864			softdep_change_linkcnt(ip);
865	}
866out:
867	VN_KNOTE(vp, NOTE_LINK);
868	VN_KNOTE(tdvp, NOTE_WRITE);
869	return (error);
870}
871
872/*
873 * whiteout vnode call
874 */
875static int
876ufs_whiteout(ap)
877	struct vop_whiteout_args /* {
878		struct vnode *a_dvp;
879		struct componentname *a_cnp;
880		int a_flags;
881	} */ *ap;
882{
883	struct vnode *dvp = ap->a_dvp;
884	struct componentname *cnp = ap->a_cnp;
885	struct direct newdir;
886	int error = 0;
887
888	switch (ap->a_flags) {
889	case LOOKUP:
890		/* 4.4 format directories support whiteout operations */
891		if (dvp->v_mount->mnt_maxsymlinklen > 0)
892			return (0);
893		return (EOPNOTSUPP);
894
895	case CREATE:
896		/* create a new directory whiteout */
897#ifdef DIAGNOSTIC
898		if ((cnp->cn_flags & SAVENAME) == 0)
899			panic("ufs_whiteout: missing name");
900		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
901			panic("ufs_whiteout: old format filesystem");
902#endif
903
904		newdir.d_ino = WINO;
905		newdir.d_namlen = cnp->cn_namelen;
906		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
907		newdir.d_type = DT_WHT;
908		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
909		break;
910
911	case DELETE:
912		/* remove an existing directory whiteout */
913#ifdef DIAGNOSTIC
914		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
915			panic("ufs_whiteout: old format filesystem");
916#endif
917
918		cnp->cn_flags &= ~DOWHITEOUT;
919		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
920		break;
921	default:
922		panic("ufs_whiteout: unknown op");
923	}
924	return (error);
925}
926
927/*
928 * Rename system call.
929 * 	rename("foo", "bar");
930 * is essentially
931 *	unlink("bar");
932 *	link("foo", "bar");
933 *	unlink("foo");
934 * but ``atomically''.  Can't do full commit without saving state in the
935 * inode on disk which isn't feasible at this time.  Best we can do is
936 * always guarantee the target exists.
937 *
938 * Basic algorithm is:
939 *
940 * 1) Bump link count on source while we're linking it to the
941 *    target.  This also ensure the inode won't be deleted out
942 *    from underneath us while we work (it may be truncated by
943 *    a concurrent `trunc' or `open' for creation).
944 * 2) Link source to destination.  If destination already exists,
945 *    delete it first.
946 * 3) Unlink source reference to inode if still around. If a
947 *    directory was moved and the parent of the destination
948 *    is different from the source, patch the ".." entry in the
949 *    directory.
950 */
951static int
952ufs_rename(ap)
953	struct vop_rename_args  /* {
954		struct vnode *a_fdvp;
955		struct vnode *a_fvp;
956		struct componentname *a_fcnp;
957		struct vnode *a_tdvp;
958		struct vnode *a_tvp;
959		struct componentname *a_tcnp;
960	} */ *ap;
961{
962	struct vnode *tvp = ap->a_tvp;
963	struct vnode *tdvp = ap->a_tdvp;
964	struct vnode *fvp = ap->a_fvp;
965	struct vnode *fdvp = ap->a_fdvp;
966	struct componentname *tcnp = ap->a_tcnp;
967	struct componentname *fcnp = ap->a_fcnp;
968	struct thread *td = fcnp->cn_thread;
969	struct inode *ip, *xp, *dp;
970	struct direct newdir;
971	int doingdirectory = 0, oldparent = 0, newparent = 0;
972	int error = 0, ioflag;
973
974#ifdef DIAGNOSTIC
975	if ((tcnp->cn_flags & HASBUF) == 0 ||
976	    (fcnp->cn_flags & HASBUF) == 0)
977		panic("ufs_rename: no name");
978#endif
979	/*
980	 * Check for cross-device rename.
981	 */
982	if ((fvp->v_mount != tdvp->v_mount) ||
983	    (tvp && (fvp->v_mount != tvp->v_mount))) {
984		error = EXDEV;
985abortit:
986		if (tdvp == tvp)
987			vrele(tdvp);
988		else
989			vput(tdvp);
990		if (tvp)
991			vput(tvp);
992		vrele(fdvp);
993		vrele(fvp);
994		return (error);
995	}
996
997	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
998	    (VTOI(tdvp)->i_flags & APPEND))) {
999		error = EPERM;
1000		goto abortit;
1001	}
1002
1003	/*
1004	 * Renaming a file to itself has no effect.  The upper layers should
1005	 * not call us in that case.  Temporarily just warn if they do.
1006	 */
1007	if (fvp == tvp) {
1008		printf("ufs_rename: fvp == tvp (can't happen)\n");
1009		error = 0;
1010		goto abortit;
1011	}
1012
1013	if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0)
1014		goto abortit;
1015	dp = VTOI(fdvp);
1016	ip = VTOI(fvp);
1017	if (ip->i_nlink >= LINK_MAX) {
1018		VOP_UNLOCK(fvp, 0, td);
1019		error = EMLINK;
1020		goto abortit;
1021	}
1022	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
1023	    || (dp->i_flags & APPEND)) {
1024		VOP_UNLOCK(fvp, 0, td);
1025		error = EPERM;
1026		goto abortit;
1027	}
1028	if ((ip->i_mode & IFMT) == IFDIR) {
1029		/*
1030		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1031		 */
1032		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1033		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
1034		    (ip->i_flag & IN_RENAME)) {
1035			VOP_UNLOCK(fvp, 0, td);
1036			error = EINVAL;
1037			goto abortit;
1038		}
1039		ip->i_flag |= IN_RENAME;
1040		oldparent = dp->i_number;
1041		doingdirectory = 1;
1042	}
1043	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXX right place? */
1044	vrele(fdvp);
1045
1046	/*
1047	 * When the target exists, both the directory
1048	 * and target vnodes are returned locked.
1049	 */
1050	dp = VTOI(tdvp);
1051	xp = NULL;
1052	if (tvp)
1053		xp = VTOI(tvp);
1054
1055	/*
1056	 * 1) Bump link count while we're moving stuff
1057	 *    around.  If we crash somewhere before
1058	 *    completing our work, the link count
1059	 *    may be wrong, but correctable.
1060	 */
1061	ip->i_effnlink++;
1062	ip->i_nlink++;
1063	DIP(ip, i_nlink) = ip->i_nlink;
1064	ip->i_flag |= IN_CHANGE;
1065	if (DOINGSOFTDEP(fvp))
1066		softdep_change_linkcnt(ip);
1067	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
1068				       DOINGASYNC(fvp)))) != 0) {
1069		VOP_UNLOCK(fvp, 0, td);
1070		goto bad;
1071	}
1072
1073	/*
1074	 * If ".." must be changed (ie the directory gets a new
1075	 * parent) then the source directory must not be in the
1076	 * directory heirarchy above the target, as this would
1077	 * orphan everything below the source directory. Also
1078	 * the user must have write permission in the source so
1079	 * as to be able to change "..". We must repeat the call
1080	 * to namei, as the parent directory is unlocked by the
1081	 * call to checkpath().
1082	 */
1083	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1084	VOP_UNLOCK(fvp, 0, td);
1085	if (oldparent != dp->i_number)
1086		newparent = dp->i_number;
1087	if (doingdirectory && newparent) {
1088		if (error)	/* write access check above */
1089			goto bad;
1090		if (xp != NULL)
1091			vput(tvp);
1092		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
1093		if (error)
1094			goto out;
1095		if ((tcnp->cn_flags & SAVESTART) == 0)
1096			panic("ufs_rename: lost to startdir");
1097		VREF(tdvp);
1098		error = relookup(tdvp, &tvp, tcnp);
1099		if (error)
1100			goto out;
1101		vrele(tdvp);
1102		dp = VTOI(tdvp);
1103		xp = NULL;
1104		if (tvp)
1105			xp = VTOI(tvp);
1106	}
1107	/*
1108	 * 2) If target doesn't exist, link the target
1109	 *    to the source and unlink the source.
1110	 *    Otherwise, rewrite the target directory
1111	 *    entry to reference the source inode and
1112	 *    expunge the original entry's existence.
1113	 */
1114	if (xp == NULL) {
1115		if (dp->i_dev != ip->i_dev)
1116			panic("ufs_rename: EXDEV");
1117		/*
1118		 * Account for ".." in new directory.
1119		 * When source and destination have the same
1120		 * parent we don't fool with the link count.
1121		 */
1122		if (doingdirectory && newparent) {
1123			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1124				error = EMLINK;
1125				goto bad;
1126			}
1127			dp->i_effnlink++;
1128			dp->i_nlink++;
1129			DIP(dp, i_nlink) = dp->i_nlink;
1130			dp->i_flag |= IN_CHANGE;
1131			if (DOINGSOFTDEP(tdvp))
1132				softdep_change_linkcnt(dp);
1133			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1134						   DOINGASYNC(tdvp)));
1135			if (error)
1136				goto bad;
1137		}
1138		ufs_makedirentry(ip, tcnp, &newdir);
1139		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
1140		if (error) {
1141			if (doingdirectory && newparent) {
1142				dp->i_effnlink--;
1143				dp->i_nlink--;
1144				DIP(dp, i_nlink) = dp->i_nlink;
1145				dp->i_flag |= IN_CHANGE;
1146				if (DOINGSOFTDEP(tdvp))
1147					softdep_change_linkcnt(dp);
1148				(void)UFS_UPDATE(tdvp, 1);
1149			}
1150			goto bad;
1151		}
1152		VN_KNOTE(tdvp, NOTE_WRITE);
1153		vput(tdvp);
1154	} else {
1155		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1156			panic("ufs_rename: EXDEV");
1157		/*
1158		 * Short circuit rename(foo, foo).
1159		 */
1160		if (xp->i_number == ip->i_number)
1161			panic("ufs_rename: same file");
1162		/*
1163		 * If the parent directory is "sticky", then the caller
1164		 * must possess VADMIN for the parent directory, or the
1165		 * destination of the rename.  This implements append-only
1166		 * directories.
1167		 */
1168		if ((dp->i_mode & S_ISTXT) &&
1169		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
1170		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
1171			error = EPERM;
1172			goto bad;
1173		}
1174		/*
1175		 * Target must be empty if a directory and have no links
1176		 * to it. Also, ensure source and target are compatible
1177		 * (both directories, or both not directories).
1178		 */
1179		if ((xp->i_mode&IFMT) == IFDIR) {
1180			if ((xp->i_effnlink > 2) ||
1181			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
1182				error = ENOTEMPTY;
1183				goto bad;
1184			}
1185			if (!doingdirectory) {
1186				error = ENOTDIR;
1187				goto bad;
1188			}
1189			cache_purge(tdvp);
1190		} else if (doingdirectory) {
1191			error = EISDIR;
1192			goto bad;
1193		}
1194		error = ufs_dirrewrite(dp, xp, ip->i_number,
1195		    IFTODT(ip->i_mode),
1196		    (doingdirectory && newparent) ? newparent : doingdirectory);
1197		if (error)
1198			goto bad;
1199		if (doingdirectory) {
1200			if (!newparent) {
1201				dp->i_effnlink--;
1202				if (DOINGSOFTDEP(tdvp))
1203					softdep_change_linkcnt(dp);
1204			}
1205			xp->i_effnlink--;
1206			if (DOINGSOFTDEP(tvp))
1207				softdep_change_linkcnt(xp);
1208		}
1209		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1210			/*
1211			 * Truncate inode. The only stuff left in the directory
1212			 * is "." and "..". The "." reference is inconsequential
1213			 * since we are quashing it. We have removed the "."
1214			 * reference and the reference in the parent directory,
1215			 * but there may be other hard links. The soft
1216			 * dependency code will arrange to do these operations
1217			 * after the parent directory entry has been deleted on
1218			 * disk, so when running with that code we avoid doing
1219			 * them now.
1220			 */
1221			if (!newparent) {
1222				dp->i_nlink--;
1223				DIP(dp, i_nlink) = dp->i_nlink;
1224				dp->i_flag |= IN_CHANGE;
1225			}
1226			xp->i_nlink--;
1227			DIP(xp, i_nlink) = xp->i_nlink;
1228			xp->i_flag |= IN_CHANGE;
1229			ioflag = IO_NORMAL;
1230			if (DOINGASYNC(tvp))
1231				ioflag |= IO_SYNC;
1232			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
1233			    tcnp->cn_cred, tcnp->cn_thread)) != 0)
1234				goto bad;
1235		}
1236		VN_KNOTE(tdvp, NOTE_WRITE);
1237		vput(tdvp);
1238		VN_KNOTE(tvp, NOTE_DELETE);
1239		vput(tvp);
1240		xp = NULL;
1241	}
1242
1243	/*
1244	 * 3) Unlink the source.
1245	 */
1246	fcnp->cn_flags &= ~MODMASK;
1247	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1248	if ((fcnp->cn_flags & SAVESTART) == 0)
1249		panic("ufs_rename: lost from startdir");
1250	VREF(fdvp);
1251	error = relookup(fdvp, &fvp, fcnp);
1252	if (error == 0)
1253		vrele(fdvp);
1254	if (fvp != NULL) {
1255		xp = VTOI(fvp);
1256		dp = VTOI(fdvp);
1257	} else {
1258		/*
1259		 * From name has disappeared.  IN_RENAME is not sufficient
1260		 * to protect against directory races due to timing windows,
1261		 * so we have to remove the panic.  XXX the only real way
1262		 * to solve this issue is at a much higher level.  By the
1263		 * time we hit ufs_rename() it's too late.
1264		 */
1265#if 0
1266		if (doingdirectory)
1267			panic("ufs_rename: lost dir entry");
1268#endif
1269		vrele(ap->a_fvp);
1270		return (0);
1271	}
1272	/*
1273	 * Ensure that the directory entry still exists and has not
1274	 * changed while the new name has been entered. If the source is
1275	 * a file then the entry may have been unlinked or renamed. In
1276	 * either case there is no further work to be done. If the source
1277	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
1278	 * flag ensures that it cannot be moved by another rename or removed
1279	 * by a rmdir.
1280	 */
1281	if (xp != ip) {
1282		/*
1283		 * From name resolves to a different inode.  IN_RENAME is
1284		 * not sufficient protection against timing window races
1285		 * so we can't panic here.  XXX the only real way
1286		 * to solve this issue is at a much higher level.  By the
1287		 * time we hit ufs_rename() it's too late.
1288		 */
1289#if 0
1290		if (doingdirectory)
1291			panic("ufs_rename: lost dir entry");
1292#endif
1293	} else {
1294		/*
1295		 * If the source is a directory with a
1296		 * new parent, the link count of the old
1297		 * parent directory must be decremented
1298		 * and ".." set to point to the new parent.
1299		 */
1300		if (doingdirectory && newparent) {
1301			xp->i_offset = mastertemplate.dot_reclen;
1302			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
1303			cache_purge(fdvp);
1304		}
1305		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
1306		xp->i_flag &= ~IN_RENAME;
1307	}
1308	VN_KNOTE(fvp, NOTE_RENAME);
1309	if (dp)
1310		vput(fdvp);
1311	if (xp)
1312		vput(fvp);
1313	vrele(ap->a_fvp);
1314	return (error);
1315
1316bad:
1317	if (xp)
1318		vput(ITOV(xp));
1319	vput(ITOV(dp));
1320out:
1321	if (doingdirectory)
1322		ip->i_flag &= ~IN_RENAME;
1323	if (vn_lock(fvp, LK_EXCLUSIVE, td) == 0) {
1324		ip->i_effnlink--;
1325		ip->i_nlink--;
1326		DIP(ip, i_nlink) = ip->i_nlink;
1327		ip->i_flag |= IN_CHANGE;
1328		ip->i_flag &= ~IN_RENAME;
1329		if (DOINGSOFTDEP(fvp))
1330			softdep_change_linkcnt(ip);
1331		vput(fvp);
1332	} else
1333		vrele(fvp);
1334	return (error);
1335}
1336
1337/*
1338 * Mkdir system call
1339 */
1340static int
1341ufs_mkdir(ap)
1342	struct vop_mkdir_args /* {
1343		struct vnode *a_dvp;
1344		struct vnode **a_vpp;
1345		struct componentname *a_cnp;
1346		struct vattr *a_vap;
1347	} */ *ap;
1348{
1349	struct vnode *dvp = ap->a_dvp;
1350	struct vattr *vap = ap->a_vap;
1351	struct componentname *cnp = ap->a_cnp;
1352	struct inode *ip, *dp;
1353	struct vnode *tvp;
1354	struct buf *bp;
1355	struct dirtemplate dirtemplate, *dtp;
1356	struct direct newdir;
1357#ifdef UFS_ACL
1358	struct acl *acl, *dacl;
1359#endif
1360	int error, dmode;
1361	long blkoff;
1362
1363#ifdef DIAGNOSTIC
1364	if ((cnp->cn_flags & HASBUF) == 0)
1365		panic("ufs_mkdir: no name");
1366#endif
1367	dp = VTOI(dvp);
1368	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1369		error = EMLINK;
1370		goto out;
1371	}
1372	dmode = vap->va_mode & 0777;
1373	dmode |= IFDIR;
1374	/*
1375	 * Must simulate part of ufs_makeinode here to acquire the inode,
1376	 * but not have it entered in the parent directory. The entry is
1377	 * made later after writing "." and ".." entries.
1378	 */
1379	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1380	if (error)
1381		goto out;
1382	ip = VTOI(tvp);
1383	ip->i_gid = dp->i_gid;
1384	DIP(ip, i_gid) = dp->i_gid;
1385#ifdef SUIDDIR
1386	{
1387#ifdef QUOTA
1388		struct ucred ucred, *ucp;
1389		ucp = cnp->cn_cred;
1390#endif
1391		/*
1392		 * If we are hacking owners here, (only do this where told to)
1393		 * and we are not giving it TO root, (would subvert quotas)
1394		 * then go ahead and give it to the other user.
1395		 * The new directory also inherits the SUID bit.
1396		 * If user's UID and dir UID are the same,
1397		 * 'give it away' so that the SUID is still forced on.
1398		 */
1399		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1400		    (dp->i_mode & ISUID) && dp->i_uid) {
1401			dmode |= ISUID;
1402			ip->i_uid = dp->i_uid;
1403			DIP(ip, i_uid) = dp->i_uid;
1404#ifdef QUOTA
1405			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1406				/*
1407				 * Make sure the correct user gets charged
1408				 * for the space.
1409				 * Make a dummy credential for the victim.
1410				 * XXX This seems to never be accessed out of
1411				 * our context so a stack variable is ok.
1412				 */
1413				ucred.cr_ref = 1;
1414				ucred.cr_uid = ip->i_uid;
1415				ucred.cr_ngroups = 1;
1416				ucred.cr_groups[0] = dp->i_gid;
1417				ucp = &ucred;
1418			}
1419#endif
1420		} else {
1421			ip->i_uid = cnp->cn_cred->cr_uid;
1422			DIP(ip, i_uid) = ip->i_uid;
1423		}
1424#ifdef QUOTA
1425		if ((error = getinoquota(ip)) ||
1426	    	    (error = chkiq(ip, 1, ucp, 0))) {
1427			UFS_VFREE(tvp, ip->i_number, dmode);
1428			vput(tvp);
1429			return (error);
1430		}
1431#endif
1432	}
1433#else	/* !SUIDDIR */
1434	ip->i_uid = cnp->cn_cred->cr_uid;
1435	DIP(ip, i_uid) = ip->i_uid;
1436#ifdef QUOTA
1437	if ((error = getinoquota(ip)) ||
1438	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1439		UFS_VFREE(tvp, ip->i_number, dmode);
1440		vput(tvp);
1441		return (error);
1442	}
1443#endif
1444#endif	/* !SUIDDIR */
1445	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1446#ifdef UFS_ACL
1447	acl = dacl = NULL;
1448	if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) {
1449		MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, M_WAITOK);
1450		MALLOC(dacl, struct acl *, sizeof(*dacl), M_ACL, M_WAITOK);
1451
1452		/*
1453		 * Retrieve default ACL from parent, if any.
1454		 */
1455		error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred,
1456		    cnp->cn_thread);
1457		switch (error) {
1458		case 0:
1459			/*
1460			 * Retrieved a default ACL, so merge mode and ACL if
1461			 * necessary.  If the ACL is empty, fall through to
1462			 * the "not defined or available" case.
1463			 */
1464			if (acl->acl_cnt != 0) {
1465				dmode = acl_posix1e_newfilemode(dmode, acl);
1466				ip->i_mode = dmode;
1467				DIP(ip, i_mode) = dmode;
1468				*dacl = *acl;
1469				ufs_sync_acl_from_inode(ip, acl);
1470				break;
1471			}
1472			/* FALLTHROUGH */
1473
1474		case EOPNOTSUPP:
1475			/*
1476			 * Just use the mode as-is.
1477			 */
1478			ip->i_mode = dmode;
1479			DIP(ip, i_mode) = dmode;
1480			FREE(acl, M_ACL);
1481			FREE(dacl, M_ACL);
1482			dacl = acl = NULL;
1483			break;
1484
1485		default:
1486			UFS_VFREE(tvp, ip->i_number, dmode);
1487			vput(tvp);
1488			FREE(acl, M_ACL);
1489				FREE(dacl, M_ACL);
1490			return (error);
1491		}
1492	} else {
1493#endif /* !UFS_ACL */
1494		ip->i_mode = dmode;
1495		DIP(ip, i_mode) = dmode;
1496#ifdef UFS_ACL
1497	}
1498#endif
1499	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1500	ip->i_effnlink = 2;
1501	ip->i_nlink = 2;
1502	DIP(ip, i_nlink) = 2;
1503	if (DOINGSOFTDEP(tvp))
1504		softdep_change_linkcnt(ip);
1505	if (cnp->cn_flags & ISWHITEOUT) {
1506		ip->i_flags |= UF_OPAQUE;
1507		DIP(ip, i_flags) = ip->i_flags;
1508	}
1509
1510	/*
1511	 * Bump link count in parent directory to reflect work done below.
1512	 * Should be done before reference is created so cleanup is
1513	 * possible if we crash.
1514	 */
1515	dp->i_effnlink++;
1516	dp->i_nlink++;
1517	DIP(dp, i_nlink) = dp->i_nlink;
1518	dp->i_flag |= IN_CHANGE;
1519	if (DOINGSOFTDEP(dvp))
1520		softdep_change_linkcnt(dp);
1521	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1522	if (error)
1523		goto bad;
1524#ifdef MAC
1525	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
1526		error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount,
1527		    dvp, tvp, cnp);
1528		if (error)
1529			goto bad;
1530	}
1531#endif
1532#ifdef UFS_ACL
1533	if (acl != NULL) {
1534		/*
1535		 * XXX: If we abort now, will Soft Updates notify the extattr
1536		 * code that the EAs for the file need to be released?
1537		 */
1538		error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred,
1539		    cnp->cn_thread);
1540		if (error == 0)
1541			error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl,
1542			    cnp->cn_cred, cnp->cn_thread);
1543		switch (error) {
1544		case 0:
1545			break;
1546
1547		case EOPNOTSUPP:
1548			/*
1549			 * XXX: This should not happen, as EOPNOTSUPP above
1550			 * was supposed to free acl.
1551			 */
1552			printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
1553			/*
1554			panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
1555			 */
1556			break;
1557
1558		default:
1559			FREE(acl, M_ACL);
1560			FREE(dacl, M_ACL);
1561			goto bad;
1562		}
1563		FREE(acl, M_ACL);
1564		FREE(dacl, M_ACL);
1565	}
1566#endif /* !UFS_ACL */
1567
1568	/*
1569	 * Initialize directory with "." and ".." from static template.
1570	 */
1571	if (dvp->v_mount->mnt_maxsymlinklen > 0
1572	)
1573		dtp = &mastertemplate;
1574	else
1575		dtp = (struct dirtemplate *)&omastertemplate;
1576	dirtemplate = *dtp;
1577	dirtemplate.dot_ino = ip->i_number;
1578	dirtemplate.dotdot_ino = dp->i_number;
1579	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1580	    BA_CLRBUF, &bp)) != 0)
1581		goto bad;
1582	ip->i_size = DIRBLKSIZ;
1583	DIP(ip, i_size) = DIRBLKSIZ;
1584	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1585	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1586	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1587	if (DOINGSOFTDEP(tvp)) {
1588		/*
1589		 * Ensure that the entire newly allocated block is a
1590		 * valid directory so that future growth within the
1591		 * block does not have to ensure that the block is
1592		 * written before the inode.
1593		 */
1594		blkoff = DIRBLKSIZ;
1595		while (blkoff < bp->b_bcount) {
1596			((struct direct *)
1597			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1598			blkoff += DIRBLKSIZ;
1599		}
1600	}
1601	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1602				       DOINGASYNC(tvp)))) != 0) {
1603		(void)BUF_WRITE(bp);
1604		goto bad;
1605	}
1606	/*
1607	 * Directory set up, now install its entry in the parent directory.
1608	 *
1609	 * If we are not doing soft dependencies, then we must write out the
1610	 * buffer containing the new directory body before entering the new
1611	 * name in the parent. If we are doing soft dependencies, then the
1612	 * buffer containing the new directory body will be passed to and
1613	 * released in the soft dependency code after the code has attached
1614	 * an appropriate ordering dependency to the buffer which ensures that
1615	 * the buffer is written before the new name is written in the parent.
1616	 */
1617	if (DOINGASYNC(dvp))
1618		bdwrite(bp);
1619	else if (!DOINGSOFTDEP(dvp) && ((error = BUF_WRITE(bp))))
1620		goto bad;
1621	ufs_makedirentry(ip, cnp, &newdir);
1622	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
1623
1624bad:
1625	if (error == 0) {
1626		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1627		*ap->a_vpp = tvp;
1628	} else {
1629		dp->i_effnlink--;
1630		dp->i_nlink--;
1631		DIP(dp, i_nlink) = dp->i_nlink;
1632		dp->i_flag |= IN_CHANGE;
1633		if (DOINGSOFTDEP(dvp))
1634			softdep_change_linkcnt(dp);
1635		/*
1636		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1637		 * do this for us because we set the link count to 0.
1638		 */
1639		ip->i_effnlink = 0;
1640		ip->i_nlink = 0;
1641		DIP(ip, i_nlink) = 0;
1642		ip->i_flag |= IN_CHANGE;
1643		if (DOINGSOFTDEP(tvp))
1644			softdep_change_linkcnt(ip);
1645		vput(tvp);
1646	}
1647out:
1648	return (error);
1649}
1650
1651/*
1652 * Rmdir system call.
1653 */
1654static int
1655ufs_rmdir(ap)
1656	struct vop_rmdir_args /* {
1657		struct vnode *a_dvp;
1658		struct vnode *a_vp;
1659		struct componentname *a_cnp;
1660	} */ *ap;
1661{
1662	struct vnode *vp = ap->a_vp;
1663	struct vnode *dvp = ap->a_dvp;
1664	struct componentname *cnp = ap->a_cnp;
1665	struct inode *ip, *dp;
1666	int error, ioflag;
1667
1668	ip = VTOI(vp);
1669	dp = VTOI(dvp);
1670
1671	/*
1672	 * Do not remove a directory that is in the process of being renamed.
1673	 * Verify the directory is empty (and valid). Rmdir ".." will not be
1674	 * valid since ".." will contain a reference to the current directory
1675	 * and thus be non-empty. Do not allow the removal of mounted on
1676	 * directories (this can happen when an NFS exported filesystem
1677	 * tries to remove a locally mounted on directory).
1678	 */
1679	error = 0;
1680	if (ip->i_flag & IN_RENAME) {
1681		error = EINVAL;
1682		goto out;
1683	}
1684	if (ip->i_effnlink != 2 ||
1685	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1686		error = ENOTEMPTY;
1687		goto out;
1688	}
1689	if ((dp->i_flags & APPEND)
1690	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1691		error = EPERM;
1692		goto out;
1693	}
1694	if (vp->v_mountedhere != 0) {
1695		error = EINVAL;
1696		goto out;
1697	}
1698	/*
1699	 * Delete reference to directory before purging
1700	 * inode.  If we crash in between, the directory
1701	 * will be reattached to lost+found,
1702	 */
1703	dp->i_effnlink--;
1704	ip->i_effnlink--;
1705	if (DOINGSOFTDEP(vp)) {
1706		softdep_change_linkcnt(dp);
1707		softdep_change_linkcnt(ip);
1708	}
1709	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
1710	if (error) {
1711		dp->i_effnlink++;
1712		ip->i_effnlink++;
1713		if (DOINGSOFTDEP(vp)) {
1714			softdep_change_linkcnt(dp);
1715			softdep_change_linkcnt(ip);
1716		}
1717		goto out;
1718	}
1719	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1720	cache_purge(dvp);
1721	/*
1722	 * Truncate inode. The only stuff left in the directory is "." and
1723	 * "..". The "." reference is inconsequential since we are quashing
1724	 * it. The soft dependency code will arrange to do these operations
1725	 * after the parent directory entry has been deleted on disk, so
1726	 * when running with that code we avoid doing them now.
1727	 */
1728	if (!DOINGSOFTDEP(vp)) {
1729		dp->i_nlink--;
1730		DIP(dp, i_nlink) = dp->i_nlink;
1731		dp->i_flag |= IN_CHANGE;
1732		ip->i_nlink--;
1733		DIP(ip, i_nlink) = ip->i_nlink;
1734		ip->i_flag |= IN_CHANGE;
1735		ioflag = IO_NORMAL;
1736		if (DOINGASYNC(vp))
1737			ioflag |= IO_SYNC;
1738		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
1739		    cnp->cn_thread);
1740	}
1741	cache_purge(vp);
1742#ifdef UFS_DIRHASH
1743	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
1744	if (ip->i_dirhash != NULL)
1745		ufsdirhash_free(ip);
1746#endif
1747out:
1748	VN_KNOTE(vp, NOTE_DELETE);
1749	return (error);
1750}
1751
1752/*
1753 * symlink -- make a symbolic link
1754 */
1755static int
1756ufs_symlink(ap)
1757	struct vop_symlink_args /* {
1758		struct vnode *a_dvp;
1759		struct vnode **a_vpp;
1760		struct componentname *a_cnp;
1761		struct vattr *a_vap;
1762		char *a_target;
1763	} */ *ap;
1764{
1765	struct vnode *vp, **vpp = ap->a_vpp;
1766	struct inode *ip;
1767	int len, error;
1768
1769	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1770	    vpp, ap->a_cnp);
1771	if (error)
1772		return (error);
1773	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1774	vp = *vpp;
1775	len = strlen(ap->a_target);
1776	if (len < vp->v_mount->mnt_maxsymlinklen) {
1777		ip = VTOI(vp);
1778		bcopy(ap->a_target, SHORTLINK(ip), len);
1779		ip->i_size = len;
1780		DIP(ip, i_size) = len;
1781		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1782	} else
1783		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1784		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1785		    ap->a_cnp->cn_cred, NOCRED, (int *)0, (struct thread *)0);
1786	if (error)
1787		vput(vp);
1788	return (error);
1789}
1790
1791/*
1792 * Vnode op for reading directories.
1793 *
1794 * The routine below assumes that the on-disk format of a directory
1795 * is the same as that defined by <sys/dirent.h>. If the on-disk
1796 * format changes, then it will be necessary to do a conversion
1797 * from the on-disk format that read returns to the format defined
1798 * by <sys/dirent.h>.
1799 */
1800int
1801ufs_readdir(ap)
1802	struct vop_readdir_args /* {
1803		struct vnode *a_vp;
1804		struct uio *a_uio;
1805		struct ucred *a_cred;
1806		int *a_eofflag;
1807		int *ncookies;
1808		u_long **a_cookies;
1809	} */ *ap;
1810{
1811	struct uio *uio = ap->a_uio;
1812	int error;
1813	size_t count, lost;
1814	off_t off;
1815
1816	if (ap->a_ncookies != NULL)
1817		/*
1818		 * Ensure that the block is aligned.  The caller can use
1819		 * the cookies to determine where in the block to start.
1820		 */
1821		uio->uio_offset &= ~(DIRBLKSIZ - 1);
1822	off = uio->uio_offset;
1823	count = uio->uio_resid;
1824	/* Make sure we don't return partial entries. */
1825	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
1826		return (EINVAL);
1827	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
1828	lost = uio->uio_resid - count;
1829	uio->uio_resid = count;
1830	uio->uio_iov->iov_len = count;
1831#	if (BYTE_ORDER == LITTLE_ENDIAN)
1832		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
1833			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1834		} else {
1835			struct dirent *dp, *edp;
1836			struct uio auio;
1837			struct iovec aiov;
1838			caddr_t dirbuf;
1839			int readcnt;
1840			u_char tmp;
1841
1842			auio = *uio;
1843			auio.uio_iov = &aiov;
1844			auio.uio_iovcnt = 1;
1845			auio.uio_segflg = UIO_SYSSPACE;
1846			aiov.iov_len = count;
1847			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
1848			aiov.iov_base = dirbuf;
1849			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
1850			if (error == 0) {
1851				readcnt = count - auio.uio_resid;
1852				edp = (struct dirent *)&dirbuf[readcnt];
1853				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
1854					tmp = dp->d_namlen;
1855					dp->d_namlen = dp->d_type;
1856					dp->d_type = tmp;
1857					if (dp->d_reclen > 0) {
1858						dp = (struct dirent *)
1859						    ((char *)dp + dp->d_reclen);
1860					} else {
1861						error = EIO;
1862						break;
1863					}
1864				}
1865				if (dp >= edp)
1866					error = uiomove(dirbuf, readcnt, uio);
1867			}
1868			FREE(dirbuf, M_TEMP);
1869		}
1870#	else
1871		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1872#	endif
1873	if (!error && ap->a_ncookies != NULL) {
1874		struct dirent* dpStart;
1875		struct dirent* dpEnd;
1876		struct dirent* dp;
1877		int ncookies;
1878		u_long *cookies;
1879		u_long *cookiep;
1880
1881		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1882			panic("ufs_readdir: unexpected uio from NFS server");
1883		dpStart = (struct dirent *)
1884		    ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off));
1885		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
1886		for (dp = dpStart, ncookies = 0;
1887		     dp < dpEnd;
1888		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
1889			ncookies++;
1890		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
1891		    M_WAITOK);
1892		for (dp = dpStart, cookiep = cookies;
1893		     dp < dpEnd;
1894		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
1895			off += dp->d_reclen;
1896			*cookiep++ = (u_long) off;
1897		}
1898		*ap->a_ncookies = ncookies;
1899		*ap->a_cookies = cookies;
1900	}
1901	uio->uio_resid += lost;
1902	if (ap->a_eofflag)
1903	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
1904	return (error);
1905}
1906
1907/*
1908 * Return target name of a symbolic link
1909 */
1910static int
1911ufs_readlink(ap)
1912	struct vop_readlink_args /* {
1913		struct vnode *a_vp;
1914		struct uio *a_uio;
1915		struct ucred *a_cred;
1916	} */ *ap;
1917{
1918	struct vnode *vp = ap->a_vp;
1919	struct inode *ip = VTOI(vp);
1920	doff_t isize;
1921
1922	isize = ip->i_size;
1923	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1924	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
1925		uiomove(SHORTLINK(ip), isize, ap->a_uio);
1926		return (0);
1927	}
1928	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1929}
1930
1931/*
1932 * Calculate the logical to physical mapping if not done already,
1933 * then call the device strategy routine.
1934 *
1935 * In order to be able to swap to a file, the ufs_bmaparray() operation may not
1936 * deadlock on memory.  See ufs_bmap() for details.
1937 */
1938static int
1939ufs_strategy(ap)
1940	struct vop_strategy_args /* {
1941		struct vnode *a_vp;
1942		struct buf *a_bp;
1943	} */ *ap;
1944{
1945	struct buf *bp = ap->a_bp;
1946	struct vnode *vp = ap->a_vp;
1947	struct inode *ip;
1948	ufs2_daddr_t blkno;
1949	int error;
1950
1951	KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)",
1952	    __func__, ap->a_vp, ap->a_bp->b_vp));
1953	ip = VTOI(vp);
1954	if (bp->b_blkno == bp->b_lblkno) {
1955		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
1956		bp->b_blkno = blkno;
1957		if (error) {
1958			bp->b_error = error;
1959			bp->b_ioflags |= BIO_ERROR;
1960			bufdone(bp);
1961			return (error);
1962		}
1963		if ((long)bp->b_blkno == -1)
1964			vfs_bio_clrbuf(bp);
1965	}
1966	if ((long)bp->b_blkno == -1) {
1967		bufdone(bp);
1968		return (0);
1969	}
1970	vp = ip->i_devvp;
1971	bp->b_dev = vp->v_rdev;
1972	bp->b_iooffset = dbtob(bp->b_blkno);
1973	VOP_SPECSTRATEGY(vp, bp);
1974	return (0);
1975}
1976
1977/*
1978 * Print out the contents of an inode.
1979 */
1980static int
1981ufs_print(ap)
1982	struct vop_print_args /* {
1983		struct vnode *a_vp;
1984	} */ *ap;
1985{
1986	struct vnode *vp = ap->a_vp;
1987	struct inode *ip = VTOI(vp);
1988
1989	printf("\tino %lu, on dev %s (%d, %d)", (u_long)ip->i_number,
1990	    devtoname(ip->i_dev), major(ip->i_dev), minor(ip->i_dev));
1991	if (vp->v_type == VFIFO)
1992		fifo_printinfo(vp);
1993	printf("\n");
1994	return (0);
1995}
1996
1997/*
1998 * Read wrapper for special devices.
1999 */
2000static int
2001ufsspec_read(ap)
2002	struct vop_read_args /* {
2003		struct vnode *a_vp;
2004		struct uio *a_uio;
2005		int  a_ioflag;
2006		struct ucred *a_cred;
2007	} */ *ap;
2008{
2009	int error, resid;
2010	struct inode *ip;
2011	struct uio *uio;
2012
2013	uio = ap->a_uio;
2014	resid = uio->uio_resid;
2015	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap);
2016	/*
2017	 * The inode may have been revoked during the call, so it must not
2018	 * be accessed blindly here or in the other wrapper functions.
2019	 */
2020	ip = VTOI(ap->a_vp);
2021	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
2022		ip->i_flag |= IN_ACCESS;
2023	return (error);
2024}
2025
2026/*
2027 * Write wrapper for special devices.
2028 */
2029static int
2030ufsspec_write(ap)
2031	struct vop_write_args /* {
2032		struct vnode *a_vp;
2033		struct uio *a_uio;
2034		int  a_ioflag;
2035		struct ucred *a_cred;
2036	} */ *ap;
2037{
2038	int error, resid;
2039	struct inode *ip;
2040	struct uio *uio;
2041
2042	uio = ap->a_uio;
2043	resid = uio->uio_resid;
2044	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap);
2045	ip = VTOI(ap->a_vp);
2046	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
2047		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
2048	return (error);
2049}
2050
2051/*
2052 * Close wrapper for special devices.
2053 *
2054 * Update the times on the inode then do device close.
2055 */
2056static int
2057ufsspec_close(ap)
2058	struct vop_close_args /* {
2059		struct vnode *a_vp;
2060		int  a_fflag;
2061		struct ucred *a_cred;
2062		struct thread *a_td;
2063	} */ *ap;
2064{
2065	struct vnode *vp = ap->a_vp;
2066
2067	VI_LOCK(vp);
2068	if (vp->v_usecount > 1)
2069		ufs_itimes(vp);
2070	VI_UNLOCK(vp);
2071	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
2072}
2073
2074/*
2075 * Read wrapper for fifos.
2076 */
2077static int
2078ufsfifo_read(ap)
2079	struct vop_read_args /* {
2080		struct vnode *a_vp;
2081		struct uio *a_uio;
2082		int  a_ioflag;
2083		struct ucred *a_cred;
2084	} */ *ap;
2085{
2086	int error, resid;
2087	struct inode *ip;
2088	struct uio *uio;
2089
2090	uio = ap->a_uio;
2091	resid = uio->uio_resid;
2092	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap);
2093	ip = VTOI(ap->a_vp);
2094	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
2095	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
2096		ip->i_flag |= IN_ACCESS;
2097	return (error);
2098}
2099
2100/*
2101 * Write wrapper for fifos.
2102 */
2103static int
2104ufsfifo_write(ap)
2105	struct vop_write_args /* {
2106		struct vnode *a_vp;
2107		struct uio *a_uio;
2108		int  a_ioflag;
2109		struct ucred *a_cred;
2110	} */ *ap;
2111{
2112	int error, resid;
2113	struct inode *ip;
2114	struct uio *uio;
2115
2116	uio = ap->a_uio;
2117	resid = uio->uio_resid;
2118	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap);
2119	ip = VTOI(ap->a_vp);
2120	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
2121		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2122	return (error);
2123}
2124
2125/*
2126 * Close wrapper for fifos.
2127 *
2128 * Update the times on the inode then do device close.
2129 */
2130static int
2131ufsfifo_close(ap)
2132	struct vop_close_args /* {
2133		struct vnode *a_vp;
2134		int  a_fflag;
2135		struct ucred *a_cred;
2136		struct thread *a_td;
2137	} */ *ap;
2138{
2139	struct vnode *vp = ap->a_vp;
2140
2141	VI_LOCK(vp);
2142	if (vp->v_usecount > 1)
2143		ufs_itimes(vp);
2144	VI_UNLOCK(vp);
2145	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
2146}
2147
2148/*
2149 * Kqfilter wrapper for fifos.
2150 *
2151 * Fall through to ufs kqfilter routines if needed
2152 */
2153static int
2154ufsfifo_kqfilter(ap)
2155	struct vop_kqfilter_args *ap;
2156{
2157	int error;
2158
2159	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilter), ap);
2160	if (error)
2161		error = ufs_kqfilter(ap);
2162	return (error);
2163}
2164
2165/*
2166 * Return POSIX pathconf information applicable to ufs filesystems.
2167 */
2168static int
2169ufs_pathconf(ap)
2170	struct vop_pathconf_args /* {
2171		struct vnode *a_vp;
2172		int a_name;
2173		int *a_retval;
2174	} */ *ap;
2175{
2176	int error;
2177
2178	error = 0;
2179	switch (ap->a_name) {
2180	case _PC_LINK_MAX:
2181		*ap->a_retval = LINK_MAX;
2182		break;
2183	case _PC_NAME_MAX:
2184		*ap->a_retval = NAME_MAX;
2185		break;
2186	case _PC_PATH_MAX:
2187		*ap->a_retval = PATH_MAX;
2188		break;
2189	case _PC_PIPE_BUF:
2190		*ap->a_retval = PIPE_BUF;
2191		break;
2192	case _PC_CHOWN_RESTRICTED:
2193		*ap->a_retval = 1;
2194		break;
2195	case _PC_NO_TRUNC:
2196		*ap->a_retval = 1;
2197		break;
2198	case _PC_ACL_EXTENDED:
2199#ifdef UFS_ACL
2200		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
2201			*ap->a_retval = 1;
2202		else
2203			*ap->a_retval = 0;
2204#else
2205		*ap->a_retval = 0;
2206#endif
2207		break;
2208	case _PC_ACL_PATH_MAX:
2209#ifdef UFS_ACL
2210		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
2211			*ap->a_retval = ACL_MAX_ENTRIES;
2212		else
2213			*ap->a_retval = 3;
2214#else
2215		*ap->a_retval = 3;
2216#endif
2217		break;
2218	case _PC_MAC_PRESENT:
2219#ifdef MAC
2220		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
2221			*ap->a_retval = 1;
2222		else
2223			*ap->a_retval = 0;
2224#else
2225		*ap->a_retval = 0;
2226#endif
2227		break;
2228	case _PC_ASYNC_IO:
2229		/* _PC_ASYNC_IO should have been handled by upper layers. */
2230		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
2231		error = EINVAL;
2232		break;
2233	case _PC_PRIO_IO:
2234		*ap->a_retval = 0;
2235		break;
2236	case _PC_SYNC_IO:
2237		*ap->a_retval = 0;
2238		break;
2239	case _PC_ALLOC_SIZE_MIN:
2240		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2241		break;
2242	case _PC_FILESIZEBITS:
2243		*ap->a_retval = 64;
2244		break;
2245	case _PC_REC_INCR_XFER_SIZE:
2246		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2247		break;
2248	case _PC_REC_MAX_XFER_SIZE:
2249		*ap->a_retval = -1; /* means ``unlimited'' */
2250		break;
2251	case _PC_REC_MIN_XFER_SIZE:
2252		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2253		break;
2254	case _PC_REC_XFER_ALIGN:
2255		*ap->a_retval = PAGE_SIZE;
2256		break;
2257	case _PC_SYMLINK_MAX:
2258		*ap->a_retval = MAXPATHLEN;
2259		break;
2260
2261	default:
2262		error = EINVAL;
2263		break;
2264	}
2265	return (error);
2266}
2267
2268/*
2269 * Advisory record locking support
2270 */
2271static int
2272ufs_advlock(ap)
2273	struct vop_advlock_args /* {
2274		struct vnode *a_vp;
2275		caddr_t  a_id;
2276		int  a_op;
2277		struct flock *a_fl;
2278		int  a_flags;
2279	} */ *ap;
2280{
2281	struct inode *ip = VTOI(ap->a_vp);
2282
2283	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
2284}
2285
2286/*
2287 * Initialize the vnode associated with a new inode, handle aliased
2288 * vnodes.
2289 */
2290int
2291ufs_vinit(mntp, specops, fifoops, vpp)
2292	struct mount *mntp;
2293	vop_t **specops;
2294	vop_t **fifoops;
2295	struct vnode **vpp;
2296{
2297	struct inode *ip;
2298	struct vnode *vp;
2299	struct timeval tv;
2300
2301	vp = *vpp;
2302	ip = VTOI(vp);
2303	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2304	case VCHR:
2305	case VBLK:
2306		vp->v_op = specops;
2307		vp = addaliasu(vp, DIP(ip, i_rdev));
2308		ip->i_vnode = vp;
2309		break;
2310	case VFIFO:
2311		vp->v_op = fifoops;
2312		break;
2313	default:
2314		break;
2315
2316	}
2317	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
2318	if (ip->i_number == ROOTINO)
2319		vp->v_vflag |= VV_ROOT;
2320	/*
2321	 * Initialize modrev times
2322	 */
2323	getmicrouptime(&tv);
2324	SETHIGH(ip->i_modrev, tv.tv_sec);
2325	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
2326	*vpp = vp;
2327	return (0);
2328}
2329
2330/*
2331 * Allocate a new inode.
2332 * Vnode dvp must be locked.
2333 */
2334static int
2335ufs_makeinode(mode, dvp, vpp, cnp)
2336	int mode;
2337	struct vnode *dvp;
2338	struct vnode **vpp;
2339	struct componentname *cnp;
2340{
2341	struct inode *ip, *pdir;
2342	struct direct newdir;
2343	struct vnode *tvp;
2344#ifdef UFS_ACL
2345	struct acl *acl;
2346#endif
2347	int error;
2348
2349	pdir = VTOI(dvp);
2350#ifdef DIAGNOSTIC
2351	if ((cnp->cn_flags & HASBUF) == 0)
2352		panic("ufs_makeinode: no name");
2353#endif
2354	*vpp = NULL;
2355	if ((mode & IFMT) == 0)
2356		mode |= IFREG;
2357
2358	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2359	if (error)
2360		return (error);
2361	ip = VTOI(tvp);
2362	ip->i_gid = pdir->i_gid;
2363	DIP(ip, i_gid) = pdir->i_gid;
2364#ifdef SUIDDIR
2365	{
2366#ifdef QUOTA
2367		struct ucred ucred, *ucp;
2368		ucp = cnp->cn_cred;
2369#endif
2370		/*
2371		 * If we are not the owner of the directory,
2372		 * and we are hacking owners here, (only do this where told to)
2373		 * and we are not giving it TO root, (would subvert quotas)
2374		 * then go ahead and give it to the other user.
2375		 * Note that this drops off the execute bits for security.
2376		 */
2377		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2378		    (pdir->i_mode & ISUID) &&
2379		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2380			ip->i_uid = pdir->i_uid;
2381			DIP(ip, i_uid) = ip->i_uid;
2382			mode &= ~07111;
2383#ifdef QUOTA
2384			/*
2385			 * Make sure the correct user gets charged
2386			 * for the space.
2387			 * Quickly knock up a dummy credential for the victim.
2388			 * XXX This seems to never be accessed out of our
2389			 * context so a stack variable is ok.
2390			 */
2391			ucred.cr_ref = 1;
2392			ucred.cr_uid = ip->i_uid;
2393			ucred.cr_ngroups = 1;
2394			ucred.cr_groups[0] = pdir->i_gid;
2395			ucp = &ucred;
2396#endif
2397		} else {
2398			ip->i_uid = cnp->cn_cred->cr_uid;
2399			DIP(ip, i_uid) = ip->i_uid;
2400		}
2401
2402#ifdef QUOTA
2403		if ((error = getinoquota(ip)) ||
2404	    	    (error = chkiq(ip, 1, ucp, 0))) {
2405			UFS_VFREE(tvp, ip->i_number, mode);
2406			vput(tvp);
2407			return (error);
2408		}
2409#endif
2410	}
2411#else	/* !SUIDDIR */
2412	ip->i_uid = cnp->cn_cred->cr_uid;
2413	DIP(ip, i_uid) = ip->i_uid;
2414#ifdef QUOTA
2415	if ((error = getinoquota(ip)) ||
2416	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2417		UFS_VFREE(tvp, ip->i_number, mode);
2418		vput(tvp);
2419		return (error);
2420	}
2421#endif
2422#endif	/* !SUIDDIR */
2423	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2424#ifdef UFS_ACL
2425	acl = NULL;
2426	if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) {
2427		MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, M_WAITOK);
2428
2429		/*
2430		 * Retrieve default ACL for parent, if any.
2431		 */
2432		error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred,
2433		    cnp->cn_thread);
2434		switch (error) {
2435		case 0:
2436			/*
2437			 * Retrieved a default ACL, so merge mode and ACL if
2438			 * necessary.
2439			 */
2440			if (acl->acl_cnt != 0) {
2441				/*
2442				 * Two possible ways for default ACL to not
2443				 * be present.  First, the EA can be
2444				 * undefined, or second, the default ACL can
2445				 * be blank.  If it's blank, fall through to
2446				 * the it's not defined case.
2447				 */
2448				mode = acl_posix1e_newfilemode(mode, acl);
2449				ip->i_mode = mode;
2450				DIP(ip, i_mode) = mode;
2451				ufs_sync_acl_from_inode(ip, acl);
2452				break;
2453			}
2454			/* FALLTHROUGH */
2455
2456		case EOPNOTSUPP:
2457			/*
2458			 * Just use the mode as-is.
2459			 */
2460			ip->i_mode = mode;
2461			DIP(ip, i_mode) = mode;
2462			FREE(acl, M_ACL);
2463			acl = NULL;
2464			break;
2465
2466		default:
2467			UFS_VFREE(tvp, ip->i_number, mode);
2468			vput(tvp);
2469			FREE(acl, M_ACL);
2470			acl = NULL;
2471			return (error);
2472		}
2473	} else {
2474#endif
2475		ip->i_mode = mode;
2476		DIP(ip, i_mode) = mode;
2477#ifdef UFS_ACL
2478	}
2479#endif
2480	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2481	ip->i_effnlink = 1;
2482	ip->i_nlink = 1;
2483	DIP(ip, i_nlink) = 1;
2484	if (DOINGSOFTDEP(tvp))
2485		softdep_change_linkcnt(ip);
2486	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2487	    suser_cred(cnp->cn_cred, PRISON_ROOT)) {
2488		ip->i_mode &= ~ISGID;
2489		DIP(ip, i_mode) = ip->i_mode;
2490	}
2491
2492	if (cnp->cn_flags & ISWHITEOUT) {
2493		ip->i_flags |= UF_OPAQUE;
2494		DIP(ip, i_flags) = ip->i_flags;
2495	}
2496
2497	/*
2498	 * Make sure inode goes to disk before directory entry.
2499	 */
2500	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2501	if (error)
2502		goto bad;
2503#ifdef MAC
2504	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
2505		error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount,
2506		    dvp, tvp, cnp);
2507		if (error)
2508			goto bad;
2509	}
2510#endif
2511#ifdef UFS_ACL
2512	if (acl != NULL) {
2513		/*
2514		 * XXX: If we abort now, will Soft Updates notify the extattr
2515		 * code that the EAs for the file need to be released?
2516		 */
2517		error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred,
2518		    cnp->cn_thread);
2519		switch (error) {
2520		case 0:
2521			break;
2522
2523		case EOPNOTSUPP:
2524			/*
2525			 * XXX: This should not happen, as EOPNOTSUPP above was
2526			 * supposed to free acl.
2527			 */
2528			printf("ufs_makeinode: VOP_GETACL() but no "
2529			    "VOP_SETACL()\n");
2530			/* panic("ufs_makeinode: VOP_GETACL() but no "
2531			    "VOP_SETACL()"); */
2532			break;
2533
2534		default:
2535			FREE(acl, M_ACL);
2536			goto bad;
2537		}
2538		FREE(acl, M_ACL);
2539	}
2540#endif /* !UFS_ACL */
2541	ufs_makedirentry(ip, cnp, &newdir);
2542	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
2543	if (error)
2544		goto bad;
2545	*vpp = tvp;
2546	return (0);
2547
2548bad:
2549	/*
2550	 * Write error occurred trying to update the inode
2551	 * or the directory so must deallocate the inode.
2552	 */
2553	ip->i_effnlink = 0;
2554	ip->i_nlink = 0;
2555	DIP(ip, i_nlink) = 0;
2556	ip->i_flag |= IN_CHANGE;
2557	if (DOINGSOFTDEP(tvp))
2558		softdep_change_linkcnt(ip);
2559	vput(tvp);
2560	return (error);
2561}
2562
2563static struct filterops ufsread_filtops =
2564	{ 1, NULL, filt_ufsdetach, filt_ufsread };
2565static struct filterops ufswrite_filtops =
2566	{ 1, NULL, filt_ufsdetach, filt_ufswrite };
2567static struct filterops ufsvnode_filtops =
2568	{ 1, NULL, filt_ufsdetach, filt_ufsvnode };
2569
2570static int
2571ufs_kqfilter(ap)
2572	struct vop_kqfilter_args /* {
2573		struct vnode *a_vp;
2574		struct knote *a_kn;
2575	} */ *ap;
2576{
2577	struct vnode *vp = ap->a_vp;
2578	struct knote *kn = ap->a_kn;
2579
2580	switch (kn->kn_filter) {
2581	case EVFILT_READ:
2582		kn->kn_fop = &ufsread_filtops;
2583		break;
2584	case EVFILT_WRITE:
2585		kn->kn_fop = &ufswrite_filtops;
2586		break;
2587	case EVFILT_VNODE:
2588		kn->kn_fop = &ufsvnode_filtops;
2589		break;
2590	default:
2591		return (1);
2592	}
2593
2594	kn->kn_hook = (caddr_t)vp;
2595
2596	if (vp->v_pollinfo == NULL)
2597		v_addpollinfo(vp);
2598	mtx_lock(&vp->v_pollinfo->vpi_lock);
2599	SLIST_INSERT_HEAD(&vp->v_pollinfo->vpi_selinfo.si_note, kn, kn_selnext);
2600	mtx_unlock(&vp->v_pollinfo->vpi_lock);
2601
2602	return (0);
2603}
2604
2605static void
2606filt_ufsdetach(struct knote *kn)
2607{
2608	struct vnode *vp = (struct vnode *)kn->kn_hook;
2609
2610	KASSERT(vp->v_pollinfo != NULL, ("Mising v_pollinfo"));
2611	mtx_lock(&vp->v_pollinfo->vpi_lock);
2612	SLIST_REMOVE(&vp->v_pollinfo->vpi_selinfo.si_note,
2613	    kn, knote, kn_selnext);
2614	mtx_unlock(&vp->v_pollinfo->vpi_lock);
2615}
2616
2617/*ARGSUSED*/
2618static int
2619filt_ufsread(struct knote *kn, long hint)
2620{
2621	struct vnode *vp = (struct vnode *)kn->kn_hook;
2622	struct inode *ip = VTOI(vp);
2623
2624	/*
2625	 * filesystem is gone, so set the EOF flag and schedule
2626	 * the knote for deletion.
2627	 */
2628	if (hint == NOTE_REVOKE) {
2629		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2630		return (1);
2631	}
2632
2633        kn->kn_data = ip->i_size - kn->kn_fp->f_offset;
2634        return (kn->kn_data != 0);
2635}
2636
2637/*ARGSUSED*/
2638static int
2639filt_ufswrite(struct knote *kn, long hint)
2640{
2641
2642	/*
2643	 * filesystem is gone, so set the EOF flag and schedule
2644	 * the knote for deletion.
2645	 */
2646	if (hint == NOTE_REVOKE)
2647		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2648
2649        kn->kn_data = 0;
2650        return (1);
2651}
2652
2653static int
2654filt_ufsvnode(struct knote *kn, long hint)
2655{
2656
2657	if (kn->kn_sfflags & hint)
2658		kn->kn_fflags |= hint;
2659	if (hint == NOTE_REVOKE) {
2660		kn->kn_flags |= EV_EOF;
2661		return (1);
2662	}
2663	return (kn->kn_fflags != 0);
2664}
2665
2666/* Global vfs data structures for ufs. */
2667static vop_t **ufs_vnodeop_p;
2668static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
2669	{ &vop_default_desc,		(vop_t *) vop_defaultop },
2670	{ &vop_fsync_desc,		(vop_t *) vop_panic },
2671	{ &vop_read_desc,		(vop_t *) vop_panic },
2672	{ &vop_reallocblks_desc,	(vop_t *) vop_panic },
2673	{ &vop_write_desc,		(vop_t *) vop_panic },
2674	{ &vop_access_desc,		(vop_t *) ufs_access },
2675	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
2676	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
2677	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
2678	{ &vop_close_desc,		(vop_t *) ufs_close },
2679	{ &vop_create_desc,		(vop_t *) ufs_create },
2680	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2681	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2682	{ &vop_link_desc,		(vop_t *) ufs_link },
2683	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
2684	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
2685	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
2686	{ &vop_open_desc,		(vop_t *) ufs_open },
2687	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
2688	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
2689	{ &vop_kqfilter_desc,		(vop_t *) ufs_kqfilter },
2690	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
2691	{ &vop_print_desc,		(vop_t *) ufs_print },
2692	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
2693	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
2694	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2695	{ &vop_remove_desc,		(vop_t *) ufs_remove },
2696	{ &vop_rename_desc,		(vop_t *) ufs_rename },
2697	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
2698	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2699#ifdef MAC
2700	{ &vop_setlabel_desc,		(vop_t *) vop_stdsetlabel_ea },
2701#endif
2702	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
2703	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
2704	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
2705#ifdef UFS_EXTATTR
2706	{ &vop_getextattr_desc,		(vop_t *) ufs_getextattr },
2707	{ &vop_deleteextattr_desc,		(vop_t *) ufs_deleteextattr },
2708	{ &vop_setextattr_desc,		(vop_t *) ufs_setextattr },
2709#endif
2710#ifdef UFS_ACL
2711	{ &vop_getacl_desc,		(vop_t *) ufs_getacl },
2712	{ &vop_setacl_desc,		(vop_t *) ufs_setacl },
2713	{ &vop_aclcheck_desc,		(vop_t *) ufs_aclcheck },
2714#endif
2715	{ NULL, NULL }
2716};
2717static struct vnodeopv_desc ufs_vnodeop_opv_desc =
2718	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
2719
2720static vop_t **ufs_specop_p;
2721static struct vnodeopv_entry_desc ufs_specop_entries[] = {
2722	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
2723	{ &vop_fsync_desc,		(vop_t *) vop_panic },
2724	{ &vop_access_desc,		(vop_t *) ufs_access },
2725	{ &vop_close_desc,		(vop_t *) ufsspec_close },
2726	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2727	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2728	{ &vop_print_desc,		(vop_t *) ufs_print },
2729	{ &vop_read_desc,		(vop_t *) ufsspec_read },
2730	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2731	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2732#ifdef MAC
2733	{ &vop_setlabel_desc,		(vop_t *) vop_stdsetlabel_ea },
2734#endif
2735	{ &vop_write_desc,		(vop_t *) ufsspec_write },
2736#ifdef UFS_EXTATTR
2737	{ &vop_getextattr_desc,		(vop_t *) ufs_getextattr },
2738	{ &vop_deleteextattr_desc,		(vop_t *) ufs_deleteextattr },
2739	{ &vop_setextattr_desc,		(vop_t *) ufs_setextattr },
2740#endif
2741#ifdef UFS_ACL
2742	{ &vop_getacl_desc,		(vop_t *) ufs_getacl },
2743	{ &vop_setacl_desc,		(vop_t *) ufs_setacl },
2744	{ &vop_aclcheck_desc,		(vop_t *) ufs_aclcheck },
2745#endif
2746	{NULL, NULL}
2747};
2748static struct vnodeopv_desc ufs_specop_opv_desc =
2749	{ &ufs_specop_p, ufs_specop_entries };
2750
2751static vop_t **ufs_fifoop_p;
2752static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
2753	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
2754	{ &vop_fsync_desc,		(vop_t *) vop_panic },
2755	{ &vop_access_desc,		(vop_t *) ufs_access },
2756	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
2757	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2758	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2759	{ &vop_kqfilter_desc,		(vop_t *) ufsfifo_kqfilter },
2760	{ &vop_print_desc,		(vop_t *) ufs_print },
2761	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
2762	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2763	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2764#ifdef MAC
2765	{ &vop_setlabel_desc,		(vop_t *) vop_stdsetlabel_ea },
2766#endif
2767	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
2768#ifdef UFS_EXTATTR
2769	{ &vop_getextattr_desc,		(vop_t *) ufs_getextattr },
2770	{ &vop_deleteextattr_desc,		(vop_t *) ufs_deleteextattr },
2771	{ &vop_setextattr_desc,		(vop_t *) ufs_setextattr },
2772#endif
2773#ifdef UFS_ACL
2774	{ &vop_getacl_desc,		(vop_t *) ufs_getacl },
2775	{ &vop_setacl_desc,		(vop_t *) ufs_setacl },
2776	{ &vop_aclcheck_desc,		(vop_t *) ufs_aclcheck },
2777#endif
2778	{ NULL, NULL }
2779};
2780static struct vnodeopv_desc ufs_fifoop_opv_desc =
2781	{ &ufs_fifoop_p, ufs_fifoop_entries };
2782
2783VNODEOP_SET(ufs_vnodeop_opv_desc);
2784VNODEOP_SET(ufs_specop_opv_desc);
2785VNODEOP_SET(ufs_fifoop_opv_desc);
2786
2787int
2788ufs_vnoperate(ap)
2789	struct vop_generic_args /* {
2790		struct vnodeop_desc *a_desc;
2791	} */ *ap;
2792{
2793	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2794}
2795
2796int
2797ufs_vnoperatefifo(ap)
2798	struct vop_generic_args /* {
2799		struct vnodeop_desc *a_desc;
2800	} */ *ap;
2801{
2802	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
2803}
2804
2805int
2806ufs_vnoperatespec(ap)
2807	struct vop_generic_args /* {
2808		struct vnodeop_desc *a_desc;
2809	} */ *ap;
2810{
2811	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
2812}
2813