ufs_vnops.c revision 109623
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
39 * $FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 109623 2003-01-21 08:56:16Z alfred $
40 */
41
42#include "opt_mac.h"
43#include "opt_quota.h"
44#include "opt_suiddir.h"
45#include "opt_ufs.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/malloc.h>
50#include <sys/namei.h>
51#include <sys/kernel.h>
52#include <sys/fcntl.h>
53#include <sys/stat.h>
54#include <sys/bio.h>
55#include <sys/buf.h>
56#include <sys/mount.h>
57#include <sys/unistd.h>
58#include <sys/vnode.h>
59#include <sys/dirent.h>
60#include <sys/lockf.h>
61#include <sys/event.h>
62#include <sys/conf.h>
63#include <sys/acl.h>
64#include <sys/mac.h>
65
66#include <machine/mutex.h>
67
68#include <sys/file.h>		/* XXX */
69
70#include <vm/vm.h>
71#include <vm/vm_extern.h>
72
73#include <fs/fifofs/fifo.h>
74
75#include <ufs/ufs/acl.h>
76#include <ufs/ufs/extattr.h>
77#include <ufs/ufs/quota.h>
78#include <ufs/ufs/inode.h>
79#include <ufs/ufs/dir.h>
80#include <ufs/ufs/ufsmount.h>
81#include <ufs/ufs/ufs_extern.h>
82#ifdef UFS_DIRHASH
83#include <ufs/ufs/dirhash.h>
84#endif
85
86static int ufs_access(struct vop_access_args *);
87static int ufs_advlock(struct vop_advlock_args *);
88static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
89static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
90static int ufs_close(struct vop_close_args *);
91static int ufs_create(struct vop_create_args *);
92static int ufs_getattr(struct vop_getattr_args *);
93static int ufs_link(struct vop_link_args *);
94static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
95static int ufs_mkdir(struct vop_mkdir_args *);
96static int ufs_mknod(struct vop_mknod_args *);
97static int ufs_open(struct vop_open_args *);
98static int ufs_pathconf(struct vop_pathconf_args *);
99static int ufs_print(struct vop_print_args *);
100static int ufs_readlink(struct vop_readlink_args *);
101static int ufs_remove(struct vop_remove_args *);
102static int ufs_rename(struct vop_rename_args *);
103static int ufs_rmdir(struct vop_rmdir_args *);
104static int ufs_setattr(struct vop_setattr_args *);
105static int ufs_strategy(struct vop_strategy_args *);
106static int ufs_symlink(struct vop_symlink_args *);
107static int ufs_whiteout(struct vop_whiteout_args *);
108static int ufsfifo_close(struct vop_close_args *);
109static int ufsfifo_kqfilter(struct vop_kqfilter_args *);
110static int ufsfifo_read(struct vop_read_args *);
111static int ufsfifo_write(struct vop_write_args *);
112static int ufsspec_close(struct vop_close_args *);
113static int ufsspec_read(struct vop_read_args *);
114static int ufsspec_write(struct vop_write_args *);
115static int filt_ufsread(struct knote *kn, long hint);
116static int filt_ufswrite(struct knote *kn, long hint);
117static int filt_ufsvnode(struct knote *kn, long hint);
118static void filt_ufsdetach(struct knote *kn);
119static int ufs_kqfilter(struct vop_kqfilter_args *ap);
120
121union _qcvt {
122	int64_t qcvt;
123	int32_t val[2];
124};
125#define SETHIGH(q, h) { \
126	union _qcvt tmp; \
127	tmp.qcvt = (q); \
128	tmp.val[_QUAD_HIGHWORD] = (h); \
129	(q) = tmp.qcvt; \
130}
131#define SETLOW(q, l) { \
132	union _qcvt tmp; \
133	tmp.qcvt = (q); \
134	tmp.val[_QUAD_LOWWORD] = (l); \
135	(q) = tmp.qcvt; \
136}
137
138/*
139 * A virgin directory (no blushing please).
140 */
141static struct dirtemplate mastertemplate = {
142	0, 12, DT_DIR, 1, ".",
143	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
144};
145static struct odirtemplate omastertemplate = {
146	0, 12, 1, ".",
147	0, DIRBLKSIZ - 12, 2, ".."
148};
149
150void
151ufs_itimes(vp)
152	struct vnode *vp;
153{
154	struct inode *ip;
155	struct timespec ts;
156
157	ip = VTOI(vp);
158	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
159		return;
160	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
161		ip->i_flag |= IN_LAZYMOD;
162	else
163		ip->i_flag |= IN_MODIFIED;
164	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
165		vfs_timestamp(&ts);
166		if (ip->i_flag & IN_ACCESS) {
167			DIP(ip, i_atime) = ts.tv_sec;
168			DIP(ip, i_atimensec) = ts.tv_nsec;
169		}
170		if (ip->i_flag & IN_UPDATE) {
171			DIP(ip, i_mtime) = ts.tv_sec;
172			DIP(ip, i_mtimensec) = ts.tv_nsec;
173			ip->i_modrev++;
174		}
175		if (ip->i_flag & IN_CHANGE) {
176			DIP(ip, i_ctime) = ts.tv_sec;
177			DIP(ip, i_ctimensec) = ts.tv_nsec;
178		}
179	}
180	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
181}
182
183/*
184 * Create a regular file
185 */
186static int
187ufs_create(ap)
188	struct vop_create_args /* {
189		struct vnode *a_dvp;
190		struct vnode **a_vpp;
191		struct componentname *a_cnp;
192		struct vattr *a_vap;
193	} */ *ap;
194{
195	int error;
196
197	error =
198	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
199	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
200	if (error)
201		return (error);
202	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
203	return (0);
204}
205
206/*
207 * Mknod vnode call
208 */
209/* ARGSUSED */
210static int
211ufs_mknod(ap)
212	struct vop_mknod_args /* {
213		struct vnode *a_dvp;
214		struct vnode **a_vpp;
215		struct componentname *a_cnp;
216		struct vattr *a_vap;
217	} */ *ap;
218{
219	struct vattr *vap = ap->a_vap;
220	struct vnode **vpp = ap->a_vpp;
221	struct inode *ip;
222	ino_t ino;
223	int error;
224
225	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
226	    ap->a_dvp, vpp, ap->a_cnp);
227	if (error)
228		return (error);
229	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
230	ip = VTOI(*vpp);
231	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
232	if (vap->va_rdev != VNOVAL) {
233		/*
234		 * Want to be able to use this to make badblock
235		 * inodes, so don't truncate the dev number.
236		 */
237		DIP(ip, i_rdev) = vap->va_rdev;
238	}
239	/*
240	 * Remove inode, then reload it through VFS_VGET so it is
241	 * checked to see if it is an alias of an existing entry in
242	 * the inode cache.
243	 */
244	vput(*vpp);
245	(*vpp)->v_type = VNON;
246	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
247	vgone(*vpp);
248	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
249	if (error) {
250		*vpp = NULL;
251		return (error);
252	}
253	return (0);
254}
255
256/*
257 * Open called.
258 *
259 * Nothing to do.
260 */
261/* ARGSUSED */
262static int
263ufs_open(ap)
264	struct vop_open_args /* {
265		struct vnode *a_vp;
266		int  a_mode;
267		struct ucred *a_cred;
268		struct thread *a_td;
269	} */ *ap;
270{
271
272	/*
273	 * Files marked append-only must be opened for appending.
274	 */
275	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
276	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
277		return (EPERM);
278	return (0);
279}
280
281/*
282 * Close called.
283 *
284 * Update the times on the inode.
285 */
286/* ARGSUSED */
287static int
288ufs_close(ap)
289	struct vop_close_args /* {
290		struct vnode *a_vp;
291		int  a_fflag;
292		struct ucred *a_cred;
293		struct thread *a_td;
294	} */ *ap;
295{
296	struct vnode *vp = ap->a_vp;
297	struct mount *mp;
298
299	VI_LOCK(vp);
300	if (vp->v_usecount > 1) {
301		ufs_itimes(vp);
302		VI_UNLOCK(vp);
303	} else {
304		VI_UNLOCK(vp);
305		/*
306		 * If we are closing the last reference to an unlinked
307		 * file, then it will be freed by the inactive routine.
308		 * Because the freeing causes a the filesystem to be
309		 * modified, it must be held up during periods when the
310		 * filesystem is suspended.
311		 *
312		 * XXX - EAGAIN is returned to prevent vn_close from
313		 * repeating the vrele operation.
314		 */
315		if (vp->v_type == VREG && VTOI(vp)->i_effnlink == 0) {
316			(void) vn_start_write(vp, &mp, V_WAIT);
317			vrele(vp);
318			vn_finished_write(mp);
319			return (EAGAIN);
320		}
321	}
322	return (0);
323}
324
325static int
326ufs_access(ap)
327	struct vop_access_args /* {
328		struct vnode *a_vp;
329		int  a_mode;
330		struct ucred *a_cred;
331		struct thread *a_td;
332	} */ *ap;
333{
334	struct vnode *vp = ap->a_vp;
335	struct inode *ip = VTOI(vp);
336	mode_t mode = ap->a_mode;
337	int error;
338#ifdef UFS_ACL
339	struct acl *acl;
340	size_t len;
341#endif
342
343	/*
344	 * Disallow write attempts on read-only filesystems;
345	 * unless the file is a socket, fifo, or a block or
346	 * character device resident on the filesystem.
347	 */
348	if (mode & VWRITE) {
349		switch (vp->v_type) {
350		case VDIR:
351		case VLNK:
352		case VREG:
353			if (vp->v_mount->mnt_flag & MNT_RDONLY)
354				return (EROFS);
355#ifdef QUOTA
356			if ((error = getinoquota(ip)) != 0)
357				return (error);
358#endif
359			break;
360		default:
361			break;
362		}
363	}
364
365	/* If immutable bit set, nobody gets to write it. */
366	if ((mode & VWRITE) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
367		return (EPERM);
368
369#ifdef UFS_ACL
370	if ((vp->v_mount->mnt_flag & MNT_ACLS) != 0) {
371		MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, 0);
372		len = sizeof(*acl);
373		error = VOP_GETACL(vp, ACL_TYPE_ACCESS, acl, ap->a_cred,
374		    ap->a_td);
375		switch (error) {
376		case EOPNOTSUPP:
377			error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
378			    ip->i_gid, ap->a_mode, ap->a_cred, NULL);
379			break;
380		case 0:
381			error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
382			    ip->i_gid, acl, ap->a_mode, ap->a_cred, NULL);
383			break;
384		default:
385			printf(
386"ufs_access(): Error retrieving ACL on object (%d).\n",
387			    error);
388			/*
389			 * XXX: Fall back until debugged.  Should
390			 * eventually possibly log an error, and return
391			 * EPERM for safety.
392			 */
393			error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
394			    ip->i_gid, ap->a_mode, ap->a_cred, NULL);
395		}
396		FREE(acl, M_ACL);
397	} else
398#endif /* !UFS_ACL */
399		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
400		    ap->a_mode, ap->a_cred, NULL);
401	return (error);
402}
403
404/* ARGSUSED */
405static int
406ufs_getattr(ap)
407	struct vop_getattr_args /* {
408		struct vnode *a_vp;
409		struct vattr *a_vap;
410		struct ucred *a_cred;
411		struct thread *a_td;
412	} */ *ap;
413{
414	struct vnode *vp = ap->a_vp;
415	struct inode *ip = VTOI(vp);
416	struct vattr *vap = ap->a_vap;
417
418	ufs_itimes(vp);
419	/*
420	 * Copy from inode table
421	 */
422	vap->va_fsid = dev2udev(ip->i_dev);
423	vap->va_fileid = ip->i_number;
424	vap->va_mode = ip->i_mode & ~IFMT;
425	vap->va_nlink = ip->i_effnlink;
426	vap->va_uid = ip->i_uid;
427	vap->va_gid = ip->i_gid;
428	if (ip->i_ump->um_fstype == UFS1) {
429		vap->va_rdev = ip->i_din1->di_rdev;
430		vap->va_size = ip->i_din1->di_size;
431		vap->va_atime.tv_sec = ip->i_din1->di_atime;
432		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
433		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
434		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
435		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
436		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
437		vap->va_birthtime.tv_sec = 0;
438		vap->va_birthtime.tv_nsec = 0;
439		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
440	} else {
441		vap->va_rdev = ip->i_din2->di_rdev;
442		vap->va_size = ip->i_din2->di_size;
443		vap->va_atime.tv_sec = ip->i_din2->di_atime;
444		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
445		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
446		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
447		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
448		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
449		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
450		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
451		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
452	}
453	vap->va_flags = ip->i_flags;
454	vap->va_gen = ip->i_gen;
455	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
456	vap->va_type = IFTOVT(ip->i_mode);
457	vap->va_filerev = ip->i_modrev;
458	return (0);
459}
460
461/*
462 * Set attribute vnode op. called from several syscalls
463 */
464static int
465ufs_setattr(ap)
466	struct vop_setattr_args /* {
467		struct vnode *a_vp;
468		struct vattr *a_vap;
469		struct ucred *a_cred;
470		struct thread *a_td;
471	} */ *ap;
472{
473	struct vattr *vap = ap->a_vap;
474	struct vnode *vp = ap->a_vp;
475	struct inode *ip = VTOI(vp);
476	struct ucred *cred = ap->a_cred;
477	struct thread *td = ap->a_td;
478	int error;
479
480	/*
481	 * Check for unsettable attributes.
482	 */
483	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
484	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
485	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
486	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
487		return (EINVAL);
488	}
489	if (vap->va_flags != VNOVAL) {
490		if (vp->v_mount->mnt_flag & MNT_RDONLY)
491			return (EROFS);
492		/*
493		 * Callers may only modify the file flags on objects they
494		 * have VADMIN rights for.
495		 */
496		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
497			return (error);
498		/*
499		 * Unprivileged processes and privileged processes in
500		 * jail() are not permitted to unset system flags, or
501		 * modify flags if any system flags are set.
502		 * Privileged non-jail processes may not modify system flags
503		 * if securelevel > 0 and any existing system flags are set.
504		 */
505		if (!suser_cred(cred, PRISON_ROOT)) {
506			if (ip->i_flags
507			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
508				error = securelevel_gt(cred, 0);
509				if (error)
510					return (error);
511			}
512			/* Snapshot flag cannot be set or cleared */
513			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
514			     (ip->i_flags & SF_SNAPSHOT) == 0) ||
515			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
516			     (ip->i_flags & SF_SNAPSHOT) != 0))
517				return (EPERM);
518			ip->i_flags = vap->va_flags;
519			DIP(ip, i_flags) = vap->va_flags;
520		} else {
521			if (ip->i_flags
522			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
523			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
524				return (EPERM);
525			ip->i_flags &= SF_SETTABLE;
526			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
527			DIP(ip, i_flags) = ip->i_flags;
528		}
529		ip->i_flag |= IN_CHANGE;
530		if (vap->va_flags & (IMMUTABLE | APPEND))
531			return (0);
532	}
533	if (ip->i_flags & (IMMUTABLE | APPEND))
534		return (EPERM);
535	/*
536	 * Go through the fields and update iff not VNOVAL.
537	 */
538	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
539		if (vp->v_mount->mnt_flag & MNT_RDONLY)
540			return (EROFS);
541		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
542		    td)) != 0)
543			return (error);
544	}
545	if (vap->va_size != VNOVAL) {
546		/*
547		 * Disallow write attempts on read-only filesystems;
548		 * unless the file is a socket, fifo, or a block or
549		 * character device resident on the filesystem.
550		 */
551		switch (vp->v_type) {
552		case VDIR:
553			return (EISDIR);
554		case VLNK:
555		case VREG:
556			if (vp->v_mount->mnt_flag & MNT_RDONLY)
557				return (EROFS);
558			if ((ip->i_flags & SF_SNAPSHOT) != 0)
559				return (EPERM);
560			break;
561		default:
562			break;
563		}
564		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
565		    cred, td)) != 0)
566			return (error);
567	}
568	if (vap->va_atime.tv_sec != VNOVAL ||
569	    vap->va_mtime.tv_sec != VNOVAL ||
570	    vap->va_birthtime.tv_sec != VNOVAL) {
571		if (vp->v_mount->mnt_flag & MNT_RDONLY)
572			return (EROFS);
573		if ((ip->i_flags & SF_SNAPSHOT) != 0)
574			return (EPERM);
575		/*
576		 * From utimes(2):
577		 * If times is NULL, ... The caller must be the owner of
578		 * the file, have permission to write the file, or be the
579		 * super-user.
580		 * If times is non-NULL, ... The caller must be the owner of
581		 * the file or be the super-user.
582		 */
583		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) &&
584		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
585		    (error = VOP_ACCESS(vp, VWRITE, cred, td))))
586			return (error);
587		if (vap->va_atime.tv_sec != VNOVAL)
588			ip->i_flag |= IN_ACCESS;
589		if (vap->va_mtime.tv_sec != VNOVAL)
590			ip->i_flag |= IN_CHANGE | IN_UPDATE;
591		if (vap->va_birthtime.tv_sec != VNOVAL &&
592		    ip->i_ump->um_fstype == UFS2)
593			ip->i_flag |= IN_MODIFIED;
594		ufs_itimes(vp);
595		if (vap->va_atime.tv_sec != VNOVAL) {
596			DIP(ip, i_atime) = vap->va_atime.tv_sec;
597			DIP(ip, i_atimensec) = vap->va_atime.tv_nsec;
598		}
599		if (vap->va_mtime.tv_sec != VNOVAL) {
600			DIP(ip, i_mtime) = vap->va_mtime.tv_sec;
601			DIP(ip, i_mtimensec) = vap->va_mtime.tv_nsec;
602		}
603		if (vap->va_birthtime.tv_sec != VNOVAL &&
604		    ip->i_ump->um_fstype == UFS2) {
605			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
606			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
607		}
608		error = UFS_UPDATE(vp, 0);
609		if (error)
610			return (error);
611	}
612	error = 0;
613	if (vap->va_mode != (mode_t)VNOVAL) {
614		if (vp->v_mount->mnt_flag & MNT_RDONLY)
615			return (EROFS);
616		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
617		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
618			return (EPERM);
619		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
620	}
621	VN_KNOTE(vp, NOTE_ATTRIB);
622	return (error);
623}
624
625/*
626 * Change the mode on a file.
627 * Inode must be locked before calling.
628 */
629static int
630ufs_chmod(vp, mode, cred, td)
631	struct vnode *vp;
632	int mode;
633	struct ucred *cred;
634	struct thread *td;
635{
636	struct inode *ip = VTOI(vp);
637	int error;
638
639	/*
640	 * To modify the permissions on a file, must possess VADMIN
641	 * for that file.
642	 */
643	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
644		return (error);
645	/*
646	 * Privileged processes may set the sticky bit on non-directories,
647	 * as well as set the setgid bit on a file with a group that the
648	 * process is not a member of.
649	 */
650	if (suser_cred(cred, PRISON_ROOT)) {
651		if (vp->v_type != VDIR && (mode & S_ISTXT))
652			return (EFTYPE);
653		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
654			return (EPERM);
655	}
656	ip->i_mode &= ~ALLPERMS;
657	ip->i_mode |= (mode & ALLPERMS);
658	DIP(ip, i_mode) = ip->i_mode;
659	ip->i_flag |= IN_CHANGE;
660	return (0);
661}
662
663/*
664 * Perform chown operation on inode ip;
665 * inode must be locked prior to call.
666 */
667static int
668ufs_chown(vp, uid, gid, cred, td)
669	struct vnode *vp;
670	uid_t uid;
671	gid_t gid;
672	struct ucred *cred;
673	struct thread *td;
674{
675	struct inode *ip = VTOI(vp);
676	uid_t ouid;
677	gid_t ogid;
678	int error = 0;
679#ifdef QUOTA
680	int i;
681	ufs2_daddr_t change;
682#endif
683
684	if (uid == (uid_t)VNOVAL)
685		uid = ip->i_uid;
686	if (gid == (gid_t)VNOVAL)
687		gid = ip->i_gid;
688	/*
689	 * To modify the ownership of a file, must possess VADMIN
690	 * for that file.
691	 */
692	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
693		return (error);
694	/*
695	 * To change the owner of a file, or change the group of a file
696	 * to a group of which we are not a member, the caller must
697	 * have privilege.
698	 */
699	if ((uid != ip->i_uid ||
700	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
701	    (error = suser_cred(cred, PRISON_ROOT)))
702		return (error);
703	ogid = ip->i_gid;
704	ouid = ip->i_uid;
705#ifdef QUOTA
706	if ((error = getinoquota(ip)) != 0)
707		return (error);
708	if (ouid == uid) {
709		dqrele(vp, ip->i_dquot[USRQUOTA]);
710		ip->i_dquot[USRQUOTA] = NODQUOT;
711	}
712	if (ogid == gid) {
713		dqrele(vp, ip->i_dquot[GRPQUOTA]);
714		ip->i_dquot[GRPQUOTA] = NODQUOT;
715	}
716	change = DIP(ip, i_blocks);
717	(void) chkdq(ip, -change, cred, CHOWN);
718	(void) chkiq(ip, -1, cred, CHOWN);
719	for (i = 0; i < MAXQUOTAS; i++) {
720		dqrele(vp, ip->i_dquot[i]);
721		ip->i_dquot[i] = NODQUOT;
722	}
723#endif
724	ip->i_gid = gid;
725	DIP(ip, i_gid) = gid;
726	ip->i_uid = uid;
727	DIP(ip, i_uid) = uid;
728#ifdef QUOTA
729	if ((error = getinoquota(ip)) == 0) {
730		if (ouid == uid) {
731			dqrele(vp, ip->i_dquot[USRQUOTA]);
732			ip->i_dquot[USRQUOTA] = NODQUOT;
733		}
734		if (ogid == gid) {
735			dqrele(vp, ip->i_dquot[GRPQUOTA]);
736			ip->i_dquot[GRPQUOTA] = NODQUOT;
737		}
738		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
739			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
740				goto good;
741			else
742				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
743		}
744		for (i = 0; i < MAXQUOTAS; i++) {
745			dqrele(vp, ip->i_dquot[i]);
746			ip->i_dquot[i] = NODQUOT;
747		}
748	}
749	ip->i_gid = ogid;
750	DIP(ip, i_gid) = ogid;
751	ip->i_uid = ouid;
752	DIP(ip, i_uid) = ouid;
753	if (getinoquota(ip) == 0) {
754		if (ouid == uid) {
755			dqrele(vp, ip->i_dquot[USRQUOTA]);
756			ip->i_dquot[USRQUOTA] = NODQUOT;
757		}
758		if (ogid == gid) {
759			dqrele(vp, ip->i_dquot[GRPQUOTA]);
760			ip->i_dquot[GRPQUOTA] = NODQUOT;
761		}
762		(void) chkdq(ip, change, cred, FORCE|CHOWN);
763		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
764		(void) getinoquota(ip);
765	}
766	return (error);
767good:
768	if (getinoquota(ip))
769		panic("ufs_chown: lost quota");
770#endif /* QUOTA */
771	ip->i_flag |= IN_CHANGE;
772	if (suser_cred(cred, PRISON_ROOT) && (ouid != uid || ogid != gid)) {
773		ip->i_mode &= ~(ISUID | ISGID);
774		DIP(ip, i_mode) = ip->i_mode;
775	}
776	return (0);
777}
778
779static int
780ufs_remove(ap)
781	struct vop_remove_args /* {
782		struct vnode *a_dvp;
783		struct vnode *a_vp;
784		struct componentname *a_cnp;
785	} */ *ap;
786{
787	struct inode *ip;
788	struct vnode *vp = ap->a_vp;
789	struct vnode *dvp = ap->a_dvp;
790	int error;
791
792	ip = VTOI(vp);
793	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
794	    (VTOI(dvp)->i_flags & APPEND)) {
795		error = EPERM;
796		goto out;
797	}
798	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
799	if (ip->i_nlink <= 0)
800		vp->v_vflag |= VV_NOSYNC;
801	VN_KNOTE(vp, NOTE_DELETE);
802	VN_KNOTE(dvp, NOTE_WRITE);
803out:
804	return (error);
805}
806
807/*
808 * link vnode call
809 */
810static int
811ufs_link(ap)
812	struct vop_link_args /* {
813		struct vnode *a_tdvp;
814		struct vnode *a_vp;
815		struct componentname *a_cnp;
816	} */ *ap;
817{
818	struct vnode *vp = ap->a_vp;
819	struct vnode *tdvp = ap->a_tdvp;
820	struct componentname *cnp = ap->a_cnp;
821	struct inode *ip;
822	struct direct newdir;
823	int error;
824
825#ifdef DIAGNOSTIC
826	if ((cnp->cn_flags & HASBUF) == 0)
827		panic("ufs_link: no name");
828#endif
829	if (tdvp->v_mount != vp->v_mount) {
830		error = EXDEV;
831		goto out;
832	}
833	ip = VTOI(vp);
834	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
835		error = EMLINK;
836		goto out;
837	}
838	if (ip->i_flags & (IMMUTABLE | APPEND)) {
839		error = EPERM;
840		goto out;
841	}
842	ip->i_effnlink++;
843	ip->i_nlink++;
844	DIP(ip, i_nlink) = ip->i_nlink;
845	ip->i_flag |= IN_CHANGE;
846	if (DOINGSOFTDEP(vp))
847		softdep_change_linkcnt(ip);
848	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
849	if (!error) {
850		ufs_makedirentry(ip, cnp, &newdir);
851		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
852	}
853
854	if (error) {
855		ip->i_effnlink--;
856		ip->i_nlink--;
857		DIP(ip, i_nlink) = ip->i_nlink;
858		ip->i_flag |= IN_CHANGE;
859		if (DOINGSOFTDEP(vp))
860			softdep_change_linkcnt(ip);
861	}
862out:
863	VN_KNOTE(vp, NOTE_LINK);
864	VN_KNOTE(tdvp, NOTE_WRITE);
865	return (error);
866}
867
868/*
869 * whiteout vnode call
870 */
871static int
872ufs_whiteout(ap)
873	struct vop_whiteout_args /* {
874		struct vnode *a_dvp;
875		struct componentname *a_cnp;
876		int a_flags;
877	} */ *ap;
878{
879	struct vnode *dvp = ap->a_dvp;
880	struct componentname *cnp = ap->a_cnp;
881	struct direct newdir;
882	int error = 0;
883
884	switch (ap->a_flags) {
885	case LOOKUP:
886		/* 4.4 format directories support whiteout operations */
887		if (dvp->v_mount->mnt_maxsymlinklen > 0)
888			return (0);
889		return (EOPNOTSUPP);
890
891	case CREATE:
892		/* create a new directory whiteout */
893#ifdef DIAGNOSTIC
894		if ((cnp->cn_flags & SAVENAME) == 0)
895			panic("ufs_whiteout: missing name");
896		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
897			panic("ufs_whiteout: old format filesystem");
898#endif
899
900		newdir.d_ino = WINO;
901		newdir.d_namlen = cnp->cn_namelen;
902		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
903		newdir.d_type = DT_WHT;
904		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
905		break;
906
907	case DELETE:
908		/* remove an existing directory whiteout */
909#ifdef DIAGNOSTIC
910		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
911			panic("ufs_whiteout: old format filesystem");
912#endif
913
914		cnp->cn_flags &= ~DOWHITEOUT;
915		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
916		break;
917	default:
918		panic("ufs_whiteout: unknown op");
919	}
920	return (error);
921}
922
923/*
924 * Rename system call.
925 * 	rename("foo", "bar");
926 * is essentially
927 *	unlink("bar");
928 *	link("foo", "bar");
929 *	unlink("foo");
930 * but ``atomically''.  Can't do full commit without saving state in the
931 * inode on disk which isn't feasible at this time.  Best we can do is
932 * always guarantee the target exists.
933 *
934 * Basic algorithm is:
935 *
936 * 1) Bump link count on source while we're linking it to the
937 *    target.  This also ensure the inode won't be deleted out
938 *    from underneath us while we work (it may be truncated by
939 *    a concurrent `trunc' or `open' for creation).
940 * 2) Link source to destination.  If destination already exists,
941 *    delete it first.
942 * 3) Unlink source reference to inode if still around. If a
943 *    directory was moved and the parent of the destination
944 *    is different from the source, patch the ".." entry in the
945 *    directory.
946 */
947static int
948ufs_rename(ap)
949	struct vop_rename_args  /* {
950		struct vnode *a_fdvp;
951		struct vnode *a_fvp;
952		struct componentname *a_fcnp;
953		struct vnode *a_tdvp;
954		struct vnode *a_tvp;
955		struct componentname *a_tcnp;
956	} */ *ap;
957{
958	struct vnode *tvp = ap->a_tvp;
959	struct vnode *tdvp = ap->a_tdvp;
960	struct vnode *fvp = ap->a_fvp;
961	struct vnode *fdvp = ap->a_fdvp;
962	struct componentname *tcnp = ap->a_tcnp;
963	struct componentname *fcnp = ap->a_fcnp;
964	struct thread *td = fcnp->cn_thread;
965	struct inode *ip, *xp, *dp;
966	struct direct newdir;
967	int doingdirectory = 0, oldparent = 0, newparent = 0;
968	int error = 0, ioflag;
969
970#ifdef DIAGNOSTIC
971	if ((tcnp->cn_flags & HASBUF) == 0 ||
972	    (fcnp->cn_flags & HASBUF) == 0)
973		panic("ufs_rename: no name");
974#endif
975	/*
976	 * Check for cross-device rename.
977	 */
978	if ((fvp->v_mount != tdvp->v_mount) ||
979	    (tvp && (fvp->v_mount != tvp->v_mount))) {
980		error = EXDEV;
981abortit:
982		if (tdvp == tvp)
983			vrele(tdvp);
984		else
985			vput(tdvp);
986		if (tvp)
987			vput(tvp);
988		vrele(fdvp);
989		vrele(fvp);
990		return (error);
991	}
992
993	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
994	    (VTOI(tdvp)->i_flags & APPEND))) {
995		error = EPERM;
996		goto abortit;
997	}
998
999	/*
1000	 * Renaming a file to itself has no effect.  The upper layers should
1001	 * not call us in that case.  Temporarily just warn if they do.
1002	 */
1003	if (fvp == tvp) {
1004		printf("ufs_rename: fvp == tvp (can't happen)\n");
1005		error = 0;
1006		goto abortit;
1007	}
1008
1009	if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0)
1010		goto abortit;
1011	dp = VTOI(fdvp);
1012	ip = VTOI(fvp);
1013	if (ip->i_nlink >= LINK_MAX) {
1014		VOP_UNLOCK(fvp, 0, td);
1015		error = EMLINK;
1016		goto abortit;
1017	}
1018	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
1019	    || (dp->i_flags & APPEND)) {
1020		VOP_UNLOCK(fvp, 0, td);
1021		error = EPERM;
1022		goto abortit;
1023	}
1024	if ((ip->i_mode & IFMT) == IFDIR) {
1025		/*
1026		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1027		 */
1028		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1029		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
1030		    (ip->i_flag & IN_RENAME)) {
1031			VOP_UNLOCK(fvp, 0, td);
1032			error = EINVAL;
1033			goto abortit;
1034		}
1035		ip->i_flag |= IN_RENAME;
1036		oldparent = dp->i_number;
1037		doingdirectory = 1;
1038	}
1039	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXX right place? */
1040	vrele(fdvp);
1041
1042	/*
1043	 * When the target exists, both the directory
1044	 * and target vnodes are returned locked.
1045	 */
1046	dp = VTOI(tdvp);
1047	xp = NULL;
1048	if (tvp)
1049		xp = VTOI(tvp);
1050
1051	/*
1052	 * 1) Bump link count while we're moving stuff
1053	 *    around.  If we crash somewhere before
1054	 *    completing our work, the link count
1055	 *    may be wrong, but correctable.
1056	 */
1057	ip->i_effnlink++;
1058	ip->i_nlink++;
1059	DIP(ip, i_nlink) = ip->i_nlink;
1060	ip->i_flag |= IN_CHANGE;
1061	if (DOINGSOFTDEP(fvp))
1062		softdep_change_linkcnt(ip);
1063	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
1064				       DOINGASYNC(fvp)))) != 0) {
1065		VOP_UNLOCK(fvp, 0, td);
1066		goto bad;
1067	}
1068
1069	/*
1070	 * If ".." must be changed (ie the directory gets a new
1071	 * parent) then the source directory must not be in the
1072	 * directory heirarchy above the target, as this would
1073	 * orphan everything below the source directory. Also
1074	 * the user must have write permission in the source so
1075	 * as to be able to change "..". We must repeat the call
1076	 * to namei, as the parent directory is unlocked by the
1077	 * call to checkpath().
1078	 */
1079	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1080	VOP_UNLOCK(fvp, 0, td);
1081	if (oldparent != dp->i_number)
1082		newparent = dp->i_number;
1083	if (doingdirectory && newparent) {
1084		if (error)	/* write access check above */
1085			goto bad;
1086		if (xp != NULL)
1087			vput(tvp);
1088		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
1089		if (error)
1090			goto out;
1091		if ((tcnp->cn_flags & SAVESTART) == 0)
1092			panic("ufs_rename: lost to startdir");
1093		VREF(tdvp);
1094		error = relookup(tdvp, &tvp, tcnp);
1095		if (error)
1096			goto out;
1097		vrele(tdvp);
1098		dp = VTOI(tdvp);
1099		xp = NULL;
1100		if (tvp)
1101			xp = VTOI(tvp);
1102	}
1103	/*
1104	 * 2) If target doesn't exist, link the target
1105	 *    to the source and unlink the source.
1106	 *    Otherwise, rewrite the target directory
1107	 *    entry to reference the source inode and
1108	 *    expunge the original entry's existence.
1109	 */
1110	if (xp == NULL) {
1111		if (dp->i_dev != ip->i_dev)
1112			panic("ufs_rename: EXDEV");
1113		/*
1114		 * Account for ".." in new directory.
1115		 * When source and destination have the same
1116		 * parent we don't fool with the link count.
1117		 */
1118		if (doingdirectory && newparent) {
1119			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1120				error = EMLINK;
1121				goto bad;
1122			}
1123			dp->i_effnlink++;
1124			dp->i_nlink++;
1125			DIP(dp, i_nlink) = dp->i_nlink;
1126			dp->i_flag |= IN_CHANGE;
1127			if (DOINGSOFTDEP(tdvp))
1128				softdep_change_linkcnt(dp);
1129			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1130						   DOINGASYNC(tdvp)));
1131			if (error)
1132				goto bad;
1133		}
1134		ufs_makedirentry(ip, tcnp, &newdir);
1135		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
1136		if (error) {
1137			if (doingdirectory && newparent) {
1138				dp->i_effnlink--;
1139				dp->i_nlink--;
1140				DIP(dp, i_nlink) = dp->i_nlink;
1141				dp->i_flag |= IN_CHANGE;
1142				if (DOINGSOFTDEP(tdvp))
1143					softdep_change_linkcnt(dp);
1144				(void)UFS_UPDATE(tdvp, 1);
1145			}
1146			goto bad;
1147		}
1148		VN_KNOTE(tdvp, NOTE_WRITE);
1149		vput(tdvp);
1150	} else {
1151		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1152			panic("ufs_rename: EXDEV");
1153		/*
1154		 * Short circuit rename(foo, foo).
1155		 */
1156		if (xp->i_number == ip->i_number)
1157			panic("ufs_rename: same file");
1158		/*
1159		 * If the parent directory is "sticky", then the caller
1160		 * must possess VADMIN for the parent directory, or the
1161		 * destination of the rename.  This implements append-only
1162		 * directories.
1163		 */
1164		if ((dp->i_mode & S_ISTXT) &&
1165		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
1166		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
1167			error = EPERM;
1168			goto bad;
1169		}
1170		/*
1171		 * Target must be empty if a directory and have no links
1172		 * to it. Also, ensure source and target are compatible
1173		 * (both directories, or both not directories).
1174		 */
1175		if ((xp->i_mode&IFMT) == IFDIR) {
1176			if ((xp->i_effnlink > 2) ||
1177			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
1178				error = ENOTEMPTY;
1179				goto bad;
1180			}
1181			if (!doingdirectory) {
1182				error = ENOTDIR;
1183				goto bad;
1184			}
1185			cache_purge(tdvp);
1186		} else if (doingdirectory) {
1187			error = EISDIR;
1188			goto bad;
1189		}
1190		error = ufs_dirrewrite(dp, xp, ip->i_number,
1191		    IFTODT(ip->i_mode),
1192		    (doingdirectory && newparent) ? newparent : doingdirectory);
1193		if (error)
1194			goto bad;
1195		if (doingdirectory) {
1196			if (!newparent) {
1197				dp->i_effnlink--;
1198				if (DOINGSOFTDEP(tdvp))
1199					softdep_change_linkcnt(dp);
1200			}
1201			xp->i_effnlink--;
1202			if (DOINGSOFTDEP(tvp))
1203				softdep_change_linkcnt(xp);
1204		}
1205		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1206			/*
1207			 * Truncate inode. The only stuff left in the directory
1208			 * is "." and "..". The "." reference is inconsequential
1209			 * since we are quashing it. We have removed the "."
1210			 * reference and the reference in the parent directory,
1211			 * but there may be other hard links. The soft
1212			 * dependency code will arrange to do these operations
1213			 * after the parent directory entry has been deleted on
1214			 * disk, so when running with that code we avoid doing
1215			 * them now.
1216			 */
1217			if (!newparent) {
1218				dp->i_nlink--;
1219				DIP(dp, i_nlink) = dp->i_nlink;
1220				dp->i_flag |= IN_CHANGE;
1221			}
1222			xp->i_nlink--;
1223			DIP(xp, i_nlink) = xp->i_nlink;
1224			xp->i_flag |= IN_CHANGE;
1225			ioflag = IO_NORMAL;
1226			if (DOINGASYNC(tvp))
1227				ioflag |= IO_SYNC;
1228			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
1229			    tcnp->cn_cred, tcnp->cn_thread)) != 0)
1230				goto bad;
1231		}
1232		VN_KNOTE(tdvp, NOTE_WRITE);
1233		vput(tdvp);
1234		VN_KNOTE(tvp, NOTE_DELETE);
1235		vput(tvp);
1236		xp = NULL;
1237	}
1238
1239	/*
1240	 * 3) Unlink the source.
1241	 */
1242	fcnp->cn_flags &= ~MODMASK;
1243	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1244	if ((fcnp->cn_flags & SAVESTART) == 0)
1245		panic("ufs_rename: lost from startdir");
1246	VREF(fdvp);
1247	error = relookup(fdvp, &fvp, fcnp);
1248	if (error == 0)
1249		vrele(fdvp);
1250	if (fvp != NULL) {
1251		xp = VTOI(fvp);
1252		dp = VTOI(fdvp);
1253	} else {
1254		/*
1255		 * From name has disappeared.  IN_RENAME is not sufficient
1256		 * to protect against directory races due to timing windows,
1257		 * so we have to remove the panic.  XXX the only real way
1258		 * to solve this issue is at a much higher level.  By the
1259		 * time we hit ufs_rename() it's too late.
1260		 */
1261#if 0
1262		if (doingdirectory)
1263			panic("ufs_rename: lost dir entry");
1264#endif
1265		vrele(ap->a_fvp);
1266		return (0);
1267	}
1268	/*
1269	 * Ensure that the directory entry still exists and has not
1270	 * changed while the new name has been entered. If the source is
1271	 * a file then the entry may have been unlinked or renamed. In
1272	 * either case there is no further work to be done. If the source
1273	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
1274	 * flag ensures that it cannot be moved by another rename or removed
1275	 * by a rmdir.
1276	 */
1277	if (xp != ip) {
1278		/*
1279		 * From name resolves to a different inode.  IN_RENAME is
1280		 * not sufficient protection against timing window races
1281		 * so we can't panic here.  XXX the only real way
1282		 * to solve this issue is at a much higher level.  By the
1283		 * time we hit ufs_rename() it's too late.
1284		 */
1285#if 0
1286		if (doingdirectory)
1287			panic("ufs_rename: lost dir entry");
1288#endif
1289	} else {
1290		/*
1291		 * If the source is a directory with a
1292		 * new parent, the link count of the old
1293		 * parent directory must be decremented
1294		 * and ".." set to point to the new parent.
1295		 */
1296		if (doingdirectory && newparent) {
1297			xp->i_offset = mastertemplate.dot_reclen;
1298			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
1299			cache_purge(fdvp);
1300		}
1301		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
1302		xp->i_flag &= ~IN_RENAME;
1303	}
1304	VN_KNOTE(fvp, NOTE_RENAME);
1305	if (dp)
1306		vput(fdvp);
1307	if (xp)
1308		vput(fvp);
1309	vrele(ap->a_fvp);
1310	return (error);
1311
1312bad:
1313	if (xp)
1314		vput(ITOV(xp));
1315	vput(ITOV(dp));
1316out:
1317	if (doingdirectory)
1318		ip->i_flag &= ~IN_RENAME;
1319	if (vn_lock(fvp, LK_EXCLUSIVE, td) == 0) {
1320		ip->i_effnlink--;
1321		ip->i_nlink--;
1322		DIP(ip, i_nlink) = ip->i_nlink;
1323		ip->i_flag |= IN_CHANGE;
1324		ip->i_flag &= ~IN_RENAME;
1325		if (DOINGSOFTDEP(fvp))
1326			softdep_change_linkcnt(ip);
1327		vput(fvp);
1328	} else
1329		vrele(fvp);
1330	return (error);
1331}
1332
1333/*
1334 * Mkdir system call
1335 */
1336static int
1337ufs_mkdir(ap)
1338	struct vop_mkdir_args /* {
1339		struct vnode *a_dvp;
1340		struct vnode **a_vpp;
1341		struct componentname *a_cnp;
1342		struct vattr *a_vap;
1343	} */ *ap;
1344{
1345	struct vnode *dvp = ap->a_dvp;
1346	struct vattr *vap = ap->a_vap;
1347	struct componentname *cnp = ap->a_cnp;
1348	struct inode *ip, *dp;
1349	struct vnode *tvp;
1350	struct buf *bp;
1351	struct dirtemplate dirtemplate, *dtp;
1352	struct direct newdir;
1353#ifdef UFS_ACL
1354	struct acl *acl, *dacl;
1355#endif
1356	int error, dmode;
1357	long blkoff;
1358
1359#ifdef DIAGNOSTIC
1360	if ((cnp->cn_flags & HASBUF) == 0)
1361		panic("ufs_mkdir: no name");
1362#endif
1363	dp = VTOI(dvp);
1364	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1365		error = EMLINK;
1366		goto out;
1367	}
1368	dmode = vap->va_mode & 0777;
1369	dmode |= IFDIR;
1370	/*
1371	 * Must simulate part of ufs_makeinode here to acquire the inode,
1372	 * but not have it entered in the parent directory. The entry is
1373	 * made later after writing "." and ".." entries.
1374	 */
1375	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1376	if (error)
1377		goto out;
1378	ip = VTOI(tvp);
1379	ip->i_gid = dp->i_gid;
1380	DIP(ip, i_gid) = dp->i_gid;
1381#ifdef SUIDDIR
1382	{
1383#ifdef QUOTA
1384		struct ucred ucred, *ucp;
1385		ucp = cnp->cn_cred;
1386#endif
1387		/*
1388		 * If we are hacking owners here, (only do this where told to)
1389		 * and we are not giving it TO root, (would subvert quotas)
1390		 * then go ahead and give it to the other user.
1391		 * The new directory also inherits the SUID bit.
1392		 * If user's UID and dir UID are the same,
1393		 * 'give it away' so that the SUID is still forced on.
1394		 */
1395		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1396		    (dp->i_mode & ISUID) && dp->i_uid) {
1397			dmode |= ISUID;
1398			ip->i_uid = dp->i_uid;
1399			DIP(ip, i_uid) = dp->i_uid;
1400#ifdef QUOTA
1401			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1402				/*
1403				 * Make sure the correct user gets charged
1404				 * for the space.
1405				 * Make a dummy credential for the victim.
1406				 * XXX This seems to never be accessed out of
1407				 * our context so a stack variable is ok.
1408				 */
1409				ucred.cr_ref = 1;
1410				ucred.cr_uid = ip->i_uid;
1411				ucred.cr_ngroups = 1;
1412				ucred.cr_groups[0] = dp->i_gid;
1413				ucp = &ucred;
1414			}
1415#endif
1416		} else {
1417			ip->i_uid = cnp->cn_cred->cr_uid;
1418			DIP(ip, i_uid) = ip->i_uid;
1419		}
1420#ifdef QUOTA
1421		if ((error = getinoquota(ip)) ||
1422	    	    (error = chkiq(ip, 1, ucp, 0))) {
1423			UFS_VFREE(tvp, ip->i_number, dmode);
1424			vput(tvp);
1425			return (error);
1426		}
1427#endif
1428	}
1429#else	/* !SUIDDIR */
1430	ip->i_uid = cnp->cn_cred->cr_uid;
1431	DIP(ip, i_uid) = ip->i_uid;
1432#ifdef QUOTA
1433	if ((error = getinoquota(ip)) ||
1434	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1435		UFS_VFREE(tvp, ip->i_number, dmode);
1436		vput(tvp);
1437		return (error);
1438	}
1439#endif
1440#endif	/* !SUIDDIR */
1441	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1442#ifdef UFS_ACL
1443	acl = dacl = NULL;
1444	if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) {
1445		MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, 0);
1446		MALLOC(dacl, struct acl *, sizeof(*dacl), M_ACL, 0);
1447
1448		/*
1449		 * Retrieve default ACL from parent, if any.
1450		 */
1451		error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred,
1452		    cnp->cn_thread);
1453		switch (error) {
1454		case 0:
1455			/*
1456			 * Retrieved a default ACL, so merge mode and ACL if
1457			 * necessary.
1458			 */
1459			if (acl->acl_cnt != 0) {
1460				/*
1461				 * Two possible ways for default ACL to not
1462				 * be present.  First, the EA can be
1463				 * undefined, or second, the default ACL can
1464				 * be blank.  If it's blank, fall through to
1465				 * the it's not defined case.
1466				 */
1467				ip->i_mode = dmode;
1468				DIP(ip, i_mode) = dmode;
1469				*dacl = *acl;
1470				ufs_sync_acl_from_inode(ip, acl);
1471				break;
1472			}
1473			/* FALLTHROUGH */
1474
1475		case EOPNOTSUPP:
1476			/*
1477			 * Just use the mode as-is.
1478			 */
1479			ip->i_mode = dmode;
1480			DIP(ip, i_mode) = dmode;
1481			FREE(acl, M_ACL);
1482			FREE(dacl, M_ACL);
1483			dacl = acl = NULL;
1484			break;
1485
1486		default:
1487			UFS_VFREE(tvp, ip->i_number, dmode);
1488			vput(tvp);
1489			FREE(acl, M_ACL);
1490				FREE(dacl, M_ACL);
1491			return (error);
1492		}
1493	} else {
1494#endif /* !UFS_ACL */
1495		ip->i_mode = dmode;
1496		DIP(ip, i_mode) = dmode;
1497#ifdef UFS_ACL
1498	}
1499#endif
1500	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1501	ip->i_effnlink = 2;
1502	ip->i_nlink = 2;
1503	DIP(ip, i_nlink) = 2;
1504	if (DOINGSOFTDEP(tvp))
1505		softdep_change_linkcnt(ip);
1506	if (cnp->cn_flags & ISWHITEOUT) {
1507		ip->i_flags |= UF_OPAQUE;
1508		DIP(ip, i_flags) = ip->i_flags;
1509	}
1510
1511	/*
1512	 * Bump link count in parent directory to reflect work done below.
1513	 * Should be done before reference is created so cleanup is
1514	 * possible if we crash.
1515	 */
1516	dp->i_effnlink++;
1517	dp->i_nlink++;
1518	DIP(dp, i_nlink) = dp->i_nlink;
1519	dp->i_flag |= IN_CHANGE;
1520	if (DOINGSOFTDEP(dvp))
1521		softdep_change_linkcnt(dp);
1522	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1523	if (error)
1524		goto bad;
1525#ifdef MAC
1526	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
1527		error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount,
1528		    dvp, tvp, cnp);
1529		if (error)
1530			goto bad;
1531	}
1532#endif
1533#ifdef UFS_ACL
1534	if (acl != NULL) {
1535		/*
1536		 * XXX: If we abort now, will Soft Updates notify the extattr
1537		 * code that the EAs for the file need to be released?
1538		 */
1539		error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred,
1540		    cnp->cn_thread);
1541		if (error == 0)
1542			error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl,
1543			    cnp->cn_cred, cnp->cn_thread);
1544		switch (error) {
1545		case 0:
1546			break;
1547
1548		case EOPNOTSUPP:
1549			/*
1550			 * XXX: This should not happen, as EOPNOTSUPP above
1551			 * was supposed to free acl.
1552			 */
1553			printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
1554			/*
1555			panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
1556			 */
1557			break;
1558
1559		default:
1560			FREE(acl, M_ACL);
1561			FREE(dacl, M_ACL);
1562			goto bad;
1563		}
1564		FREE(acl, M_ACL);
1565		FREE(dacl, M_ACL);
1566	}
1567#endif /* !UFS_ACL */
1568
1569	/*
1570	 * Initialize directory with "." and ".." from static template.
1571	 */
1572	if (dvp->v_mount->mnt_maxsymlinklen > 0
1573	)
1574		dtp = &mastertemplate;
1575	else
1576		dtp = (struct dirtemplate *)&omastertemplate;
1577	dirtemplate = *dtp;
1578	dirtemplate.dot_ino = ip->i_number;
1579	dirtemplate.dotdot_ino = dp->i_number;
1580	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1581	    BA_CLRBUF, &bp)) != 0)
1582		goto bad;
1583	ip->i_size = DIRBLKSIZ;
1584	DIP(ip, i_size) = DIRBLKSIZ;
1585	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1586	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1587	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1588	if (DOINGSOFTDEP(tvp)) {
1589		/*
1590		 * Ensure that the entire newly allocated block is a
1591		 * valid directory so that future growth within the
1592		 * block does not have to ensure that the block is
1593		 * written before the inode.
1594		 */
1595		blkoff = DIRBLKSIZ;
1596		while (blkoff < bp->b_bcount) {
1597			((struct direct *)
1598			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1599			blkoff += DIRBLKSIZ;
1600		}
1601	}
1602	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1603				       DOINGASYNC(tvp)))) != 0) {
1604		(void)BUF_WRITE(bp);
1605		goto bad;
1606	}
1607	/*
1608	 * Directory set up, now install its entry in the parent directory.
1609	 *
1610	 * If we are not doing soft dependencies, then we must write out the
1611	 * buffer containing the new directory body before entering the new
1612	 * name in the parent. If we are doing soft dependencies, then the
1613	 * buffer containing the new directory body will be passed to and
1614	 * released in the soft dependency code after the code has attached
1615	 * an appropriate ordering dependency to the buffer which ensures that
1616	 * the buffer is written before the new name is written in the parent.
1617	 */
1618	if (DOINGASYNC(dvp))
1619		bdwrite(bp);
1620	else if (!DOINGSOFTDEP(dvp) && ((error = BUF_WRITE(bp))))
1621		goto bad;
1622	ufs_makedirentry(ip, cnp, &newdir);
1623	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
1624
1625bad:
1626	if (error == 0) {
1627		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1628		*ap->a_vpp = tvp;
1629	} else {
1630		dp->i_effnlink--;
1631		dp->i_nlink--;
1632		DIP(dp, i_nlink) = dp->i_nlink;
1633		dp->i_flag |= IN_CHANGE;
1634		if (DOINGSOFTDEP(dvp))
1635			softdep_change_linkcnt(dp);
1636		/*
1637		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1638		 * do this for us because we set the link count to 0.
1639		 */
1640		ip->i_effnlink = 0;
1641		ip->i_nlink = 0;
1642		DIP(ip, i_nlink) = 0;
1643		ip->i_flag |= IN_CHANGE;
1644		if (DOINGSOFTDEP(tvp))
1645			softdep_change_linkcnt(ip);
1646		vput(tvp);
1647	}
1648out:
1649	return (error);
1650}
1651
1652/*
1653 * Rmdir system call.
1654 */
1655static int
1656ufs_rmdir(ap)
1657	struct vop_rmdir_args /* {
1658		struct vnode *a_dvp;
1659		struct vnode *a_vp;
1660		struct componentname *a_cnp;
1661	} */ *ap;
1662{
1663	struct vnode *vp = ap->a_vp;
1664	struct vnode *dvp = ap->a_dvp;
1665	struct componentname *cnp = ap->a_cnp;
1666	struct inode *ip, *dp;
1667	int error, ioflag;
1668
1669	ip = VTOI(vp);
1670	dp = VTOI(dvp);
1671
1672	/*
1673	 * Do not remove a directory that is in the process of being renamed.
1674	 * Verify the directory is empty (and valid). Rmdir ".." will not be
1675	 * valid since ".." will contain a reference to the current directory
1676	 * and thus be non-empty. Do not allow the removal of mounted on
1677	 * directories (this can happen when an NFS exported filesystem
1678	 * tries to remove a locally mounted on directory).
1679	 */
1680	error = 0;
1681	if (ip->i_flag & IN_RENAME) {
1682		error = EINVAL;
1683		goto out;
1684	}
1685	if (ip->i_effnlink != 2 ||
1686	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1687		error = ENOTEMPTY;
1688		goto out;
1689	}
1690	if ((dp->i_flags & APPEND)
1691	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1692		error = EPERM;
1693		goto out;
1694	}
1695	if (vp->v_mountedhere != 0) {
1696		error = EINVAL;
1697		goto out;
1698	}
1699	/*
1700	 * Delete reference to directory before purging
1701	 * inode.  If we crash in between, the directory
1702	 * will be reattached to lost+found,
1703	 */
1704	dp->i_effnlink--;
1705	ip->i_effnlink--;
1706	if (DOINGSOFTDEP(vp)) {
1707		softdep_change_linkcnt(dp);
1708		softdep_change_linkcnt(ip);
1709	}
1710	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
1711	if (error) {
1712		dp->i_effnlink++;
1713		ip->i_effnlink++;
1714		if (DOINGSOFTDEP(vp)) {
1715			softdep_change_linkcnt(dp);
1716			softdep_change_linkcnt(ip);
1717		}
1718		goto out;
1719	}
1720	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1721	cache_purge(dvp);
1722	/*
1723	 * Truncate inode. The only stuff left in the directory is "." and
1724	 * "..". The "." reference is inconsequential since we are quashing
1725	 * it. The soft dependency code will arrange to do these operations
1726	 * after the parent directory entry has been deleted on disk, so
1727	 * when running with that code we avoid doing them now.
1728	 */
1729	if (!DOINGSOFTDEP(vp)) {
1730		dp->i_nlink--;
1731		DIP(dp, i_nlink) = dp->i_nlink;
1732		dp->i_flag |= IN_CHANGE;
1733		ip->i_nlink--;
1734		DIP(ip, i_nlink) = ip->i_nlink;
1735		ip->i_flag |= IN_CHANGE;
1736		ioflag = IO_NORMAL;
1737		if (DOINGASYNC(vp))
1738			ioflag |= IO_SYNC;
1739		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
1740		    cnp->cn_thread);
1741	}
1742	cache_purge(vp);
1743#ifdef UFS_DIRHASH
1744	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
1745	if (ip->i_dirhash != NULL)
1746		ufsdirhash_free(ip);
1747#endif
1748out:
1749	VN_KNOTE(vp, NOTE_DELETE);
1750	return (error);
1751}
1752
1753/*
1754 * symlink -- make a symbolic link
1755 */
1756static int
1757ufs_symlink(ap)
1758	struct vop_symlink_args /* {
1759		struct vnode *a_dvp;
1760		struct vnode **a_vpp;
1761		struct componentname *a_cnp;
1762		struct vattr *a_vap;
1763		char *a_target;
1764	} */ *ap;
1765{
1766	struct vnode *vp, **vpp = ap->a_vpp;
1767	struct inode *ip;
1768	int len, error;
1769
1770	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1771	    vpp, ap->a_cnp);
1772	if (error)
1773		return (error);
1774	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1775	vp = *vpp;
1776	len = strlen(ap->a_target);
1777	if (len < vp->v_mount->mnt_maxsymlinklen) {
1778		ip = VTOI(vp);
1779		bcopy(ap->a_target, SHORTLINK(ip), len);
1780		ip->i_size = len;
1781		DIP(ip, i_size) = len;
1782		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1783	} else
1784		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1785		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1786		    ap->a_cnp->cn_cred, NOCRED, (int *)0, (struct thread *)0);
1787	if (error)
1788		vput(vp);
1789	return (error);
1790}
1791
1792/*
1793 * Vnode op for reading directories.
1794 *
1795 * The routine below assumes that the on-disk format of a directory
1796 * is the same as that defined by <sys/dirent.h>. If the on-disk
1797 * format changes, then it will be necessary to do a conversion
1798 * from the on-disk format that read returns to the format defined
1799 * by <sys/dirent.h>.
1800 */
1801int
1802ufs_readdir(ap)
1803	struct vop_readdir_args /* {
1804		struct vnode *a_vp;
1805		struct uio *a_uio;
1806		struct ucred *a_cred;
1807		int *a_eofflag;
1808		int *ncookies;
1809		u_long **a_cookies;
1810	} */ *ap;
1811{
1812	struct uio *uio = ap->a_uio;
1813	int error;
1814	size_t count, lost;
1815	off_t off;
1816
1817	if (ap->a_ncookies != NULL)
1818		/*
1819		 * Ensure that the block is aligned.  The caller can use
1820		 * the cookies to determine where in the block to start.
1821		 */
1822		uio->uio_offset &= ~(DIRBLKSIZ - 1);
1823	off = uio->uio_offset;
1824	count = uio->uio_resid;
1825	/* Make sure we don't return partial entries. */
1826	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
1827		return (EINVAL);
1828	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
1829	lost = uio->uio_resid - count;
1830	uio->uio_resid = count;
1831	uio->uio_iov->iov_len = count;
1832#	if (BYTE_ORDER == LITTLE_ENDIAN)
1833		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
1834			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1835		} else {
1836			struct dirent *dp, *edp;
1837			struct uio auio;
1838			struct iovec aiov;
1839			caddr_t dirbuf;
1840			int readcnt;
1841			u_char tmp;
1842
1843			auio = *uio;
1844			auio.uio_iov = &aiov;
1845			auio.uio_iovcnt = 1;
1846			auio.uio_segflg = UIO_SYSSPACE;
1847			aiov.iov_len = count;
1848			MALLOC(dirbuf, caddr_t, count, M_TEMP, 0);
1849			aiov.iov_base = dirbuf;
1850			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
1851			if (error == 0) {
1852				readcnt = count - auio.uio_resid;
1853				edp = (struct dirent *)&dirbuf[readcnt];
1854				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
1855					tmp = dp->d_namlen;
1856					dp->d_namlen = dp->d_type;
1857					dp->d_type = tmp;
1858					if (dp->d_reclen > 0) {
1859						dp = (struct dirent *)
1860						    ((char *)dp + dp->d_reclen);
1861					} else {
1862						error = EIO;
1863						break;
1864					}
1865				}
1866				if (dp >= edp)
1867					error = uiomove(dirbuf, readcnt, uio);
1868			}
1869			FREE(dirbuf, M_TEMP);
1870		}
1871#	else
1872		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1873#	endif
1874	if (!error && ap->a_ncookies != NULL) {
1875		struct dirent* dpStart;
1876		struct dirent* dpEnd;
1877		struct dirent* dp;
1878		int ncookies;
1879		u_long *cookies;
1880		u_long *cookiep;
1881
1882		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1883			panic("ufs_readdir: unexpected uio from NFS server");
1884		dpStart = (struct dirent *)
1885		    ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off));
1886		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
1887		for (dp = dpStart, ncookies = 0;
1888		     dp < dpEnd;
1889		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
1890			ncookies++;
1891		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
1892		    0);
1893		for (dp = dpStart, cookiep = cookies;
1894		     dp < dpEnd;
1895		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
1896			off += dp->d_reclen;
1897			*cookiep++ = (u_long) off;
1898		}
1899		*ap->a_ncookies = ncookies;
1900		*ap->a_cookies = cookies;
1901	}
1902	uio->uio_resid += lost;
1903	if (ap->a_eofflag)
1904	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
1905	return (error);
1906}
1907
1908/*
1909 * Return target name of a symbolic link
1910 */
1911static int
1912ufs_readlink(ap)
1913	struct vop_readlink_args /* {
1914		struct vnode *a_vp;
1915		struct uio *a_uio;
1916		struct ucred *a_cred;
1917	} */ *ap;
1918{
1919	struct vnode *vp = ap->a_vp;
1920	struct inode *ip = VTOI(vp);
1921	doff_t isize;
1922
1923	isize = ip->i_size;
1924	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1925	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
1926		uiomove(SHORTLINK(ip), isize, ap->a_uio);
1927		return (0);
1928	}
1929	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1930}
1931
1932/*
1933 * Calculate the logical to physical mapping if not done already,
1934 * then call the device strategy routine.
1935 *
1936 * In order to be able to swap to a file, the ufs_bmaparray() operation may not
1937 * deadlock on memory.  See ufs_bmap() for details.
1938 */
1939static int
1940ufs_strategy(ap)
1941	struct vop_strategy_args /* {
1942		struct vnode *a_vp;
1943		struct buf *a_bp;
1944	} */ *ap;
1945{
1946	struct buf *bp = ap->a_bp;
1947	struct vnode *vp = ap->a_vp;
1948	struct inode *ip;
1949	ufs2_daddr_t blkno;
1950	int error;
1951
1952	ip = VTOI(vp);
1953	if (bp->b_blkno == bp->b_lblkno) {
1954		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
1955		bp->b_blkno = blkno;
1956		if (error) {
1957			bp->b_error = error;
1958			bp->b_ioflags |= BIO_ERROR;
1959			bufdone(bp);
1960			return (error);
1961		}
1962		if ((long)bp->b_blkno == -1)
1963			vfs_bio_clrbuf(bp);
1964	}
1965	if ((long)bp->b_blkno == -1) {
1966		bufdone(bp);
1967		return (0);
1968	}
1969	vp = ip->i_devvp;
1970	bp->b_dev = vp->v_rdev;
1971	VOP_SPECSTRATEGY(vp, bp);
1972	return (0);
1973}
1974
1975/*
1976 * Print out the contents of an inode.
1977 */
1978static int
1979ufs_print(ap)
1980	struct vop_print_args /* {
1981		struct vnode *a_vp;
1982	} */ *ap;
1983{
1984	struct vnode *vp = ap->a_vp;
1985	struct inode *ip = VTOI(vp);
1986
1987	printf("ino %lu, on dev %s (%d, %d)", (u_long)ip->i_number,
1988	    devtoname(ip->i_dev), major(ip->i_dev), minor(ip->i_dev));
1989	if (vp->v_type == VFIFO)
1990		fifo_printinfo(vp);
1991	printf("\n");
1992	return (0);
1993}
1994
1995/*
1996 * Read wrapper for special devices.
1997 */
1998static int
1999ufsspec_read(ap)
2000	struct vop_read_args /* {
2001		struct vnode *a_vp;
2002		struct uio *a_uio;
2003		int  a_ioflag;
2004		struct ucred *a_cred;
2005	} */ *ap;
2006{
2007	int error, resid;
2008	struct inode *ip;
2009	struct uio *uio;
2010
2011	uio = ap->a_uio;
2012	resid = uio->uio_resid;
2013	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap);
2014	/*
2015	 * The inode may have been revoked during the call, so it must not
2016	 * be accessed blindly here or in the other wrapper functions.
2017	 */
2018	ip = VTOI(ap->a_vp);
2019	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
2020		ip->i_flag |= IN_ACCESS;
2021	return (error);
2022}
2023
2024/*
2025 * Write wrapper for special devices.
2026 */
2027static int
2028ufsspec_write(ap)
2029	struct vop_write_args /* {
2030		struct vnode *a_vp;
2031		struct uio *a_uio;
2032		int  a_ioflag;
2033		struct ucred *a_cred;
2034	} */ *ap;
2035{
2036	int error, resid;
2037	struct inode *ip;
2038	struct uio *uio;
2039
2040	uio = ap->a_uio;
2041	resid = uio->uio_resid;
2042	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap);
2043	ip = VTOI(ap->a_vp);
2044	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
2045		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
2046	return (error);
2047}
2048
2049/*
2050 * Close wrapper for special devices.
2051 *
2052 * Update the times on the inode then do device close.
2053 */
2054static int
2055ufsspec_close(ap)
2056	struct vop_close_args /* {
2057		struct vnode *a_vp;
2058		int  a_fflag;
2059		struct ucred *a_cred;
2060		struct thread *a_td;
2061	} */ *ap;
2062{
2063	struct vnode *vp = ap->a_vp;
2064
2065	VI_LOCK(vp);
2066	if (vp->v_usecount > 1)
2067		ufs_itimes(vp);
2068	VI_UNLOCK(vp);
2069	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
2070}
2071
2072/*
2073 * Read wrapper for fifos.
2074 */
2075static int
2076ufsfifo_read(ap)
2077	struct vop_read_args /* {
2078		struct vnode *a_vp;
2079		struct uio *a_uio;
2080		int  a_ioflag;
2081		struct ucred *a_cred;
2082	} */ *ap;
2083{
2084	int error, resid;
2085	struct inode *ip;
2086	struct uio *uio;
2087
2088	uio = ap->a_uio;
2089	resid = uio->uio_resid;
2090	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap);
2091	ip = VTOI(ap->a_vp);
2092	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
2093	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
2094		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
2095	return (error);
2096}
2097
2098/*
2099 * Write wrapper for fifos.
2100 */
2101static int
2102ufsfifo_write(ap)
2103	struct vop_write_args /* {
2104		struct vnode *a_vp;
2105		struct uio *a_uio;
2106		int  a_ioflag;
2107		struct ucred *a_cred;
2108	} */ *ap;
2109{
2110	int error, resid;
2111	struct inode *ip;
2112	struct uio *uio;
2113
2114	uio = ap->a_uio;
2115	resid = uio->uio_resid;
2116	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap);
2117	ip = VTOI(ap->a_vp);
2118	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
2119		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
2120	return (error);
2121}
2122
2123/*
2124 * Close wrapper for fifos.
2125 *
2126 * Update the times on the inode then do device close.
2127 */
2128static int
2129ufsfifo_close(ap)
2130	struct vop_close_args /* {
2131		struct vnode *a_vp;
2132		int  a_fflag;
2133		struct ucred *a_cred;
2134		struct thread *a_td;
2135	} */ *ap;
2136{
2137	struct vnode *vp = ap->a_vp;
2138
2139	VI_LOCK(vp);
2140	if (vp->v_usecount > 1)
2141		ufs_itimes(vp);
2142	VI_UNLOCK(vp);
2143	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
2144}
2145
2146/*
2147 * Kqfilter wrapper for fifos.
2148 *
2149 * Fall through to ufs kqfilter routines if needed
2150 */
2151static int
2152ufsfifo_kqfilter(ap)
2153	struct vop_kqfilter_args *ap;
2154{
2155	int error;
2156
2157	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilter), ap);
2158	if (error)
2159		error = ufs_kqfilter(ap);
2160	return (error);
2161}
2162
2163/*
2164 * Return POSIX pathconf information applicable to ufs filesystems.
2165 */
2166static int
2167ufs_pathconf(ap)
2168	struct vop_pathconf_args /* {
2169		struct vnode *a_vp;
2170		int a_name;
2171		int *a_retval;
2172	} */ *ap;
2173{
2174	int error;
2175
2176	error = 0;
2177	switch (ap->a_name) {
2178	case _PC_LINK_MAX:
2179		*ap->a_retval = LINK_MAX;
2180		break;
2181	case _PC_NAME_MAX:
2182		*ap->a_retval = NAME_MAX;
2183		break;
2184	case _PC_PATH_MAX:
2185		*ap->a_retval = PATH_MAX;
2186		break;
2187	case _PC_PIPE_BUF:
2188		*ap->a_retval = PIPE_BUF;
2189		break;
2190	case _PC_CHOWN_RESTRICTED:
2191		*ap->a_retval = 1;
2192		break;
2193	case _PC_NO_TRUNC:
2194		*ap->a_retval = 1;
2195		break;
2196	case _PC_ACL_EXTENDED:
2197#ifdef UFS_ACL
2198		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
2199			*ap->a_retval = 1;
2200		else
2201			*ap->a_retval = 0;
2202#else
2203		*ap->a_retval = 0;
2204#endif
2205		break;
2206	case _PC_ACL_PATH_MAX:
2207#ifdef UFS_ACL
2208		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
2209			*ap->a_retval = ACL_MAX_ENTRIES;
2210		else
2211			*ap->a_retval = 3;
2212#else
2213		*ap->a_retval = 3;
2214#endif
2215		break;
2216	case _PC_MAC_PRESENT:
2217#ifdef MAC
2218		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
2219			*ap->a_retval = 1;
2220		else
2221			*ap->a_retval = 0;
2222#else
2223		*ap->a_retval = 0;
2224#endif
2225		break;
2226	case _PC_ASYNC_IO:
2227		/* _PC_ASYNC_IO should have been handled by upper layers. */
2228		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
2229		error = EINVAL;
2230		break;
2231	case _PC_PRIO_IO:
2232		*ap->a_retval = 0;
2233		break;
2234	case _PC_SYNC_IO:
2235		*ap->a_retval = 0;
2236		break;
2237	case _PC_ALLOC_SIZE_MIN:
2238		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2239		break;
2240	case _PC_FILESIZEBITS:
2241		*ap->a_retval = 64;
2242		break;
2243	case _PC_REC_INCR_XFER_SIZE:
2244		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2245		break;
2246	case _PC_REC_MAX_XFER_SIZE:
2247		*ap->a_retval = -1; /* means ``unlimited'' */
2248		break;
2249	case _PC_REC_MIN_XFER_SIZE:
2250		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2251		break;
2252	case _PC_REC_XFER_ALIGN:
2253		*ap->a_retval = PAGE_SIZE;
2254		break;
2255	case _PC_SYMLINK_MAX:
2256		*ap->a_retval = MAXPATHLEN;
2257		break;
2258
2259	default:
2260		error = EINVAL;
2261		break;
2262	}
2263	return (error);
2264}
2265
2266/*
2267 * Advisory record locking support
2268 */
2269static int
2270ufs_advlock(ap)
2271	struct vop_advlock_args /* {
2272		struct vnode *a_vp;
2273		caddr_t  a_id;
2274		int  a_op;
2275		struct flock *a_fl;
2276		int  a_flags;
2277	} */ *ap;
2278{
2279	struct inode *ip = VTOI(ap->a_vp);
2280
2281	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
2282}
2283
2284/*
2285 * Initialize the vnode associated with a new inode, handle aliased
2286 * vnodes.
2287 */
2288int
2289ufs_vinit(mntp, specops, fifoops, vpp)
2290	struct mount *mntp;
2291	vop_t **specops;
2292	vop_t **fifoops;
2293	struct vnode **vpp;
2294{
2295	struct inode *ip;
2296	struct vnode *vp;
2297	struct timeval tv;
2298
2299	vp = *vpp;
2300	ip = VTOI(vp);
2301	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2302	case VCHR:
2303	case VBLK:
2304		vp->v_op = specops;
2305		vp = addaliasu(vp, DIP(ip, i_rdev));
2306		ip->i_vnode = vp;
2307		break;
2308	case VFIFO:
2309		vp->v_op = fifoops;
2310		break;
2311	default:
2312		break;
2313
2314	}
2315	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
2316	if (ip->i_number == ROOTINO)
2317		vp->v_vflag |= VV_ROOT;
2318	/*
2319	 * Initialize modrev times
2320	 */
2321	getmicrouptime(&tv);
2322	SETHIGH(ip->i_modrev, tv.tv_sec);
2323	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
2324	*vpp = vp;
2325	return (0);
2326}
2327
2328/*
2329 * Allocate a new inode.
2330 * Vnode dvp must be locked.
2331 */
2332static int
2333ufs_makeinode(mode, dvp, vpp, cnp)
2334	int mode;
2335	struct vnode *dvp;
2336	struct vnode **vpp;
2337	struct componentname *cnp;
2338{
2339	struct inode *ip, *pdir;
2340	struct direct newdir;
2341	struct vnode *tvp;
2342#ifdef UFS_ACL
2343	struct acl *acl;
2344#endif
2345	int error;
2346
2347	pdir = VTOI(dvp);
2348#ifdef DIAGNOSTIC
2349	if ((cnp->cn_flags & HASBUF) == 0)
2350		panic("ufs_makeinode: no name");
2351#endif
2352	*vpp = NULL;
2353	if ((mode & IFMT) == 0)
2354		mode |= IFREG;
2355
2356	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2357	if (error)
2358		return (error);
2359	ip = VTOI(tvp);
2360	ip->i_gid = pdir->i_gid;
2361	DIP(ip, i_gid) = pdir->i_gid;
2362#ifdef SUIDDIR
2363	{
2364#ifdef QUOTA
2365		struct ucred ucred, *ucp;
2366		ucp = cnp->cn_cred;
2367#endif
2368		/*
2369		 * If we are not the owner of the directory,
2370		 * and we are hacking owners here, (only do this where told to)
2371		 * and we are not giving it TO root, (would subvert quotas)
2372		 * then go ahead and give it to the other user.
2373		 * Note that this drops off the execute bits for security.
2374		 */
2375		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2376		    (pdir->i_mode & ISUID) &&
2377		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2378			ip->i_uid = pdir->i_uid;
2379			DIP(ip, i_uid) = ip->i_uid;
2380			mode &= ~07111;
2381#ifdef QUOTA
2382			/*
2383			 * Make sure the correct user gets charged
2384			 * for the space.
2385			 * Quickly knock up a dummy credential for the victim.
2386			 * XXX This seems to never be accessed out of our
2387			 * context so a stack variable is ok.
2388			 */
2389			ucred.cr_ref = 1;
2390			ucred.cr_uid = ip->i_uid;
2391			ucred.cr_ngroups = 1;
2392			ucred.cr_groups[0] = pdir->i_gid;
2393			ucp = &ucred;
2394#endif
2395		} else {
2396			ip->i_uid = cnp->cn_cred->cr_uid;
2397			DIP(ip, i_uid) = ip->i_uid;
2398		}
2399
2400#ifdef QUOTA
2401		if ((error = getinoquota(ip)) ||
2402	    	    (error = chkiq(ip, 1, ucp, 0))) {
2403			UFS_VFREE(tvp, ip->i_number, mode);
2404			vput(tvp);
2405			return (error);
2406		}
2407#endif
2408	}
2409#else	/* !SUIDDIR */
2410	ip->i_uid = cnp->cn_cred->cr_uid;
2411	DIP(ip, i_uid) = ip->i_uid;
2412#ifdef QUOTA
2413	if ((error = getinoquota(ip)) ||
2414	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2415		UFS_VFREE(tvp, ip->i_number, mode);
2416		vput(tvp);
2417		return (error);
2418	}
2419#endif
2420#endif	/* !SUIDDIR */
2421	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2422#ifdef UFS_ACL
2423	acl = NULL;
2424	if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) {
2425		MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, 0);
2426
2427		/*
2428		 * Retrieve default ACL for parent, if any.
2429		 */
2430		error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred,
2431		    cnp->cn_thread);
2432		switch (error) {
2433		case 0:
2434			/*
2435			 * Retrieved a default ACL, so merge mode and ACL if
2436			 * necessary.
2437			 */
2438			if (acl->acl_cnt != 0) {
2439				/*
2440				 * Two possible ways for default ACL to not
2441				 * be present.  First, the EA can be
2442				 * undefined, or second, the default ACL can
2443				 * be blank.  If it's blank, fall through to
2444				 * the it's not defined case.
2445				 */
2446				ip->i_mode = mode;
2447				DIP(ip, i_mode) = mode;
2448				ufs_sync_acl_from_inode(ip, acl);
2449				break;
2450			}
2451			/* FALLTHROUGH */
2452
2453		case EOPNOTSUPP:
2454			/*
2455			 * Just use the mode as-is.
2456			 */
2457			ip->i_mode = mode;
2458			DIP(ip, i_mode) = mode;
2459			FREE(acl, M_ACL);
2460			acl = NULL;
2461			break;
2462
2463		default:
2464			UFS_VFREE(tvp, ip->i_number, mode);
2465			vput(tvp);
2466			FREE(acl, M_ACL);
2467			acl = NULL;
2468			return (error);
2469		}
2470	} else {
2471#endif
2472		ip->i_mode = mode;
2473		DIP(ip, i_mode) = mode;
2474#ifdef UFS_ACL
2475	}
2476#endif
2477	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2478	ip->i_effnlink = 1;
2479	ip->i_nlink = 1;
2480	DIP(ip, i_nlink) = 1;
2481	if (DOINGSOFTDEP(tvp))
2482		softdep_change_linkcnt(ip);
2483	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2484	    suser_cred(cnp->cn_cred, PRISON_ROOT)) {
2485		ip->i_mode &= ~ISGID;
2486		DIP(ip, i_mode) = ip->i_mode;
2487	}
2488
2489	if (cnp->cn_flags & ISWHITEOUT) {
2490		ip->i_flags |= UF_OPAQUE;
2491		DIP(ip, i_flags) = ip->i_flags;
2492	}
2493
2494	/*
2495	 * Make sure inode goes to disk before directory entry.
2496	 */
2497	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2498	if (error)
2499		goto bad;
2500#ifdef MAC
2501	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
2502		error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount,
2503		    dvp, tvp, cnp);
2504		if (error)
2505			goto bad;
2506	}
2507#endif
2508#ifdef UFS_ACL
2509	if (acl != NULL) {
2510		/*
2511		 * XXX: If we abort now, will Soft Updates notify the extattr
2512		 * code that the EAs for the file need to be released?
2513		 */
2514		error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred,
2515		    cnp->cn_thread);
2516		switch (error) {
2517		case 0:
2518			break;
2519
2520		case EOPNOTSUPP:
2521			/*
2522			 * XXX: This should not happen, as EOPNOTSUPP above was
2523			 * supposed to free acl.
2524			 */
2525			printf("ufs_makeinode: VOP_GETACL() but no "
2526			    "VOP_SETACL()\n");
2527			/* panic("ufs_makeinode: VOP_GETACL() but no "
2528			    "VOP_SETACL()"); */
2529			break;
2530
2531		default:
2532			FREE(acl, M_ACL);
2533			goto bad;
2534		}
2535		FREE(acl, M_ACL);
2536	}
2537#endif /* !UFS_ACL */
2538	ufs_makedirentry(ip, cnp, &newdir);
2539	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
2540	if (error)
2541		goto bad;
2542	*vpp = tvp;
2543	return (0);
2544
2545bad:
2546	/*
2547	 * Write error occurred trying to update the inode
2548	 * or the directory so must deallocate the inode.
2549	 */
2550	ip->i_effnlink = 0;
2551	ip->i_nlink = 0;
2552	DIP(ip, i_nlink) = 0;
2553	ip->i_flag |= IN_CHANGE;
2554	if (DOINGSOFTDEP(tvp))
2555		softdep_change_linkcnt(ip);
2556	vput(tvp);
2557	return (error);
2558}
2559
2560static struct filterops ufsread_filtops =
2561	{ 1, NULL, filt_ufsdetach, filt_ufsread };
2562static struct filterops ufswrite_filtops =
2563	{ 1, NULL, filt_ufsdetach, filt_ufswrite };
2564static struct filterops ufsvnode_filtops =
2565	{ 1, NULL, filt_ufsdetach, filt_ufsvnode };
2566
2567static int
2568ufs_kqfilter(ap)
2569	struct vop_kqfilter_args /* {
2570		struct vnode *a_vp;
2571		struct knote *a_kn;
2572	} */ *ap;
2573{
2574	struct vnode *vp = ap->a_vp;
2575	struct knote *kn = ap->a_kn;
2576
2577	switch (kn->kn_filter) {
2578	case EVFILT_READ:
2579		kn->kn_fop = &ufsread_filtops;
2580		break;
2581	case EVFILT_WRITE:
2582		kn->kn_fop = &ufswrite_filtops;
2583		break;
2584	case EVFILT_VNODE:
2585		kn->kn_fop = &ufsvnode_filtops;
2586		break;
2587	default:
2588		return (1);
2589	}
2590
2591	kn->kn_hook = (caddr_t)vp;
2592
2593	if (vp->v_pollinfo == NULL)
2594		v_addpollinfo(vp);
2595	mtx_lock(&vp->v_pollinfo->vpi_lock);
2596	SLIST_INSERT_HEAD(&vp->v_pollinfo->vpi_selinfo.si_note, kn, kn_selnext);
2597	mtx_unlock(&vp->v_pollinfo->vpi_lock);
2598
2599	return (0);
2600}
2601
2602static void
2603filt_ufsdetach(struct knote *kn)
2604{
2605	struct vnode *vp = (struct vnode *)kn->kn_hook;
2606
2607	KASSERT(vp->v_pollinfo != NULL, ("Mising v_pollinfo"));
2608	mtx_lock(&vp->v_pollinfo->vpi_lock);
2609	SLIST_REMOVE(&vp->v_pollinfo->vpi_selinfo.si_note,
2610	    kn, knote, kn_selnext);
2611	mtx_unlock(&vp->v_pollinfo->vpi_lock);
2612}
2613
2614/*ARGSUSED*/
2615static int
2616filt_ufsread(struct knote *kn, long hint)
2617{
2618	struct vnode *vp = (struct vnode *)kn->kn_hook;
2619	struct inode *ip = VTOI(vp);
2620
2621	/*
2622	 * filesystem is gone, so set the EOF flag and schedule
2623	 * the knote for deletion.
2624	 */
2625	if (hint == NOTE_REVOKE) {
2626		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2627		return (1);
2628	}
2629
2630        kn->kn_data = ip->i_size - kn->kn_fp->f_offset;
2631        return (kn->kn_data != 0);
2632}
2633
2634/*ARGSUSED*/
2635static int
2636filt_ufswrite(struct knote *kn, long hint)
2637{
2638
2639	/*
2640	 * filesystem is gone, so set the EOF flag and schedule
2641	 * the knote for deletion.
2642	 */
2643	if (hint == NOTE_REVOKE)
2644		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2645
2646        kn->kn_data = 0;
2647        return (1);
2648}
2649
2650static int
2651filt_ufsvnode(struct knote *kn, long hint)
2652{
2653
2654	if (kn->kn_sfflags & hint)
2655		kn->kn_fflags |= hint;
2656	if (hint == NOTE_REVOKE) {
2657		kn->kn_flags |= EV_EOF;
2658		return (1);
2659	}
2660	return (kn->kn_fflags != 0);
2661}
2662
2663/* Global vfs data structures for ufs. */
2664static vop_t **ufs_vnodeop_p;
2665static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
2666	{ &vop_default_desc,		(vop_t *) vop_defaultop },
2667	{ &vop_fsync_desc,		(vop_t *) vop_panic },
2668	{ &vop_read_desc,		(vop_t *) vop_panic },
2669	{ &vop_reallocblks_desc,	(vop_t *) vop_panic },
2670	{ &vop_write_desc,		(vop_t *) vop_panic },
2671	{ &vop_access_desc,		(vop_t *) ufs_access },
2672	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
2673	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
2674	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
2675	{ &vop_close_desc,		(vop_t *) ufs_close },
2676	{ &vop_create_desc,		(vop_t *) ufs_create },
2677	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2678	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2679	{ &vop_link_desc,		(vop_t *) ufs_link },
2680	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
2681	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
2682	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
2683	{ &vop_open_desc,		(vop_t *) ufs_open },
2684	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
2685	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
2686	{ &vop_kqfilter_desc,		(vop_t *) ufs_kqfilter },
2687	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
2688	{ &vop_print_desc,		(vop_t *) ufs_print },
2689	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
2690	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
2691	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2692	{ &vop_remove_desc,		(vop_t *) ufs_remove },
2693	{ &vop_rename_desc,		(vop_t *) ufs_rename },
2694	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
2695	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2696#ifdef MAC
2697	{ &vop_setlabel_desc,		(vop_t *) vop_stdsetlabel_ea },
2698#endif
2699	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
2700	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
2701	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
2702#ifdef UFS_EXTATTR
2703	{ &vop_getextattr_desc,		(vop_t *) ufs_getextattr },
2704	{ &vop_setextattr_desc,		(vop_t *) ufs_setextattr },
2705#endif
2706#ifdef UFS_ACL
2707	{ &vop_getacl_desc,		(vop_t *) ufs_getacl },
2708	{ &vop_setacl_desc,		(vop_t *) ufs_setacl },
2709	{ &vop_aclcheck_desc,		(vop_t *) ufs_aclcheck },
2710#endif
2711	{ NULL, NULL }
2712};
2713static struct vnodeopv_desc ufs_vnodeop_opv_desc =
2714	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
2715
2716static vop_t **ufs_specop_p;
2717static struct vnodeopv_entry_desc ufs_specop_entries[] = {
2718	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
2719	{ &vop_fsync_desc,		(vop_t *) vop_panic },
2720	{ &vop_access_desc,		(vop_t *) ufs_access },
2721	{ &vop_close_desc,		(vop_t *) ufsspec_close },
2722	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2723	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2724	{ &vop_print_desc,		(vop_t *) ufs_print },
2725	{ &vop_read_desc,		(vop_t *) ufsspec_read },
2726	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2727	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2728#ifdef MAC
2729	{ &vop_setlabel_desc,		(vop_t *) vop_stdsetlabel_ea },
2730#endif
2731	{ &vop_write_desc,		(vop_t *) ufsspec_write },
2732#ifdef UFS_EXTATTR
2733	{ &vop_getextattr_desc,		(vop_t *) ufs_getextattr },
2734	{ &vop_setextattr_desc,		(vop_t *) ufs_setextattr },
2735#endif
2736#ifdef UFS_ACL
2737	{ &vop_getacl_desc,		(vop_t *) ufs_getacl },
2738	{ &vop_setacl_desc,		(vop_t *) ufs_setacl },
2739	{ &vop_aclcheck_desc,		(vop_t *) ufs_aclcheck },
2740#endif
2741	{NULL, NULL}
2742};
2743static struct vnodeopv_desc ufs_specop_opv_desc =
2744	{ &ufs_specop_p, ufs_specop_entries };
2745
2746static vop_t **ufs_fifoop_p;
2747static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
2748	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
2749	{ &vop_fsync_desc,		(vop_t *) vop_panic },
2750	{ &vop_access_desc,		(vop_t *) ufs_access },
2751	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
2752	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2753	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2754	{ &vop_kqfilter_desc,		(vop_t *) ufsfifo_kqfilter },
2755	{ &vop_print_desc,		(vop_t *) ufs_print },
2756	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
2757	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2758	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2759#ifdef MAC
2760	{ &vop_setlabel_desc,		(vop_t *) vop_stdsetlabel_ea },
2761#endif
2762	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
2763#ifdef UFS_EXTATTR
2764	{ &vop_getextattr_desc,		(vop_t *) ufs_getextattr },
2765	{ &vop_setextattr_desc,		(vop_t *) ufs_setextattr },
2766#endif
2767#ifdef UFS_ACL
2768	{ &vop_getacl_desc,		(vop_t *) ufs_getacl },
2769	{ &vop_setacl_desc,		(vop_t *) ufs_setacl },
2770	{ &vop_aclcheck_desc,		(vop_t *) ufs_aclcheck },
2771#endif
2772	{ NULL, NULL }
2773};
2774static struct vnodeopv_desc ufs_fifoop_opv_desc =
2775	{ &ufs_fifoop_p, ufs_fifoop_entries };
2776
2777VNODEOP_SET(ufs_vnodeop_opv_desc);
2778VNODEOP_SET(ufs_specop_opv_desc);
2779VNODEOP_SET(ufs_fifoop_opv_desc);
2780
2781int
2782ufs_vnoperate(ap)
2783	struct vop_generic_args /* {
2784		struct vnodeop_desc *a_desc;
2785	} */ *ap;
2786{
2787	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2788}
2789
2790int
2791ufs_vnoperatefifo(ap)
2792	struct vop_generic_args /* {
2793		struct vnodeop_desc *a_desc;
2794	} */ *ap;
2795{
2796	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
2797}
2798
2799int
2800ufs_vnoperatespec(ap)
2801	struct vop_generic_args /* {
2802		struct vnodeop_desc *a_desc;
2803	} */ *ap;
2804{
2805	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
2806}
2807