1/*	$NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $	*/
2
3/*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1982, 1986, 1989, 1993, 1995
34 *	The Regents of the University of California.  All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)ufs_vnops.c	8.28 (Berkeley) 7/31/95
66 */
67
68#include <sys/cdefs.h>
69__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $");
70
71#if defined(_KERNEL_OPT)
72#include "opt_ffs.h"
73#include "opt_quota.h"
74#endif
75
76#include <sys/param.h>
77#include <sys/systm.h>
78#include <sys/namei.h>
79#include <sys/resourcevar.h>
80#include <sys/kernel.h>
81#include <sys/file.h>
82#include <sys/stat.h>
83#include <sys/buf.h>
84#include <sys/proc.h>
85#include <sys/mount.h>
86#include <sys/vnode.h>
87#include <sys/kmem.h>
88#include <sys/malloc.h>
89#include <sys/dirent.h>
90#include <sys/lockf.h>
91#include <sys/kauth.h>
92#include <sys/wapbl.h>
93#include <sys/fstrans.h>
94
95#include <miscfs/specfs/specdev.h>
96#include <miscfs/fifofs/fifo.h>
97#include <miscfs/genfs/genfs.h>
98
99#include <ufs/ufs/inode.h>
100#include <ufs/ufs/dir.h>
101#include <ufs/ufs/ufsmount.h>
102#include <ufs/ufs/ufs_bswap.h>
103#include <ufs/ufs/ufs_extern.h>
104#include <ufs/ufs/ufs_wapbl.h>
105#ifdef UFS_DIRHASH
106#include <ufs/ufs/dirhash.h>
107#endif
108#include <ufs/ext2fs/ext2fs_extern.h>
109#include <ufs/ext2fs/ext2fs_dir.h>
110#include <ufs/ffs/ffs_extern.h>
111#include <ufs/lfs/lfs_extern.h>
112#include <ufs/lfs/lfs.h>
113
114#include <uvm/uvm.h>
115
116__CTASSERT(EXT2FS_MAXNAMLEN == FFS_MAXNAMLEN);
117__CTASSERT(LFS_MAXNAMLEN == FFS_MAXNAMLEN);
118
119static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct lwp *);
120static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t,
121    struct lwp *);
122
123/*
124 * A virgin directory (no blushing please).
125 */
126static const struct dirtemplate mastertemplate = {
127	0,	12,		DT_DIR,	1,	".",
128	0,	DIRBLKSIZ - 12,	DT_DIR,	2,	".."
129};
130
131/*
132 * Create a regular file
133 */
134int
135ufs_create(void *v)
136{
137	struct vop_create_args /* {
138		struct vnode		*a_dvp;
139		struct vnode		**a_vpp;
140		struct componentname	*a_cnp;
141		struct vattr		*a_vap;
142	} */ *ap = v;
143	int	error;
144	struct vnode *dvp = ap->a_dvp;
145	struct ufs_lookup_results *ulr;
146
147	/* XXX should handle this material another way */
148	ulr = &VTOI(dvp)->i_crap;
149	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
150
151	/*
152	 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
153	 * ufs_makeinode
154	 */
155	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
156	error =
157	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
158			  dvp, ulr, ap->a_vpp, ap->a_cnp);
159	if (error) {
160		fstrans_done(dvp->v_mount);
161		return (error);
162	}
163	UFS_WAPBL_END1(dvp->v_mount, dvp);
164	fstrans_done(dvp->v_mount);
165	VN_KNOTE(dvp, NOTE_WRITE);
166	return (0);
167}
168
169/*
170 * Mknod vnode call
171 */
172/* ARGSUSED */
173int
174ufs_mknod(void *v)
175{
176	struct vop_mknod_args /* {
177		struct vnode		*a_dvp;
178		struct vnode		**a_vpp;
179		struct componentname	*a_cnp;
180		struct vattr		*a_vap;
181	} */ *ap = v;
182	struct vattr	*vap;
183	struct vnode	**vpp;
184	struct inode	*ip;
185	int		error;
186	struct mount	*mp;
187	ino_t		ino;
188	struct ufs_lookup_results *ulr;
189
190	vap = ap->a_vap;
191	vpp = ap->a_vpp;
192
193	/* XXX should handle this material another way */
194	ulr = &VTOI(ap->a_dvp)->i_crap;
195	UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
196
197	/*
198	 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
199	 * ufs_makeinode
200	 */
201	fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
202	if ((error =
203	    ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
204	    ap->a_dvp, ulr, vpp, ap->a_cnp)) != 0)
205		goto out;
206	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
207	ip = VTOI(*vpp);
208	mp  = (*vpp)->v_mount;
209	ino = ip->i_number;
210	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
211	if (vap->va_rdev != VNOVAL) {
212		struct ufsmount *ump = ip->i_ump;
213		/*
214		 * Want to be able to use this to make badblock
215		 * inodes, so don't truncate the dev number.
216		 */
217		if (ump->um_fstype == UFS1)
218			ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev,
219			    UFS_MPNEEDSWAP(ump));
220		else
221			ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev,
222			    UFS_MPNEEDSWAP(ump));
223	}
224	UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0);
225	UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
226	/*
227	 * Remove inode so that it will be reloaded by VFS_VGET and
228	 * checked to see if it is an alias of an existing entry in
229	 * the inode cache.
230	 */
231	(*vpp)->v_type = VNON;
232	VOP_UNLOCK(*vpp);
233	vgone(*vpp);
234	error = VFS_VGET(mp, ino, vpp);
235out:
236	fstrans_done(ap->a_dvp->v_mount);
237	if (error != 0) {
238		*vpp = NULL;
239		return (error);
240	}
241	return (0);
242}
243
244/*
245 * Open called.
246 *
247 * Nothing to do.
248 */
249/* ARGSUSED */
250int
251ufs_open(void *v)
252{
253	struct vop_open_args /* {
254		struct vnode	*a_vp;
255		int		a_mode;
256		kauth_cred_t	a_cred;
257	} */ *ap = v;
258
259	/*
260	 * Files marked append-only must be opened for appending.
261	 */
262	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
263	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
264		return (EPERM);
265	return (0);
266}
267
268/*
269 * Close called.
270 *
271 * Update the times on the inode.
272 */
273/* ARGSUSED */
274int
275ufs_close(void *v)
276{
277	struct vop_close_args /* {
278		struct vnode	*a_vp;
279		int		a_fflag;
280		kauth_cred_t	a_cred;
281	} */ *ap = v;
282	struct vnode	*vp;
283	struct inode	*ip;
284
285	vp = ap->a_vp;
286	ip = VTOI(vp);
287	fstrans_start(vp->v_mount, FSTRANS_SHARED);
288	if (vp->v_usecount > 1)
289		UFS_ITIMES(vp, NULL, NULL, NULL);
290	fstrans_done(vp->v_mount);
291	return (0);
292}
293
294static int
295ufs_check_possible(struct vnode *vp, struct inode *ip, mode_t mode,
296    kauth_cred_t cred)
297{
298#if defined(QUOTA) || defined(QUOTA2)
299	int error;
300#endif
301
302	/*
303	 * Disallow write attempts on read-only file systems;
304	 * unless the file is a socket, fifo, or a block or
305	 * character device resident on the file system.
306	 */
307	if (mode & VWRITE) {
308		switch (vp->v_type) {
309		case VDIR:
310		case VLNK:
311		case VREG:
312			if (vp->v_mount->mnt_flag & MNT_RDONLY)
313				return (EROFS);
314#if defined(QUOTA) || defined(QUOTA2)
315			fstrans_start(vp->v_mount, FSTRANS_SHARED);
316			error = chkdq(ip, 0, cred, 0);
317			fstrans_done(vp->v_mount);
318			if (error != 0)
319				return error;
320#endif
321			break;
322		case VBAD:
323		case VBLK:
324		case VCHR:
325		case VSOCK:
326		case VFIFO:
327		case VNON:
328		default:
329			break;
330		}
331	}
332
333	/* If it is a snapshot, nobody gets access to it. */
334	if ((ip->i_flags & SF_SNAPSHOT))
335		return (EPERM);
336	/* If immutable bit set, nobody gets to write it. */
337	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
338		return (EPERM);
339
340	return 0;
341}
342
343static int
344ufs_check_permitted(struct vnode *vp, struct inode *ip, mode_t mode,
345    kauth_cred_t cred)
346{
347
348	return genfs_can_access(vp->v_type, ip->i_mode & ALLPERMS, ip->i_uid,
349	    ip->i_gid, mode, cred);
350}
351
352int
353ufs_access(void *v)
354{
355	struct vop_access_args /* {
356		struct vnode	*a_vp;
357		int		a_mode;
358		kauth_cred_t	a_cred;
359	} */ *ap = v;
360	struct vnode	*vp;
361	struct inode	*ip;
362	mode_t		mode;
363	int		error;
364
365	vp = ap->a_vp;
366	ip = VTOI(vp);
367	mode = ap->a_mode;
368
369	error = ufs_check_possible(vp, ip, mode, ap->a_cred);
370	if (error)
371		return error;
372
373	error = ufs_check_permitted(vp, ip, mode, ap->a_cred);
374
375	return error;
376}
377
378/* ARGSUSED */
379int
380ufs_getattr(void *v)
381{
382	struct vop_getattr_args /* {
383		struct vnode	*a_vp;
384		struct vattr	*a_vap;
385		kauth_cred_t	a_cred;
386	} */ *ap = v;
387	struct vnode	*vp;
388	struct inode	*ip;
389	struct vattr	*vap;
390
391	vp = ap->a_vp;
392	ip = VTOI(vp);
393	vap = ap->a_vap;
394	fstrans_start(vp->v_mount, FSTRANS_SHARED);
395	UFS_ITIMES(vp, NULL, NULL, NULL);
396
397	/*
398	 * Copy from inode table
399	 */
400	vap->va_fsid = ip->i_dev;
401	vap->va_fileid = ip->i_number;
402	vap->va_mode = ip->i_mode & ALLPERMS;
403	vap->va_nlink = ip->i_nlink;
404	vap->va_uid = ip->i_uid;
405	vap->va_gid = ip->i_gid;
406	vap->va_size = vp->v_size;
407	if (ip->i_ump->um_fstype == UFS1) {
408		vap->va_rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
409		    UFS_MPNEEDSWAP(ip->i_ump));
410		vap->va_atime.tv_sec = ip->i_ffs1_atime;
411		vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
412		vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
413		vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
414		vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
415		vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
416		vap->va_birthtime.tv_sec = 0;
417		vap->va_birthtime.tv_nsec = 0;
418		vap->va_bytes = dbtob((u_quad_t)ip->i_ffs1_blocks);
419	} else {
420		vap->va_rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
421		    UFS_MPNEEDSWAP(ip->i_ump));
422		vap->va_atime.tv_sec = ip->i_ffs2_atime;
423		vap->va_atime.tv_nsec = ip->i_ffs2_atimensec;
424		vap->va_mtime.tv_sec = ip->i_ffs2_mtime;
425		vap->va_mtime.tv_nsec = ip->i_ffs2_mtimensec;
426		vap->va_ctime.tv_sec = ip->i_ffs2_ctime;
427		vap->va_ctime.tv_nsec = ip->i_ffs2_ctimensec;
428		vap->va_birthtime.tv_sec = ip->i_ffs2_birthtime;
429		vap->va_birthtime.tv_nsec = ip->i_ffs2_birthnsec;
430		vap->va_bytes = dbtob(ip->i_ffs2_blocks);
431	}
432	vap->va_gen = ip->i_gen;
433	vap->va_flags = ip->i_flags;
434
435	/* this doesn't belong here */
436	if (vp->v_type == VBLK)
437		vap->va_blocksize = BLKDEV_IOSIZE;
438	else if (vp->v_type == VCHR)
439		vap->va_blocksize = MAXBSIZE;
440	else
441		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
442	vap->va_type = vp->v_type;
443	vap->va_filerev = ip->i_modrev;
444	fstrans_done(vp->v_mount);
445	return (0);
446}
447
448/*
449 * Set attribute vnode op. called from several syscalls
450 */
451int
452ufs_setattr(void *v)
453{
454	struct vop_setattr_args /* {
455		struct vnode	*a_vp;
456		struct vattr	*a_vap;
457		kauth_cred_t	a_cred;
458	} */ *ap = v;
459	struct vattr	*vap;
460	struct vnode	*vp;
461	struct inode	*ip;
462	kauth_cred_t	cred;
463	struct lwp	*l;
464	int		error;
465
466	vap = ap->a_vap;
467	vp = ap->a_vp;
468	ip = VTOI(vp);
469	cred = ap->a_cred;
470	l = curlwp;
471
472	/*
473	 * Check for unsettable attributes.
474	 */
475	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
476	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
477	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
478	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
479		return (EINVAL);
480	}
481
482	fstrans_start(vp->v_mount, FSTRANS_SHARED);
483
484	if (vap->va_flags != VNOVAL) {
485		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
486			error = EROFS;
487			goto out;
488		}
489		if (kauth_cred_geteuid(cred) != ip->i_uid &&
490		    (error = kauth_authorize_generic(cred,
491		    KAUTH_GENERIC_ISSUSER, NULL)))
492			goto out;
493		if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
494		    NULL) == 0) {
495			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) &&
496			    kauth_authorize_system(l->l_cred,
497			     KAUTH_SYSTEM_CHSYSFLAGS, 0, NULL, NULL, NULL)) {
498				error = EPERM;
499				goto out;
500			}
501			/* Snapshot flag cannot be set or cleared */
502			if ((vap->va_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) !=
503			    (ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))) {
504				error = EPERM;
505				goto out;
506			}
507			error = UFS_WAPBL_BEGIN(vp->v_mount);
508			if (error)
509				goto out;
510			ip->i_flags = vap->va_flags;
511			DIP_ASSIGN(ip, flags, ip->i_flags);
512		} else {
513			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) ||
514			    (vap->va_flags & UF_SETTABLE) != vap->va_flags) {
515				error = EPERM;
516				goto out;
517			}
518			if ((ip->i_flags & SF_SETTABLE) !=
519			    (vap->va_flags & SF_SETTABLE)) {
520				error = EPERM;
521				goto out;
522			}
523			error = UFS_WAPBL_BEGIN(vp->v_mount);
524			if (error)
525				goto out;
526			ip->i_flags &= SF_SETTABLE;
527			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
528			DIP_ASSIGN(ip, flags, ip->i_flags);
529		}
530		ip->i_flag |= IN_CHANGE;
531		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
532		UFS_WAPBL_END(vp->v_mount);
533		if (vap->va_flags & (IMMUTABLE | APPEND)) {
534			error = 0;
535			goto out;
536		}
537	}
538	if (ip->i_flags & (IMMUTABLE | APPEND)) {
539		error = EPERM;
540		goto out;
541	}
542	/*
543	 * Go through the fields and update iff not VNOVAL.
544	 */
545	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
546		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
547			error = EROFS;
548			goto out;
549		}
550		error = UFS_WAPBL_BEGIN(vp->v_mount);
551		if (error)
552			goto out;
553		error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
554		UFS_WAPBL_END(vp->v_mount);
555		if (error)
556			goto out;
557	}
558	if (vap->va_size != VNOVAL) {
559		/*
560		 * Disallow write attempts on read-only file systems;
561		 * unless the file is a socket, fifo, or a block or
562		 * character device resident on the file system.
563		 */
564		switch (vp->v_type) {
565		case VDIR:
566			error = EISDIR;
567			goto out;
568		case VCHR:
569		case VBLK:
570		case VFIFO:
571			break;
572		case VREG:
573			if (vp->v_mount->mnt_flag & MNT_RDONLY) {
574				error = EROFS;
575				goto out;
576			}
577			if ((ip->i_flags & SF_SNAPSHOT) != 0) {
578				error = EPERM;
579				goto out;
580			}
581			error = UFS_WAPBL_BEGIN(vp->v_mount);
582			if (error)
583				goto out;
584			/*
585			 * When journaling, only truncate one indirect block
586			 * at a time.
587			 */
588			if (vp->v_mount->mnt_wapbl) {
589				uint64_t incr = MNINDIR(ip->i_ump) <<
590				    vp->v_mount->mnt_fs_bshift; /* Power of 2 */
591				uint64_t base = NDADDR <<
592				    vp->v_mount->mnt_fs_bshift;
593				while (!error && ip->i_size > base + incr &&
594				    ip->i_size > vap->va_size + incr) {
595					/*
596					 * round down to next full indirect
597					 * block boundary.
598					 */
599					uint64_t nsize = base +
600					    ((ip->i_size - base - 1) &
601					    ~(incr - 1));
602					error = UFS_TRUNCATE(vp, nsize, 0,
603					    cred);
604					if (error == 0) {
605						UFS_WAPBL_END(vp->v_mount);
606						error =
607						   UFS_WAPBL_BEGIN(vp->v_mount);
608					}
609				}
610			}
611			if (!error)
612				error = UFS_TRUNCATE(vp, vap->va_size, 0, cred);
613			UFS_WAPBL_END(vp->v_mount);
614			if (error)
615				goto out;
616			break;
617		default:
618			error = EOPNOTSUPP;
619			goto out;
620		}
621	}
622	ip = VTOI(vp);
623	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
624	    vap->va_birthtime.tv_sec != VNOVAL) {
625		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
626			error = EROFS;
627			goto out;
628		}
629		if ((ip->i_flags & SF_SNAPSHOT) != 0) {
630			error = EPERM;
631			goto out;
632		}
633		error = genfs_can_chtimes(vp, vap->va_vaflags, ip->i_uid, cred);
634		if (error)
635			goto out;
636		error = UFS_WAPBL_BEGIN(vp->v_mount);
637		if (error)
638			goto out;
639		if (vap->va_atime.tv_sec != VNOVAL)
640			if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
641				ip->i_flag |= IN_ACCESS;
642		if (vap->va_mtime.tv_sec != VNOVAL) {
643			ip->i_flag |= IN_CHANGE | IN_UPDATE;
644			if (vp->v_mount->mnt_flag & MNT_RELATIME)
645				ip->i_flag |= IN_ACCESS;
646		}
647		if (vap->va_birthtime.tv_sec != VNOVAL &&
648		    ip->i_ump->um_fstype == UFS2) {
649			ip->i_ffs2_birthtime = vap->va_birthtime.tv_sec;
650			ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec;
651		}
652		error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
653		UFS_WAPBL_END(vp->v_mount);
654		if (error)
655			goto out;
656	}
657	error = 0;
658	if (vap->va_mode != (mode_t)VNOVAL) {
659		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
660			error = EROFS;
661			goto out;
662		}
663		if ((ip->i_flags & SF_SNAPSHOT) != 0 &&
664		    (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP |
665		     S_IXOTH | S_IWOTH))) {
666			error = EPERM;
667			goto out;
668		}
669		error = UFS_WAPBL_BEGIN(vp->v_mount);
670		if (error)
671			goto out;
672		error = ufs_chmod(vp, (int)vap->va_mode, cred, l);
673		UFS_WAPBL_END(vp->v_mount);
674	}
675	VN_KNOTE(vp, NOTE_ATTRIB);
676out:
677	fstrans_done(vp->v_mount);
678	return (error);
679}
680
681/*
682 * Change the mode on a file.
683 * Inode must be locked before calling.
684 */
685static int
686ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
687{
688	struct inode	*ip;
689	int		error;
690
691	UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);
692
693	ip = VTOI(vp);
694
695	error = genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid, mode);
696	if (error)
697		return (error);
698
699	fstrans_start(vp->v_mount, FSTRANS_SHARED);
700	ip->i_mode &= ~ALLPERMS;
701	ip->i_mode |= (mode & ALLPERMS);
702	ip->i_flag |= IN_CHANGE;
703	DIP_ASSIGN(ip, mode, ip->i_mode);
704	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
705	fstrans_done(vp->v_mount);
706	return (0);
707}
708
709/*
710 * Perform chown operation on inode ip;
711 * inode must be locked prior to call.
712 */
713static int
714ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
715    	struct lwp *l)
716{
717	struct inode	*ip;
718	int		error = 0;
719#if defined(QUOTA) || defined(QUOTA2)
720	uid_t		ouid;
721	gid_t		ogid;
722	int64_t		change;
723#endif
724	ip = VTOI(vp);
725	error = 0;
726
727	if (uid == (uid_t)VNOVAL)
728		uid = ip->i_uid;
729	if (gid == (gid_t)VNOVAL)
730		gid = ip->i_gid;
731
732	error = genfs_can_chown(vp, cred, ip->i_uid, ip->i_gid, uid, gid);
733	if (error)
734		return (error);
735
736	fstrans_start(vp->v_mount, FSTRANS_SHARED);
737#if defined(QUOTA) || defined(QUOTA2)
738	ogid = ip->i_gid;
739	ouid = ip->i_uid;
740	change = DIP(ip, blocks);
741	(void) chkdq(ip, -change, cred, 0);
742	(void) chkiq(ip, -1, cred, 0);
743#endif
744	ip->i_gid = gid;
745	DIP_ASSIGN(ip, gid, gid);
746	ip->i_uid = uid;
747	DIP_ASSIGN(ip, uid, uid);
748#if defined(QUOTA) || defined(QUOTA2)
749	if ((error = chkdq(ip, change, cred, 0)) == 0) {
750		if ((error = chkiq(ip, 1, cred, 0)) == 0)
751			goto good;
752		else
753			(void) chkdq(ip, -change, cred, FORCE);
754	}
755	ip->i_gid = ogid;
756	DIP_ASSIGN(ip, gid, ogid);
757	ip->i_uid = ouid;
758	DIP_ASSIGN(ip, uid, ouid);
759	(void) chkdq(ip, change, cred, FORCE);
760	(void) chkiq(ip, 1, cred, FORCE);
761	fstrans_done(vp->v_mount);
762	return (error);
763 good:
764#endif /* QUOTA || QUOTA2 */
765	ip->i_flag |= IN_CHANGE;
766	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
767	fstrans_done(vp->v_mount);
768	return (0);
769}
770
771int
772ufs_remove(void *v)
773{
774	struct vop_remove_args /* {
775		struct vnode		*a_dvp;
776		struct vnode		*a_vp;
777		struct componentname	*a_cnp;
778	} */ *ap = v;
779	struct vnode	*vp, *dvp;
780	struct inode	*ip;
781	int		error;
782	struct ufs_lookup_results *ulr;
783
784	vp = ap->a_vp;
785	dvp = ap->a_dvp;
786	ip = VTOI(vp);
787
788	/* XXX should handle this material another way */
789	ulr = &VTOI(dvp)->i_crap;
790	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
791
792	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
793	if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) ||
794	    (VTOI(dvp)->i_flags & APPEND))
795		error = EPERM;
796	else {
797		error = UFS_WAPBL_BEGIN(dvp->v_mount);
798		if (error == 0) {
799			error = ufs_dirremove(dvp, ulr,
800					      ip, ap->a_cnp->cn_flags, 0);
801			UFS_WAPBL_END(dvp->v_mount);
802		}
803	}
804	VN_KNOTE(vp, NOTE_DELETE);
805	VN_KNOTE(dvp, NOTE_WRITE);
806	if (dvp == vp)
807		vrele(vp);
808	else
809		vput(vp);
810	vput(dvp);
811	fstrans_done(dvp->v_mount);
812	return (error);
813}
814
815/*
816 * ufs_link: create hard link.
817 */
818int
819ufs_link(void *v)
820{
821	struct vop_link_args /* {
822		struct vnode *a_dvp;
823		struct vnode *a_vp;
824		struct componentname *a_cnp;
825	} */ *ap = v;
826	struct vnode *dvp = ap->a_dvp;
827	struct vnode *vp = ap->a_vp;
828	struct componentname *cnp = ap->a_cnp;
829	struct inode *ip;
830	struct direct *newdir;
831	int error;
832	struct ufs_lookup_results *ulr;
833
834	KASSERT(dvp != vp);
835	KASSERT(vp->v_type != VDIR);
836	KASSERT(dvp->v_mount == vp->v_mount);
837
838	/* XXX should handle this material another way */
839	ulr = &VTOI(dvp)->i_crap;
840	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
841
842	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
843	error = vn_lock(vp, LK_EXCLUSIVE);
844	if (error) {
845		VOP_ABORTOP(dvp, cnp);
846		goto out2;
847	}
848	ip = VTOI(vp);
849	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
850		VOP_ABORTOP(dvp, cnp);
851		error = EMLINK;
852		goto out1;
853	}
854	if (ip->i_flags & (IMMUTABLE | APPEND)) {
855		VOP_ABORTOP(dvp, cnp);
856		error = EPERM;
857		goto out1;
858	}
859	error = UFS_WAPBL_BEGIN(vp->v_mount);
860	if (error) {
861		VOP_ABORTOP(dvp, cnp);
862		goto out1;
863	}
864	ip->i_nlink++;
865	DIP_ASSIGN(ip, nlink, ip->i_nlink);
866	ip->i_flag |= IN_CHANGE;
867	error = UFS_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
868	if (!error) {
869		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
870		ufs_makedirentry(ip, cnp, newdir);
871		error = ufs_direnter(dvp, ulr, vp, newdir, cnp, NULL);
872		pool_cache_put(ufs_direct_cache, newdir);
873	}
874	if (error) {
875		ip->i_nlink--;
876		DIP_ASSIGN(ip, nlink, ip->i_nlink);
877		ip->i_flag |= IN_CHANGE;
878		UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
879	}
880	UFS_WAPBL_END(vp->v_mount);
881 out1:
882	VOP_UNLOCK(vp);
883 out2:
884	VN_KNOTE(vp, NOTE_LINK);
885	VN_KNOTE(dvp, NOTE_WRITE);
886	vput(dvp);
887	fstrans_done(dvp->v_mount);
888	return (error);
889}
890
891/*
892 * whiteout vnode call
893 */
894int
895ufs_whiteout(void *v)
896{
897	struct vop_whiteout_args /* {
898		struct vnode		*a_dvp;
899		struct componentname	*a_cnp;
900		int			a_flags;
901	} */ *ap = v;
902	struct vnode		*dvp = ap->a_dvp;
903	struct componentname	*cnp = ap->a_cnp;
904	struct direct		*newdir;
905	int			error;
906	struct ufsmount		*ump = VFSTOUFS(dvp->v_mount);
907	struct ufs_lookup_results *ulr;
908
909	/* XXX should handle this material another way */
910	ulr = &VTOI(dvp)->i_crap;
911	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
912
913	error = 0;
914	switch (ap->a_flags) {
915	case LOOKUP:
916		/* 4.4 format directories support whiteout operations */
917		if (ump->um_maxsymlinklen > 0)
918			return (0);
919		return (EOPNOTSUPP);
920
921	case CREATE:
922		/* create a new directory whiteout */
923		fstrans_start(dvp->v_mount, FSTRANS_SHARED);
924		error = UFS_WAPBL_BEGIN(dvp->v_mount);
925		if (error)
926			break;
927#ifdef DIAGNOSTIC
928		if (ump->um_maxsymlinklen <= 0)
929			panic("ufs_whiteout: old format filesystem");
930#endif
931
932		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
933		newdir->d_ino = WINO;
934		newdir->d_namlen = cnp->cn_namelen;
935		memcpy(newdir->d_name, cnp->cn_nameptr,
936		    (size_t)cnp->cn_namelen);
937		newdir->d_name[cnp->cn_namelen] = '\0';
938		newdir->d_type = DT_WHT;
939		error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL);
940		pool_cache_put(ufs_direct_cache, newdir);
941		break;
942
943	case DELETE:
944		/* remove an existing directory whiteout */
945		fstrans_start(dvp->v_mount, FSTRANS_SHARED);
946		error = UFS_WAPBL_BEGIN(dvp->v_mount);
947		if (error)
948			break;
949#ifdef DIAGNOSTIC
950		if (ump->um_maxsymlinklen <= 0)
951			panic("ufs_whiteout: old format filesystem");
952#endif
953
954		cnp->cn_flags &= ~DOWHITEOUT;
955		error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0);
956		break;
957	default:
958		panic("ufs_whiteout: unknown op");
959		/* NOTREACHED */
960	}
961	UFS_WAPBL_END(dvp->v_mount);
962	fstrans_done(dvp->v_mount);
963	return (error);
964}
965
966
967/*
968 * Rename vnode operation
969 * 	rename("foo", "bar");
970 * is essentially
971 *	unlink("bar");
972 *	link("foo", "bar");
973 *	unlink("foo");
974 * but ``atomically''.  Can't do full commit without saving state in the
975 * inode on disk which isn't feasible at this time.  Best we can do is
976 * always guarantee the target exists.
977 *
978 * Basic algorithm is:
979 *
980 * 1) Bump link count on source while we're linking it to the
981 *    target.  This also ensure the inode won't be deleted out
982 *    from underneath us while we work (it may be truncated by
983 *    a concurrent `trunc' or `open' for creation).
984 * 2) Link source to destination.  If destination already exists,
985 *    delete it first.
986 * 3) Unlink source reference to inode if still around. If a
987 *    directory was moved and the parent of the destination
988 *    is different from the source, patch the ".." entry in the
989 *    directory.
990 */
991
992/*
993 * Notes on rename locking:
994 *
995 * We lock parent vnodes before child vnodes. This means in particular
996 * that if A is above B in the directory tree then A must be locked
997 * before B. (This is true regardless of how many steps appear in
998 * between, because an arbitrary number of other processes could lock
999 * parent/child in between and establish a lock cycle and deadlock.)
1000 *
1001 * Therefore, if tdvp is above fdvp we must lock tdvp first; if fdvp
1002 * is above tdvp we must lock fdvp first; and if they're
1003 * incommensurate it doesn't matter. (But, we rely on the fact that
1004 * there's a whole-volume rename lock to prevent deadlock among groups
1005 * of renames upon overlapping sets of incommensurate vnodes.)
1006 *
1007 * In addition to establishing lock ordering the parent check also
1008 * serves to rule out cases where someone tries to move a directory
1009 * underneath itself, e.g. rename("a/b", "a/b/c"). If allowed to
1010 * proceed such renames would detach portions of the directory tree
1011 * and make fsck very unhappy.
1012 *
1013 * Note that it is an error for *fvp* to be above tdvp; however,
1014 * *fdvp* can be above tdvp, as in rename("a/b", "a/c/d").
1015 *
1016 * The parent check searches up the tree from tdvp until it either
1017 * finds fdvp or the root of the volume. It also returns the vnode it
1018 * saw immediately before fdvp, if any. Later on (after looking up
1019 * fvp) we will check to see if this *is* fvp and if so fail.
1020 *
1021 * If the parent check finds fdvp, it means fdvp is above tdvp, so we
1022 * lock fdvp first and then tdvp. Otherwise, either tdvp is above fdvp
1023 * or they're incommensurate and we lock tdvp first.
1024 *
1025 * In either case each of the child vnodes has to be looked up and
1026 * locked immediately after its parent. The cases
1027 *
1028 *       fdvp/fvp/[.../]tdvp/tvp
1029 *       tdvp/tvp/[.../]fdvp/fvp
1030 *
1031 * can cause deadlock otherwise. Note that both of these are error
1032 * cases; the first fails the parent check and the second fails
1033 * because tvp isn't empty. The parent check case is handled before
1034 * we start locking; however, the nonempty case requires locking tvp
1035 * to find out safely that it's nonempty.
1036 *
1037 * Therefore the procedure is either
1038 *
1039 *   lock fdvp
1040 *   lookup fvp
1041 *   lock fvp
1042 *   lock tdvp
1043 *   lookup tvp
1044 *   lock tvp
1045 *
1046 * or
1047 *
1048 *   lock tdvp
1049 *   lookup tvp
1050 *   lock tvp
1051 *   lock fdvp
1052 *   lookup fvp
1053 *   lock fvp
1054 *
1055 * This could in principle be simplified by always looking up fvp
1056 * last; because of the parent check we know by the time we start
1057 * locking that fvp cannot be directly above tdvp, so (given the
1058 * whole-volume rename lock and other assumptions) it's safe to lock
1059 * tdvp before fvp. This would allow the following scheme:
1060 *
1061 *   lock fdvp
1062 *   lock tdvp
1063 * or
1064 *   lock tdvp
1065 *   lock fdvp
1066 *
1067 * then
1068 *   lookup tvp
1069 *   lock tvp
1070 *   lookup fvp
1071 *   check if fvp is above of tdvp, fail if so
1072 *   lock fvp
1073 *
1074 * which is much, much simpler.
1075 *
1076 * However, current levels of vfs namei/lookup sanity do not permit
1077 * this. It is impossible currently to look up fvp without locking it.
1078 * (It gets locked regardless of whether LOCKLEAF is set; without
1079 * LOCKLEAF it just gets unlocked again, which doesn't help.)
1080 *
1081 * Therefore, because we must look up fvp to know if it's above tdvp,
1082 * which locks fvp, we must, at least in the case where fdvp is above
1083 * tdvp, do that before locking tdvp. The longer scheme does that; the
1084 * simpler scheme is not safe.
1085 *
1086 * Note that for now we aren't doing lookup() but relookup(); however,
1087 * the differences are minor.
1088 *
1089 * On top of all the above, just to make everything more
1090 * exciting, any two of the vnodes might end up being the same.
1091 *
1092 * FROMPARENT == FROMCHILD	mv a/. foo	is an error.
1093 * FROMPARENT == TOPARENT	mv a/b a/c	is ok.
1094 * FROMPARENT == TOCHILD	mv a/b/c a/b	will give ENOTEMPTY.
1095 * FROMCHILD == TOPARENT	mv a/b a/b/c	fails the parent check.
1096 * FROMCHILD == TOCHILD		mv a/b a/b	is ok.
1097 * TOPARENT == TOCHILD		mv foo a/.	is an error.
1098 *
1099 * This introduces more cases in the locking, because each distinct
1100 * vnode must be locked exactly once.
1101 *
1102 * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we assume it
1103 * doesn't matter what order the children are locked in, because the
1104 * per-volume rename lock excludes other renames and no other
1105 * operation locks two files in the same directory at once. (Note: if
1106 * it turns out that link() does, link() is wrong.)
1107 *
1108 * Until such time as we can do lookups without the namei and lookup
1109 * machinery "helpfully" locking the result vnode for us, we can't
1110 * avoid tripping on cases where FROMCHILD == TOCHILD. Currently for
1111 * non-directories we unlock the first one we lock while looking up
1112 * the second, then relock it if necessary. This is more or less
1113 * harmless since not much of interest can happen to the objects in
1114 * that window while we have the containing directory locked; but it's
1115 * not desirable and should be cleaned up when that becomes possible.
1116 * The right way to do it is to check after looking the second one up
1117 * and only lock it if it's different. (Note: for directories we don't
1118 * do this dance because the same directory can't appear more than
1119 * once.)
1120 */
1121
1122/* XXX following lifted from ufs_lookup.c */
1123#define	FSFMT(vp)	(((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0)
1124
1125/*
1126 * Check if either entry referred to by FROM_ULR is within the range
1127 * of entries named by TO_ULR.
1128 */
1129static int
1130ulr_overlap(const struct ufs_lookup_results *from_ulr,
1131	    const struct ufs_lookup_results *to_ulr)
1132{
1133	doff_t from_start, from_prevstart;
1134	doff_t to_start, to_end;
1135
1136	/*
1137	 * FROM is a DELETE result; offset points to the entry to
1138	 * remove and subtracting count gives the previous entry.
1139	 */
1140	from_start = from_ulr->ulr_offset - from_ulr->ulr_count;
1141	from_prevstart = from_ulr->ulr_offset;
1142
1143	/*
1144	 * TO is a RENAME (thus non-DELETE) result; offset points
1145	 * to the beginning of a region to write in, and adding
1146	 * count gives the end of the region.
1147	 */
1148	to_start = to_ulr->ulr_offset;
1149	to_end = to_ulr->ulr_offset + to_ulr->ulr_count;
1150
1151	if (from_prevstart >= to_start && from_prevstart < to_end) {
1152		return 1;
1153	}
1154	if (from_start >= to_start && from_start < to_end) {
1155		return 1;
1156	}
1157	return 0;
1158}
1159
1160/*
1161 * Wrapper for relookup that also updates the supplemental results.
1162 */
1163static int
1164do_relookup(struct vnode *dvp, struct ufs_lookup_results *ulr,
1165	    struct vnode **vp, struct componentname *cnp)
1166{
1167	int error;
1168
1169	error = relookup(dvp, vp, cnp, 0);
1170	if (error) {
1171		return error;
1172	}
1173	/* update the supplemental reasults */
1174	*ulr = VTOI(dvp)->i_crap;
1175	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
1176	return 0;
1177}
1178
1179/*
1180 * Lock and relookup a sequence of two directories and two children.
1181 *
1182 */
1183static int
1184lock_vnode_sequence(struct vnode *d1, struct ufs_lookup_results *ulr1,
1185		    struct vnode **v1_ret, struct componentname *cn1,
1186		    int v1_missing_ok,
1187		    int overlap_error,
1188		    struct vnode *d2, struct ufs_lookup_results *ulr2,
1189		    struct vnode **v2_ret, struct componentname *cn2,
1190		    int v2_missing_ok)
1191{
1192	struct vnode *v1, *v2;
1193	int error;
1194
1195	KASSERT(d1 != d2);
1196
1197	vn_lock(d1, LK_EXCLUSIVE | LK_RETRY);
1198	if (VTOI(d1)->i_size == 0) {
1199		/* d1 has been rmdir'd */
1200		VOP_UNLOCK(d1);
1201		return ENOENT;
1202	}
1203	error = do_relookup(d1, ulr1, &v1, cn1);
1204	if (v1_missing_ok) {
1205		if (error == ENOENT) {
1206			/*
1207			 * Note: currently if the name doesn't exist,
1208			 * relookup succeeds (it intercepts the
1209			 * EJUSTRETURN from VOP_LOOKUP) and sets tvp
1210			 * to NULL. Therefore, we will never get
1211			 * ENOENT and this branch is not needed.
1212			 * However, in a saner future the EJUSTRETURN
1213			 * garbage will go away, so let's DTRT.
1214			 */
1215			v1 = NULL;
1216			error = 0;
1217		}
1218	} else {
1219		if (error == 0 && v1 == NULL) {
1220			/* This is what relookup sets if v1 disappeared. */
1221			error = ENOENT;
1222		}
1223	}
1224	if (error) {
1225		VOP_UNLOCK(d1);
1226		return error;
1227	}
1228	if (v1 && v1 == d2) {
1229		VOP_UNLOCK(d1);
1230		VOP_UNLOCK(v1);
1231		vrele(v1);
1232		return overlap_error;
1233	}
1234
1235	/*
1236	 * The right way to do this is to do lookups without locking
1237	 * the results, and lock the results afterwards; then at the
1238	 * end we can avoid trying to lock v2 if v2 == v1.
1239	 *
1240	 * However, for the reasons described in the fdvp == tdvp case
1241	 * in rename below, we can't do that safely. So, in the case
1242	 * where v1 is not a directory, unlock it and lock it again
1243	 * afterwards. This is safe in locking order because a
1244	 * non-directory can't be above anything else in the tree. If
1245	 * v1 *is* a directory, that's not true, but then because d1
1246	 * != d2, v1 != v2.
1247	 */
1248	if (v1 && v1->v_type != VDIR) {
1249		VOP_UNLOCK(v1);
1250	}
1251	vn_lock(d2, LK_EXCLUSIVE | LK_RETRY);
1252	if (VTOI(d2)->i_size == 0) {
1253		/* d2 has been rmdir'd */
1254		VOP_UNLOCK(d2);
1255		if (v1 && v1->v_type == VDIR) {
1256			VOP_UNLOCK(v1);
1257		}
1258		VOP_UNLOCK(d1);
1259		if (v1) {
1260			vrele(v1);
1261		}
1262		return ENOENT;
1263	}
1264	error = do_relookup(d2, ulr2, &v2, cn2);
1265	if (v2_missing_ok) {
1266		if (error == ENOENT) {
1267			/* as above */
1268			v2 = NULL;
1269			error = 0;
1270		}
1271	} else {
1272		if (error == 0 && v2 == NULL) {
1273			/* This is what relookup sets if v2 disappeared. */
1274			error = ENOENT;
1275		}
1276	}
1277	if (error) {
1278		VOP_UNLOCK(d2);
1279		if (v1 && v1->v_type == VDIR) {
1280			VOP_UNLOCK(v1);
1281		}
1282		VOP_UNLOCK(d1);
1283		if (v1) {
1284			vrele(v1);
1285		}
1286		return error;
1287	}
1288	if (v1 && v1->v_type != VDIR && v1 != v2) {
1289		vn_lock(v1, LK_EXCLUSIVE | LK_RETRY);
1290	}
1291	*v1_ret = v1;
1292	*v2_ret = v2;
1293	return 0;
1294}
1295
1296/*
1297 * Rename vnode operation
1298 * 	rename("foo", "bar");
1299 * is essentially
1300 *	unlink("bar");
1301 *	link("foo", "bar");
1302 *	unlink("foo");
1303 * but ``atomically''.  Can't do full commit without saving state in the
1304 * inode on disk which isn't feasible at this time.  Best we can do is
1305 * always guarantee the target exists.
1306 *
1307 * Basic algorithm is:
1308 *
1309 * 1) Bump link count on source while we're linking it to the
1310 *    target.  This also ensure the inode won't be deleted out
1311 *    from underneath us while we work (it may be truncated by
1312 *    a concurrent `trunc' or `open' for creation).
1313 * 2) Link source to destination.  If destination already exists,
1314 *    delete it first.
1315 * 3) Unlink source reference to inode if still around. If a
1316 *    directory was moved and the parent of the destination
1317 *    is different from the source, patch the ".." entry in the
1318 *    directory.
1319 */
1320int
1321ufs_rename(void *v)
1322{
1323	struct vop_rename_args  /* {
1324		struct vnode		*a_fdvp;
1325		struct vnode		*a_fvp;
1326		struct componentname	*a_fcnp;
1327		struct vnode		*a_tdvp;
1328		struct vnode		*a_tvp;
1329		struct componentname	*a_tcnp;
1330	} */ *ap = v;
1331	struct vnode		*tvp, *tdvp, *fvp, *fdvp;
1332	struct componentname	*tcnp, *fcnp;
1333	struct inode		*ip, *txp, *fxp, *tdp, *fdp;
1334	struct mount		*mp;
1335	struct direct		*newdir;
1336	int			doingdirectory, error;
1337	ino_t			oldparent, newparent;
1338
1339	struct ufs_lookup_results from_ulr, to_ulr;
1340
1341	tvp = ap->a_tvp;
1342	tdvp = ap->a_tdvp;
1343	fvp = ap->a_fvp;
1344	fdvp = ap->a_fdvp;
1345	tcnp = ap->a_tcnp;
1346	fcnp = ap->a_fcnp;
1347	doingdirectory = error = 0;
1348	oldparent = newparent = 0;
1349
1350	/* save the supplemental lookup results as they currently exist */
1351	from_ulr = VTOI(fdvp)->i_crap;
1352	to_ulr = VTOI(tdvp)->i_crap;
1353	UFS_CHECK_CRAPCOUNTER(VTOI(fdvp));
1354	UFS_CHECK_CRAPCOUNTER(VTOI(tdvp));
1355
1356	/*
1357	 * Owing to VFS oddities we are currently called with tdvp/tvp
1358	 * locked and not fdvp/fvp. In a sane world we'd be passed
1359	 * tdvp and fdvp only, unlocked, and two name strings. Pretend
1360	 * we have a sane world and unlock tdvp and tvp.
1361	 */
1362	VOP_UNLOCK(tdvp);
1363	if (tvp && tvp != tdvp) {
1364		VOP_UNLOCK(tvp);
1365	}
1366
1367	/* Also pretend we have a sane world and vrele fvp/tvp. */
1368	vrele(fvp);
1369	fvp = NULL;
1370	if (tvp) {
1371		vrele(tvp);
1372		tvp = NULL;
1373	}
1374
1375	/*
1376	 * Check for cross-device rename.
1377	 */
1378	if (fdvp->v_mount != tdvp->v_mount) {
1379		error = EXDEV;
1380		goto abort;
1381	}
1382
1383	/*
1384	 * Reject "." and ".."
1385	 */
1386	if ((fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) ||
1387	    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1388	    (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')) {
1389		error = EINVAL;
1390		goto abort;
1391	}
1392
1393	/*
1394	 * Get locks.
1395	 */
1396
1397	/* paranoia */
1398	fcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
1399	tcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
1400
1401	if (fdvp == tdvp) {
1402		/* One directory. Lock it and relookup both children. */
1403		vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
1404
1405		if (VTOI(fdvp)->i_size == 0) {
1406			/* directory has been rmdir'd */
1407			VOP_UNLOCK(fdvp);
1408			error = ENOENT;
1409			goto abort;
1410		}
1411
1412		error = do_relookup(fdvp, &from_ulr, &fvp, fcnp);
1413		if (error == 0 && fvp == NULL) {
1414			/* relookup may produce this if fvp disappears */
1415			error = ENOENT;
1416		}
1417		if (error) {
1418			VOP_UNLOCK(fdvp);
1419			goto abort;
1420		}
1421
1422		/*
1423		 * The right way to do this is to look up both children
1424		 * without locking either, and then lock both unless they
1425		 * turn out to be the same. However, due to deep-seated
1426		 * VFS-level issues all lookups lock the child regardless
1427		 * of whether LOCKLEAF is set (if LOCKLEAF is not set,
1428		 * the child is locked during lookup and then unlocked)
1429		 * so it is not safe to look up tvp while fvp is locked.
1430		 *
1431		 * Unlocking fvp here temporarily is more or less safe,
1432		 * because with the directory locked there's not much
1433		 * that can happen to it. However, ideally it wouldn't
1434		 * be necessary. XXX.
1435		 */
1436		VOP_UNLOCK(fvp);
1437		/* remember fdvp == tdvp so tdvp is locked */
1438		error = do_relookup(tdvp, &to_ulr, &tvp, tcnp);
1439		if (error && error != ENOENT) {
1440			VOP_UNLOCK(fdvp);
1441			goto abort;
1442		}
1443		if (error == ENOENT) {
1444			/*
1445			 * Note: currently if the name doesn't exist,
1446			 * relookup succeeds (it intercepts the
1447			 * EJUSTRETURN from VOP_LOOKUP) and sets tvp
1448			 * to NULL. Therefore, we will never get
1449			 * ENOENT and this branch is not needed.
1450			 * However, in a saner future the EJUSTRETURN
1451			 * garbage will go away, so let's DTRT.
1452			 */
1453			tvp = NULL;
1454		}
1455
1456		/* tvp is locked; lock fvp if necessary */
1457		if (!tvp || tvp != fvp) {
1458			vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
1459		}
1460	} else {
1461		int found_fdvp;
1462		struct vnode *illegal_fvp;
1463
1464		/*
1465		 * The source must not be above the destination. (If
1466		 * it were, the rename would detach a section of the
1467		 * tree.)
1468		 *
1469		 * Look up the tree from tdvp to see if we find fdvp,
1470		 * and if so, return the immediate child of fdvp we're
1471		 * under; that must not turn out to be the same as
1472		 * fvp.
1473		 *
1474		 * The per-volume rename lock guarantees that the
1475		 * result of this check remains true until we finish
1476		 * looking up and locking.
1477		 */
1478		error = ufs_parentcheck(fdvp, tdvp, fcnp->cn_cred,
1479					&found_fdvp, &illegal_fvp);
1480		if (error) {
1481			goto abort;
1482		}
1483
1484		/* Must lock in tree order. */
1485
1486		if (found_fdvp) {
1487			/* fdvp -> fvp -> tdvp -> tvp */
1488			error = lock_vnode_sequence(fdvp, &from_ulr,
1489						    &fvp, fcnp, 0,
1490						    EINVAL,
1491						    tdvp, &to_ulr,
1492						    &tvp, tcnp, 1);
1493		} else {
1494			/* tdvp -> tvp -> fdvp -> fvp */
1495			error = lock_vnode_sequence(tdvp, &to_ulr,
1496						    &tvp, tcnp, 1,
1497						    ENOTEMPTY,
1498						    fdvp, &from_ulr,
1499						    &fvp, fcnp, 0);
1500		}
1501		if (error) {
1502			if (illegal_fvp) {
1503				vrele(illegal_fvp);
1504			}
1505			goto abort;
1506		}
1507		KASSERT(fvp != NULL);
1508
1509		if (illegal_fvp && fvp == illegal_fvp) {
1510			vrele(illegal_fvp);
1511			error = EINVAL;
1512			goto abort_withlocks;
1513		}
1514
1515		if (illegal_fvp) {
1516			vrele(illegal_fvp);
1517		}
1518	}
1519
1520	KASSERT(fdvp && VOP_ISLOCKED(fdvp));
1521	KASSERT(fvp && VOP_ISLOCKED(fvp));
1522	KASSERT(tdvp && VOP_ISLOCKED(tdvp));
1523	KASSERT(tvp == NULL || VOP_ISLOCKED(tvp));
1524
1525	/* --- everything is now locked --- */
1526
1527	if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
1528	    (VTOI(tdvp)->i_flags & APPEND))) {
1529		error = EPERM;
1530		goto abort_withlocks;
1531	}
1532
1533	/*
1534	 * Check if just deleting a link name.
1535	 */
1536	if (fvp == tvp) {
1537		if (fvp->v_type == VDIR) {
1538			error = EINVAL;
1539			goto abort_withlocks;
1540		}
1541
1542		/* Release destination completely. Leave fdvp locked. */
1543		VOP_ABORTOP(tdvp, tcnp);
1544		if (fdvp != tdvp) {
1545			VOP_UNLOCK(tdvp);
1546		}
1547		VOP_UNLOCK(tvp);
1548		vrele(tdvp);
1549		vrele(tvp);
1550
1551		/* Delete source. */
1552		/* XXX: do we really need to relookup again? */
1553
1554		/*
1555		 * fdvp is still locked, but we just unlocked fvp
1556		 * (because fvp == tvp) so just decref fvp
1557		 */
1558		vrele(fvp);
1559		fcnp->cn_flags &= ~(MODMASK);
1560		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1561		fcnp->cn_nameiop = DELETE;
1562		if ((error = relookup(fdvp, &fvp, fcnp, 0))) {
1563			vput(fdvp);
1564			return (error);
1565		}
1566		return (VOP_REMOVE(fdvp, fvp, fcnp));
1567	}
1568	fdp = VTOI(fdvp);
1569	ip = VTOI(fvp);
1570	if ((nlink_t) ip->i_nlink >= LINK_MAX) {
1571		error = EMLINK;
1572		goto abort_withlocks;
1573	}
1574	if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
1575		(fdp->i_flags & APPEND)) {
1576		error = EPERM;
1577		goto abort_withlocks;
1578	}
1579	if ((ip->i_mode & IFMT) == IFDIR) {
1580		/*
1581		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1582		 */
1583		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1584		    fdp == ip ||
1585		    (fcnp->cn_flags & ISDOTDOT) ||
1586		    (tcnp->cn_flags & ISDOTDOT) ||
1587		    (ip->i_flag & IN_RENAME)) {
1588			error = EINVAL;
1589			goto abort_withlocks;
1590		}
1591		ip->i_flag |= IN_RENAME;
1592		doingdirectory = 1;
1593	}
1594	oldparent = fdp->i_number;
1595	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXXLUKEM/XXX: right place? */
1596
1597	/*
1598	 * Both the directory
1599	 * and target vnodes are locked.
1600	 */
1601	tdp = VTOI(tdvp);
1602	txp = NULL;
1603	if (tvp)
1604		txp = VTOI(tvp);
1605
1606	mp = fdvp->v_mount;
1607	fstrans_start(mp, FSTRANS_SHARED);
1608
1609	if (oldparent != tdp->i_number)
1610		newparent = tdp->i_number;
1611
1612	/*
1613	 * If ".." must be changed (ie the directory gets a new
1614	 * parent) the user must have write permission in the source
1615	 * so as to be able to change "..".
1616	 */
1617	if (doingdirectory && newparent) {
1618		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
1619		if (error)
1620			goto out;
1621	}
1622
1623	KASSERT(fdvp != tvp);
1624
1625	if (newparent) {
1626		/* Check for the rename("foo/foo", "foo") case. */
1627		if (fdvp == tvp) {
1628			error = doingdirectory ? ENOTEMPTY : EISDIR;
1629			goto out;
1630		}
1631	}
1632
1633	fxp = VTOI(fvp);
1634	fdp = VTOI(fdvp);
1635
1636	error = UFS_WAPBL_BEGIN(fdvp->v_mount);
1637	if (error)
1638		goto out2;
1639
1640	/*
1641	 * 1) Bump link count while we're moving stuff
1642	 *    around.  If we crash somewhere before
1643	 *    completing our work, the link count
1644	 *    may be wrong, but correctable.
1645	 */
1646	ip->i_nlink++;
1647	DIP_ASSIGN(ip, nlink, ip->i_nlink);
1648	ip->i_flag |= IN_CHANGE;
1649	if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
1650		goto bad;
1651	}
1652
1653	/*
1654	 * 2) If target doesn't exist, link the target
1655	 *    to the source and unlink the source.
1656	 *    Otherwise, rewrite the target directory
1657	 *    entry to reference the source inode and
1658	 *    expunge the original entry's existence.
1659	 */
1660	if (txp == NULL) {
1661		if (tdp->i_dev != ip->i_dev)
1662			panic("rename: EXDEV");
1663		/*
1664		 * Account for ".." in new directory.
1665		 * When source and destination have the same
1666		 * parent we don't fool with the link count.
1667		 */
1668		if (doingdirectory && newparent) {
1669			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
1670				error = EMLINK;
1671				goto bad;
1672			}
1673			tdp->i_nlink++;
1674			DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1675			tdp->i_flag |= IN_CHANGE;
1676			if ((error = UFS_UPDATE(tdvp, NULL, NULL,
1677			    UPDATE_DIROP)) != 0) {
1678				tdp->i_nlink--;
1679				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1680				tdp->i_flag |= IN_CHANGE;
1681				goto bad;
1682			}
1683		}
1684		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
1685		ufs_makedirentry(ip, tcnp, newdir);
1686		error = ufs_direnter(tdvp, &to_ulr,
1687				     NULL, newdir, tcnp, NULL);
1688		pool_cache_put(ufs_direct_cache, newdir);
1689		if (error != 0) {
1690			if (doingdirectory && newparent) {
1691				tdp->i_nlink--;
1692				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1693				tdp->i_flag |= IN_CHANGE;
1694				(void)UFS_UPDATE(tdvp, NULL, NULL,
1695						 UPDATE_WAIT | UPDATE_DIROP);
1696			}
1697			goto bad;
1698		}
1699		VN_KNOTE(tdvp, NOTE_WRITE);
1700	} else {
1701		if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
1702			panic("rename: EXDEV");
1703		/*
1704		 * Short circuit rename(foo, foo).
1705		 */
1706		if (txp->i_number == ip->i_number)
1707			panic("rename: same file");
1708		/*
1709		 * If the parent directory is "sticky", then the user must
1710		 * own the parent directory, or the destination of the rename,
1711		 * otherwise the destination may not be changed (except by
1712		 * root). This implements append-only directories.
1713		 */
1714		if ((tdp->i_mode & S_ISTXT) &&
1715		    kauth_authorize_generic(tcnp->cn_cred,
1716		     KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
1717		    kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
1718		    txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
1719			error = EPERM;
1720			goto bad;
1721		}
1722		/*
1723		 * Target must be empty if a directory and have no links
1724		 * to it. Also, ensure source and target are compatible
1725		 * (both directories, or both not directories).
1726		 */
1727		if ((txp->i_mode & IFMT) == IFDIR) {
1728			if (txp->i_nlink > 2 ||
1729			    !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
1730				error = ENOTEMPTY;
1731				goto bad;
1732			}
1733			if (!doingdirectory) {
1734				error = ENOTDIR;
1735				goto bad;
1736			}
1737			cache_purge(tdvp);
1738		} else if (doingdirectory) {
1739			error = EISDIR;
1740			goto bad;
1741		}
1742		if ((error = ufs_dirrewrite(tdp, to_ulr.ulr_offset,
1743		    txp, ip->i_number,
1744		    IFTODT(ip->i_mode), doingdirectory && newparent ?
1745		    newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
1746			goto bad;
1747		if (doingdirectory) {
1748			/*
1749			 * Truncate inode. The only stuff left in the directory
1750			 * is "." and "..". The "." reference is inconsequential
1751			 * since we are quashing it. We have removed the "."
1752			 * reference and the reference in the parent directory,
1753			 * but there may be other hard links.
1754			 */
1755			if (!newparent) {
1756				tdp->i_nlink--;
1757				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1758				tdp->i_flag |= IN_CHANGE;
1759				UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
1760			}
1761			txp->i_nlink--;
1762			DIP_ASSIGN(txp, nlink, txp->i_nlink);
1763			txp->i_flag |= IN_CHANGE;
1764			if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
1765			    tcnp->cn_cred)))
1766				goto bad;
1767		}
1768		VN_KNOTE(tdvp, NOTE_WRITE);
1769		VN_KNOTE(tvp, NOTE_DELETE);
1770	}
1771
1772	/*
1773	 * Handle case where the directory entry we need to remove,
1774	 * which is/was at from_ulr.ulr_offset, or the one before it,
1775	 * which is/was at from_ulr.ulr_offset - from_ulr.ulr_count,
1776	 * may have been moved when the directory insertion above
1777	 * performed compaction.
1778	 */
1779	if (tdp->i_number == fdp->i_number &&
1780	    ulr_overlap(&from_ulr, &to_ulr)) {
1781
1782		struct buf *bp;
1783		struct direct *ep;
1784		struct ufsmount *ump = fdp->i_ump;
1785		doff_t curpos;
1786		doff_t endsearch;	/* offset to end directory search */
1787		uint32_t prev_reclen;
1788		int dirblksiz = ump->um_dirblksiz;
1789		const int needswap = UFS_MPNEEDSWAP(ump);
1790		u_long bmask;
1791		int namlen, entryoffsetinblock;
1792		char *dirbuf;
1793
1794		bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
1795
1796		/*
1797		 * The fcnp entry will be somewhere between the start of
1798		 * compaction (to_ulr.ulr_offset) and the original location
1799		 * (from_ulr.ulr_offset).
1800		 */
1801		curpos = to_ulr.ulr_offset;
1802		endsearch = from_ulr.ulr_offset + from_ulr.ulr_reclen;
1803		entryoffsetinblock = 0;
1804
1805		/*
1806		 * Get the directory block containing the start of
1807		 * compaction.
1808		 */
1809		error = ufs_blkatoff(fdvp, (off_t)to_ulr.ulr_offset, &dirbuf,
1810		    &bp, false);
1811		if (error)
1812			goto bad;
1813
1814		/*
1815		 * Keep existing ulr_count (length of previous record)
1816		 * for the case where compaction did not include the
1817		 * previous entry but started at the from-entry.
1818		 */
1819		prev_reclen = from_ulr.ulr_count;
1820
1821		while (curpos < endsearch) {
1822			uint32_t reclen;
1823
1824			/*
1825			 * If necessary, get the next directory block.
1826			 *
1827			 * dholland 7/13/11 to the best of my understanding
1828			 * this should never happen; compaction occurs only
1829			 * within single blocks. I think.
1830			 */
1831			if ((curpos & bmask) == 0) {
1832				if (bp != NULL)
1833					brelse(bp, 0);
1834				error = ufs_blkatoff(fdvp, (off_t)curpos,
1835				    &dirbuf, &bp, false);
1836				if (error)
1837					goto bad;
1838				entryoffsetinblock = 0;
1839			}
1840
1841			KASSERT(bp != NULL);
1842			ep = (struct direct *)(dirbuf + entryoffsetinblock);
1843			reclen = ufs_rw16(ep->d_reclen, needswap);
1844
1845#if (BYTE_ORDER == LITTLE_ENDIAN)
1846			if (FSFMT(fdvp) && needswap == 0)
1847				namlen = ep->d_type;
1848			else
1849				namlen = ep->d_namlen;
1850#else
1851			if (FSFMT(fdvp) && needswap != 0)
1852				namlen = ep->d_type;
1853			else
1854				namlen = ep->d_namlen;
1855#endif
1856			if ((ep->d_ino != 0) &&
1857			    (ufs_rw32(ep->d_ino, needswap) != WINO) &&
1858			    (namlen == fcnp->cn_namelen) &&
1859			    memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
1860				from_ulr.ulr_reclen = reclen;
1861				break;
1862			}
1863			curpos += reclen;
1864			entryoffsetinblock += reclen;
1865			prev_reclen = reclen;
1866		}
1867
1868		from_ulr.ulr_offset = curpos;
1869		from_ulr.ulr_count = prev_reclen;
1870
1871		KASSERT(curpos <= endsearch);
1872
1873		/*
1874		 * If ulr_offset points to start of a directory block,
1875		 * clear ulr_count so ufs_dirremove() doesn't try to
1876		 * merge free space over a directory block boundary.
1877		 */
1878		if ((from_ulr.ulr_offset & (dirblksiz - 1)) == 0)
1879			from_ulr.ulr_count = 0;
1880
1881		brelse(bp, 0);
1882	}
1883
1884	/*
1885	 * 3) Unlink the source.
1886	 */
1887
1888#if 0
1889	/*
1890	 * Ensure that the directory entry still exists and has not
1891	 * changed while the new name has been entered. If the source is
1892	 * a file then the entry may have been unlinked or renamed. In
1893	 * either case there is no further work to be done. If the source
1894	 * is a directory then it cannot have been rmdir'ed; The IRENAME
1895	 * flag ensures that it cannot be moved by another rename or removed
1896	 * by a rmdir.
1897	 */
1898#endif
1899	KASSERT(fxp == ip);
1900
1901	/*
1902	 * If the source is a directory with a new parent, the link
1903	 * count of the old parent directory must be decremented and
1904	 * ".." set to point to the new parent.
1905	 */
1906	if (doingdirectory && newparent) {
1907		KASSERT(fdp != NULL);
1908		ufs_dirrewrite(fxp, mastertemplate.dot_reclen,
1909			       fdp, newparent, DT_DIR, 0, IN_CHANGE);
1910		cache_purge(fdvp);
1911	}
1912	error = ufs_dirremove(fdvp, &from_ulr,
1913			      fxp, fcnp->cn_flags, 0);
1914	fxp->i_flag &= ~IN_RENAME;
1915
1916	VN_KNOTE(fvp, NOTE_RENAME);
1917	goto done;
1918
1919 out:
1920	goto out2;
1921
1922	/* exit routines from steps 1 & 2 */
1923 bad:
1924	if (doingdirectory)
1925		ip->i_flag &= ~IN_RENAME;
1926	ip->i_nlink--;
1927	DIP_ASSIGN(ip, nlink, ip->i_nlink);
1928	ip->i_flag |= IN_CHANGE;
1929	ip->i_flag &= ~IN_RENAME;
1930	UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
1931 done:
1932	UFS_WAPBL_END(fdvp->v_mount);
1933 out2:
1934	/*
1935	 * clear IN_RENAME - some exit paths happen too early to go
1936	 * through the cleanup done in the "bad" case above, so we
1937	 * always do this mini-cleanup here.
1938	 */
1939	ip->i_flag &= ~IN_RENAME;
1940
1941	VOP_UNLOCK(fdvp);
1942	if (tdvp != fdvp) {
1943		VOP_UNLOCK(tdvp);
1944	}
1945	VOP_UNLOCK(fvp);
1946	if (tvp && tvp != fvp) {
1947		VOP_UNLOCK(tvp);
1948	}
1949
1950	vrele(fdvp);
1951	vrele(tdvp);
1952	vrele(fvp);
1953	if (tvp) {
1954		vrele(tvp);
1955	}
1956
1957	fstrans_done(mp);
1958	return (error);
1959
1960 abort_withlocks:
1961	VOP_UNLOCK(fdvp);
1962	if (tdvp != fdvp) {
1963		VOP_UNLOCK(tdvp);
1964	}
1965	VOP_UNLOCK(fvp);
1966	if (tvp && tvp != fvp) {
1967		VOP_UNLOCK(tvp);
1968	}
1969
1970 abort:
1971	VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
1972	VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
1973	vrele(tdvp);
1974	if (tvp) {
1975		vrele(tvp);
1976	}
1977	vrele(fdvp);
1978	if (fvp) {
1979		vrele(fvp);
1980	}
1981	return (error);
1982}
1983
1984int
1985ufs_mkdir(void *v)
1986{
1987	struct vop_mkdir_args /* {
1988		struct vnode		*a_dvp;
1989		struct vnode		**a_vpp;
1990		struct componentname	*a_cnp;
1991		struct vattr		*a_vap;
1992	} */ *ap = v;
1993	struct vnode		*dvp = ap->a_dvp, *tvp;
1994	struct vattr		*vap = ap->a_vap;
1995	struct componentname	*cnp = ap->a_cnp;
1996	struct inode		*ip, *dp = VTOI(dvp);
1997	struct buf		*bp;
1998	struct dirtemplate	dirtemplate;
1999	struct direct		*newdir;
2000	int			error, dmode;
2001	struct ufsmount		*ump = dp->i_ump;
2002	int			dirblksiz = ump->um_dirblksiz;
2003	struct ufs_lookup_results *ulr;
2004
2005	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
2006
2007	/* XXX should handle this material another way */
2008	ulr = &dp->i_crap;
2009	UFS_CHECK_CRAPCOUNTER(dp);
2010
2011	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
2012		error = EMLINK;
2013		goto out;
2014	}
2015	dmode = vap->va_mode & ACCESSPERMS;
2016	dmode |= IFDIR;
2017	/*
2018	 * Must simulate part of ufs_makeinode here to acquire the inode,
2019	 * but not have it entered in the parent directory. The entry is
2020	 * made later after writing "." and ".." entries.
2021	 */
2022	if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0)
2023		goto out;
2024
2025	tvp = *ap->a_vpp;
2026	ip = VTOI(tvp);
2027
2028	error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount);
2029	if (error) {
2030		UFS_VFREE(tvp, ip->i_number, dmode);
2031		vput(tvp);
2032		goto out;
2033	}
2034	ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
2035	DIP_ASSIGN(ip, uid, ip->i_uid);
2036	ip->i_gid = dp->i_gid;
2037	DIP_ASSIGN(ip, gid, ip->i_gid);
2038#if defined(QUOTA) || defined(QUOTA2)
2039	if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2040		UFS_VFREE(tvp, ip->i_number, dmode);
2041		UFS_WAPBL_END(dvp->v_mount);
2042		fstrans_done(dvp->v_mount);
2043		vput(tvp);
2044		vput(dvp);
2045		return (error);
2046	}
2047#endif
2048	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2049	ip->i_mode = dmode;
2050	DIP_ASSIGN(ip, mode, dmode);
2051	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
2052	ip->i_nlink = 2;
2053	DIP_ASSIGN(ip, nlink, 2);
2054	if (cnp->cn_flags & ISWHITEOUT) {
2055		ip->i_flags |= UF_OPAQUE;
2056		DIP_ASSIGN(ip, flags, ip->i_flags);
2057	}
2058
2059	/*
2060	 * Bump link count in parent directory to reflect work done below.
2061	 * Should be done before reference is created so cleanup is
2062	 * possible if we crash.
2063	 */
2064	dp->i_nlink++;
2065	DIP_ASSIGN(dp, nlink, dp->i_nlink);
2066	dp->i_flag |= IN_CHANGE;
2067	if ((error = UFS_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
2068		goto bad;
2069
2070	/*
2071	 * Initialize directory with "." and ".." from static template.
2072	 */
2073	dirtemplate = mastertemplate;
2074	dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
2075	dirtemplate.dot_ino = ufs_rw32(ip->i_number, UFS_MPNEEDSWAP(ump));
2076	dirtemplate.dotdot_ino = ufs_rw32(dp->i_number, UFS_MPNEEDSWAP(ump));
2077	dirtemplate.dot_reclen = ufs_rw16(dirtemplate.dot_reclen,
2078	    UFS_MPNEEDSWAP(ump));
2079	dirtemplate.dotdot_reclen = ufs_rw16(dirtemplate.dotdot_reclen,
2080	    UFS_MPNEEDSWAP(ump));
2081	if (ump->um_maxsymlinklen <= 0) {
2082#if BYTE_ORDER == LITTLE_ENDIAN
2083		if (UFS_MPNEEDSWAP(ump) == 0)
2084#else
2085		if (UFS_MPNEEDSWAP(ump) != 0)
2086#endif
2087		{
2088			dirtemplate.dot_type = dirtemplate.dot_namlen;
2089			dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
2090			dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
2091		} else
2092			dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
2093	}
2094	if ((error = UFS_BALLOC(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
2095	    B_CLRBUF, &bp)) != 0)
2096		goto bad;
2097	ip->i_size = dirblksiz;
2098	DIP_ASSIGN(ip, size, dirblksiz);
2099	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2100	uvm_vnp_setsize(tvp, ip->i_size);
2101	memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
2102
2103	/*
2104	 * Directory set up, now install it's entry in the parent directory.
2105	 * We must write out the buffer containing the new directory body
2106	 * before entering the new name in the parent.
2107	 */
2108	if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
2109		goto bad;
2110	if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
2111		goto bad;
2112	}
2113	newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
2114	ufs_makedirentry(ip, cnp, newdir);
2115	error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
2116	pool_cache_put(ufs_direct_cache, newdir);
2117 bad:
2118	if (error == 0) {
2119		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
2120		UFS_WAPBL_END(dvp->v_mount);
2121	} else {
2122		dp->i_nlink--;
2123		DIP_ASSIGN(dp, nlink, dp->i_nlink);
2124		dp->i_flag |= IN_CHANGE;
2125		UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
2126		/*
2127		 * No need to do an explicit UFS_TRUNCATE here, vrele will
2128		 * do this for us because we set the link count to 0.
2129		 */
2130		ip->i_nlink = 0;
2131		DIP_ASSIGN(ip, nlink, 0);
2132		ip->i_flag |= IN_CHANGE;
2133		/* If IN_ADIROP, account for it */
2134		UFS_UNMARK_VNODE(tvp);
2135		UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
2136		UFS_WAPBL_END(dvp->v_mount);
2137		vput(tvp);
2138	}
2139 out:
2140	fstrans_done(dvp->v_mount);
2141	vput(dvp);
2142	return (error);
2143}
2144
2145int
2146ufs_rmdir(void *v)
2147{
2148	struct vop_rmdir_args /* {
2149		struct vnode		*a_dvp;
2150		struct vnode		*a_vp;
2151		struct componentname	*a_cnp;
2152	} */ *ap = v;
2153	struct vnode		*vp, *dvp;
2154	struct componentname	*cnp;
2155	struct inode		*ip, *dp;
2156	int			error;
2157	struct ufs_lookup_results *ulr;
2158
2159	vp = ap->a_vp;
2160	dvp = ap->a_dvp;
2161	cnp = ap->a_cnp;
2162	ip = VTOI(vp);
2163	dp = VTOI(dvp);
2164
2165	/* XXX should handle this material another way */
2166	ulr = &dp->i_crap;
2167	UFS_CHECK_CRAPCOUNTER(dp);
2168
2169	/*
2170	 * No rmdir "." or of mounted directories please.
2171	 */
2172	if (dp == ip || vp->v_mountedhere != NULL) {
2173		if (dp == ip)
2174			vrele(dvp);
2175		else
2176			vput(dvp);
2177		vput(vp);
2178		return (EINVAL);
2179	}
2180
2181	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
2182
2183	/*
2184	 * Do not remove a directory that is in the process of being renamed.
2185	 * Verify that the directory is empty (and valid). (Rmdir ".." won't
2186	 * be valid since ".." will contain a reference to the current
2187	 * directory and thus be non-empty.)
2188	 */
2189	error = 0;
2190	if (ip->i_flag & IN_RENAME) {
2191		error = EINVAL;
2192		goto out;
2193	}
2194	if (ip->i_nlink != 2 ||
2195	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
2196		error = ENOTEMPTY;
2197		goto out;
2198	}
2199	if ((dp->i_flags & APPEND) ||
2200		(ip->i_flags & (IMMUTABLE | APPEND))) {
2201		error = EPERM;
2202		goto out;
2203	}
2204	error = UFS_WAPBL_BEGIN(dvp->v_mount);
2205	if (error)
2206		goto out;
2207	/*
2208	 * Delete reference to directory before purging
2209	 * inode.  If we crash in between, the directory
2210	 * will be reattached to lost+found,
2211	 */
2212	error = ufs_dirremove(dvp, ulr, ip, cnp->cn_flags, 1);
2213	if (error) {
2214		UFS_WAPBL_END(dvp->v_mount);
2215		goto out;
2216	}
2217	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
2218	cache_purge(dvp);
2219	/*
2220	 * Truncate inode.  The only stuff left in the directory is "." and
2221	 * "..".  The "." reference is inconsequential since we're quashing
2222	 * it.
2223	 */
2224	dp->i_nlink--;
2225	DIP_ASSIGN(dp, nlink, dp->i_nlink);
2226	dp->i_flag |= IN_CHANGE;
2227	UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
2228	ip->i_nlink--;
2229	DIP_ASSIGN(ip, nlink, ip->i_nlink);
2230	ip->i_flag |= IN_CHANGE;
2231	error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
2232	cache_purge(vp);
2233	/*
2234	 * Unlock the log while we still have reference to unlinked
2235	 * directory vp so that it will not get locked for recycling
2236	 */
2237	UFS_WAPBL_END(dvp->v_mount);
2238#ifdef UFS_DIRHASH
2239	if (ip->i_dirhash != NULL)
2240		ufsdirhash_free(ip);
2241#endif
2242 out:
2243	VN_KNOTE(vp, NOTE_DELETE);
2244	vput(vp);
2245	fstrans_done(dvp->v_mount);
2246	vput(dvp);
2247	return (error);
2248}
2249
2250/*
2251 * symlink -- make a symbolic link
2252 */
2253int
2254ufs_symlink(void *v)
2255{
2256	struct vop_symlink_args /* {
2257		struct vnode		*a_dvp;
2258		struct vnode		**a_vpp;
2259		struct componentname	*a_cnp;
2260		struct vattr		*a_vap;
2261		char			*a_target;
2262	} */ *ap = v;
2263	struct vnode	*vp, **vpp;
2264	struct inode	*ip;
2265	int		len, error;
2266	struct ufs_lookup_results *ulr;
2267
2268	vpp = ap->a_vpp;
2269
2270	/* XXX should handle this material another way */
2271	ulr = &VTOI(ap->a_dvp)->i_crap;
2272	UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
2273
2274	/*
2275	 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
2276	 * ufs_makeinode
2277	 */
2278	fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
2279	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, ulr,
2280			      vpp, ap->a_cnp);
2281	if (error)
2282		goto out;
2283	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
2284	vp = *vpp;
2285	len = strlen(ap->a_target);
2286	ip = VTOI(vp);
2287	if (len < ip->i_ump->um_maxsymlinklen) {
2288		memcpy((char *)SHORTLINK(ip), ap->a_target, len);
2289		ip->i_size = len;
2290		DIP_ASSIGN(ip, size, len);
2291		uvm_vnp_setsize(vp, ip->i_size);
2292		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2293		if (vp->v_mount->mnt_flag & MNT_RELATIME)
2294			ip->i_flag |= IN_ACCESS;
2295		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
2296	} else
2297		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
2298		    UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED,
2299		    ap->a_cnp->cn_cred, NULL, NULL);
2300	UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
2301	if (error)
2302		vput(vp);
2303out:
2304	fstrans_done(ap->a_dvp->v_mount);
2305	return (error);
2306}
2307
2308/*
2309 * Vnode op for reading directories.
2310 *
2311 * This routine handles converting from the on-disk directory format
2312 * "struct direct" to the in-memory format "struct dirent" as well as
2313 * byte swapping the entries if necessary.
2314 */
2315int
2316ufs_readdir(void *v)
2317{
2318	struct vop_readdir_args /* {
2319		struct vnode	*a_vp;
2320		struct uio	*a_uio;
2321		kauth_cred_t	a_cred;
2322		int		*a_eofflag;
2323		off_t		**a_cookies;
2324		int		*ncookies;
2325	} */ *ap = v;
2326	struct vnode	*vp = ap->a_vp;
2327	struct direct	*cdp, *ecdp;
2328	struct dirent	*ndp;
2329	char		*cdbuf, *ndbuf, *endp;
2330	struct uio	auio, *uio;
2331	struct iovec	aiov;
2332	int		error;
2333	size_t		count, ccount, rcount, cdbufsz, ndbufsz;
2334	off_t		off, *ccp;
2335	off_t		startoff;
2336	size_t		skipbytes;
2337	struct ufsmount	*ump = VFSTOUFS(vp->v_mount);
2338	int nswap = UFS_MPNEEDSWAP(ump);
2339#if BYTE_ORDER == LITTLE_ENDIAN
2340	int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0;
2341#else
2342	int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0;
2343#endif
2344	uio = ap->a_uio;
2345	count = uio->uio_resid;
2346	rcount = count - ((uio->uio_offset + count) & (ump->um_dirblksiz - 1));
2347
2348	if (rcount < _DIRENT_MINSIZE(cdp) || count < _DIRENT_MINSIZE(ndp))
2349		return EINVAL;
2350
2351	startoff = uio->uio_offset & ~(ump->um_dirblksiz - 1);
2352	skipbytes = uio->uio_offset - startoff;
2353	rcount += skipbytes;
2354
2355	auio.uio_iov = &aiov;
2356	auio.uio_iovcnt = 1;
2357	auio.uio_offset = startoff;
2358	auio.uio_resid = rcount;
2359	UIO_SETUP_SYSSPACE(&auio);
2360	auio.uio_rw = UIO_READ;
2361	cdbufsz = rcount;
2362	cdbuf = kmem_alloc(cdbufsz, KM_SLEEP);
2363	aiov.iov_base = cdbuf;
2364	aiov.iov_len = rcount;
2365	error = VOP_READ(vp, &auio, 0, ap->a_cred);
2366	if (error != 0) {
2367		kmem_free(cdbuf, cdbufsz);
2368		return error;
2369	}
2370
2371	rcount -= auio.uio_resid;
2372
2373	cdp = (struct direct *)(void *)cdbuf;
2374	ecdp = (struct direct *)(void *)&cdbuf[rcount];
2375
2376	ndbufsz = count;
2377	ndbuf = kmem_alloc(ndbufsz, KM_SLEEP);
2378	ndp = (struct dirent *)(void *)ndbuf;
2379	endp = &ndbuf[count];
2380
2381	off = uio->uio_offset;
2382	if (ap->a_cookies) {
2383		ccount = rcount / _DIRENT_RECLEN(cdp, 1);
2384		ccp = *(ap->a_cookies) = malloc(ccount * sizeof(*ccp),
2385		    M_TEMP, M_WAITOK);
2386	} else {
2387		/* XXX: GCC */
2388		ccount = 0;
2389		ccp = NULL;
2390	}
2391
2392	while (cdp < ecdp) {
2393		cdp->d_reclen = ufs_rw16(cdp->d_reclen, nswap);
2394		if (skipbytes > 0) {
2395			if (cdp->d_reclen <= skipbytes) {
2396				skipbytes -= cdp->d_reclen;
2397				cdp = _DIRENT_NEXT(cdp);
2398				continue;
2399			}
2400			/*
2401			 * invalid cookie.
2402			 */
2403			error = EINVAL;
2404			goto out;
2405		}
2406		if (cdp->d_reclen == 0) {
2407			struct dirent *ondp = ndp;
2408			ndp->d_reclen = _DIRENT_MINSIZE(ndp);
2409			ndp = _DIRENT_NEXT(ndp);
2410			ondp->d_reclen = 0;
2411			cdp = ecdp;
2412			break;
2413		}
2414		if (needswap) {
2415			ndp->d_type = cdp->d_namlen;
2416			ndp->d_namlen = cdp->d_type;
2417		} else {
2418			ndp->d_type = cdp->d_type;
2419			ndp->d_namlen = cdp->d_namlen;
2420		}
2421		ndp->d_reclen = _DIRENT_RECLEN(ndp, ndp->d_namlen);
2422		if ((char *)(void *)ndp + ndp->d_reclen +
2423		    _DIRENT_MINSIZE(ndp) > endp)
2424			break;
2425		ndp->d_fileno = ufs_rw32(cdp->d_ino, nswap);
2426		(void)memcpy(ndp->d_name, cdp->d_name, ndp->d_namlen);
2427		memset(&ndp->d_name[ndp->d_namlen], 0,
2428		    ndp->d_reclen - _DIRENT_NAMEOFF(ndp) - ndp->d_namlen);
2429		off += cdp->d_reclen;
2430		if (ap->a_cookies) {
2431			KASSERT(ccp - *(ap->a_cookies) < ccount);
2432			*(ccp++) = off;
2433		}
2434		ndp = _DIRENT_NEXT(ndp);
2435		cdp = _DIRENT_NEXT(cdp);
2436	}
2437
2438	count = ((char *)(void *)ndp - ndbuf);
2439	error = uiomove(ndbuf, count, uio);
2440out:
2441	if (ap->a_cookies) {
2442		if (error) {
2443			free(*(ap->a_cookies), M_TEMP);
2444			*(ap->a_cookies) = NULL;
2445			*(ap->a_ncookies) = 0;
2446		} else {
2447			*ap->a_ncookies = ccp - *(ap->a_cookies);
2448		}
2449	}
2450	uio->uio_offset = off;
2451	kmem_free(ndbuf, ndbufsz);
2452	kmem_free(cdbuf, cdbufsz);
2453	*ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset;
2454	return error;
2455}
2456
2457/*
2458 * Return target name of a symbolic link
2459 */
2460int
2461ufs_readlink(void *v)
2462{
2463	struct vop_readlink_args /* {
2464		struct vnode	*a_vp;
2465		struct uio	*a_uio;
2466		kauth_cred_t	a_cred;
2467	} */ *ap = v;
2468	struct vnode	*vp = ap->a_vp;
2469	struct inode	*ip = VTOI(vp);
2470	struct ufsmount	*ump = VFSTOUFS(vp->v_mount);
2471	int		isize;
2472
2473	isize = ip->i_size;
2474	if (isize < ump->um_maxsymlinklen ||
2475	    (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) {
2476		uiomove((char *)SHORTLINK(ip), isize, ap->a_uio);
2477		return (0);
2478	}
2479	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
2480}
2481
2482/*
2483 * Calculate the logical to physical mapping if not done already,
2484 * then call the device strategy routine.
2485 */
2486int
2487ufs_strategy(void *v)
2488{
2489	struct vop_strategy_args /* {
2490		struct vnode *a_vp;
2491		struct buf *a_bp;
2492	} */ *ap = v;
2493	struct buf	*bp;
2494	struct vnode	*vp;
2495	struct inode	*ip;
2496	struct mount	*mp;
2497	int		error;
2498
2499	bp = ap->a_bp;
2500	vp = ap->a_vp;
2501	ip = VTOI(vp);
2502	if (vp->v_type == VBLK || vp->v_type == VCHR)
2503		panic("ufs_strategy: spec");
2504	KASSERT(bp->b_bcount != 0);
2505	if (bp->b_blkno == bp->b_lblkno) {
2506		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
2507				 NULL);
2508		if (error) {
2509			bp->b_error = error;
2510			biodone(bp);
2511			return (error);
2512		}
2513		if (bp->b_blkno == -1) /* no valid data */
2514			clrbuf(bp);
2515	}
2516	if (bp->b_blkno < 0) { /* block is not on disk */
2517		biodone(bp);
2518		return (0);
2519	}
2520	vp = ip->i_devvp;
2521
2522	error = VOP_STRATEGY(vp, bp);
2523	if (error)
2524		return error;
2525
2526	if (!BUF_ISREAD(bp))
2527		return 0;
2528
2529	mp = wapbl_vptomp(vp);
2530	if (mp == NULL || mp->mnt_wapbl_replay == NULL ||
2531	    !WAPBL_REPLAY_ISOPEN(mp) ||
2532	    !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount))
2533		return 0;
2534
2535	error = biowait(bp);
2536	if (error)
2537		return error;
2538
2539	error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount);
2540	if (error) {
2541		mutex_enter(&bufcache_lock);
2542		SET(bp->b_cflags, BC_INVAL);
2543		mutex_exit(&bufcache_lock);
2544	}
2545	return error;
2546}
2547
2548/*
2549 * Print out the contents of an inode.
2550 */
2551int
2552ufs_print(void *v)
2553{
2554	struct vop_print_args /* {
2555		struct vnode	*a_vp;
2556	} */ *ap = v;
2557	struct vnode	*vp;
2558	struct inode	*ip;
2559
2560	vp = ap->a_vp;
2561	ip = VTOI(vp);
2562	printf("tag VT_UFS, ino %llu, on dev %llu, %llu",
2563	    (unsigned long long)ip->i_number,
2564	    (unsigned long long)major(ip->i_dev),
2565	    (unsigned long long)minor(ip->i_dev));
2566	printf(" flags 0x%x, nlink %d\n",
2567	    ip->i_flag, ip->i_nlink);
2568	printf("\tmode 0%o, owner %d, group %d, size %qd",
2569	    ip->i_mode, ip->i_uid, ip->i_gid,
2570	    (long long)ip->i_size);
2571	if (vp->v_type == VFIFO)
2572		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
2573	printf("\n");
2574	return (0);
2575}
2576
2577/*
2578 * Read wrapper for special devices.
2579 */
2580int
2581ufsspec_read(void *v)
2582{
2583	struct vop_read_args /* {
2584		struct vnode	*a_vp;
2585		struct uio	*a_uio;
2586		int		a_ioflag;
2587		kauth_cred_t	a_cred;
2588	} */ *ap = v;
2589
2590	/*
2591	 * Set access flag.
2592	 */
2593	if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
2594		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
2595	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
2596}
2597
2598/*
2599 * Write wrapper for special devices.
2600 */
2601int
2602ufsspec_write(void *v)
2603{
2604	struct vop_write_args /* {
2605		struct vnode	*a_vp;
2606		struct uio	*a_uio;
2607		int		a_ioflag;
2608		kauth_cred_t	a_cred;
2609	} */ *ap = v;
2610
2611	/*
2612	 * Set update and change flags.
2613	 */
2614	if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
2615		VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
2616	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
2617}
2618
2619/*
2620 * Close wrapper for special devices.
2621 *
2622 * Update the times on the inode then do device close.
2623 */
2624int
2625ufsspec_close(void *v)
2626{
2627	struct vop_close_args /* {
2628		struct vnode	*a_vp;
2629		int		a_fflag;
2630		kauth_cred_t	a_cred;
2631	} */ *ap = v;
2632	struct vnode	*vp;
2633	struct inode	*ip;
2634
2635	vp = ap->a_vp;
2636	ip = VTOI(vp);
2637	if (vp->v_usecount > 1)
2638		UFS_ITIMES(vp, NULL, NULL, NULL);
2639	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
2640}
2641
2642/*
2643 * Read wrapper for fifo's
2644 */
2645int
2646ufsfifo_read(void *v)
2647{
2648	struct vop_read_args /* {
2649		struct vnode	*a_vp;
2650		struct uio	*a_uio;
2651		int		a_ioflag;
2652		kauth_cred_t	a_cred;
2653	} */ *ap = v;
2654
2655	/*
2656	 * Set access flag.
2657	 */
2658	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
2659	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
2660}
2661
2662/*
2663 * Write wrapper for fifo's.
2664 */
2665int
2666ufsfifo_write(void *v)
2667{
2668	struct vop_write_args /* {
2669		struct vnode	*a_vp;
2670		struct uio	*a_uio;
2671		int		a_ioflag;
2672		kauth_cred_t	a_cred;
2673	} */ *ap = v;
2674
2675	/*
2676	 * Set update and change flags.
2677	 */
2678	VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
2679	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
2680}
2681
2682/*
2683 * Close wrapper for fifo's.
2684 *
2685 * Update the times on the inode then do device close.
2686 */
2687int
2688ufsfifo_close(void *v)
2689{
2690	struct vop_close_args /* {
2691		struct vnode	*a_vp;
2692		int		a_fflag;
2693		kauth_cred_t	a_cred;
2694	} */ *ap = v;
2695	struct vnode	*vp;
2696	struct inode	*ip;
2697
2698	vp = ap->a_vp;
2699	ip = VTOI(vp);
2700	if (ap->a_vp->v_usecount > 1)
2701		UFS_ITIMES(vp, NULL, NULL, NULL);
2702	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
2703}
2704
2705/*
2706 * Return POSIX pathconf information applicable to ufs filesystems.
2707 */
2708int
2709ufs_pathconf(void *v)
2710{
2711	struct vop_pathconf_args /* {
2712		struct vnode	*a_vp;
2713		int		a_name;
2714		register_t	*a_retval;
2715	} */ *ap = v;
2716
2717	switch (ap->a_name) {
2718	case _PC_LINK_MAX:
2719		*ap->a_retval = LINK_MAX;
2720		return (0);
2721	case _PC_NAME_MAX:
2722		*ap->a_retval = FFS_MAXNAMLEN;
2723		return (0);
2724	case _PC_PATH_MAX:
2725		*ap->a_retval = PATH_MAX;
2726		return (0);
2727	case _PC_PIPE_BUF:
2728		*ap->a_retval = PIPE_BUF;
2729		return (0);
2730	case _PC_CHOWN_RESTRICTED:
2731		*ap->a_retval = 1;
2732		return (0);
2733	case _PC_NO_TRUNC:
2734		*ap->a_retval = 1;
2735		return (0);
2736	case _PC_SYNC_IO:
2737		*ap->a_retval = 1;
2738		return (0);
2739	case _PC_FILESIZEBITS:
2740		*ap->a_retval = 42;
2741		return (0);
2742	case _PC_SYMLINK_MAX:
2743		*ap->a_retval = MAXPATHLEN;
2744		return (0);
2745	case _PC_2_SYMLINKS:
2746		*ap->a_retval = 1;
2747		return (0);
2748	default:
2749		return (EINVAL);
2750	}
2751	/* NOTREACHED */
2752}
2753
2754/*
2755 * Advisory record locking support
2756 */
2757int
2758ufs_advlock(void *v)
2759{
2760	struct vop_advlock_args /* {
2761		struct vnode	*a_vp;
2762		void *		a_id;
2763		int		a_op;
2764		struct flock	*a_fl;
2765		int		a_flags;
2766	} */ *ap = v;
2767	struct inode *ip;
2768
2769	ip = VTOI(ap->a_vp);
2770	return lf_advlock(ap, &ip->i_lockf, ip->i_size);
2771}
2772
2773/*
2774 * Initialize the vnode associated with a new inode, handle aliased
2775 * vnodes.
2776 */
2777void
2778ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *),
2779	struct vnode **vpp)
2780{
2781	struct timeval	tv;
2782	struct inode	*ip;
2783	struct vnode	*vp;
2784	dev_t		rdev;
2785	struct ufsmount	*ump;
2786
2787	vp = *vpp;
2788	ip = VTOI(vp);
2789	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2790	case VCHR:
2791	case VBLK:
2792		vp->v_op = specops;
2793		ump = ip->i_ump;
2794		if (ump->um_fstype == UFS1)
2795			rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
2796			    UFS_MPNEEDSWAP(ump));
2797		else
2798			rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
2799			    UFS_MPNEEDSWAP(ump));
2800		spec_node_init(vp, rdev);
2801		break;
2802	case VFIFO:
2803		vp->v_op = fifoops;
2804		break;
2805	case VNON:
2806	case VBAD:
2807	case VSOCK:
2808	case VLNK:
2809	case VDIR:
2810	case VREG:
2811		break;
2812	}
2813	if (ip->i_number == ROOTINO)
2814                vp->v_vflag |= VV_ROOT;
2815	/*
2816	 * Initialize modrev times
2817	 */
2818	getmicrouptime(&tv);
2819	ip->i_modrev = (uint64_t)(uint)tv.tv_sec << 32
2820			| tv.tv_usec * 4294u;
2821	*vpp = vp;
2822}
2823
2824/*
2825 * Allocate a new inode.
2826 */
2827int
2828ufs_makeinode(int mode, struct vnode *dvp, const struct ufs_lookup_results *ulr,
2829	struct vnode **vpp, struct componentname *cnp)
2830{
2831	struct inode	*ip, *pdir;
2832	struct direct	*newdir;
2833	struct vnode	*tvp;
2834	int		error, ismember = 0;
2835
2836	UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount);
2837
2838	pdir = VTOI(dvp);
2839
2840	if ((mode & IFMT) == 0)
2841		mode |= IFREG;
2842
2843	if ((error = UFS_VALLOC(dvp, mode, cnp->cn_cred, vpp)) != 0) {
2844		vput(dvp);
2845		return (error);
2846	}
2847	tvp = *vpp;
2848	ip = VTOI(tvp);
2849	ip->i_gid = pdir->i_gid;
2850	DIP_ASSIGN(ip, gid, ip->i_gid);
2851	ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
2852	DIP_ASSIGN(ip, uid, ip->i_uid);
2853	error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp);
2854	if (error) {
2855		/*
2856		 * Note, we can't VOP_VFREE(tvp) here like we should
2857		 * because we can't write to the disk.  Instead, we leave
2858		 * the vnode dangling from the journal.
2859		 */
2860		vput(tvp);
2861		vput(dvp);
2862		return (error);
2863	}
2864#if defined(QUOTA) || defined(QUOTA2)
2865	if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2866		UFS_VFREE(tvp, ip->i_number, mode);
2867		UFS_WAPBL_END1(dvp->v_mount, dvp);
2868		vput(tvp);
2869		vput(dvp);
2870		return (error);
2871	}
2872#endif
2873	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2874	ip->i_mode = mode;
2875	DIP_ASSIGN(ip, mode, mode);
2876	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2877	ip->i_nlink = 1;
2878	DIP_ASSIGN(ip, nlink, 1);
2879	if ((ip->i_mode & ISGID) && (kauth_cred_ismember_gid(cnp->cn_cred,
2880	    ip->i_gid, &ismember) != 0 || !ismember) &&
2881	    kauth_authorize_generic(cnp->cn_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2882		ip->i_mode &= ~ISGID;
2883		DIP_ASSIGN(ip, mode, ip->i_mode);
2884	}
2885
2886	if (cnp->cn_flags & ISWHITEOUT) {
2887		ip->i_flags |= UF_OPAQUE;
2888		DIP_ASSIGN(ip, flags, ip->i_flags);
2889	}
2890
2891	/*
2892	 * Make sure inode goes to disk before directory entry.
2893	 */
2894	if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
2895		goto bad;
2896	newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
2897	ufs_makedirentry(ip, cnp, newdir);
2898	error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, NULL);
2899	pool_cache_put(ufs_direct_cache, newdir);
2900	if (error)
2901		goto bad;
2902	vput(dvp);
2903	*vpp = tvp;
2904	return (0);
2905
2906 bad:
2907	/*
2908	 * Write error occurred trying to update the inode
2909	 * or the directory so must deallocate the inode.
2910	 */
2911	ip->i_nlink = 0;
2912	DIP_ASSIGN(ip, nlink, 0);
2913	ip->i_flag |= IN_CHANGE;
2914	/* If IN_ADIROP, account for it */
2915	UFS_UNMARK_VNODE(tvp);
2916	UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0);
2917	tvp->v_type = VNON;		/* explodes later if VBLK */
2918	UFS_WAPBL_END1(dvp->v_mount, dvp);
2919	vput(tvp);
2920	vput(dvp);
2921	return (error);
2922}
2923
2924/*
2925 * Allocate len bytes at offset off.
2926 */
2927int
2928ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
2929    kauth_cred_t cred)
2930{
2931        struct inode *ip = VTOI(vp);
2932        int error, delta, bshift, bsize;
2933        UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist);
2934
2935        error = 0;
2936        bshift = vp->v_mount->mnt_fs_bshift;
2937        bsize = 1 << bshift;
2938
2939        delta = off & (bsize - 1);
2940        off -= delta;
2941        len += delta;
2942
2943        while (len > 0) {
2944                bsize = MIN(bsize, len);
2945
2946                error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL);
2947                if (error) {
2948                        goto out;
2949                }
2950
2951                /*
2952                 * increase file size now, UFS_BALLOC() requires that
2953                 * EOF be up-to-date before each call.
2954                 */
2955
2956                if (ip->i_size < off + bsize) {
2957                        UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
2958                            vp, ip->i_size, off + bsize, 0);
2959                        ip->i_size = off + bsize;
2960			DIP_ASSIGN(ip, size, ip->i_size);
2961                }
2962
2963                off += bsize;
2964                len -= bsize;
2965        }
2966
2967out:
2968	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
2969	return error;
2970}
2971
2972void
2973ufs_gop_markupdate(struct vnode *vp, int flags)
2974{
2975	u_int32_t mask = 0;
2976
2977	if ((flags & GOP_UPDATE_ACCESSED) != 0) {
2978		mask = IN_ACCESS;
2979	}
2980	if ((flags & GOP_UPDATE_MODIFIED) != 0) {
2981		if (vp->v_type == VREG) {
2982			mask |= IN_CHANGE | IN_UPDATE;
2983		} else {
2984			mask |= IN_MODIFY;
2985		}
2986	}
2987	if (mask) {
2988		struct inode *ip = VTOI(vp);
2989
2990		ip->i_flag |= mask;
2991	}
2992}
2993