1139778Simp/*-
212115Sdyson *  modified for EXT2FS support in Lites 1.1
312115Sdyson *
412115Sdyson *  Aug 1995, Godmar Back (gback@cs.utah.edu)
512115Sdyson *  University of Utah, Department of Computer Science
612115Sdyson */
7139778Simp/*-
812115Sdyson * Copyright (c) 1982, 1986, 1989, 1993
912115Sdyson *	The Regents of the University of California.  All rights reserved.
1031495Sphk * (c) UNIX System Laboratories, Inc.
1131495Sphk * All or some portions of this file are derived from material licensed
1231495Sphk * to the University of California by American Telephone and Telegraph
1331495Sphk * Co. or Unix System Laboratories, Inc. and are reproduced herein with
1431495Sphk * the permission of UNIX System Laboratories, Inc.
1512115Sdyson *
1612115Sdyson * Redistribution and use in source and binary forms, with or without
1712115Sdyson * modification, are permitted provided that the following conditions
1812115Sdyson * are met:
1912115Sdyson * 1. Redistributions of source code must retain the above copyright
2012115Sdyson *    notice, this list of conditions and the following disclaimer.
2112115Sdyson * 2. Redistributions in binary form must reproduce the above copyright
2212115Sdyson *    notice, this list of conditions and the following disclaimer in the
2312115Sdyson *    documentation and/or other materials provided with the distribution.
2412115Sdyson * 4. Neither the name of the University nor the names of its contributors
2512115Sdyson *    may be used to endorse or promote products derived from this software
2612115Sdyson *    without specific prior written permission.
2712115Sdyson *
2812115Sdyson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2912115Sdyson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3012115Sdyson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3112115Sdyson * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3212115Sdyson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3312115Sdyson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3412115Sdyson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3512115Sdyson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3612115Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3712115Sdyson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3812115Sdyson * SUCH DAMAGE.
3912115Sdyson *
4093015Sbde *	@(#)ufs_vnops.c	8.7 (Berkeley) 2/3/94
4131495Sphk *	@(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
4253101Seivind * $FreeBSD$
4312115Sdyson */
4412115Sdyson
4531749Seivind#include "opt_suiddir.h"
4631398Sbde
4712115Sdyson#include <sys/param.h>
4812115Sdyson#include <sys/systm.h>
4912115Sdyson#include <sys/kernel.h>
5096749Siedowse#include <sys/fcntl.h>
51252956Spfg#include <sys/filio.h>
5212115Sdyson#include <sys/stat.h>
5360041Sphk#include <sys/bio.h>
5412115Sdyson#include <sys/buf.h>
55193377Sstas#include <sys/endian.h>
56164033Srwatson#include <sys/priv.h>
5712115Sdyson#include <sys/mount.h>
5896749Siedowse#include <sys/unistd.h>
5926641Sbde#include <sys/time.h>
6012115Sdyson#include <sys/vnode.h>
6131268Sphk#include <sys/namei.h>
6296749Siedowse#include <sys/lockf.h>
6396749Siedowse#include <sys/event.h>
6496749Siedowse#include <sys/conf.h>
6596749Siedowse#include <sys/file.h>
6612115Sdyson
6712115Sdyson#include <vm/vm.h>
68228507Spfg#include <vm/vm_page.h>
69228507Spfg#include <vm/vm_object.h>
7012726Sbde#include <vm/vm_extern.h>
7133933Smsmith#include <vm/vnode_pager.h>
7212115Sdyson
73228507Spfg#include "opt_directio.h"
74228507Spfg
7531268Sphk#include <ufs/ufs/dir.h>
7612115Sdyson
77221128Sjhb#include <fs/ext2fs/fs.h>
78202283Slulf#include <fs/ext2fs/inode.h>
79202283Slulf#include <fs/ext2fs/ext2_extern.h>
80202283Slulf#include <fs/ext2fs/ext2fs.h>
81221128Sjhb#include <fs/ext2fs/ext2_dinode.h>
82202283Slulf#include <fs/ext2fs/ext2_dir.h>
83221128Sjhb#include <fs/ext2fs/ext2_mount.h>
8412115Sdyson
8592728Salfredstatic int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
86202283Slulfstatic void ext2_itimes_locked(struct vnode *);
87254260Spfgstatic int ext4_ext_read(struct vop_read_args *);
88254260Spfgstatic int ext2_ind_read(struct vop_read_args *);
8931268Sphk
90138270Sphkstatic vop_access_t	ext2_access;
9196749Siedowsestatic int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *);
9296749Siedowsestatic int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *,
9396749Siedowse    struct thread *);
94138270Sphkstatic vop_close_t	ext2_close;
95138270Sphkstatic vop_create_t	ext2_create;
96138270Sphkstatic vop_fsync_t	ext2_fsync;
97138270Sphkstatic vop_getattr_t	ext2_getattr;
98252956Spfgstatic vop_ioctl_t	ext2_ioctl;
99138270Sphkstatic vop_link_t	ext2_link;
100138270Sphkstatic vop_mkdir_t	ext2_mkdir;
101138270Sphkstatic vop_mknod_t	ext2_mknod;
102138270Sphkstatic vop_open_t	ext2_open;
103138270Sphkstatic vop_pathconf_t	ext2_pathconf;
104138270Sphkstatic vop_print_t	ext2_print;
105138270Sphkstatic vop_read_t	ext2_read;
106138270Sphkstatic vop_readlink_t	ext2_readlink;
107138270Sphkstatic vop_remove_t	ext2_remove;
108138270Sphkstatic vop_rename_t	ext2_rename;
109138270Sphkstatic vop_rmdir_t	ext2_rmdir;
110138270Sphkstatic vop_setattr_t	ext2_setattr;
111138270Sphkstatic vop_strategy_t	ext2_strategy;
112138270Sphkstatic vop_symlink_t	ext2_symlink;
113138270Sphkstatic vop_write_t	ext2_write;
114166774Spjdstatic vop_vptofh_t	ext2_vptofh;
115138270Sphkstatic vop_close_t	ext2fifo_close;
116138270Sphkstatic vop_kqfilter_t	ext2fifo_kqfilter;
11712911Sphk
11896749Siedowse/* Global vfs data structures for ext2. */
119138290Sphkstruct vop_vector ext2_vnodeops = {
120138290Sphk	.vop_default =		&default_vnodeops,
121138290Sphk	.vop_access =		ext2_access,
122138290Sphk	.vop_bmap =		ext2_bmap,
123138290Sphk	.vop_cachedlookup =	ext2_lookup,
124138290Sphk	.vop_close =		ext2_close,
125138290Sphk	.vop_create =		ext2_create,
126138290Sphk	.vop_fsync =		ext2_fsync,
127138290Sphk	.vop_getattr =		ext2_getattr,
128138290Sphk	.vop_inactive =		ext2_inactive,
129252956Spfg	.vop_ioctl =		ext2_ioctl,
130138290Sphk	.vop_link =		ext2_link,
131138290Sphk	.vop_lookup =		vfs_cache_lookup,
132138290Sphk	.vop_mkdir =		ext2_mkdir,
133138290Sphk	.vop_mknod =		ext2_mknod,
134138290Sphk	.vop_open =		ext2_open,
135138290Sphk	.vop_pathconf =		ext2_pathconf,
136138290Sphk	.vop_poll =		vop_stdpoll,
137138290Sphk	.vop_print =		ext2_print,
138138290Sphk	.vop_read =		ext2_read,
139138290Sphk	.vop_readdir =		ext2_readdir,
140138290Sphk	.vop_readlink =		ext2_readlink,
141138290Sphk	.vop_reallocblks =	ext2_reallocblks,
142138290Sphk	.vop_reclaim =		ext2_reclaim,
143138290Sphk	.vop_remove =		ext2_remove,
144138290Sphk	.vop_rename =		ext2_rename,
145138290Sphk	.vop_rmdir =		ext2_rmdir,
146138290Sphk	.vop_setattr =		ext2_setattr,
147138290Sphk	.vop_strategy =		ext2_strategy,
148138290Sphk	.vop_symlink =		ext2_symlink,
149138290Sphk	.vop_write =		ext2_write,
150166774Spjd	.vop_vptofh =		ext2_vptofh,
15112115Sdyson};
15212115Sdyson
153138290Sphkstruct vop_vector ext2_fifoops = {
154138290Sphk	.vop_default =		&fifo_specops,
155138290Sphk	.vop_access =		ext2_access,
156138290Sphk	.vop_close =		ext2fifo_close,
157138290Sphk	.vop_fsync =		ext2_fsync,
158138290Sphk	.vop_getattr =		ext2_getattr,
159138290Sphk	.vop_inactive =		ext2_inactive,
160138290Sphk	.vop_kqfilter =		ext2fifo_kqfilter,
161138290Sphk	.vop_print =		ext2_print,
162138868Sphk	.vop_read =		VOP_PANIC,
163138290Sphk	.vop_reclaim =		ext2_reclaim,
164138290Sphk	.vop_setattr =		ext2_setattr,
165138868Sphk	.vop_write =		VOP_PANIC,
166166774Spjd	.vop_vptofh =		ext2_vptofh,
16712115Sdyson};
16812115Sdyson
16912115Sdyson/*
17057710Sbde * A virgin directory (no blushing please).
17196749Siedowse * Note that the type and namlen fields are reversed relative to ext2.
17257710Sbde * Also, we don't use `struct odirtemplate', since it would just cause
17357710Sbde * endianness problems.
17457710Sbde */
17557710Sbdestatic struct dirtemplate mastertemplate = {
17657710Sbde	0, 12, 1, EXT2_FT_DIR, ".",
17757710Sbde	0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".."
17857710Sbde};
17957710Sbdestatic struct dirtemplate omastertemplate = {
18057710Sbde	0, 12, 1, EXT2_FT_UNKNOWN, ".",
18157710Sbde	0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".."
18257710Sbde};
18357710Sbde
184202283Slulfstatic void
185202283Slulfext2_itimes_locked(struct vnode *vp)
18696749Siedowse{
18796749Siedowse	struct inode *ip;
18896749Siedowse	struct timespec ts;
18996749Siedowse
190202283Slulf	ASSERT_VI_LOCKED(vp, __func__);
191202283Slulf
19296749Siedowse	ip = VTOI(vp);
19396749Siedowse	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
19496749Siedowse		return;
19596749Siedowse	if ((vp->v_type == VBLK || vp->v_type == VCHR))
19696749Siedowse		ip->i_flag |= IN_LAZYMOD;
19796749Siedowse	else
19896749Siedowse		ip->i_flag |= IN_MODIFIED;
19996749Siedowse	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
20096749Siedowse		vfs_timestamp(&ts);
20196749Siedowse		if (ip->i_flag & IN_ACCESS) {
20296749Siedowse			ip->i_atime = ts.tv_sec;
20396749Siedowse			ip->i_atimensec = ts.tv_nsec;
20496749Siedowse		}
20596749Siedowse		if (ip->i_flag & IN_UPDATE) {
20696749Siedowse			ip->i_mtime = ts.tv_sec;
20796749Siedowse			ip->i_mtimensec = ts.tv_nsec;
20896749Siedowse			ip->i_modrev++;
20996749Siedowse		}
21096749Siedowse		if (ip->i_flag & IN_CHANGE) {
21196749Siedowse			ip->i_ctime = ts.tv_sec;
21296749Siedowse			ip->i_ctimensec = ts.tv_nsec;
21396749Siedowse		}
21496749Siedowse	}
21596749Siedowse	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
21696749Siedowse}
21796749Siedowse
218202283Slulfvoid
219202283Slulfext2_itimes(struct vnode *vp)
220202283Slulf{
221202283Slulf
222202283Slulf	VI_LOCK(vp);
223202283Slulf	ext2_itimes_locked(vp);
224202283Slulf	VI_UNLOCK(vp);
225202283Slulf}
226202283Slulf
22757710Sbde/*
22831268Sphk * Create a regular file
22931268Sphk */
23031268Sphkstatic int
231246634Spfgext2_create(struct vop_create_args *ap)
23231268Sphk{
23331268Sphk	int error;
23431268Sphk
23531268Sphk	error =
23631268Sphk	    ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
23731268Sphk	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
23831268Sphk	if (error)
23931268Sphk		return (error);
24031268Sphk	return (0);
24131268Sphk}
24231268Sphk
243105223Sphkstatic int
244246634Spfgext2_open(struct vop_open_args *ap)
24596749Siedowse{
24696749Siedowse
247135864Sphk	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR)
248135864Sphk		return (EOPNOTSUPP);
249135864Sphk
25096749Siedowse	/*
25196749Siedowse	 * Files marked append-only must be opened for appending.
25296749Siedowse	 */
25396749Siedowse	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
25496749Siedowse	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
25596749Siedowse		return (EPERM);
256151811Scracauer
257153858Scracauer	vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td);
258151811Scracauer
25996749Siedowse	return (0);
26096749Siedowse}
26196749Siedowse
26296749Siedowse/*
26396749Siedowse * Close called.
26496749Siedowse *
26596749Siedowse * Update the times on the inode.
26696749Siedowse */
26796749Siedowsestatic int
268246634Spfgext2_close(struct vop_close_args *ap)
26996749Siedowse{
27096749Siedowse	struct vnode *vp = ap->a_vp;
27196749Siedowse
272103938Sjeff	VI_LOCK(vp);
273143509Sjeff	if (vp->v_usecount > 1)
274202283Slulf		ext2_itimes_locked(vp);
275143509Sjeff	VI_UNLOCK(vp);
27696749Siedowse	return (0);
27796749Siedowse}
27896749Siedowse
27996749Siedowsestatic int
280246634Spfgext2_access(struct vop_access_args *ap)
28196749Siedowse{
28296749Siedowse	struct vnode *vp = ap->a_vp;
28396749Siedowse	struct inode *ip = VTOI(vp);
284184413Strasz	accmode_t accmode = ap->a_accmode;
28596749Siedowse	int error;
28696749Siedowse
287135864Sphk	if (vp->v_type == VBLK || vp->v_type == VCHR)
288135864Sphk		return (EOPNOTSUPP);
289135864Sphk
29096749Siedowse	/*
29196749Siedowse	 * Disallow write attempts on read-only file systems;
29296749Siedowse	 * unless the file is a socket, fifo, or a block or
29396749Siedowse	 * character device resident on the file system.
29496749Siedowse	 */
295184413Strasz	if (accmode & VWRITE) {
29696749Siedowse		switch (vp->v_type) {
29796749Siedowse		case VDIR:
29896749Siedowse		case VLNK:
29996749Siedowse		case VREG:
30096749Siedowse			if (vp->v_mount->mnt_flag & MNT_RDONLY)
30196749Siedowse				return (EROFS);
30296749Siedowse			break;
30396749Siedowse		default:
30496749Siedowse			break;
30596749Siedowse		}
30696749Siedowse	}
30796749Siedowse
30896749Siedowse	/* If immutable bit set, nobody gets to write it. */
309202283Slulf	if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT)))
31096749Siedowse		return (EPERM);
31196749Siedowse
31296749Siedowse	error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
313184413Strasz	    ap->a_accmode, ap->a_cred, NULL);
31496749Siedowse	return (error);
31596749Siedowse}
31696749Siedowse
31796749Siedowsestatic int
318246634Spfgext2_getattr(struct vop_getattr_args *ap)
31996749Siedowse{
32096749Siedowse	struct vnode *vp = ap->a_vp;
32196749Siedowse	struct inode *ip = VTOI(vp);
32296749Siedowse	struct vattr *vap = ap->a_vap;
32396749Siedowse
32496749Siedowse	ext2_itimes(vp);
32596749Siedowse	/*
32696749Siedowse	 * Copy from inode table
32796749Siedowse	 */
328147868Scracauer	vap->va_fsid = dev2udev(ip->i_devvp->v_rdev);
32996749Siedowse	vap->va_fileid = ip->i_number;
33096749Siedowse	vap->va_mode = ip->i_mode & ~IFMT;
33196749Siedowse	vap->va_nlink = ip->i_nlink;
33296749Siedowse	vap->va_uid = ip->i_uid;
33396749Siedowse	vap->va_gid = ip->i_gid;
33496749Siedowse	vap->va_rdev = ip->i_rdev;
33596749Siedowse	vap->va_size = ip->i_size;
33696749Siedowse	vap->va_atime.tv_sec = ip->i_atime;
337232703Spfg	vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0;
33896749Siedowse	vap->va_mtime.tv_sec = ip->i_mtime;
339232703Spfg	vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0;
34096749Siedowse	vap->va_ctime.tv_sec = ip->i_ctime;
341232703Spfg	vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0;
342232703Spfg	if E2DI_HAS_XTIME(ip) {
343232703Spfg		vap->va_birthtime.tv_sec = ip->i_birthtime;
344232703Spfg		vap->va_birthtime.tv_nsec = ip->i_birthnsec;
345232703Spfg	}
34696749Siedowse	vap->va_flags = ip->i_flags;
34796749Siedowse	vap->va_gen = ip->i_gen;
34896749Siedowse	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
34996749Siedowse	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
35096749Siedowse	vap->va_type = IFTOVT(ip->i_mode);
35196749Siedowse	vap->va_filerev = ip->i_modrev;
35296749Siedowse	return (0);
35396749Siedowse}
35496749Siedowse
35596749Siedowse/*
35696749Siedowse * Set attribute vnode op. called from several syscalls
35796749Siedowse */
358105223Sphkstatic int
359246634Spfgext2_setattr(struct vop_setattr_args *ap)
36096749Siedowse{
36196749Siedowse	struct vattr *vap = ap->a_vap;
36296749Siedowse	struct vnode *vp = ap->a_vp;
36396749Siedowse	struct inode *ip = VTOI(vp);
36496749Siedowse	struct ucred *cred = ap->a_cred;
365182371Sattilio	struct thread *td = curthread;
36696749Siedowse	int error;
36796749Siedowse
36896749Siedowse	/*
36996749Siedowse	 * Check for unsettable attributes.
37096749Siedowse	 */
37196749Siedowse	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
37296749Siedowse	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
37396749Siedowse	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
37496749Siedowse	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
37596749Siedowse		return (EINVAL);
37696749Siedowse	}
37796749Siedowse	if (vap->va_flags != VNOVAL) {
378202584Slulf		/* Disallow flags not supported by ext2fs. */
379202584Slulf		if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP))
380202584Slulf			return (EOPNOTSUPP);
381234139Sjh
38296749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
38396749Siedowse			return (EROFS);
38496749Siedowse		/*
38596749Siedowse		 * Callers may only modify the file flags on objects they
38696749Siedowse		 * have VADMIN rights for.
38796749Siedowse		 */
38896749Siedowse		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
38996749Siedowse			return (error);
39096749Siedowse		/*
39196749Siedowse		 * Unprivileged processes and privileged processes in
39296749Siedowse		 * jail() are not permitted to unset system flags, or
39396749Siedowse		 * modify flags if any system flags are set.
39496749Siedowse		 * Privileged non-jail processes may not modify system flags
39596749Siedowse		 * if securelevel > 0 and any existing system flags are set.
39696749Siedowse		 */
397170587Srwatson		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
398234203Sjh			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) {
39996749Siedowse				error = securelevel_gt(cred, 0);
40096749Siedowse				if (error)
40196749Siedowse					return (error);
40296749Siedowse			}
40396749Siedowse		} else {
404234203Sjh			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) ||
405234203Sjh			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
40696749Siedowse				return (EPERM);
40796749Siedowse		}
408234203Sjh		ip->i_flags = vap->va_flags;
40996749Siedowse		ip->i_flag |= IN_CHANGE;
410234104Sjh		if (ip->i_flags & (IMMUTABLE | APPEND))
41196749Siedowse			return (0);
41296749Siedowse	}
41396749Siedowse	if (ip->i_flags & (IMMUTABLE | APPEND))
41496749Siedowse		return (EPERM);
41596749Siedowse	/*
41696749Siedowse	 * Go through the fields and update iff not VNOVAL.
41796749Siedowse	 */
41896749Siedowse	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
41996749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
42096749Siedowse			return (EROFS);
42196749Siedowse		if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred,
42296749Siedowse		    td)) != 0)
42396749Siedowse			return (error);
42496749Siedowse	}
42596749Siedowse	if (vap->va_size != VNOVAL) {
42696749Siedowse		/*
42796749Siedowse		 * Disallow write attempts on read-only file systems;
42896749Siedowse		 * unless the file is a socket, fifo, or a block or
42996749Siedowse		 * character device resident on the file system.
43096749Siedowse		 */
43196749Siedowse		switch (vp->v_type) {
43296749Siedowse		case VDIR:
43396749Siedowse			return (EISDIR);
43496749Siedowse		case VLNK:
43596749Siedowse		case VREG:
43696749Siedowse			if (vp->v_mount->mnt_flag & MNT_RDONLY)
43796749Siedowse				return (EROFS);
43896749Siedowse			break;
43996749Siedowse		default:
44096749Siedowse			break;
44196749Siedowse		}
44296749Siedowse		if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0)
44396749Siedowse			return (error);
44496749Siedowse	}
44596749Siedowse	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
44696749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
44796749Siedowse			return (EROFS);
44896749Siedowse		/*
44996749Siedowse		 * From utimes(2):
45096749Siedowse		 * If times is NULL, ... The caller must be the owner of
45196749Siedowse		 * the file, have permission to write the file, or be the
45296749Siedowse		 * super-user.
45396749Siedowse		 * If times is non-NULL, ... The caller must be the owner of
45496749Siedowse		 * the file or be the super-user.
45596749Siedowse		 */
45696749Siedowse		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) &&
45796749Siedowse		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
45896749Siedowse		    (error = VOP_ACCESS(vp, VWRITE, cred, td))))
45996749Siedowse			return (error);
46096749Siedowse		if (vap->va_atime.tv_sec != VNOVAL)
46196749Siedowse			ip->i_flag |= IN_ACCESS;
46296749Siedowse		if (vap->va_mtime.tv_sec != VNOVAL)
46396749Siedowse			ip->i_flag |= IN_CHANGE | IN_UPDATE;
46496749Siedowse		ext2_itimes(vp);
46596749Siedowse		if (vap->va_atime.tv_sec != VNOVAL) {
46696749Siedowse			ip->i_atime = vap->va_atime.tv_sec;
46796749Siedowse			ip->i_atimensec = vap->va_atime.tv_nsec;
46896749Siedowse		}
46996749Siedowse		if (vap->va_mtime.tv_sec != VNOVAL) {
47096749Siedowse			ip->i_mtime = vap->va_mtime.tv_sec;
47196749Siedowse			ip->i_mtimensec = vap->va_mtime.tv_nsec;
47296749Siedowse		}
473232703Spfg		ip->i_birthtime = vap->va_birthtime.tv_sec;
474232703Spfg		ip->i_birthnsec = vap->va_birthtime.tv_nsec;
47596749Siedowse		error = ext2_update(vp, 0);
47696749Siedowse		if (error)
47796749Siedowse			return (error);
47896749Siedowse	}
47996749Siedowse	error = 0;
48096749Siedowse	if (vap->va_mode != (mode_t)VNOVAL) {
48196749Siedowse		if (vp->v_mount->mnt_flag & MNT_RDONLY)
48296749Siedowse			return (EROFS);
48396749Siedowse		error = ext2_chmod(vp, (int)vap->va_mode, cred, td);
48496749Siedowse	}
48596749Siedowse	return (error);
48696749Siedowse}
48796749Siedowse
48896749Siedowse/*
48996749Siedowse * Change the mode on a file.
49096749Siedowse * Inode must be locked before calling.
49196749Siedowse */
49296749Siedowsestatic int
493246634Spfgext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
49496749Siedowse{
49596749Siedowse	struct inode *ip = VTOI(vp);
49696749Siedowse	int error;
49796749Siedowse
49896749Siedowse	/*
49996749Siedowse	 * To modify the permissions on a file, must possess VADMIN
50096749Siedowse	 * for that file.
50196749Siedowse	 */
50296749Siedowse	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
50396749Siedowse		return (error);
50496749Siedowse	/*
50596749Siedowse	 * Privileged processes may set the sticky bit on non-directories,
50696749Siedowse	 * as well as set the setgid bit on a file with a group that the
50796749Siedowse	 * process is not a member of.
50896749Siedowse	 */
509164033Srwatson	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
510170587Srwatson		error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0);
511164033Srwatson		if (error)
51296749Siedowse			return (EFTYPE);
51396749Siedowse	}
514164033Srwatson	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
515170587Srwatson		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
516164033Srwatson		if (error)
517164033Srwatson			return (error);
518164033Srwatson	}
51996749Siedowse	ip->i_mode &= ~ALLPERMS;
52096749Siedowse	ip->i_mode |= (mode & ALLPERMS);
52196749Siedowse	ip->i_flag |= IN_CHANGE;
52296749Siedowse	return (0);
52396749Siedowse}
52496749Siedowse
52596749Siedowse/*
52696749Siedowse * Perform chown operation on inode ip;
52796749Siedowse * inode must be locked prior to call.
52896749Siedowse */
52996749Siedowsestatic int
530246634Spfgext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
531246634Spfg    struct thread *td)
53296749Siedowse{
53396749Siedowse	struct inode *ip = VTOI(vp);
53496749Siedowse	uid_t ouid;
53596749Siedowse	gid_t ogid;
53696749Siedowse	int error = 0;
53796749Siedowse
53896749Siedowse	if (uid == (uid_t)VNOVAL)
53996749Siedowse		uid = ip->i_uid;
54096749Siedowse	if (gid == (gid_t)VNOVAL)
54196749Siedowse		gid = ip->i_gid;
54296749Siedowse	/*
54396749Siedowse	 * To modify the ownership of a file, must possess VADMIN
54496749Siedowse	 * for that file.
54596749Siedowse	 */
54696749Siedowse	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
54796749Siedowse		return (error);
54896749Siedowse	/*
54996749Siedowse	 * To change the owner of a file, or change the group of a file
55096749Siedowse	 * to a group of which we are not a member, the caller must
55196749Siedowse	 * have privilege.
55296749Siedowse	 */
553164033Srwatson	if (uid != ip->i_uid || (gid != ip->i_gid &&
554164033Srwatson	    !groupmember(gid, cred))) {
555170587Srwatson		error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
556164033Srwatson		if (error)
557164033Srwatson			return (error);
558164033Srwatson	}
55996749Siedowse	ogid = ip->i_gid;
56096749Siedowse	ouid = ip->i_uid;
56196749Siedowse	ip->i_gid = gid;
56296749Siedowse	ip->i_uid = uid;
56396749Siedowse	ip->i_flag |= IN_CHANGE;
564167151Spjd	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
565170587Srwatson		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0)
566164033Srwatson			ip->i_mode &= ~(ISUID | ISGID);
567164033Srwatson	}
56896749Siedowse	return (0);
56996749Siedowse}
57096749Siedowse
57196749Siedowse/*
57231398Sbde * Synch an open file.
57331398Sbde */
57431398Sbde/* ARGSUSED */
57531398Sbdestatic int
576246634Spfgext2_fsync(struct vop_fsync_args *ap)
57731398Sbde{
57831398Sbde	/*
57931398Sbde	 * Flush all dirty buffers associated with a vnode.
58031398Sbde	 */
58131398Sbde
582110587Sjeff	vop_stdfsync(ap);
583110587Sjeff
58496749Siedowse	return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT));
58531398Sbde}
58631398Sbde
58731398Sbde/*
58831268Sphk * Mknod vnode call
58931268Sphk */
59031268Sphk/* ARGSUSED */
59131268Sphkstatic int
592246634Spfgext2_mknod(struct vop_mknod_args *ap)
59331268Sphk{
59431268Sphk	struct vattr *vap = ap->a_vap;
59531268Sphk	struct vnode **vpp = ap->a_vpp;
59631268Sphk	struct inode *ip;
59768307Sbde	ino_t ino;
59831268Sphk	int error;
59931268Sphk
60031268Sphk	error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
60131268Sphk	    ap->a_dvp, vpp, ap->a_cnp);
60231268Sphk	if (error)
60331268Sphk		return (error);
60431268Sphk	ip = VTOI(*vpp);
60531268Sphk	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
60631268Sphk	if (vap->va_rdev != VNOVAL) {
60731268Sphk		/*
60831268Sphk		 * Want to be able to use this to make badblock
60931268Sphk		 * inodes, so don't truncate the dev number.
61031268Sphk		 */
61131268Sphk		ip->i_rdev = vap->va_rdev;
61231268Sphk	}
61331268Sphk	/*
61453101Seivind	 * Remove inode, then reload it through VFS_VGET so it is
61531268Sphk	 * checked to see if it is an alias of an existing entry in
616143509Sjeff	 * the inode cache.	 XXX I don't believe this is necessary now.
61731268Sphk	 */
61831268Sphk	(*vpp)->v_type = VNON;
61968307Sbde	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
62031268Sphk	vgone(*vpp);
621143509Sjeff	vput(*vpp);
62292462Smckusick	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
62353101Seivind	if (error) {
62453101Seivind		*vpp = NULL;
62553101Seivind		return (error);
62653101Seivind	}
62731268Sphk	return (0);
62831268Sphk}
62931268Sphk
63031268Sphkstatic int
631246634Spfgext2_remove(struct vop_remove_args *ap)
63231268Sphk{
63331398Sbde	struct inode *ip;
63431398Sbde	struct vnode *vp = ap->a_vp;
63531398Sbde	struct vnode *dvp = ap->a_dvp;
63631398Sbde	int error;
63731268Sphk
63831398Sbde	ip = VTOI(vp);
63931398Sbde	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
64031398Sbde	    (VTOI(dvp)->i_flags & APPEND)) {
64131398Sbde		error = EPERM;
64231268Sphk		goto out;
64331268Sphk	}
64431398Sbde	error = ext2_dirremove(dvp, ap->a_cnp);
64531398Sbde	if (error == 0) {
64631398Sbde		ip->i_nlink--;
64731268Sphk		ip->i_flag |= IN_CHANGE;
64831268Sphk	}
64931268Sphkout:
65031268Sphk	return (error);
65131268Sphk}
65231268Sphk
65331268Sphk/*
65431398Sbde * link vnode call
65531268Sphk */
65631268Sphkstatic int
657246634Spfgext2_link(struct vop_link_args *ap)
65831268Sphk{
65931268Sphk	struct vnode *vp = ap->a_vp;
66031398Sbde	struct vnode *tdvp = ap->a_tdvp;
66131268Sphk	struct componentname *cnp = ap->a_cnp;
66231398Sbde	struct inode *ip;
66331268Sphk	int error;
66431268Sphk
665251658Spfg#ifdef INVARIANTS
66631398Sbde	if ((cnp->cn_flags & HASBUF) == 0)
66796749Siedowse		panic("ext2_link: no name");
66831398Sbde#endif
66931398Sbde	if (tdvp->v_mount != vp->v_mount) {
67031398Sbde		error = EXDEV;
671103636Struckman		goto out;
67231398Sbde	}
67331268Sphk	ip = VTOI(vp);
674246347Spfg	if ((nlink_t)ip->i_nlink >= EXT2_LINK_MAX) {
67531398Sbde		error = EMLINK;
676103636Struckman		goto out;
67731268Sphk	}
67831398Sbde	if (ip->i_flags & (IMMUTABLE | APPEND)) {
67931268Sphk		error = EPERM;
680103636Struckman		goto out;
68131268Sphk	}
68231398Sbde	ip->i_nlink++;
68331398Sbde	ip->i_flag |= IN_CHANGE;
684221166Sjhb	error = ext2_update(vp, !DOINGASYNC(vp));
68531398Sbde	if (!error)
68631398Sbde		error = ext2_direnter(ip, tdvp, cnp);
68731398Sbde	if (error) {
68831398Sbde		ip->i_nlink--;
68931398Sbde		ip->i_flag |= IN_CHANGE;
69031398Sbde	}
691103636Struckmanout:
69231268Sphk	return (error);
69331268Sphk}
69431268Sphk
69531268Sphk/*
69631268Sphk * Rename system call.
697202283Slulf * 	rename("foo", "bar");
698202283Slulf * is essentially
699202283Slulf *	unlink("bar");
700202283Slulf *	link("foo", "bar");
701202283Slulf *	unlink("foo");
702202283Slulf * but ``atomically''.  Can't do full commit without saving state in the
703202283Slulf * inode on disk which isn't feasible at this time.  Best we can do is
704202283Slulf * always guarantee the target exists.
705202283Slulf *
706202283Slulf * Basic algorithm is:
707202283Slulf *
708202283Slulf * 1) Bump link count on source while we're linking it to the
709202283Slulf *    target.  This also ensure the inode won't be deleted out
710202283Slulf *    from underneath us while we work (it may be truncated by
711202283Slulf *    a concurrent `trunc' or `open' for creation).
712202283Slulf * 2) Link source to destination.  If destination already exists,
713202283Slulf *    delete it first.
714202283Slulf * 3) Unlink source reference to inode if still around. If a
715202283Slulf *    directory was moved and the parent of the destination
716202283Slulf *    is different from the source, patch the ".." entry in the
717202283Slulf *    directory.
71831268Sphk */
71931268Sphkstatic int
720246634Spfgext2_rename(struct vop_rename_args *ap)
72131268Sphk{
72231268Sphk	struct vnode *tvp = ap->a_tvp;
72396752Siedowse	struct vnode *tdvp = ap->a_tdvp;
72431268Sphk	struct vnode *fvp = ap->a_fvp;
72531268Sphk	struct vnode *fdvp = ap->a_fdvp;
72631268Sphk	struct componentname *tcnp = ap->a_tcnp;
72731268Sphk	struct componentname *fcnp = ap->a_fcnp;
72831268Sphk	struct inode *ip, *xp, *dp;
72931268Sphk	struct dirtemplate dirbuf;
73031268Sphk	int doingdirectory = 0, oldparent = 0, newparent = 0;
73131268Sphk	int error = 0;
73231268Sphk	u_char namlen;
73331268Sphk
734251658Spfg#ifdef INVARIANTS
73531268Sphk	if ((tcnp->cn_flags & HASBUF) == 0 ||
73631268Sphk	    (fcnp->cn_flags & HASBUF) == 0)
73796749Siedowse		panic("ext2_rename: no name");
73831268Sphk#endif
73931268Sphk	/*
74031268Sphk	 * Check for cross-device rename.
74131268Sphk	 */
74231268Sphk	if ((fvp->v_mount != tdvp->v_mount) ||
74331268Sphk	    (tvp && (fvp->v_mount != tvp->v_mount))) {
74431268Sphk		error = EXDEV;
74531268Sphkabortit:
74631268Sphk		if (tdvp == tvp)
74731268Sphk			vrele(tdvp);
74831268Sphk		else
74931268Sphk			vput(tdvp);
75031268Sphk		if (tvp)
75131268Sphk			vput(tvp);
75231268Sphk		vrele(fdvp);
75331268Sphk		vrele(fvp);
75431268Sphk		return (error);
75531268Sphk	}
75631268Sphk
75731268Sphk	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
75831268Sphk	    (VTOI(tdvp)->i_flags & APPEND))) {
75931268Sphk		error = EPERM;
76031268Sphk		goto abortit;
76131268Sphk	}
76231268Sphk
76331268Sphk	/*
764103180Sbde	 * Renaming a file to itself has no effect.  The upper layers should
765103180Sbde	 * not call us in that case.  Temporarily just warn if they do.
76631268Sphk	 */
76731268Sphk	if (fvp == tvp) {
768103180Sbde		printf("ext2_rename: fvp == tvp (can't happen)\n");
769103180Sbde		error = 0;
770103180Sbde		goto abortit;
771103180Sbde	}
77231268Sphk
773175202Sattilio	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
77431268Sphk		goto abortit;
77531268Sphk	dp = VTOI(fdvp);
77631268Sphk	ip = VTOI(fvp);
777246347Spfg	if (ip->i_nlink >= EXT2_LINK_MAX) {
778175294Sattilio 		VOP_UNLOCK(fvp, 0);
77944395Simp 		error = EMLINK;
78044395Simp 		goto abortit;
78144395Simp 	}
78231268Sphk	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
78331268Sphk	    || (dp->i_flags & APPEND)) {
784175294Sattilio		VOP_UNLOCK(fvp, 0);
78531268Sphk		error = EPERM;
78631268Sphk		goto abortit;
78731268Sphk	}
78831268Sphk	if ((ip->i_mode & IFMT) == IFDIR) {
78931268Sphk		/*
79031268Sphk		 * Avoid ".", "..", and aliases of "." for obvious reasons.
79131268Sphk		 */
79231268Sphk		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
79331268Sphk		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
79431268Sphk		    (ip->i_flag & IN_RENAME)) {
795175294Sattilio			VOP_UNLOCK(fvp, 0);
79631268Sphk			error = EINVAL;
79731268Sphk			goto abortit;
79831268Sphk		}
79931268Sphk		ip->i_flag |= IN_RENAME;
80031268Sphk		oldparent = dp->i_number;
80131268Sphk		doingdirectory++;
80231268Sphk	}
80331268Sphk	vrele(fdvp);
80431268Sphk
80531268Sphk	/*
80631268Sphk	 * When the target exists, both the directory
80731268Sphk	 * and target vnodes are returned locked.
80831268Sphk	 */
80931268Sphk	dp = VTOI(tdvp);
81031268Sphk	xp = NULL;
81131268Sphk	if (tvp)
81231268Sphk		xp = VTOI(tvp);
81331268Sphk
81431268Sphk	/*
81531268Sphk	 * 1) Bump link count while we're moving stuff
81631268Sphk	 *    around.  If we crash somewhere before
81731268Sphk	 *    completing our work, the link count
81831268Sphk	 *    may be wrong, but correctable.
81931268Sphk	 */
82031268Sphk	ip->i_nlink++;
82131268Sphk	ip->i_flag |= IN_CHANGE;
822221166Sjhb	if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) {
823175294Sattilio		VOP_UNLOCK(fvp, 0);
82431268Sphk		goto bad;
82531268Sphk	}
82631268Sphk
82731268Sphk	/*
82831268Sphk	 * If ".." must be changed (ie the directory gets a new
82931268Sphk	 * parent) then the source directory must not be in the
830204111Suqs	 * directory hierarchy above the target, as this would
83131268Sphk	 * orphan everything below the source directory. Also
83231268Sphk	 * the user must have write permission in the source so
83331268Sphk	 * as to be able to change "..". We must repeat the call
83431268Sphk	 * to namei, as the parent directory is unlocked by the
83531268Sphk	 * call to checkpath().
83631268Sphk	 */
83783366Sjulian	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
838175294Sattilio	VOP_UNLOCK(fvp, 0);
83931268Sphk	if (oldparent != dp->i_number)
84031268Sphk		newparent = dp->i_number;
84131268Sphk	if (doingdirectory && newparent) {
84231268Sphk		if (error)	/* write access check above */
84331268Sphk			goto bad;
84431268Sphk		if (xp != NULL)
84531268Sphk			vput(tvp);
84631268Sphk		error = ext2_checkpath(ip, dp, tcnp->cn_cred);
84731268Sphk		if (error)
84831268Sphk			goto out;
84931268Sphk		VREF(tdvp);
85031268Sphk		error = relookup(tdvp, &tvp, tcnp);
85131268Sphk		if (error)
85231268Sphk			goto out;
85331268Sphk		vrele(tdvp);
85431268Sphk		dp = VTOI(tdvp);
85531268Sphk		xp = NULL;
85631268Sphk		if (tvp)
85731268Sphk			xp = VTOI(tvp);
85831268Sphk	}
85931268Sphk	/*
86031268Sphk	 * 2) If target doesn't exist, link the target
86131268Sphk	 *    to the source and unlink the source.
86231268Sphk	 *    Otherwise, rewrite the target directory
86331268Sphk	 *    entry to reference the source inode and
86431268Sphk	 *    expunge the original entry's existence.
86531268Sphk	 */
86631268Sphk	if (xp == NULL) {
867143677Sphk		if (dp->i_devvp != ip->i_devvp)
86896749Siedowse			panic("ext2_rename: EXDEV");
86931268Sphk		/*
87031268Sphk		 * Account for ".." in new directory.
87131268Sphk		 * When source and destination have the same
87231268Sphk		 * parent we don't fool with the link count.
87331268Sphk		 */
87431268Sphk		if (doingdirectory && newparent) {
875246347Spfg			if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) {
87631268Sphk				error = EMLINK;
87731268Sphk				goto bad;
87831268Sphk			}
87931268Sphk			dp->i_nlink++;
88031268Sphk			dp->i_flag |= IN_CHANGE;
881221166Sjhb			error = ext2_update(tdvp, !DOINGASYNC(tdvp));
88231268Sphk			if (error)
88331268Sphk				goto bad;
88431268Sphk		}
88531268Sphk		error = ext2_direnter(ip, tdvp, tcnp);
88631268Sphk		if (error) {
88731268Sphk			if (doingdirectory && newparent) {
88831268Sphk				dp->i_nlink--;
88931268Sphk				dp->i_flag |= IN_CHANGE;
89096749Siedowse				(void)ext2_update(tdvp, 1);
89131268Sphk			}
89231268Sphk			goto bad;
89331268Sphk		}
89431268Sphk		vput(tdvp);
89531268Sphk	} else {
896143677Sphk		if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp)
897105223Sphk		       panic("ext2_rename: EXDEV");
89831268Sphk		/*
89931268Sphk		 * Short circuit rename(foo, foo).
90031268Sphk		 */
90131268Sphk		if (xp->i_number == ip->i_number)
90296749Siedowse			panic("ext2_rename: same file");
90331268Sphk		/*
90431268Sphk		 * If the parent directory is "sticky", then the user must
90531268Sphk		 * own the parent directory, or the destination of the rename,
90631268Sphk		 * otherwise the destination may not be changed (except by
90731268Sphk		 * root). This implements append-only directories.
90831268Sphk		 */
90931268Sphk		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
91031268Sphk		    tcnp->cn_cred->cr_uid != dp->i_uid &&
91131268Sphk		    xp->i_uid != tcnp->cn_cred->cr_uid) {
91231268Sphk			error = EPERM;
91331268Sphk			goto bad;
91431268Sphk		}
91531268Sphk		/*
91631268Sphk		 * Target must be empty if a directory and have no links
91731268Sphk		 * to it. Also, ensure source and target are compatible
91831268Sphk		 * (both directories, or both not directories).
91931268Sphk		 */
92031268Sphk		if ((xp->i_mode&IFMT) == IFDIR) {
92131268Sphk			if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) ||
92231268Sphk			    xp->i_nlink > 2) {
92331268Sphk				error = ENOTEMPTY;
92431268Sphk				goto bad;
92531268Sphk			}
92631268Sphk			if (!doingdirectory) {
92731268Sphk				error = ENOTDIR;
92831268Sphk				goto bad;
92931268Sphk			}
93031268Sphk			cache_purge(tdvp);
93131268Sphk		} else if (doingdirectory) {
93231268Sphk			error = EISDIR;
93331268Sphk			goto bad;
93431268Sphk		}
93531268Sphk		error = ext2_dirrewrite(dp, ip, tcnp);
93631268Sphk		if (error)
93731268Sphk			goto bad;
93831268Sphk		/*
93931268Sphk		 * If the target directory is in the same
94031268Sphk		 * directory as the source directory,
94131268Sphk		 * decrement the link count on the parent
94231268Sphk		 * of the target directory.
94331268Sphk		 */
944105223Sphk		if (doingdirectory && !newparent) {
945105223Sphk		       dp->i_nlink--;
946105223Sphk		       dp->i_flag |= IN_CHANGE;
94731268Sphk		}
94831268Sphk		vput(tdvp);
94931268Sphk		/*
95031268Sphk		 * Adjust the link count of the target to
95131268Sphk		 * reflect the dirrewrite above.  If this is
95231268Sphk		 * a directory it is empty and there are
95331268Sphk		 * no links to it, so we can squash the inode and
95431268Sphk		 * any space associated with it.  We disallowed
95531268Sphk		 * renaming over top of a directory with links to
95631268Sphk		 * it above, as the remaining link would point to
95731268Sphk		 * a directory without "." or ".." entries.
95831268Sphk		 */
95931268Sphk		xp->i_nlink--;
96031268Sphk		if (doingdirectory) {
96131268Sphk			if (--xp->i_nlink != 0)
96296749Siedowse				panic("ext2_rename: linked directory");
96396749Siedowse			error = ext2_truncate(tvp, (off_t)0, IO_SYNC,
96483366Sjulian			    tcnp->cn_cred, tcnp->cn_thread);
96531268Sphk		}
96631268Sphk		xp->i_flag |= IN_CHANGE;
96731268Sphk		vput(tvp);
96831268Sphk		xp = NULL;
96931268Sphk	}
97031268Sphk
97131268Sphk	/*
97231268Sphk	 * 3) Unlink the source.
97331268Sphk	 */
97431268Sphk	fcnp->cn_flags &= ~MODMASK;
97531268Sphk	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
97631268Sphk	VREF(fdvp);
97731268Sphk	error = relookup(fdvp, &fvp, fcnp);
97831268Sphk	if (error == 0)
97931268Sphk		vrele(fdvp);
98031268Sphk	if (fvp != NULL) {
98131268Sphk		xp = VTOI(fvp);
98231268Sphk		dp = VTOI(fdvp);
98331268Sphk	} else {
98431268Sphk		/*
98531268Sphk		 * From name has disappeared.
98631268Sphk		 */
98731268Sphk		if (doingdirectory)
98896749Siedowse			panic("ext2_rename: lost dir entry");
98931268Sphk		vrele(ap->a_fvp);
99031268Sphk		return (0);
99131268Sphk	}
99231268Sphk	/*
99331268Sphk	 * Ensure that the directory entry still exists and has not
99431268Sphk	 * changed while the new name has been entered. If the source is
99531268Sphk	 * a file then the entry may have been unlinked or renamed. In
99631268Sphk	 * either case there is no further work to be done. If the source
99731268Sphk	 * is a directory then it cannot have been rmdir'ed; its link
99831268Sphk	 * count of three would cause a rmdir to fail with ENOTEMPTY.
99931268Sphk	 * The IN_RENAME flag ensures that it cannot be moved by another
100031268Sphk	 * rename.
100131268Sphk	 */
100231268Sphk	if (xp != ip) {
100331268Sphk		if (doingdirectory)
100496749Siedowse			panic("ext2_rename: lost dir entry");
100531268Sphk	} else {
100631268Sphk		/*
100731268Sphk		 * If the source is a directory with a
100831268Sphk		 * new parent, the link count of the old
100931268Sphk		 * parent directory must be decremented
101031268Sphk		 * and ".." set to point to the new parent.
101131268Sphk		 */
101231268Sphk		if (doingdirectory && newparent) {
101331268Sphk			dp->i_nlink--;
101431268Sphk			dp->i_flag |= IN_CHANGE;
101531268Sphk			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
1016228583Spfg				sizeof(struct dirtemplate), (off_t)0,
1017101744Srwatson				UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1018194296Skib				tcnp->cn_cred, NOCRED, NULL, NULL);
101931268Sphk			if (error == 0) {
102057710Sbde				/* Like ufs little-endian: */
102157710Sbde				namlen = dirbuf.dotdot_type;
102231268Sphk				if (namlen != 2 ||
102331268Sphk				    dirbuf.dotdot_name[0] != '.' ||
102431268Sphk				    dirbuf.dotdot_name[1] != '.') {
102596749Siedowse					ext2_dirbad(xp, (doff_t)12,
102631268Sphk					    "rename: mangled dir");
102731268Sphk				} else {
102831268Sphk					dirbuf.dotdot_ino = newparent;
102931268Sphk					(void) vn_rdwr(UIO_WRITE, fvp,
103031268Sphk					    (caddr_t)&dirbuf,
1031228583Spfg					    sizeof(struct dirtemplate),
103231268Sphk					    (off_t)0, UIO_SYSSPACE,
1033101744Srwatson					    IO_NODELOCKED | IO_SYNC |
1034101744Srwatson					    IO_NOMACCHECK, tcnp->cn_cred,
1035194296Skib					    NOCRED, NULL, NULL);
103631268Sphk					cache_purge(fdvp);
103731268Sphk				}
103831268Sphk			}
103931268Sphk		}
104031268Sphk		error = ext2_dirremove(fdvp, fcnp);
104131268Sphk		if (!error) {
104231268Sphk			xp->i_nlink--;
104331268Sphk			xp->i_flag |= IN_CHANGE;
104431268Sphk		}
104531268Sphk		xp->i_flag &= ~IN_RENAME;
104631268Sphk	}
104731268Sphk	if (dp)
104831268Sphk		vput(fdvp);
104931268Sphk	if (xp)
105031268Sphk		vput(fvp);
105131268Sphk	vrele(ap->a_fvp);
105231268Sphk	return (error);
105331268Sphk
105431268Sphkbad:
105531268Sphk	if (xp)
105631268Sphk		vput(ITOV(xp));
105731268Sphk	vput(ITOV(dp));
105831268Sphkout:
105931268Sphk	if (doingdirectory)
106031268Sphk		ip->i_flag &= ~IN_RENAME;
1061175202Sattilio	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
106231268Sphk		ip->i_nlink--;
106331268Sphk		ip->i_flag |= IN_CHANGE;
106431268Sphk		ip->i_flag &= ~IN_RENAME;
106531268Sphk		vput(fvp);
106631268Sphk	} else
106731268Sphk		vrele(fvp);
106831268Sphk	return (error);
106931268Sphk}
107031268Sphk
107131268Sphk/*
107231398Sbde * Mkdir system call
107331398Sbde */
107431268Sphkstatic int
1075246634Spfgext2_mkdir(struct vop_mkdir_args *ap)
107631268Sphk{
107796752Siedowse	struct vnode *dvp = ap->a_dvp;
107896752Siedowse	struct vattr *vap = ap->a_vap;
107996752Siedowse	struct componentname *cnp = ap->a_cnp;
108096752Siedowse	struct inode *ip, *dp;
108131398Sbde	struct vnode *tvp;
108231398Sbde	struct dirtemplate dirtemplate, *dtp;
108331398Sbde	int error, dmode;
108431268Sphk
1085251658Spfg#ifdef INVARIANTS
108631268Sphk	if ((cnp->cn_flags & HASBUF) == 0)
108796749Siedowse		panic("ext2_mkdir: no name");
108831268Sphk#endif
108931398Sbde	dp = VTOI(dvp);
1090246347Spfg	if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) {
109131398Sbde		error = EMLINK;
109231398Sbde		goto out;
109331268Sphk	}
109431398Sbde	dmode = vap->va_mode & 0777;
109531398Sbde	dmode |= IFDIR;
109631398Sbde	/*
109731398Sbde	 * Must simulate part of ext2_makeinode here to acquire the inode,
109831398Sbde	 * but not have it entered in the parent directory. The entry is
109931398Sbde	 * made later after writing "." and ".." entries.
110031398Sbde	 */
110196749Siedowse	error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp);
110231398Sbde	if (error)
110331398Sbde		goto out;
110431398Sbde	ip = VTOI(tvp);
110531398Sbde	ip->i_gid = dp->i_gid;
110631398Sbde#ifdef SUIDDIR
110731398Sbde	{
110831398Sbde		/*
110931398Sbde		 * if we are hacking owners here, (only do this where told to)
111031398Sbde		 * and we are not giving it TOO root, (would subvert quotas)
111131398Sbde		 * then go ahead and give it to the other user.
111231398Sbde		 * The new directory also inherits the SUID bit.
111333064Seivind		 * If user's UID and dir UID are the same,
111431398Sbde		 * 'give it away' so that the SUID is still forced on.
111531398Sbde		 */
111631398Sbde		if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
111731398Sbde		   (dp->i_mode & ISUID) && dp->i_uid) {
111831398Sbde			dmode |= ISUID;
111931398Sbde			ip->i_uid = dp->i_uid;
112031398Sbde		} else {
112131398Sbde			ip->i_uid = cnp->cn_cred->cr_uid;
112231398Sbde		}
112331268Sphk	}
112431398Sbde#else
112531398Sbde	ip->i_uid = cnp->cn_cred->cr_uid;
112631398Sbde#endif
112731398Sbde	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
112831398Sbde	ip->i_mode = dmode;
112931398Sbde	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
113031398Sbde	ip->i_nlink = 2;
113131398Sbde	if (cnp->cn_flags & ISWHITEOUT)
113231398Sbde		ip->i_flags |= UF_OPAQUE;
113396749Siedowse	error = ext2_update(tvp, 1);
113431398Sbde
113531398Sbde	/*
113631398Sbde	 * Bump link count in parent directory
113731398Sbde	 * to reflect work done below.  Should
113831398Sbde	 * be done before reference is created
113931398Sbde	 * so reparation is possible if we crash.
114031398Sbde	 */
114131398Sbde	dp->i_nlink++;
114231398Sbde	dp->i_flag |= IN_CHANGE;
1143221166Sjhb	error = ext2_update(dvp, !DOINGASYNC(dvp));
114431398Sbde	if (error)
114531398Sbde		goto bad;
114631398Sbde
114731398Sbde	/* Initialize directory with "." and ".." from static template. */
1148193377Sstas	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
1149202283Slulf	    EXT2F_INCOMPAT_FTYPE))
115057710Sbde		dtp = &mastertemplate;
115157710Sbde	else
115257710Sbde		dtp = &omastertemplate;
115331398Sbde	dirtemplate = *dtp;
115431398Sbde	dirtemplate.dot_ino = ip->i_number;
115531398Sbde	dirtemplate.dotdot_ino = dp->i_number;
115631398Sbde	/* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE
115731398Sbde	 * so let's just redefine it - for this function only
115831398Sbde	 */
115931398Sbde#undef  DIRBLKSIZ
1160202283Slulf#define DIRBLKSIZ  VTOI(dvp)->i_e2fs->e2fs_bsize
116131398Sbde	dirtemplate.dotdot_reclen = DIRBLKSIZ - 12;
116231398Sbde	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
1163228583Spfg	    sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE,
1164101941Srwatson	    IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED,
1165194296Skib	    NULL, NULL);
116631398Sbde	if (error) {
116731398Sbde		dp->i_nlink--;
116831398Sbde		dp->i_flag |= IN_CHANGE;
116931398Sbde		goto bad;
117031268Sphk	}
117196749Siedowse	if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
117296749Siedowse		/* XXX should grow with balloc() */
117396749Siedowse		panic("ext2_mkdir: blksize");
117431398Sbde	else {
117531398Sbde		ip->i_size = DIRBLKSIZ;
117631398Sbde		ip->i_flag |= IN_CHANGE;
117731398Sbde	}
117831268Sphk
117935256Sdes	/* Directory set up, now install its entry in the parent directory. */
118031398Sbde	error = ext2_direnter(ip, dvp, cnp);
118131268Sphk	if (error) {
118231398Sbde		dp->i_nlink--;
118331398Sbde		dp->i_flag |= IN_CHANGE;
118431398Sbde	}
118531398Sbdebad:
118631398Sbde	/*
118731398Sbde	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
118831398Sbde	 * for us because we set the link count to 0.
118931398Sbde	 */
119031398Sbde	if (error) {
119131398Sbde		ip->i_nlink = 0;
119231268Sphk		ip->i_flag |= IN_CHANGE;
119331398Sbde		vput(tvp);
119431398Sbde	} else
119531398Sbde		*ap->a_vpp = tvp;
119631398Sbdeout:
119731268Sphk	return (error);
119831398Sbde#undef  DIRBLKSIZ
119931398Sbde#define DIRBLKSIZ  DEV_BSIZE
120031268Sphk}
120131268Sphk
120231398Sbde/*
120331398Sbde * Rmdir system call.
120431398Sbde */
120531268Sphkstatic int
1206246634Spfgext2_rmdir(struct vop_rmdir_args *ap)
120731268Sphk{
120831268Sphk	struct vnode *vp = ap->a_vp;
120931268Sphk	struct vnode *dvp = ap->a_dvp;
121031398Sbde	struct componentname *cnp = ap->a_cnp;
121131398Sbde	struct inode *ip, *dp;
121231268Sphk	int error;
121331268Sphk
121431268Sphk	ip = VTOI(vp);
121531398Sbde	dp = VTOI(dvp);
121631398Sbde
121731398Sbde	/*
121831398Sbde	 * Verify the directory is empty (and valid).
121931398Sbde	 * (Rmdir ".." won't be valid since
122031398Sbde	 *  ".." will contain a reference to
122131398Sbde	 *  the current directory and thus be
122231398Sbde	 *  non-empty.)
122331398Sbde	 */
122431398Sbde	error = 0;
122531398Sbde	if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) {
122631398Sbde		error = ENOTEMPTY;
122731398Sbde		goto out;
122831398Sbde	}
122931398Sbde	if ((dp->i_flags & APPEND)
123031398Sbde	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
123131268Sphk		error = EPERM;
123231268Sphk		goto out;
123331268Sphk	}
123431398Sbde	/*
123531398Sbde	 * Delete reference to directory before purging
123631398Sbde	 * inode.  If we crash in between, the directory
123731398Sbde	 * will be reattached to lost+found,
123831398Sbde	 */
123931398Sbde	error = ext2_dirremove(dvp, cnp);
124031398Sbde	if (error)
124131398Sbde		goto out;
124231398Sbde	dp->i_nlink--;
124331398Sbde	dp->i_flag |= IN_CHANGE;
124431398Sbde	cache_purge(dvp);
1245175294Sattilio	VOP_UNLOCK(dvp, 0);
124631398Sbde	/*
124731398Sbde	 * Truncate inode.  The only stuff left
124831398Sbde	 * in the directory is "." and "..".  The
124931398Sbde	 * "." reference is inconsequential since
125031398Sbde	 * we're quashing it.  The ".." reference
125131398Sbde	 * has already been adjusted above.  We've
125231398Sbde	 * removed the "." reference and the reference
125331398Sbde	 * in the parent directory, but there may be
125431398Sbde	 * other hard links so decrement by 2 and
125531398Sbde	 * worry about them later.
125631398Sbde	 */
125731398Sbde	ip->i_nlink -= 2;
1258175294Sattilio	error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
1259175294Sattilio	    cnp->cn_thread);
126031398Sbde	cache_purge(ITOV(ip));
1261235508Spfg	if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1262235508Spfg		VOP_UNLOCK(vp, 0);
1263235508Spfg		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1264235508Spfg		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1265235508Spfg	}
126631268Sphkout:
126731268Sphk	return (error);
126831268Sphk}
126931268Sphk
127031268Sphk/*
127131398Sbde * symlink -- make a symbolic link
127212115Sdyson */
127312911Sphkstatic int
1274246634Spfgext2_symlink(struct vop_symlink_args *ap)
127512115Sdyson{
127696752Siedowse	struct vnode *vp, **vpp = ap->a_vpp;
127796752Siedowse	struct inode *ip;
127831398Sbde	int len, error;
127931398Sbde
128031398Sbde	error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
128131398Sbde	    vpp, ap->a_cnp);
128231398Sbde	if (error)
128331398Sbde		return (error);
128431398Sbde	vp = *vpp;
128531398Sbde	len = strlen(ap->a_target);
128631398Sbde	if (len < vp->v_mount->mnt_maxsymlinklen) {
128731398Sbde		ip = VTOI(vp);
128831398Sbde		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
128931398Sbde		ip->i_size = len;
129031398Sbde		ip->i_flag |= IN_CHANGE | IN_UPDATE;
129131398Sbde	} else
129231398Sbde		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1293101744Srwatson		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1294194296Skib		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
129553131Seivind	if (error)
129653131Seivind		vput(vp);
129731398Sbde	return (error);
129831398Sbde}
129931398Sbde
130031398Sbde/*
130196749Siedowse * Return target name of a symbolic link
130296749Siedowse */
130396749Siedowsestatic int
1304246634Spfgext2_readlink(struct vop_readlink_args *ap)
130596749Siedowse{
130696749Siedowse	struct vnode *vp = ap->a_vp;
130796749Siedowse	struct inode *ip = VTOI(vp);
130896749Siedowse	int isize;
130996749Siedowse
131096749Siedowse	isize = ip->i_size;
131196749Siedowse	if (isize < vp->v_mount->mnt_maxsymlinklen) {
131296749Siedowse		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
131396749Siedowse		return (0);
131496749Siedowse	}
131596749Siedowse	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
131696749Siedowse}
131796749Siedowse
131896749Siedowse/*
131996749Siedowse * Calculate the logical to physical mapping if not done already,
132096749Siedowse * then call the device strategy routine.
132196749Siedowse *
132296749Siedowse * In order to be able to swap to a file, the ext2_bmaparray() operation may not
132396749Siedowse * deadlock on memory.  See ext2_bmap() for details.
132496749Siedowse */
1325105223Sphkstatic int
1326246634Spfgext2_strategy(struct vop_strategy_args *ap)
132796749Siedowse{
132896749Siedowse	struct buf *bp = ap->a_bp;
132996749Siedowse	struct vnode *vp = ap->a_vp;
133096749Siedowse	struct inode *ip;
1331137039Sphk	struct bufobj *bo;
1332254283Spfg	daddr_t blkno;
133396749Siedowse	int error;
133496749Siedowse
133596749Siedowse	ip = VTOI(vp);
133696749Siedowse	if (vp->v_type == VBLK || vp->v_type == VCHR)
133796749Siedowse		panic("ext2_strategy: spec");
133896749Siedowse	if (bp->b_blkno == bp->b_lblkno) {
133996749Siedowse		error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL);
134096749Siedowse		bp->b_blkno = blkno;
134196749Siedowse		if (error) {
134296749Siedowse			bp->b_error = error;
134396749Siedowse			bp->b_ioflags |= BIO_ERROR;
134496749Siedowse			bufdone(bp);
1345186194Strasz			return (0);
134696749Siedowse		}
134796749Siedowse		if ((long)bp->b_blkno == -1)
134896749Siedowse			vfs_bio_clrbuf(bp);
134996749Siedowse	}
135096749Siedowse	if ((long)bp->b_blkno == -1) {
135196749Siedowse		bufdone(bp);
135296749Siedowse		return (0);
135396749Siedowse	}
1354121205Sphk	bp->b_iooffset = dbtob(bp->b_blkno);
1355137039Sphk	bo = VFSTOEXT2(vp->v_mount)->um_bo;
1356140051Sphk	BO_STRATEGY(bo, bp);
135796749Siedowse	return (0);
135896749Siedowse}
135996749Siedowse
136096749Siedowse/*
136196749Siedowse * Print out the contents of an inode.
136296749Siedowse */
1363105223Sphkstatic int
1364246634Spfgext2_print(struct vop_print_args *ap)
136596749Siedowse{
136696749Siedowse	struct vnode *vp = ap->a_vp;
136796749Siedowse	struct inode *ip = VTOI(vp);
136896749Siedowse
1369143677Sphk	vn_printf(ip->i_devvp, "\tino %lu", (u_long)ip->i_number);
137096749Siedowse	if (vp->v_type == VFIFO)
137196749Siedowse		fifo_printinfo(vp);
137296749Siedowse	printf("\n");
137396749Siedowse	return (0);
137496749Siedowse}
137596749Siedowse
137696749Siedowse/*
137796749Siedowse * Close wrapper for fifos.
137896749Siedowse *
137996749Siedowse * Update the times on the inode then do device close.
138096749Siedowse */
1381105223Sphkstatic int
1382246634Spfgext2fifo_close(struct vop_close_args *ap)
138396749Siedowse{
138496749Siedowse	struct vnode *vp = ap->a_vp;
138596749Siedowse
1386103938Sjeff	VI_LOCK(vp);
138796749Siedowse	if (vp->v_usecount > 1)
1388202283Slulf		ext2_itimes_locked(vp);
1389103938Sjeff	VI_UNLOCK(vp);
1390138693Smarcel	return (fifo_specops.vop_close(ap));
139196749Siedowse}
139296749Siedowse
139396749Siedowse/*
139496749Siedowse * Kqfilter wrapper for fifos.
139596749Siedowse *
139696749Siedowse * Fall through to ext2 kqfilter routines if needed
139796749Siedowse */
1398105223Sphkstatic int
1399246634Spfgext2fifo_kqfilter(struct vop_kqfilter_args *ap)
140096749Siedowse{
140196749Siedowse	int error;
140296749Siedowse
1403138693Smarcel	error = fifo_specops.vop_kqfilter(ap);
140496749Siedowse	if (error)
1405184410Skib		error = vfs_kqfilter(ap);
140696749Siedowse	return (error);
140796749Siedowse}
140896749Siedowse
140996749Siedowse/*
141096749Siedowse * Return POSIX pathconf information applicable to ext2 filesystems.
141196749Siedowse */
1412105223Sphkstatic int
1413246634Spfgext2_pathconf(struct vop_pathconf_args *ap)
141496749Siedowse{
1415253173Spfg	int error = 0;
141696749Siedowse
141796749Siedowse	switch (ap->a_name) {
141896749Siedowse	case _PC_LINK_MAX:
1419246347Spfg		*ap->a_retval = EXT2_LINK_MAX;
1420253173Spfg		break;
142196749Siedowse	case _PC_NAME_MAX:
142296749Siedowse		*ap->a_retval = NAME_MAX;
1423253173Spfg		break;
142496749Siedowse	case _PC_PATH_MAX:
142596749Siedowse		*ap->a_retval = PATH_MAX;
1426253173Spfg		break;
142796749Siedowse	case _PC_PIPE_BUF:
142896749Siedowse		*ap->a_retval = PIPE_BUF;
1429253173Spfg		break;
143096749Siedowse	case _PC_CHOWN_RESTRICTED:
143196749Siedowse		*ap->a_retval = 1;
1432253173Spfg		break;
143396749Siedowse	case _PC_NO_TRUNC:
143496749Siedowse		*ap->a_retval = 1;
1435253173Spfg		break;
1436252956Spfg	case _PC_MIN_HOLE_SIZE:
1437252956Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1438253173Spfg		break;
1439253173Spfg	case _PC_ASYNC_IO:
1440253173Spfg		/* _PC_ASYNC_IO should have been handled by upper layers. */
1441253173Spfg		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
1442253173Spfg		error = EINVAL;
1443253173Spfg		break;
1444253173Spfg	case _PC_PRIO_IO:
1445253173Spfg		*ap->a_retval = 0;
1446253173Spfg		break;
1447253173Spfg	case _PC_SYNC_IO:
1448253173Spfg		*ap->a_retval = 0;
1449253173Spfg		break;
1450253173Spfg	case _PC_ALLOC_SIZE_MIN:
1451253173Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
1452253173Spfg		break;
1453253173Spfg	case _PC_FILESIZEBITS:
1454253173Spfg		*ap->a_retval = 64;
1455253173Spfg		break;
1456253173Spfg	case _PC_REC_INCR_XFER_SIZE:
1457253173Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1458253173Spfg		break;
1459253173Spfg	case _PC_REC_MAX_XFER_SIZE:
1460253173Spfg		*ap->a_retval = -1; /* means ``unlimited'' */
1461253173Spfg		break;
1462253173Spfg	case _PC_REC_MIN_XFER_SIZE:
1463253173Spfg		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1464253173Spfg		break;
1465253173Spfg	case _PC_REC_XFER_ALIGN:
1466253173Spfg		*ap->a_retval = PAGE_SIZE;
1467253173Spfg		break;
1468253173Spfg	case _PC_SYMLINK_MAX:
1469253173Spfg		*ap->a_retval = MAXPATHLEN;
1470253173Spfg		break;
1471253173Spfg
147296749Siedowse	default:
1473253173Spfg		error = EINVAL;
1474253173Spfg		break;
147596749Siedowse	}
1476253173Spfg	return (error);
147796749Siedowse}
147896749Siedowse
147996749Siedowse/*
1480166774Spjd * Vnode pointer to File handle
1481166774Spjd */
1482166774Spjd/* ARGSUSED */
1483166774Spjdstatic int
1484246634Spfgext2_vptofh(struct vop_vptofh_args *ap)
1485166774Spjd{
1486166774Spjd	struct inode *ip;
1487166774Spjd	struct ufid *ufhp;
1488166774Spjd
1489166774Spjd	ip = VTOI(ap->a_vp);
1490166774Spjd	ufhp = (struct ufid *)ap->a_fhp;
1491166774Spjd	ufhp->ufid_len = sizeof(struct ufid);
1492166774Spjd	ufhp->ufid_ino = ip->i_number;
1493166774Spjd	ufhp->ufid_gen = ip->i_gen;
1494166774Spjd	return (0);
1495166774Spjd}
1496166774Spjd
1497166774Spjd/*
149896749Siedowse * Initialize the vnode associated with a new inode, handle aliased
149996749Siedowse * vnodes.
150096749Siedowse */
150196749Siedowseint
1502246634Spfgext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp)
150396749Siedowse{
150496749Siedowse	struct inode *ip;
150596749Siedowse	struct vnode *vp;
150696749Siedowse
150796749Siedowse	vp = *vpp;
150896749Siedowse	ip = VTOI(vp);
1509135864Sphk	vp->v_type = IFTOVT(ip->i_mode);
1510135864Sphk	if (vp->v_type == VFIFO)
151196749Siedowse		vp->v_op = fifoops;
1512137039Sphk
1513221128Sjhb	if (ip->i_number == EXT2_ROOTINO)
1514101308Sjeff		vp->v_vflag |= VV_ROOT;
1515134899Sphk	ip->i_modrev = init_va_filerev();
151696749Siedowse	*vpp = vp;
151796749Siedowse	return (0);
151896749Siedowse}
151996749Siedowse
152096749Siedowse/*
152131398Sbde * Allocate a new inode.
152231398Sbde */
152331398Sbdestatic int
1524246634Spfgext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
1525246634Spfg    struct componentname *cnp)
152631398Sbde{
152796752Siedowse	struct inode *ip, *pdir;
152831398Sbde	struct vnode *tvp;
152931398Sbde	int error;
153012115Sdyson
153131398Sbde	pdir = VTOI(dvp);
1532251658Spfg#ifdef INVARIANTS
153331398Sbde	if ((cnp->cn_flags & HASBUF) == 0)
153431398Sbde		panic("ext2_makeinode: no name");
153512115Sdyson#endif
153631398Sbde	*vpp = NULL;
153731398Sbde	if ((mode & IFMT) == 0)
153831398Sbde		mode |= IFREG;
153912115Sdyson
154096749Siedowse	error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp);
154131398Sbde	if (error) {
154231398Sbde		return (error);
154331398Sbde	}
154431398Sbde	ip = VTOI(tvp);
154531398Sbde	ip->i_gid = pdir->i_gid;
154631398Sbde#ifdef SUIDDIR
154731398Sbde	{
154812115Sdyson		/*
154931398Sbde		 * if we are
155031398Sbde		 * not the owner of the directory,
155131398Sbde		 * and we are hacking owners here, (only do this where told to)
155231398Sbde		 * and we are not giving it TOO root, (would subvert quotas)
155331398Sbde		 * then go ahead and give it to the other user.
155431398Sbde		 * Note that this drops off the execute bits for security.
155512115Sdyson		 */
155631398Sbde		if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
155731398Sbde		     (pdir->i_mode & ISUID) &&
155831398Sbde		     (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
155931398Sbde			ip->i_uid = pdir->i_uid;
156031398Sbde			mode &= ~07111;
156131398Sbde		} else {
156231398Sbde			ip->i_uid = cnp->cn_cred->cr_uid;
156331398Sbde		}
156412115Sdyson	}
156512115Sdyson#else
156631398Sbde	ip->i_uid = cnp->cn_cred->cr_uid;
156712115Sdyson#endif
156831398Sbde	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
156931398Sbde	ip->i_mode = mode;
157031398Sbde	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
157131398Sbde	ip->i_nlink = 1;
1572164033Srwatson	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) {
1573170587Srwatson		if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0))
1574164033Srwatson			ip->i_mode &= ~ISGID;
1575164033Srwatson	}
157631398Sbde
157731398Sbde	if (cnp->cn_flags & ISWHITEOUT)
157831398Sbde		ip->i_flags |= UF_OPAQUE;
157931398Sbde
158031398Sbde	/*
158131398Sbde	 * Make sure inode goes to disk before directory entry.
158231398Sbde	 */
1583221166Sjhb	error = ext2_update(tvp, !DOINGASYNC(tvp));
158431398Sbde	if (error)
158531398Sbde		goto bad;
158631398Sbde	error = ext2_direnter(ip, dvp, cnp);
158731398Sbde	if (error)
158831398Sbde		goto bad;
158931398Sbde
159031398Sbde	*vpp = tvp;
159131398Sbde	return (0);
159231398Sbde
159331398Sbdebad:
159431398Sbde	/*
159531398Sbde	 * Write error occurred trying to update the inode
159631398Sbde	 * or the directory so must deallocate the inode.
159731398Sbde	 */
159831398Sbde	ip->i_nlink = 0;
159931398Sbde	ip->i_flag |= IN_CHANGE;
160031398Sbde	vput(tvp);
160131398Sbde	return (error);
160212115Sdyson}
1603228507Spfg
1604228507Spfg/*
1605228507Spfg * Vnode op for reading.
1606228507Spfg */
1607228507Spfgstatic int
1608246634Spfgext2_read(struct vop_read_args *ap)
1609228507Spfg{
1610228507Spfg	struct vnode *vp;
1611228507Spfg	struct inode *ip;
1612254260Spfg	int error;
1613254260Spfg
1614254260Spfg	vp = ap->a_vp;
1615254260Spfg	ip = VTOI(vp);
1616254260Spfg
1617254260Spfg	/*EXT4_EXT_LOCK(ip);*/
1618254260Spfg	if (ip->i_flags & EXT4_EXTENTS)
1619254260Spfg		error = ext4_ext_read(ap);
1620254260Spfg	else
1621254260Spfg		error = ext2_ind_read(ap);
1622254260Spfg	/*EXT4_EXT_UNLOCK(ip);*/
1623254260Spfg	return (error);
1624254260Spfg}
1625254260Spfg
1626254260Spfg
1627254260Spfg/*
1628254260Spfg * Vnode op for reading.
1629254260Spfg */
1630254260Spfgstatic int
1631254260Spfgext2_ind_read(struct vop_read_args *ap)
1632254260Spfg{
1633254260Spfg	struct vnode *vp;
1634254260Spfg	struct inode *ip;
1635228507Spfg	struct uio *uio;
1636228507Spfg	struct m_ext2fs *fs;
1637228507Spfg	struct buf *bp;
1638228507Spfg	daddr_t lbn, nextlbn;
1639228507Spfg	off_t bytesinfile;
1640228507Spfg	long size, xfersize, blkoffset;
1641228507Spfg	int error, orig_resid, seqcount;
1642228507Spfg	int ioflag;
1643228507Spfg
1644228507Spfg	vp = ap->a_vp;
1645228507Spfg	uio = ap->a_uio;
1646228507Spfg	ioflag = ap->a_ioflag;
1647228507Spfg
1648228507Spfg	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
1649228507Spfg	ip = VTOI(vp);
1650228507Spfg
1651228507Spfg#ifdef INVARIANTS
1652228507Spfg	if (uio->uio_rw != UIO_READ)
1653228507Spfg		panic("%s: mode", "ext2_read");
1654228507Spfg
1655228507Spfg	if (vp->v_type == VLNK) {
1656228507Spfg		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
1657228507Spfg			panic("%s: short symlink", "ext2_read");
1658228507Spfg	} else if (vp->v_type != VREG && vp->v_type != VDIR)
1659228507Spfg		panic("%s: type %d", "ext2_read", vp->v_type);
1660228507Spfg#endif
1661228507Spfg	orig_resid = uio->uio_resid;
1662253098Spfg	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
1663228507Spfg	if (orig_resid == 0)
1664228507Spfg		return (0);
1665253098Spfg	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
1666228507Spfg	fs = ip->i_e2fs;
1667228507Spfg	if (uio->uio_offset < ip->i_size &&
1668228507Spfg	    uio->uio_offset >= fs->e2fs_maxfilesize)
1669228507Spfg	    	return (EOVERFLOW);
1670228507Spfg
1671228507Spfg	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1672228507Spfg		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
1673228507Spfg			break;
1674228507Spfg		lbn = lblkno(fs, uio->uio_offset);
1675228507Spfg		nextlbn = lbn + 1;
1676228507Spfg		size = blksize(fs, ip, lbn);
1677228507Spfg		blkoffset = blkoff(fs, uio->uio_offset);
1678228507Spfg
1679228507Spfg		xfersize = fs->e2fs_fsize - blkoffset;
1680228507Spfg		if (uio->uio_resid < xfersize)
1681228507Spfg			xfersize = uio->uio_resid;
1682228507Spfg		if (bytesinfile < xfersize)
1683228507Spfg			xfersize = bytesinfile;
1684228507Spfg
1685228507Spfg		if (lblktosize(fs, nextlbn) >= ip->i_size)
1686228507Spfg			error = bread(vp, lbn, size, NOCRED, &bp);
1687248282Skib		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
1688228507Spfg			error = cluster_read(vp, ip->i_size, lbn, size,
1689248282Skib			    NOCRED, blkoffset + uio->uio_resid, seqcount,
1690248282Skib			    0, &bp);
1691248282Skib		} else if (seqcount > 1) {
1692228507Spfg			int nextsize = blksize(fs, ip, nextlbn);
1693228507Spfg			error = breadn(vp, lbn,
1694228507Spfg			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1695228507Spfg		} else
1696228507Spfg			error = bread(vp, lbn, size, NOCRED, &bp);
1697228507Spfg		if (error) {
1698228507Spfg			brelse(bp);
1699228507Spfg			bp = NULL;
1700228507Spfg			break;
1701228507Spfg		}
1702228507Spfg
1703228507Spfg		/*
1704228507Spfg		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
1705228507Spfg		 * will cause us to attempt to release the buffer later on
1706228507Spfg		 * and will cause the buffer cache to attempt to free the
1707228507Spfg		 * underlying pages.
1708228507Spfg		 */
1709228507Spfg		if (ioflag & IO_DIRECT)
1710228507Spfg			bp->b_flags |= B_DIRECT;
1711228507Spfg
1712228507Spfg		/*
1713228507Spfg		 * We should only get non-zero b_resid when an I/O error
1714228507Spfg		 * has occurred, which should cause us to break above.
1715228507Spfg		 * However, if the short read did not cause an error,
1716228507Spfg		 * then we want to ensure that we do not uiomove bad
1717228507Spfg		 * or uninitialized data.
1718228507Spfg		 */
1719228507Spfg		size -= bp->b_resid;
1720228507Spfg		if (size < xfersize) {
1721228507Spfg			if (size == 0)
1722228507Spfg				break;
1723228507Spfg			xfersize = size;
1724228507Spfg		}
1725228507Spfg		error = uiomove((char *)bp->b_data + blkoffset,
1726228507Spfg  			(int)xfersize, uio);
1727228507Spfg		if (error)
1728228507Spfg			break;
1729228507Spfg
1730228507Spfg		if (ioflag & (IO_VMIO|IO_DIRECT)) {
1731228507Spfg			/*
1732228507Spfg			 * If it's VMIO or direct I/O, then we don't
1733228507Spfg			 * need the buf, mark it available for
1734228507Spfg			 * freeing. If it's non-direct VMIO, the VM has
1735228507Spfg			 * the data.
1736228507Spfg			 */
1737228507Spfg			bp->b_flags |= B_RELBUF;
1738228507Spfg			brelse(bp);
1739228507Spfg		} else {
1740228507Spfg			/*
1741228507Spfg			 * Otherwise let whoever
1742228507Spfg			 * made the request take care of
1743228507Spfg			 * freeing it. We just queue
1744228507Spfg			 * it onto another list.
1745228507Spfg			 */
1746228507Spfg			bqrelse(bp);
1747228507Spfg		}
1748228507Spfg	}
1749228507Spfg
1750228507Spfg	/*
1751228507Spfg	 * This can only happen in the case of an error
1752228507Spfg	 * because the loop above resets bp to NULL on each iteration
1753228507Spfg	 * and on normal completion has not set a new value into it.
1754228507Spfg	 * so it must have come from a 'break' statement
1755228507Spfg	 */
1756228507Spfg	if (bp != NULL) {
1757228507Spfg		if (ioflag & (IO_VMIO|IO_DIRECT)) {
1758228507Spfg			bp->b_flags |= B_RELBUF;
1759228507Spfg			brelse(bp);
1760228507Spfg		} else {
1761228507Spfg			bqrelse(bp);
1762228507Spfg		}
1763228507Spfg	}
1764228507Spfg
1765228507Spfg	if ((error == 0 || uio->uio_resid != orig_resid) &&
1766228507Spfg	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1767228507Spfg		ip->i_flag |= IN_ACCESS;
1768228507Spfg	return (error);
1769228507Spfg}
1770228507Spfg
1771252956Spfgstatic int
1772252956Spfgext2_ioctl(struct vop_ioctl_args *ap)
1773252956Spfg{
1774252956Spfg
1775252956Spfg	switch (ap->a_command) {
1776252956Spfg	case FIOSEEKDATA:
1777252956Spfg	case FIOSEEKHOLE:
1778252956Spfg		return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
1779252956Spfg		    (off_t *)ap->a_data, ap->a_cred));
1780252956Spfg	default:
1781252956Spfg		return (ENOTTY);
1782252956Spfg	}
1783252956Spfg}
1784252956Spfg
1785228507Spfg/*
1786254260Spfg * this function handles ext4 extents block mapping
1787254260Spfg */
1788254260Spfgstatic int
1789254260Spfgext4_ext_read(struct vop_read_args *ap)
1790254260Spfg{
1791254260Spfg	struct vnode *vp;
1792254260Spfg	struct inode *ip;
1793254260Spfg	struct uio *uio;
1794254260Spfg	struct m_ext2fs *fs;
1795254260Spfg	struct buf *bp;
1796254260Spfg	struct ext4_extent nex, *ep;
1797254260Spfg	struct ext4_extent_path path;
1798254260Spfg	daddr_t lbn, newblk;
1799254260Spfg	off_t bytesinfile;
1800254260Spfg	int cache_type;
1801254260Spfg	ssize_t orig_resid;
1802254260Spfg	int error;
1803254260Spfg	long size, xfersize, blkoffset;
1804254260Spfg
1805254260Spfg	vp = ap->a_vp;
1806254260Spfg	ip = VTOI(vp);
1807254260Spfg	uio = ap->a_uio;
1808254260Spfg	memset(&path, 0, sizeof(path));
1809254260Spfg
1810254260Spfg	orig_resid = uio->uio_resid;
1811254260Spfg	KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__));
1812254260Spfg	if (orig_resid == 0)
1813254260Spfg		return (0);
1814254260Spfg	KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__));
1815254260Spfg	fs = ip->i_e2fs;
1816254260Spfg	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
1817254260Spfg		return (EOVERFLOW);
1818254260Spfg
1819254260Spfg	while (uio->uio_resid > 0) {
1820254260Spfg		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
1821254260Spfg			break;
1822254260Spfg		lbn = lblkno(fs, uio->uio_offset);
1823254260Spfg		size = blksize(fs, ip, lbn);
1824254260Spfg		blkoffset = blkoff(fs, uio->uio_offset);
1825254260Spfg
1826254260Spfg		xfersize = fs->e2fs_fsize - blkoffset;
1827254260Spfg		xfersize = MIN(xfersize, uio->uio_resid);
1828254260Spfg		xfersize = MIN(xfersize, bytesinfile);
1829254260Spfg
1830254260Spfg		/* get block from ext4 extent cache */
1831254260Spfg		cache_type = ext4_ext_in_cache(ip, lbn, &nex);
1832254260Spfg		switch (cache_type) {
1833254260Spfg		case EXT4_EXT_CACHE_NO:
1834254260Spfg			ext4_ext_find_extent(fs, ip, lbn, &path);
1835254260Spfg			ep = path.ep_ext;
1836254260Spfg			if (ep == NULL)
1837254260Spfg				return (EIO);
1838254260Spfg
1839254260Spfg			ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN);
1840254260Spfg
1841254260Spfg			newblk = lbn - ep->e_blk + (ep->e_start_lo |
1842254260Spfg			    (daddr_t)ep->e_start_hi << 32);
1843254260Spfg
1844254260Spfg			if (path.ep_bp != NULL) {
1845254260Spfg				brelse(path.ep_bp);
1846254260Spfg				path.ep_bp = NULL;
1847254260Spfg			}
1848254260Spfg			break;
1849254260Spfg
1850254260Spfg		case EXT4_EXT_CACHE_GAP:
1851254260Spfg			/* block has not been allocated yet */
1852254260Spfg			return (0);
1853254260Spfg
1854254260Spfg		case EXT4_EXT_CACHE_IN:
1855254260Spfg			newblk = lbn - nex.e_blk + (nex.e_start_lo |
1856254260Spfg			    (daddr_t)nex.e_start_hi << 32);
1857254260Spfg			break;
1858254260Spfg
1859254260Spfg		default:
1860254260Spfg			panic("%s: invalid cache type", __func__);
1861254260Spfg		}
1862254260Spfg
1863254260Spfg		error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp);
1864254260Spfg		if (error) {
1865254260Spfg			brelse(bp);
1866254260Spfg			return (error);
1867254260Spfg		}
1868254260Spfg
1869254260Spfg		size -= bp->b_resid;
1870254260Spfg		if (size < xfersize) {
1871254260Spfg			if (size == 0) {
1872254260Spfg				bqrelse(bp);
1873254260Spfg				break;
1874254260Spfg			}
1875254260Spfg			xfersize = size;
1876254260Spfg		}
1877254260Spfg		error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio);
1878254260Spfg		bqrelse(bp);
1879254260Spfg		if (error)
1880254260Spfg			return (error);
1881254260Spfg	}
1882254260Spfg
1883254260Spfg	return (0);
1884254260Spfg}
1885254260Spfg
1886254260Spfg/*
1887228507Spfg * Vnode op for writing.
1888228507Spfg */
1889228507Spfgstatic int
1890246634Spfgext2_write(struct vop_write_args *ap)
1891228507Spfg{
1892228507Spfg	struct vnode *vp;
1893228507Spfg	struct uio *uio;
1894228507Spfg	struct inode *ip;
1895228507Spfg	struct m_ext2fs *fs;
1896228507Spfg	struct buf *bp;
1897228507Spfg	daddr_t lbn;
1898228507Spfg	off_t osize;
1899228507Spfg	int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
1900228507Spfg
1901228507Spfg	ioflag = ap->a_ioflag;
1902228507Spfg	uio = ap->a_uio;
1903228507Spfg	vp = ap->a_vp;
1904228507Spfg
1905228507Spfg	seqcount = ioflag >> IO_SEQSHIFT;
1906228507Spfg	ip = VTOI(vp);
1907228507Spfg
1908228507Spfg#ifdef INVARIANTS
1909228507Spfg	if (uio->uio_rw != UIO_WRITE)
1910228507Spfg		panic("%s: mode", "ext2_write");
1911228507Spfg#endif
1912228507Spfg
1913228507Spfg	switch (vp->v_type) {
1914228507Spfg	case VREG:
1915228507Spfg		if (ioflag & IO_APPEND)
1916228507Spfg			uio->uio_offset = ip->i_size;
1917228507Spfg		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
1918228507Spfg			return (EPERM);
1919228507Spfg		/* FALLTHROUGH */
1920228507Spfg	case VLNK:
1921228507Spfg		break;
1922228507Spfg	case VDIR:
1923228507Spfg		/* XXX differs from ffs -- this is called from ext2_mkdir(). */
1924228507Spfg		if ((ioflag & IO_SYNC) == 0)
1925228507Spfg		panic("ext2_write: nonsync dir write");
1926228507Spfg		break;
1927228507Spfg	default:
1928228507Spfg		panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
1929228507Spfg		    vp->v_type, (intmax_t)uio->uio_offset,
1930228507Spfg		    (intmax_t)uio->uio_resid);
1931228507Spfg	}
1932228507Spfg
1933228507Spfg	KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
1934228507Spfg	KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
1935228507Spfg	fs = ip->i_e2fs;
1936228507Spfg	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
1937228507Spfg		return (EFBIG);
1938228507Spfg	/*
1939228507Spfg	 * Maybe this should be above the vnode op call, but so long as
1940228507Spfg	 * file servers have no limits, I don't think it matters.
1941228507Spfg	 */
1942228507Spfg	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
1943228507Spfg		return (EFBIG);
1944228507Spfg
1945228507Spfg	resid = uio->uio_resid;
1946228507Spfg	osize = ip->i_size;
1947228507Spfg	if (seqcount > BA_SEQMAX)
1948228507Spfg		flags = BA_SEQMAX << BA_SEQSHIFT;
1949228507Spfg	else
1950228507Spfg		flags = seqcount << BA_SEQSHIFT;
1951228507Spfg	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1952228507Spfg		flags |= IO_SYNC;
1953228507Spfg
1954228507Spfg	for (error = 0; uio->uio_resid > 0;) {
1955228507Spfg		lbn = lblkno(fs, uio->uio_offset);
1956228507Spfg		blkoffset = blkoff(fs, uio->uio_offset);
1957228507Spfg		xfersize = fs->e2fs_fsize - blkoffset;
1958228507Spfg		if (uio->uio_resid < xfersize)
1959228507Spfg			xfersize = uio->uio_resid;
1960228507Spfg		if (uio->uio_offset + xfersize > ip->i_size)
1961228507Spfg			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
1962228507Spfg
1963228507Spfg                /*
1964228507Spfg		 * We must perform a read-before-write if the transfer size
1965228507Spfg		 * does not cover the entire buffer.
1966228507Spfg                 */
1967228507Spfg		if (fs->e2fs_bsize > xfersize)
1968228507Spfg			flags |= BA_CLRBUF;
1969228507Spfg		else
1970228507Spfg			flags &= ~BA_CLRBUF;
1971228507Spfg		error = ext2_balloc(ip, lbn, blkoffset + xfersize,
1972228507Spfg		    ap->a_cred, &bp, flags);
1973228507Spfg		if (error != 0)
1974228507Spfg			break;
1975228507Spfg
1976228507Spfg		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
1977228507Spfg			bp->b_flags |= B_NOCACHE;
1978228507Spfg		if (uio->uio_offset + xfersize > ip->i_size)
1979228507Spfg			ip->i_size = uio->uio_offset + xfersize;
1980228507Spfg		size = blksize(fs, ip, lbn) - bp->b_resid;
1981228507Spfg		if (size < xfersize)
1982228507Spfg			xfersize = size;
1983228507Spfg
1984228507Spfg		error =
1985228507Spfg		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1986253050Spfg		/*
1987253050Spfg		 * If the buffer is not already filled and we encounter an
1988253050Spfg		 * error while trying to fill it, we have to clear out any
1989253050Spfg		 * garbage data from the pages instantiated for the buffer.
1990253050Spfg		 * If we do not, a failed uiomove() during a write can leave
1991253050Spfg		 * the prior contents of the pages exposed to a userland mmap.
1992253050Spfg		 *
1993253050Spfg		 * Note that we need only clear buffers with a transfer size
1994253050Spfg		 * equal to the block size because buffers with a shorter
1995253050Spfg		 * transfer size were cleared above by the call to ext2_balloc()
1996253050Spfg		 * with the BA_CLRBUF flag set.
1997253050Spfg		 *
1998253050Spfg		 * If the source region for uiomove identically mmaps the
1999253050Spfg		 * buffer, uiomove() performed the NOP copy, and the buffer
2000253050Spfg		 * content remains valid because the page fault handler
2001253050Spfg		 * validated the pages.
2002253050Spfg		 */
2003253050Spfg		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
2004253050Spfg		    fs->e2fs_bsize == xfersize)
2005253050Spfg			vfs_bio_clrbuf(bp);
2006228507Spfg		if (ioflag & (IO_VMIO|IO_DIRECT)) {
2007228507Spfg			bp->b_flags |= B_RELBUF;
2008228507Spfg		}
2009228507Spfg
2010228507Spfg		/*
2011228507Spfg		 * If IO_SYNC each buffer is written synchronously.  Otherwise
2012228507Spfg		 * if we have a severe page deficiency write the buffer
2013228507Spfg		 * asynchronously.  Otherwise try to cluster, and if that
2014228507Spfg		 * doesn't do it then either do an async write (if O_DIRECT),
2015228507Spfg		 * or a delayed write (if not).
2016228507Spfg		 */
2017228507Spfg		if (ioflag & IO_SYNC) {
2018228507Spfg			(void)bwrite(bp);
2019228507Spfg		} else if (vm_page_count_severe() ||
2020228507Spfg		    buf_dirty_count_severe() ||
2021228507Spfg		    (ioflag & IO_ASYNC)) {
2022228507Spfg			bp->b_flags |= B_CLUSTEROK;
2023228507Spfg			bawrite(bp);
2024228507Spfg		} else if (xfersize + blkoffset == fs->e2fs_fsize) {
2025228507Spfg			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
2026228507Spfg				bp->b_flags |= B_CLUSTEROK;
2027248282Skib				cluster_write(vp, bp, ip->i_size, seqcount, 0);
2028228507Spfg			} else {
2029228507Spfg				bawrite(bp);
2030228507Spfg			}
2031228507Spfg		} else if (ioflag & IO_DIRECT) {
2032228507Spfg			bp->b_flags |= B_CLUSTEROK;
2033228507Spfg			bawrite(bp);
2034228507Spfg		} else {
2035228507Spfg			bp->b_flags |= B_CLUSTEROK;
2036228507Spfg			bdwrite(bp);
2037228507Spfg		}
2038228507Spfg		if (error || xfersize == 0)
2039228507Spfg			break;
2040228507Spfg	}
2041228507Spfg	/*
2042228507Spfg	 * If we successfully wrote any data, and we are not the superuser
2043228507Spfg	 * we clear the setuid and setgid bits as a precaution against
2044228507Spfg	 * tampering.
2045228507Spfg	 */
2046228507Spfg	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
2047228507Spfg	    ap->a_cred) {
2048228507Spfg		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
2049228507Spfg			ip->i_mode &= ~(ISUID | ISGID);
2050228507Spfg	}
2051228507Spfg	if (error) {
2052228507Spfg		if (ioflag & IO_UNIT) {
2053228507Spfg			(void)ext2_truncate(vp, osize,
2054228507Spfg			    ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
2055228507Spfg			uio->uio_offset -= resid - uio->uio_resid;
2056228507Spfg			uio->uio_resid = resid;
2057228507Spfg		}
2058228507Spfg	}
2059228507Spfg	if (uio->uio_resid != resid) {
2060228507Spfg               ip->i_flag |= IN_CHANGE | IN_UPDATE;
2061228507Spfg               if (ioflag & IO_SYNC)
2062228507Spfg                       error = ext2_update(vp, 1);
2063228507Spfg       }
2064228507Spfg	return (error);
2065228507Spfg}
2066