ufs_vnops.c revision 50253
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
39 * $Id: ufs_vnops.c,v 1.120 1999/08/22 00:15:16 jdp Exp $
40 */
41
42#include "opt_quota.h"
43#include "opt_suiddir.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/namei.h>
48#include <sys/kernel.h>
49#include <sys/fcntl.h>
50#include <sys/stat.h>
51#include <sys/buf.h>
52#include <sys/proc.h>
53#include <sys/mount.h>
54#include <sys/unistd.h>
55#include <sys/vnode.h>
56#include <sys/malloc.h>
57#include <sys/dirent.h>
58#include <sys/lockf.h>
59#include <sys/poll.h>
60#include <sys/conf.h>
61
62#include <vm/vm_zone.h>
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65
66#include <miscfs/fifofs/fifo.h>
67
68#include <ufs/ufs/quota.h>
69#include <ufs/ufs/inode.h>
70#include <ufs/ufs/dir.h>
71#include <ufs/ufs/ufsmount.h>
72#include <ufs/ufs/ufs_extern.h>
73
74static int ufs_abortop __P((struct vop_abortop_args *));
75static int ufs_access __P((struct vop_access_args *));
76static int ufs_advlock __P((struct vop_advlock_args *));
77static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
78static int ufs_chown __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
79static int ufs_close __P((struct vop_close_args *));
80static int ufs_create __P((struct vop_create_args *));
81static int ufs_getattr __P((struct vop_getattr_args *));
82static int ufs_link __P((struct vop_link_args *));
83static int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
84static int ufs_missingop __P((struct vop_generic_args *ap));
85static int ufs_mkdir __P((struct vop_mkdir_args *));
86static int ufs_mknod __P((struct vop_mknod_args *));
87static int ufs_mmap __P((struct vop_mmap_args *));
88static int ufs_open __P((struct vop_open_args *));
89static int ufs_pathconf __P((struct vop_pathconf_args *));
90static int ufs_print __P((struct vop_print_args *));
91static int ufs_readdir __P((struct vop_readdir_args *));
92static int ufs_readlink __P((struct vop_readlink_args *));
93static int ufs_remove __P((struct vop_remove_args *));
94static int ufs_rename __P((struct vop_rename_args *));
95static int ufs_rmdir __P((struct vop_rmdir_args *));
96static int ufs_setattr __P((struct vop_setattr_args *));
97static int ufs_strategy __P((struct vop_strategy_args *));
98static int ufs_symlink __P((struct vop_symlink_args *));
99static int ufs_whiteout __P((struct vop_whiteout_args *));
100static int ufsfifo_close __P((struct vop_close_args *));
101static int ufsfifo_read __P((struct vop_read_args *));
102static int ufsfifo_write __P((struct vop_write_args *));
103static int ufsspec_close __P((struct vop_close_args *));
104static int ufsspec_read __P((struct vop_read_args *));
105static int ufsspec_write __P((struct vop_write_args *));
106
107union _qcvt {
108	int64_t qcvt;
109	int32_t val[2];
110};
111#define SETHIGH(q, h) { \
112	union _qcvt tmp; \
113	tmp.qcvt = (q); \
114	tmp.val[_QUAD_HIGHWORD] = (h); \
115	(q) = tmp.qcvt; \
116}
117#define SETLOW(q, l) { \
118	union _qcvt tmp; \
119	tmp.qcvt = (q); \
120	tmp.val[_QUAD_LOWWORD] = (l); \
121	(q) = tmp.qcvt; \
122}
123
124/*
125 * A virgin directory (no blushing please).
126 */
127static struct dirtemplate mastertemplate = {
128	0, 12, DT_DIR, 1, ".",
129	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
130};
131static struct odirtemplate omastertemplate = {
132	0, 12, 1, ".",
133	0, DIRBLKSIZ - 12, 2, ".."
134};
135
136void
137ufs_itimes(vp)
138	struct vnode *vp;
139{
140	struct inode *ip;
141	struct timespec ts;
142
143	ip = VTOI(vp);
144	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
145		return;
146	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
147		vfs_timestamp(&ts);
148		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
149		    !DOINGSOFTDEP(vp))
150			ip->i_flag |= IN_LAZYMOD;
151		else
152			ip->i_flag |= IN_MODIFIED;
153		if (ip->i_flag & IN_ACCESS) {
154			ip->i_atime = ts.tv_sec;
155			ip->i_atimensec = ts.tv_nsec;
156		}
157		if (ip->i_flag & IN_UPDATE) {
158			ip->i_mtime = ts.tv_sec;
159			ip->i_mtimensec = ts.tv_nsec;
160			ip->i_modrev++;
161		}
162		if (ip->i_flag & IN_CHANGE) {
163			ip->i_ctime = ts.tv_sec;
164			ip->i_ctimensec = ts.tv_nsec;
165		}
166	}
167	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
168}
169
170/*
171 * Create a regular file
172 */
173int
174ufs_create(ap)
175	struct vop_create_args /* {
176		struct vnode *a_dvp;
177		struct vnode **a_vpp;
178		struct componentname *a_cnp;
179		struct vattr *a_vap;
180	} */ *ap;
181{
182	int error;
183
184	error =
185	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
186	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
187	if (error)
188		return (error);
189	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
190	return (0);
191}
192
193/*
194 * Mknod vnode call
195 */
196/* ARGSUSED */
197int
198ufs_mknod(ap)
199	struct vop_mknod_args /* {
200		struct vnode *a_dvp;
201		struct vnode **a_vpp;
202		struct componentname *a_cnp;
203		struct vattr *a_vap;
204	} */ *ap;
205{
206	struct vattr *vap = ap->a_vap;
207	struct vnode **vpp = ap->a_vpp;
208	struct inode *ip;
209	int error;
210
211	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
212	    ap->a_dvp, vpp, ap->a_cnp);
213	if (error)
214		return (error);
215	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
216	ip = VTOI(*vpp);
217	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
218	if (vap->va_rdev != VNOVAL) {
219		/*
220		 * Want to be able to use this to make badblock
221		 * inodes, so don't truncate the dev number.
222		 */
223		ip->i_rdev = vap->va_rdev;
224	}
225	/*
226	 * Remove inode so that it will be reloaded by VFS_VGET and
227	 * checked to see if it is an alias of an existing entry in
228	 * the inode cache.
229	 */
230	vput(*vpp);
231	(*vpp)->v_type = VNON;
232	vgone(*vpp);
233	*vpp = 0;
234	return (0);
235}
236
237/*
238 * Open called.
239 *
240 * Nothing to do.
241 */
242/* ARGSUSED */
243int
244ufs_open(ap)
245	struct vop_open_args /* {
246		struct vnode *a_vp;
247		int  a_mode;
248		struct ucred *a_cred;
249		struct proc *a_p;
250	} */ *ap;
251{
252
253	/*
254	 * Files marked append-only must be opened for appending.
255	 */
256	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
257	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
258		return (EPERM);
259	return (0);
260}
261
262/*
263 * Close called.
264 *
265 * Update the times on the inode.
266 */
267/* ARGSUSED */
268int
269ufs_close(ap)
270	struct vop_close_args /* {
271		struct vnode *a_vp;
272		int  a_fflag;
273		struct ucred *a_cred;
274		struct proc *a_p;
275	} */ *ap;
276{
277	register struct vnode *vp = ap->a_vp;
278
279	simple_lock(&vp->v_interlock);
280	if (vp->v_usecount > 1)
281		ufs_itimes(vp);
282	simple_unlock(&vp->v_interlock);
283	return (0);
284}
285
286int
287ufs_access(ap)
288	struct vop_access_args /* {
289		struct vnode *a_vp;
290		int  a_mode;
291		struct ucred *a_cred;
292		struct proc *a_p;
293	} */ *ap;
294{
295	struct vnode *vp = ap->a_vp;
296	struct inode *ip = VTOI(vp);
297	struct ucred *cred = ap->a_cred;
298	mode_t mask, mode = ap->a_mode;
299	register gid_t *gp;
300	int i;
301#ifdef QUOTA
302	int error;
303#endif
304
305	/*
306	 * Disallow write attempts on read-only file systems;
307	 * unless the file is a socket, fifo, or a block or
308	 * character device resident on the file system.
309	 */
310	if (mode & VWRITE) {
311		switch (vp->v_type) {
312		case VDIR:
313		case VLNK:
314		case VREG:
315			if (vp->v_mount->mnt_flag & MNT_RDONLY)
316				return (EROFS);
317#ifdef QUOTA
318			if ((error = getinoquota(ip)) != 0)
319				return (error);
320#endif
321			break;
322		default:
323			break;
324		}
325	}
326
327	/* If immutable bit set, nobody gets to write it. */
328	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
329		return (EPERM);
330
331	/* Otherwise, user id 0 always gets access. */
332	if (cred->cr_uid == 0)
333		return (0);
334
335	mask = 0;
336
337	/* Otherwise, check the owner. */
338	if (cred->cr_uid == ip->i_uid) {
339		if (mode & VEXEC)
340			mask |= S_IXUSR;
341		if (mode & VREAD)
342			mask |= S_IRUSR;
343		if (mode & VWRITE)
344			mask |= S_IWUSR;
345		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
346	}
347
348	/* Otherwise, check the groups. */
349	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
350		if (ip->i_gid == *gp) {
351			if (mode & VEXEC)
352				mask |= S_IXGRP;
353			if (mode & VREAD)
354				mask |= S_IRGRP;
355			if (mode & VWRITE)
356				mask |= S_IWGRP;
357			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
358		}
359
360	/* Otherwise, check everyone else. */
361	if (mode & VEXEC)
362		mask |= S_IXOTH;
363	if (mode & VREAD)
364		mask |= S_IROTH;
365	if (mode & VWRITE)
366		mask |= S_IWOTH;
367	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
368}
369
370/* ARGSUSED */
371int
372ufs_getattr(ap)
373	struct vop_getattr_args /* {
374		struct vnode *a_vp;
375		struct vattr *a_vap;
376		struct ucred *a_cred;
377		struct proc *a_p;
378	} */ *ap;
379{
380	register struct vnode *vp = ap->a_vp;
381	register struct inode *ip = VTOI(vp);
382	register struct vattr *vap = ap->a_vap;
383
384	ufs_itimes(vp);
385	/*
386	 * Copy from inode table
387	 */
388	vap->va_fsid = dev2udev(ip->i_dev);
389	vap->va_fileid = ip->i_number;
390	vap->va_mode = ip->i_mode & ~IFMT;
391	vap->va_nlink = ip->i_effnlink;
392	vap->va_uid = ip->i_uid;
393	vap->va_gid = ip->i_gid;
394	vap->va_rdev = ip->i_rdev;
395	vap->va_size = ip->i_din.di_size;
396	vap->va_atime.tv_sec = ip->i_atime;
397	vap->va_atime.tv_nsec = ip->i_atimensec;
398	vap->va_mtime.tv_sec = ip->i_mtime;
399	vap->va_mtime.tv_nsec = ip->i_mtimensec;
400	vap->va_ctime.tv_sec = ip->i_ctime;
401	vap->va_ctime.tv_nsec = ip->i_ctimensec;
402	vap->va_flags = ip->i_flags;
403	vap->va_gen = ip->i_gen;
404	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
405	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
406	vap->va_type = IFTOVT(ip->i_mode);
407	vap->va_filerev = ip->i_modrev;
408	return (0);
409}
410
411/*
412 * Set attribute vnode op. called from several syscalls
413 */
414int
415ufs_setattr(ap)
416	struct vop_setattr_args /* {
417		struct vnode *a_vp;
418		struct vattr *a_vap;
419		struct ucred *a_cred;
420		struct proc *a_p;
421	} */ *ap;
422{
423	struct vattr *vap = ap->a_vap;
424	struct vnode *vp = ap->a_vp;
425	struct inode *ip = VTOI(vp);
426	struct ucred *cred = ap->a_cred;
427	struct proc *p = ap->a_p;
428	int error;
429
430	/*
431	 * Check for unsettable attributes.
432	 */
433	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
434	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
435	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
436	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
437		return (EINVAL);
438	}
439	if (vap->va_flags != VNOVAL) {
440		if (vp->v_mount->mnt_flag & MNT_RDONLY)
441			return (EROFS);
442		if (cred->cr_uid != ip->i_uid &&
443		    (error = suser_xxx(cred, p, PRISON_ROOT)))
444			return (error);
445		if (cred->cr_uid == 0) {
446			if ((ip->i_flags
447			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
448			    securelevel > 0)
449				return (EPERM);
450			ip->i_flags = vap->va_flags;
451		} else {
452			if (ip->i_flags
453			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
454			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
455				return (EPERM);
456			ip->i_flags &= SF_SETTABLE;
457			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
458		}
459		ip->i_flag |= IN_CHANGE;
460		if (vap->va_flags & (IMMUTABLE | APPEND))
461			return (0);
462	}
463	if (ip->i_flags & (IMMUTABLE | APPEND))
464		return (EPERM);
465	/*
466	 * Go through the fields and update iff not VNOVAL.
467	 */
468	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
469		if (vp->v_mount->mnt_flag & MNT_RDONLY)
470			return (EROFS);
471		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p)) != 0)
472			return (error);
473	}
474	if (vap->va_size != VNOVAL) {
475		/*
476		 * Disallow write attempts on read-only file systems;
477		 * unless the file is a socket, fifo, or a block or
478		 * character device resident on the file system.
479		 */
480		switch (vp->v_type) {
481		case VDIR:
482			return (EISDIR);
483		case VLNK:
484		case VREG:
485			if (vp->v_mount->mnt_flag & MNT_RDONLY)
486				return (EROFS);
487			break;
488		default:
489			break;
490		}
491		if ((error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0)
492			return (error);
493	}
494	ip = VTOI(vp);
495	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
496		if (vp->v_mount->mnt_flag & MNT_RDONLY)
497			return (EROFS);
498		if (cred->cr_uid != ip->i_uid &&
499		    (error = suser_xxx(cred, p, PRISON_ROOT)) &&
500		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
501		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
502			return (error);
503		if (vap->va_atime.tv_sec != VNOVAL)
504			ip->i_flag |= IN_ACCESS;
505		if (vap->va_mtime.tv_sec != VNOVAL)
506			ip->i_flag |= IN_CHANGE | IN_UPDATE;
507		ufs_itimes(vp);
508		if (vap->va_atime.tv_sec != VNOVAL) {
509			ip->i_atime = vap->va_atime.tv_sec;
510			ip->i_atimensec = vap->va_atime.tv_nsec;
511		}
512		if (vap->va_mtime.tv_sec != VNOVAL) {
513			ip->i_mtime = vap->va_mtime.tv_sec;
514			ip->i_mtimensec = vap->va_mtime.tv_nsec;
515		}
516		error = UFS_UPDATE(vp, 0);
517		if (error)
518			return (error);
519	}
520	error = 0;
521	if (vap->va_mode != (mode_t)VNOVAL) {
522		if (vp->v_mount->mnt_flag & MNT_RDONLY)
523			return (EROFS);
524		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
525	}
526	VN_POLLEVENT(vp, POLLATTRIB);
527	return (error);
528}
529
530/*
531 * Change the mode on a file.
532 * Inode must be locked before calling.
533 */
534static int
535ufs_chmod(vp, mode, cred, p)
536	register struct vnode *vp;
537	register int mode;
538	register struct ucred *cred;
539	struct proc *p;
540{
541	register struct inode *ip = VTOI(vp);
542	int error;
543
544	if (cred->cr_uid != ip->i_uid) {
545	    error = suser_xxx(cred, p, PRISON_ROOT);
546	    if (error)
547		return (error);
548	}
549	if (cred->cr_uid) {
550		if (vp->v_type != VDIR && (mode & S_ISTXT))
551			return (EFTYPE);
552		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
553			return (EPERM);
554	}
555	ip->i_mode &= ~ALLPERMS;
556	ip->i_mode |= (mode & ALLPERMS);
557	ip->i_flag |= IN_CHANGE;
558	return (0);
559}
560
561/*
562 * Perform chown operation on inode ip;
563 * inode must be locked prior to call.
564 */
565static int
566ufs_chown(vp, uid, gid, cred, p)
567	register struct vnode *vp;
568	uid_t uid;
569	gid_t gid;
570	struct ucred *cred;
571	struct proc *p;
572{
573	register struct inode *ip = VTOI(vp);
574	uid_t ouid;
575	gid_t ogid;
576	int error = 0;
577#ifdef QUOTA
578	register int i;
579	long change;
580#endif
581
582	if (uid == (uid_t)VNOVAL)
583		uid = ip->i_uid;
584	if (gid == (gid_t)VNOVAL)
585		gid = ip->i_gid;
586	/*
587	 * If we don't own the file, are trying to change the owner
588	 * of the file, or are not a member of the target group,
589	 * the caller must be superuser or the call fails.
590	 */
591	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
592	    (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) &&
593	    (error = suser_xxx(cred, p, PRISON_ROOT)))
594		return (error);
595	ogid = ip->i_gid;
596	ouid = ip->i_uid;
597#ifdef QUOTA
598	if ((error = getinoquota(ip)) != 0)
599		return (error);
600	if (ouid == uid) {
601		dqrele(vp, ip->i_dquot[USRQUOTA]);
602		ip->i_dquot[USRQUOTA] = NODQUOT;
603	}
604	if (ogid == gid) {
605		dqrele(vp, ip->i_dquot[GRPQUOTA]);
606		ip->i_dquot[GRPQUOTA] = NODQUOT;
607	}
608	change = ip->i_blocks;
609	(void) chkdq(ip, -change, cred, CHOWN);
610	(void) chkiq(ip, -1, cred, CHOWN);
611	for (i = 0; i < MAXQUOTAS; i++) {
612		dqrele(vp, ip->i_dquot[i]);
613		ip->i_dquot[i] = NODQUOT;
614	}
615#endif
616	ip->i_gid = gid;
617	ip->i_uid = uid;
618#ifdef QUOTA
619	if ((error = getinoquota(ip)) == 0) {
620		if (ouid == uid) {
621			dqrele(vp, ip->i_dquot[USRQUOTA]);
622			ip->i_dquot[USRQUOTA] = NODQUOT;
623		}
624		if (ogid == gid) {
625			dqrele(vp, ip->i_dquot[GRPQUOTA]);
626			ip->i_dquot[GRPQUOTA] = NODQUOT;
627		}
628		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
629			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
630				goto good;
631			else
632				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
633		}
634		for (i = 0; i < MAXQUOTAS; i++) {
635			dqrele(vp, ip->i_dquot[i]);
636			ip->i_dquot[i] = NODQUOT;
637		}
638	}
639	ip->i_gid = ogid;
640	ip->i_uid = ouid;
641	if (getinoquota(ip) == 0) {
642		if (ouid == uid) {
643			dqrele(vp, ip->i_dquot[USRQUOTA]);
644			ip->i_dquot[USRQUOTA] = NODQUOT;
645		}
646		if (ogid == gid) {
647			dqrele(vp, ip->i_dquot[GRPQUOTA]);
648			ip->i_dquot[GRPQUOTA] = NODQUOT;
649		}
650		(void) chkdq(ip, change, cred, FORCE|CHOWN);
651		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
652		(void) getinoquota(ip);
653	}
654	return (error);
655good:
656	if (getinoquota(ip))
657		panic("ufs_chown: lost quota");
658#endif /* QUOTA */
659	ip->i_flag |= IN_CHANGE;
660	if (cred->cr_uid != 0 && (ouid != uid || ogid != gid))
661		ip->i_mode &= ~(ISUID | ISGID);
662	return (0);
663}
664
665/*
666 * Mmap a file
667 *
668 * NB Currently unsupported.
669 */
670/* ARGSUSED */
671int
672ufs_mmap(ap)
673	struct vop_mmap_args /* {
674		struct vnode *a_vp;
675		int  a_fflags;
676		struct ucred *a_cred;
677		struct proc *a_p;
678	} */ *ap;
679{
680
681	return (EINVAL);
682}
683
684int
685ufs_remove(ap)
686	struct vop_remove_args /* {
687		struct vnode *a_dvp;
688		struct vnode *a_vp;
689		struct componentname *a_cnp;
690	} */ *ap;
691{
692	struct inode *ip;
693	struct vnode *vp = ap->a_vp;
694	struct vnode *dvp = ap->a_dvp;
695	int error;
696
697	ip = VTOI(vp);
698	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
699	    (VTOI(dvp)->i_flags & APPEND)) {
700		error = EPERM;
701		goto out;
702	}
703	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
704	VN_POLLEVENT(vp, POLLNLINK);
705	VN_POLLEVENT(dvp, POLLWRITE);
706out:
707	return (error);
708}
709
710/*
711 * link vnode call
712 */
713int
714ufs_link(ap)
715	struct vop_link_args /* {
716		struct vnode *a_tdvp;
717		struct vnode *a_vp;
718		struct componentname *a_cnp;
719	} */ *ap;
720{
721	struct vnode *vp = ap->a_vp;
722	struct vnode *tdvp = ap->a_tdvp;
723	struct componentname *cnp = ap->a_cnp;
724	struct proc *p = cnp->cn_proc;
725	struct inode *ip;
726	struct direct newdir;
727	int error;
728
729#ifdef DIAGNOSTIC
730	if ((cnp->cn_flags & HASBUF) == 0)
731		panic("ufs_link: no name");
732#endif
733	if (tdvp->v_mount != vp->v_mount) {
734		VOP_ABORTOP(tdvp, cnp);
735		error = EXDEV;
736		goto out2;
737	}
738	if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
739		VOP_ABORTOP(tdvp, cnp);
740		goto out2;
741	}
742	ip = VTOI(vp);
743	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
744		VOP_ABORTOP(tdvp, cnp);
745		error = EMLINK;
746		goto out1;
747	}
748	if (ip->i_flags & (IMMUTABLE | APPEND)) {
749		VOP_ABORTOP(tdvp, cnp);
750		error = EPERM;
751		goto out1;
752	}
753	ip->i_effnlink++;
754	ip->i_nlink++;
755	ip->i_flag |= IN_CHANGE;
756	if (DOINGSOFTDEP(vp))
757		softdep_increase_linkcnt(ip);
758	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
759	if (!error) {
760		ufs_makedirentry(ip, cnp, &newdir);
761		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
762	}
763
764	if (error) {
765		ip->i_effnlink--;
766		ip->i_nlink--;
767		ip->i_flag |= IN_CHANGE;
768	}
769	zfree(namei_zone, cnp->cn_pnbuf);
770out1:
771	if (tdvp != vp)
772		VOP_UNLOCK(vp, 0, p);
773out2:
774	VN_POLLEVENT(vp, POLLNLINK);
775	VN_POLLEVENT(tdvp, POLLWRITE);
776	return (error);
777}
778
779/*
780 * whiteout vnode call
781 */
782int
783ufs_whiteout(ap)
784	struct vop_whiteout_args /* {
785		struct vnode *a_dvp;
786		struct componentname *a_cnp;
787		int a_flags;
788	} */ *ap;
789{
790	struct vnode *dvp = ap->a_dvp;
791	struct componentname *cnp = ap->a_cnp;
792	struct direct newdir;
793	int error = 0;
794
795	switch (ap->a_flags) {
796	case LOOKUP:
797		/* 4.4 format directories support whiteout operations */
798		if (dvp->v_mount->mnt_maxsymlinklen > 0)
799			return (0);
800		return (EOPNOTSUPP);
801
802	case CREATE:
803		/* create a new directory whiteout */
804#ifdef DIAGNOSTIC
805		if ((cnp->cn_flags & SAVENAME) == 0)
806			panic("ufs_whiteout: missing name");
807		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
808			panic("ufs_whiteout: old format filesystem");
809#endif
810
811		newdir.d_ino = WINO;
812		newdir.d_namlen = cnp->cn_namelen;
813		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
814		newdir.d_type = DT_WHT;
815		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
816		break;
817
818	case DELETE:
819		/* remove an existing directory whiteout */
820#ifdef DIAGNOSTIC
821		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
822			panic("ufs_whiteout: old format filesystem");
823#endif
824
825		cnp->cn_flags &= ~DOWHITEOUT;
826		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
827		break;
828	default:
829		panic("ufs_whiteout: unknown op");
830	}
831	if (cnp->cn_flags & HASBUF) {
832		zfree(namei_zone, cnp->cn_pnbuf);
833		cnp->cn_flags &= ~HASBUF;
834	}
835	return (error);
836}
837
838/*
839 * Rename system call.
840 * 	rename("foo", "bar");
841 * is essentially
842 *	unlink("bar");
843 *	link("foo", "bar");
844 *	unlink("foo");
845 * but ``atomically''.  Can't do full commit without saving state in the
846 * inode on disk which isn't feasible at this time.  Best we can do is
847 * always guarantee the target exists.
848 *
849 * Basic algorithm is:
850 *
851 * 1) Bump link count on source while we're linking it to the
852 *    target.  This also ensure the inode won't be deleted out
853 *    from underneath us while we work (it may be truncated by
854 *    a concurrent `trunc' or `open' for creation).
855 * 2) Link source to destination.  If destination already exists,
856 *    delete it first.
857 * 3) Unlink source reference to inode if still around. If a
858 *    directory was moved and the parent of the destination
859 *    is different from the source, patch the ".." entry in the
860 *    directory.
861 */
862int
863ufs_rename(ap)
864	struct vop_rename_args  /* {
865		struct vnode *a_fdvp;
866		struct vnode *a_fvp;
867		struct componentname *a_fcnp;
868		struct vnode *a_tdvp;
869		struct vnode *a_tvp;
870		struct componentname *a_tcnp;
871	} */ *ap;
872{
873	struct vnode *tvp = ap->a_tvp;
874	register struct vnode *tdvp = ap->a_tdvp;
875	struct vnode *fvp = ap->a_fvp;
876	struct vnode *fdvp = ap->a_fdvp;
877	struct componentname *tcnp = ap->a_tcnp;
878	struct componentname *fcnp = ap->a_fcnp;
879	struct proc *p = fcnp->cn_proc;
880	struct inode *ip, *xp, *dp;
881	struct direct newdir;
882	int doingdirectory = 0, oldparent = 0, newparent = 0;
883	int error = 0, ioflag;
884
885#ifdef DIAGNOSTIC
886	if ((tcnp->cn_flags & HASBUF) == 0 ||
887	    (fcnp->cn_flags & HASBUF) == 0)
888		panic("ufs_rename: no name");
889#endif
890	/*
891	 * Check for cross-device rename.
892	 */
893	if ((fvp->v_mount != tdvp->v_mount) ||
894	    (tvp && (fvp->v_mount != tvp->v_mount))) {
895		error = EXDEV;
896abortit:
897		VOP_ABORTOP(tdvp, tcnp);
898		if (tdvp == tvp)
899			vrele(tdvp);
900		else
901			vput(tdvp);
902		if (tvp)
903			vput(tvp);
904		VOP_ABORTOP(fdvp, fcnp);
905		vrele(fdvp);
906		vrele(fvp);
907		return (error);
908	}
909
910	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
911	    (VTOI(tdvp)->i_flags & APPEND))) {
912		error = EPERM;
913		goto abortit;
914	}
915
916	/*
917	 * Check if just deleting a link name or if we've lost a race.
918	 * If another process completes the same rename after we've looked
919	 * up the source and have blocked looking up the target, then the
920	 * source and target inodes may be identical now although the
921	 * names were never linked.
922	 */
923	if (fvp == tvp) {
924		if (fvp->v_type == VDIR) {
925			/*
926			 * Linked directories are impossible, so we must
927			 * have lost the race.  Pretend that the rename
928			 * completed before the lookup.
929			 */
930#ifdef UFS_RENAME_DEBUG
931			printf("ufs_rename: fvp == tvp for directories\n");
932#endif
933			error = ENOENT;
934			goto abortit;
935		}
936
937		/* Release destination completely. */
938		VOP_ABORTOP(tdvp, tcnp);
939		vput(tdvp);
940		vput(tvp);
941
942		/*
943		 * Delete source.  There is another race now that everything
944		 * is unlocked, but this doesn't cause any new complications.
945		 * Relookup() may find a file that is unrelated to the
946		 * original one, or it may fail.  Too bad.
947		 */
948		vrele(fdvp);
949		vrele(fvp);
950		fcnp->cn_flags &= ~MODMASK;
951		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
952		if ((fcnp->cn_flags & SAVESTART) == 0)
953			panic("ufs_rename: lost from startdir");
954		fcnp->cn_nameiop = DELETE;
955		VREF(fdvp);
956		error = relookup(fdvp, &fvp, fcnp);
957		if (error == 0)
958			vrele(fdvp);
959		if (fvp == NULL) {
960#ifdef UFS_RENAME_DEBUG
961			printf("ufs_rename: from name disappeared\n");
962#endif
963			return (ENOENT);
964		}
965		error = VOP_REMOVE(fdvp, fvp, fcnp);
966		if (fdvp == fvp)
967			vrele(fdvp);
968		else
969			vput(fdvp);
970		vput(fvp);
971		return (error);
972	}
973	if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
974		goto abortit;
975	dp = VTOI(fdvp);
976	ip = VTOI(fvp);
977	if (ip->i_nlink >= LINK_MAX) {
978		VOP_UNLOCK(fvp, 0, p);
979		error = EMLINK;
980		goto abortit;
981	}
982	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
983	    || (dp->i_flags & APPEND)) {
984		VOP_UNLOCK(fvp, 0, p);
985		error = EPERM;
986		goto abortit;
987	}
988	if ((ip->i_mode & IFMT) == IFDIR) {
989		/*
990		 * Avoid ".", "..", and aliases of "." for obvious reasons.
991		 */
992		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
993		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
994		    (ip->i_flag & IN_RENAME)) {
995			VOP_UNLOCK(fvp, 0, p);
996			error = EINVAL;
997			goto abortit;
998		}
999		ip->i_flag |= IN_RENAME;
1000		oldparent = dp->i_number;
1001		doingdirectory = 1;
1002	}
1003	VN_POLLEVENT(fdvp, POLLWRITE);
1004	vrele(fdvp);
1005
1006	/*
1007	 * When the target exists, both the directory
1008	 * and target vnodes are returned locked.
1009	 */
1010	dp = VTOI(tdvp);
1011	xp = NULL;
1012	if (tvp)
1013		xp = VTOI(tvp);
1014
1015	/*
1016	 * 1) Bump link count while we're moving stuff
1017	 *    around.  If we crash somewhere before
1018	 *    completing our work, the link count
1019	 *    may be wrong, but correctable.
1020	 */
1021	ip->i_effnlink++;
1022	ip->i_nlink++;
1023	ip->i_flag |= IN_CHANGE;
1024	if (DOINGSOFTDEP(fvp))
1025		softdep_increase_linkcnt(ip);
1026	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
1027				       DOINGASYNC(fvp)))) != 0) {
1028		VOP_UNLOCK(fvp, 0, p);
1029		goto bad;
1030	}
1031
1032	/*
1033	 * If ".." must be changed (ie the directory gets a new
1034	 * parent) then the source directory must not be in the
1035	 * directory heirarchy above the target, as this would
1036	 * orphan everything below the source directory. Also
1037	 * the user must have write permission in the source so
1038	 * as to be able to change "..". We must repeat the call
1039	 * to namei, as the parent directory is unlocked by the
1040	 * call to checkpath().
1041	 */
1042	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
1043	VOP_UNLOCK(fvp, 0, p);
1044	if (oldparent != dp->i_number)
1045		newparent = dp->i_number;
1046	if (doingdirectory && newparent) {
1047		if (error)	/* write access check above */
1048			goto bad;
1049		if (xp != NULL)
1050			vput(tvp);
1051		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
1052		if (error)
1053			goto out;
1054		if ((tcnp->cn_flags & SAVESTART) == 0)
1055			panic("ufs_rename: lost to startdir");
1056		VREF(tdvp);
1057		error = relookup(tdvp, &tvp, tcnp);
1058		if (error)
1059			goto out;
1060		vrele(tdvp);
1061		dp = VTOI(tdvp);
1062		xp = NULL;
1063		if (tvp)
1064			xp = VTOI(tvp);
1065	}
1066	/*
1067	 * 2) If target doesn't exist, link the target
1068	 *    to the source and unlink the source.
1069	 *    Otherwise, rewrite the target directory
1070	 *    entry to reference the source inode and
1071	 *    expunge the original entry's existence.
1072	 */
1073	if (xp == NULL) {
1074		if (dp->i_dev != ip->i_dev)
1075			panic("ufs_rename: EXDEV");
1076		/*
1077		 * Account for ".." in new directory.
1078		 * When source and destination have the same
1079		 * parent we don't fool with the link count.
1080		 */
1081		if (doingdirectory && newparent) {
1082			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1083				error = EMLINK;
1084				goto bad;
1085			}
1086			dp->i_effnlink++;
1087			dp->i_nlink++;
1088			dp->i_flag |= IN_CHANGE;
1089			if (DOINGSOFTDEP(tdvp))
1090				softdep_increase_linkcnt(dp);
1091			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1092						   DOINGASYNC(tdvp)));
1093			if (error)
1094				goto bad;
1095		}
1096		ufs_makedirentry(ip, tcnp, &newdir);
1097		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
1098		if (error) {
1099			if (doingdirectory && newparent) {
1100				dp->i_effnlink--;
1101				dp->i_nlink--;
1102				dp->i_flag |= IN_CHANGE;
1103				(void)UFS_UPDATE(tdvp, 1);
1104			}
1105			goto bad;
1106		}
1107		VN_POLLEVENT(tdvp, POLLWRITE);
1108		vput(tdvp);
1109	} else {
1110		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1111			panic("ufs_rename: EXDEV");
1112		/*
1113		 * Short circuit rename(foo, foo).
1114		 */
1115		if (xp->i_number == ip->i_number)
1116			panic("ufs_rename: same file");
1117		/*
1118		 * If the parent directory is "sticky", then the user must
1119		 * own the parent directory, or the destination of the rename,
1120		 * otherwise the destination may not be changed (except by
1121		 * root). This implements append-only directories.
1122		 */
1123		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
1124		    tcnp->cn_cred->cr_uid != dp->i_uid &&
1125		    xp->i_uid != tcnp->cn_cred->cr_uid) {
1126			error = EPERM;
1127			goto bad;
1128		}
1129		/*
1130		 * Target must be empty if a directory and have no links
1131		 * to it. Also, ensure source and target are compatible
1132		 * (both directories, or both not directories).
1133		 */
1134		if ((xp->i_mode&IFMT) == IFDIR) {
1135			if ((xp->i_effnlink > 2) ||
1136			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
1137				error = ENOTEMPTY;
1138				goto bad;
1139			}
1140			if (!doingdirectory) {
1141				error = ENOTDIR;
1142				goto bad;
1143			}
1144			cache_purge(tdvp);
1145		} else if (doingdirectory) {
1146			error = EISDIR;
1147			goto bad;
1148		}
1149		error = ufs_dirrewrite(dp, xp, ip->i_number,
1150		    IFTODT(ip->i_mode),
1151		    (doingdirectory && newparent) ? newparent : doingdirectory);
1152		if (error)
1153			goto bad;
1154		if (doingdirectory) {
1155			if (!newparent) {
1156				dp->i_effnlink--;
1157				dp->i_flag |= IN_CHANGE;
1158			}
1159			xp->i_effnlink--;
1160			xp->i_flag |= IN_CHANGE;
1161		}
1162		VN_POLLEVENT(tdvp, POLLWRITE);
1163		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1164			/*
1165			 * Truncate inode. The only stuff left in the directory
1166			 * is "." and "..". The "." reference is inconsequential
1167			 * since we are quashing it. We have removed the "."
1168			 * reference and the reference in the parent directory,
1169			 * but there may be other hard links. The soft
1170			 * dependency code will arrange to do these operations
1171			 * after the parent directory entry has been deleted on
1172			 * disk, so when running with that code we avoid doing
1173			 * them now.
1174			 */
1175			if (!newparent)
1176				dp->i_nlink--;
1177			xp->i_nlink--;
1178			ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
1179			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
1180			    tcnp->cn_cred, tcnp->cn_proc)) != 0)
1181				goto bad;
1182		}
1183		vput(tdvp);
1184		VN_POLLEVENT(tvp, POLLNLINK); /* XXX this right? */
1185		vput(tvp);
1186		xp = NULL;
1187	}
1188
1189	/*
1190	 * 3) Unlink the source.
1191	 */
1192	fcnp->cn_flags &= ~MODMASK;
1193	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1194	if ((fcnp->cn_flags & SAVESTART) == 0)
1195		panic("ufs_rename: lost from startdir");
1196	VREF(fdvp);
1197	error = relookup(fdvp, &fvp, fcnp);
1198	if (error == 0)
1199		vrele(fdvp);
1200	if (fvp != NULL) {
1201		xp = VTOI(fvp);
1202		dp = VTOI(fdvp);
1203	} else {
1204		/*
1205		 * From name has disappeared.
1206		 */
1207		if (doingdirectory)
1208			panic("ufs_rename: lost dir entry");
1209		vrele(ap->a_fvp);
1210		return (0);
1211	}
1212	/*
1213	 * Ensure that the directory entry still exists and has not
1214	 * changed while the new name has been entered. If the source is
1215	 * a file then the entry may have been unlinked or renamed. In
1216	 * either case there is no further work to be done. If the source
1217	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
1218	 * flag ensures that it cannot be moved by another rename or removed
1219	 * by a rmdir.
1220	 */
1221	if (xp != ip) {
1222		if (doingdirectory)
1223			panic("ufs_rename: lost dir entry");
1224	} else {
1225		/*
1226		 * If the source is a directory with a
1227		 * new parent, the link count of the old
1228		 * parent directory must be decremented
1229		 * and ".." set to point to the new parent.
1230		 */
1231		if (doingdirectory && newparent) {
1232			xp->i_offset = mastertemplate.dot_reclen;
1233			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
1234			cache_purge(fdvp);
1235		}
1236		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
1237		xp->i_flag &= ~IN_RENAME;
1238	}
1239	if (dp)
1240		vput(fdvp);
1241	if (xp)
1242		vput(fvp);
1243	vrele(ap->a_fvp);
1244	return (error);
1245
1246bad:
1247	if (xp)
1248		vput(ITOV(xp));
1249	vput(ITOV(dp));
1250out:
1251	if (doingdirectory)
1252		ip->i_flag &= ~IN_RENAME;
1253	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
1254		ip->i_effnlink--;
1255		ip->i_nlink--;
1256		ip->i_flag |= IN_CHANGE;
1257		ip->i_flag &= ~IN_RENAME;
1258		vput(fvp);
1259	} else
1260		vrele(fvp);
1261	return (error);
1262}
1263
1264/*
1265 * Mkdir system call
1266 */
1267int
1268ufs_mkdir(ap)
1269	struct vop_mkdir_args /* {
1270		struct vnode *a_dvp;
1271		struct vnode **a_vpp;
1272		struct componentname *a_cnp;
1273		struct vattr *a_vap;
1274	} */ *ap;
1275{
1276	register struct vnode *dvp = ap->a_dvp;
1277	register struct vattr *vap = ap->a_vap;
1278	register struct componentname *cnp = ap->a_cnp;
1279	register struct inode *ip, *dp;
1280	struct vnode *tvp;
1281	struct buf *bp;
1282	struct dirtemplate dirtemplate, *dtp;
1283	struct direct newdir;
1284	int error, dmode;
1285	long blkoff;
1286
1287#ifdef DIAGNOSTIC
1288	if ((cnp->cn_flags & HASBUF) == 0)
1289		panic("ufs_mkdir: no name");
1290#endif
1291	dp = VTOI(dvp);
1292	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1293		error = EMLINK;
1294		goto out;
1295	}
1296	dmode = vap->va_mode & 0777;
1297	dmode |= IFDIR;
1298	/*
1299	 * Must simulate part of ufs_makeinode here to acquire the inode,
1300	 * but not have it entered in the parent directory. The entry is
1301	 * made later after writing "." and ".." entries.
1302	 */
1303	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1304	if (error)
1305		goto out;
1306	ip = VTOI(tvp);
1307	ip->i_gid = dp->i_gid;
1308#ifdef SUIDDIR
1309	{
1310#ifdef QUOTA
1311		struct ucred ucred, *ucp;
1312		ucp = cnp->cn_cred;
1313#endif			I
1314		/*
1315		 * If we are hacking owners here, (only do this where told to)
1316		 * and we are not giving it TOO root, (would subvert quotas)
1317		 * then go ahead and give it to the other user.
1318		 * The new directory also inherits the SUID bit.
1319		 * If user's UID and dir UID are the same,
1320		 * 'give it away' so that the SUID is still forced on.
1321		 */
1322		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1323		    (dp->i_mode & ISUID) && dp->i_uid) {
1324			dmode |= ISUID;
1325			ip->i_uid = dp->i_uid;
1326#ifdef QUOTA
1327			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1328				/*
1329				 * Make sure the correct user gets charged
1330				 * for the space.
1331				 * Make a dummy credential for the victim.
1332				 * XXX This seems to never be accessed out of
1333				 * our context so a stack variable is ok.
1334				 */
1335				ucred.cr_ref = 1;
1336				ucred.cr_uid = ip->i_uid;
1337				ucred.cr_ngroups = 1;
1338				ucred.cr_groups[0] = dp->i_gid;
1339				ucp = &ucred;
1340			}
1341#endif
1342		} else
1343			ip->i_uid = cnp->cn_cred->cr_uid;
1344#ifdef QUOTA
1345		if ((error = getinoquota(ip)) ||
1346	    	    (error = chkiq(ip, 1, ucp, 0))) {
1347			zfree(namei_zone, cnp->cn_pnbuf);
1348			UFS_VFREE(tvp, ip->i_number, dmode);
1349			vput(tvp);
1350			return (error);
1351		}
1352#endif
1353	}
1354#else	/* !SUIDDIR */
1355	ip->i_uid = cnp->cn_cred->cr_uid;
1356#ifdef QUOTA
1357	if ((error = getinoquota(ip)) ||
1358	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1359		zfree(namei_zone, cnp->cn_pnbuf);
1360		UFS_VFREE(tvp, ip->i_number, dmode);
1361		vput(tvp);
1362		return (error);
1363	}
1364#endif
1365#endif	/* !SUIDDIR */
1366	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1367	ip->i_mode = dmode;
1368	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1369	ip->i_effnlink = 2;
1370	ip->i_nlink = 2;
1371	if (DOINGSOFTDEP(tvp))
1372		softdep_increase_linkcnt(ip);
1373	if (cnp->cn_flags & ISWHITEOUT)
1374		ip->i_flags |= UF_OPAQUE;
1375
1376	/*
1377	 * Bump link count in parent directory to reflect work done below.
1378	 * Should be done before reference is created so cleanup is
1379	 * possible if we crash.
1380	 */
1381	dp->i_effnlink++;
1382	dp->i_nlink++;
1383	dp->i_flag |= IN_CHANGE;
1384	if (DOINGSOFTDEP(dvp))
1385		softdep_increase_linkcnt(dp);
1386	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1387	if (error)
1388		goto bad;
1389
1390	/*
1391	 * Initialize directory with "." and ".." from static template.
1392	 */
1393	if (dvp->v_mount->mnt_maxsymlinklen > 0
1394	)
1395		dtp = &mastertemplate;
1396	else
1397		dtp = (struct dirtemplate *)&omastertemplate;
1398	dirtemplate = *dtp;
1399	dirtemplate.dot_ino = ip->i_number;
1400	dirtemplate.dotdot_ino = dp->i_number;
1401	if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1402	    B_CLRBUF, &bp)) != 0)
1403		goto bad;
1404	ip->i_size = DIRBLKSIZ;
1405	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1406	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1407	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1408	if (DOINGSOFTDEP(tvp)) {
1409		/*
1410		 * Ensure that the entire newly allocated block is a
1411		 * valid directory so that future growth within the
1412		 * block does not have to ensure that the block is
1413		 * written before the inode.
1414		 */
1415		blkoff = DIRBLKSIZ;
1416		while (blkoff < bp->b_bcount) {
1417			((struct direct *)
1418			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1419			blkoff += DIRBLKSIZ;
1420		}
1421	}
1422	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1423				       DOINGASYNC(tvp)))) != 0) {
1424		(void)VOP_BWRITE(bp->b_vp, bp);
1425		goto bad;
1426	}
1427	VN_POLLEVENT(dvp, POLLWRITE); /* XXX right place? */
1428	/*
1429	 * Directory set up, now install its entry in the parent directory.
1430	 *
1431	 * If we are not doing soft dependencies, then we must write out the
1432	 * buffer containing the new directory body before entering the new
1433	 * name in the parent. If we are doing soft dependencies, then the
1434	 * buffer containing the new directory body will be passed to and
1435	 * released in the soft dependency code after the code has attached
1436	 * an appropriate ordering dependency to the buffer which ensures that
1437	 * the buffer is written before the new name is written in the parent.
1438	 */
1439	if (DOINGASYNC(dvp))
1440		bdwrite(bp);
1441	else if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp->b_vp, bp))))
1442		goto bad;
1443	ufs_makedirentry(ip, cnp, &newdir);
1444	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
1445
1446bad:
1447	if (error == 0) {
1448		*ap->a_vpp = tvp;
1449	} else {
1450		dp->i_effnlink--;
1451		dp->i_nlink--;
1452		dp->i_flag |= IN_CHANGE;
1453		/*
1454		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1455		 * do this for us because we set the link count to 0.
1456		 */
1457		ip->i_effnlink = 0;
1458		ip->i_nlink = 0;
1459		ip->i_flag |= IN_CHANGE;
1460		vput(tvp);
1461	}
1462out:
1463	zfree(namei_zone, cnp->cn_pnbuf);
1464	return (error);
1465}
1466
1467/*
1468 * Rmdir system call.
1469 */
1470int
1471ufs_rmdir(ap)
1472	struct vop_rmdir_args /* {
1473		struct vnode *a_dvp;
1474		struct vnode *a_vp;
1475		struct componentname *a_cnp;
1476	} */ *ap;
1477{
1478	struct vnode *vp = ap->a_vp;
1479	struct vnode *dvp = ap->a_dvp;
1480	struct componentname *cnp = ap->a_cnp;
1481	struct inode *ip, *dp;
1482	int error, ioflag;
1483
1484	ip = VTOI(vp);
1485	dp = VTOI(dvp);
1486
1487	/*
1488	 * Do not remove a directory that is in the process of being renamed.
1489	 * Verify the directory is empty (and valid). Rmdir ".." will not be
1490	 * valid since ".." will contain a reference to the current directory
1491	 * and thus be non-empty. Do not allow the removal of mounted on
1492	 * directories (this can happen when an NFS exported filesystem
1493	 * tries to remove a locally mounted on directory).
1494	 */
1495	error = 0;
1496	if (ip->i_flag & IN_RENAME) {
1497		error = EINVAL;
1498		goto out;
1499	}
1500	if (ip->i_effnlink != 2 ||
1501	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1502		error = ENOTEMPTY;
1503		goto out;
1504	}
1505	if ((dp->i_flags & APPEND)
1506	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1507		error = EPERM;
1508		goto out;
1509	}
1510	if (vp->v_mountedhere != 0) {
1511		error = EINVAL;
1512		goto out;
1513	}
1514	/*
1515	 * Delete reference to directory before purging
1516	 * inode.  If we crash in between, the directory
1517	 * will be reattached to lost+found,
1518	 */
1519	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
1520	if (error)
1521		goto out;
1522	VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK);
1523	cache_purge(dvp);
1524	/*
1525	 * Truncate inode. The only stuff left in the directory is "." and
1526	 * "..". The "." reference is inconsequential since we are quashing
1527	 * it. We have removed the "." reference and the reference in the
1528	 * parent directory, but there may be other hard links. So,
1529	 * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
1530	 * new entries are made. The soft dependency code will arrange to
1531	 * do these operations after the parent directory entry has been
1532	 * deleted on disk, so when running with that code we avoid doing
1533	 * them now.
1534	 */
1535	dp->i_effnlink--;
1536	dp->i_flag |= IN_CHANGE;
1537	ip->i_effnlink--;
1538	ip->i_flag |= IN_CHANGE;
1539	if (!DOINGSOFTDEP(vp)) {
1540		dp->i_nlink--;
1541		ip->i_nlink--;
1542		ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
1543		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
1544		    cnp->cn_proc);
1545	}
1546	cache_purge(vp);
1547out:
1548	VN_POLLEVENT(vp, POLLNLINK);
1549	return (error);
1550}
1551
1552/*
1553 * symlink -- make a symbolic link
1554 */
1555int
1556ufs_symlink(ap)
1557	struct vop_symlink_args /* {
1558		struct vnode *a_dvp;
1559		struct vnode **a_vpp;
1560		struct componentname *a_cnp;
1561		struct vattr *a_vap;
1562		char *a_target;
1563	} */ *ap;
1564{
1565	register struct vnode *vp, **vpp = ap->a_vpp;
1566	register struct inode *ip;
1567	int len, error;
1568
1569	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1570	    vpp, ap->a_cnp);
1571	if (error)
1572		return (error);
1573	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
1574	vp = *vpp;
1575	len = strlen(ap->a_target);
1576	if (len < vp->v_mount->mnt_maxsymlinklen) {
1577		ip = VTOI(vp);
1578		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
1579		ip->i_size = len;
1580		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1581	} else
1582		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1583		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
1584		    (struct proc *)0);
1585	vput(vp);
1586	return (error);
1587}
1588
1589/*
1590 * Vnode op for reading directories.
1591 *
1592 * The routine below assumes that the on-disk format of a directory
1593 * is the same as that defined by <sys/dirent.h>. If the on-disk
1594 * format changes, then it will be necessary to do a conversion
1595 * from the on-disk format that read returns to the format defined
1596 * by <sys/dirent.h>.
1597 */
1598int
1599ufs_readdir(ap)
1600	struct vop_readdir_args /* {
1601		struct vnode *a_vp;
1602		struct uio *a_uio;
1603		struct ucred *a_cred;
1604		int *a_eofflag;
1605		int *ncookies;
1606		u_long **a_cookies;
1607	} */ *ap;
1608{
1609	register struct uio *uio = ap->a_uio;
1610	int error;
1611	size_t count, lost;
1612	off_t off;
1613
1614	if (ap->a_ncookies != NULL)
1615		/*
1616		 * Ensure that the block is aligned.  The caller can use
1617		 * the cookies to determine where in the block to start.
1618		 */
1619		uio->uio_offset &= ~(DIRBLKSIZ - 1);
1620	off = uio->uio_offset;
1621	count = uio->uio_resid;
1622	/* Make sure we don't return partial entries. */
1623	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
1624		return (EINVAL);
1625	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
1626	lost = uio->uio_resid - count;
1627	uio->uio_resid = count;
1628	uio->uio_iov->iov_len = count;
1629#	if (BYTE_ORDER == LITTLE_ENDIAN)
1630		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
1631			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1632		} else {
1633			struct dirent *dp, *edp;
1634			struct uio auio;
1635			struct iovec aiov;
1636			caddr_t dirbuf;
1637			int readcnt;
1638			u_char tmp;
1639
1640			auio = *uio;
1641			auio.uio_iov = &aiov;
1642			auio.uio_iovcnt = 1;
1643			auio.uio_segflg = UIO_SYSSPACE;
1644			aiov.iov_len = count;
1645			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
1646			aiov.iov_base = dirbuf;
1647			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
1648			if (error == 0) {
1649				readcnt = count - auio.uio_resid;
1650				edp = (struct dirent *)&dirbuf[readcnt];
1651				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
1652					tmp = dp->d_namlen;
1653					dp->d_namlen = dp->d_type;
1654					dp->d_type = tmp;
1655					if (dp->d_reclen > 0) {
1656						dp = (struct dirent *)
1657						    ((char *)dp + dp->d_reclen);
1658					} else {
1659						error = EIO;
1660						break;
1661					}
1662				}
1663				if (dp >= edp)
1664					error = uiomove(dirbuf, readcnt, uio);
1665			}
1666			FREE(dirbuf, M_TEMP);
1667		}
1668#	else
1669		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1670#	endif
1671	if (!error && ap->a_ncookies != NULL) {
1672		struct dirent* dpStart;
1673		struct dirent* dpEnd;
1674		struct dirent* dp;
1675		int ncookies;
1676		u_long *cookies;
1677		u_long *cookiep;
1678
1679		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1680			panic("ufs_readdir: unexpected uio from NFS server");
1681		dpStart = (struct dirent *)
1682		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
1683		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
1684		for (dp = dpStart, ncookies = 0;
1685		     dp < dpEnd;
1686		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
1687			ncookies++;
1688		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
1689		    M_WAITOK);
1690		for (dp = dpStart, cookiep = cookies;
1691		     dp < dpEnd;
1692		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
1693			off += dp->d_reclen;
1694			*cookiep++ = (u_long) off;
1695		}
1696		*ap->a_ncookies = ncookies;
1697		*ap->a_cookies = cookies;
1698	}
1699	uio->uio_resid += lost;
1700	if (ap->a_eofflag)
1701	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
1702	return (error);
1703}
1704
1705/*
1706 * Return target name of a symbolic link
1707 */
1708int
1709ufs_readlink(ap)
1710	struct vop_readlink_args /* {
1711		struct vnode *a_vp;
1712		struct uio *a_uio;
1713		struct ucred *a_cred;
1714	} */ *ap;
1715{
1716	register struct vnode *vp = ap->a_vp;
1717	register struct inode *ip = VTOI(vp);
1718	int isize;
1719
1720	isize = ip->i_size;
1721	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1722	    (ip->i_din.di_blocks == 0)) {	/* XXX - for old fastlink support */
1723		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
1724		return (0);
1725	}
1726	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1727}
1728
1729/*
1730 * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
1731 * done. If a buffer has been saved in anticipation of a CREATE, delete it.
1732 */
1733/* ARGSUSED */
1734int
1735ufs_abortop(ap)
1736	struct vop_abortop_args /* {
1737		struct vnode *a_dvp;
1738		struct componentname *a_cnp;
1739	} */ *ap;
1740{
1741	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1742		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
1743	return (0);
1744}
1745
1746/*
1747 * Calculate the logical to physical mapping if not done already,
1748 * then call the device strategy routine.
1749 *
1750 * In order to be able to swap to a file, the VOP_BMAP operation may not
1751 * deadlock on memory.  See ufs_bmap() for details.
1752 */
1753int
1754ufs_strategy(ap)
1755	struct vop_strategy_args /* {
1756		struct vnode *a_vp;
1757		struct buf *a_bp;
1758	} */ *ap;
1759{
1760	register struct buf *bp = ap->a_bp;
1761	register struct vnode *vp = ap->a_vp;
1762	register struct inode *ip;
1763	int error;
1764
1765	ip = VTOI(vp);
1766	if (vp->v_type == VBLK || vp->v_type == VCHR)
1767		panic("ufs_strategy: spec");
1768	if (bp->b_blkno == bp->b_lblkno) {
1769		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
1770		if (error) {
1771			bp->b_error = error;
1772			bp->b_flags |= B_ERROR;
1773			biodone(bp);
1774			return (error);
1775		}
1776		if ((long)bp->b_blkno == -1)
1777			vfs_bio_clrbuf(bp);
1778	}
1779	if ((long)bp->b_blkno == -1) {
1780		biodone(bp);
1781		return (0);
1782	}
1783	vp = ip->i_devvp;
1784	bp->b_dev = vp->v_rdev;
1785	VOP_STRATEGY(vp, bp);
1786	return (0);
1787}
1788
1789/*
1790 * Print out the contents of an inode.
1791 */
1792int
1793ufs_print(ap)
1794	struct vop_print_args /* {
1795		struct vnode *a_vp;
1796	} */ *ap;
1797{
1798	register struct vnode *vp = ap->a_vp;
1799	register struct inode *ip = VTOI(vp);
1800
1801	printf("tag VT_UFS, ino %lu, on dev %s (%d, %d)",
1802	    (u_long)ip->i_number, devtoname(ip->i_dev), major(ip->i_dev),
1803	    minor(ip->i_dev));
1804	if (vp->v_type == VFIFO)
1805		fifo_printinfo(vp);
1806	lockmgr_printinfo(&ip->i_lock);
1807	printf("\n");
1808	return (0);
1809}
1810
1811/*
1812 * Read wrapper for special devices.
1813 */
1814int
1815ufsspec_read(ap)
1816	struct vop_read_args /* {
1817		struct vnode *a_vp;
1818		struct uio *a_uio;
1819		int  a_ioflag;
1820		struct ucred *a_cred;
1821	} */ *ap;
1822{
1823	int error, resid;
1824	struct inode *ip;
1825	struct uio *uio;
1826
1827	uio = ap->a_uio;
1828	resid = uio->uio_resid;
1829	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap);
1830	/*
1831	 * The inode may have been revoked during the call, so it must not
1832	 * be accessed blindly here or in the other wrapper functions.
1833	 */
1834	ip = VTOI(ap->a_vp);
1835	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1836		ip->i_flag |= IN_ACCESS;
1837	return (error);
1838}
1839
1840/*
1841 * Write wrapper for special devices.
1842 */
1843int
1844ufsspec_write(ap)
1845	struct vop_write_args /* {
1846		struct vnode *a_vp;
1847		struct uio *a_uio;
1848		int  a_ioflag;
1849		struct ucred *a_cred;
1850	} */ *ap;
1851{
1852	int error, resid;
1853	struct inode *ip;
1854	struct uio *uio;
1855
1856	uio = ap->a_uio;
1857	resid = uio->uio_resid;
1858	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap);
1859	ip = VTOI(ap->a_vp);
1860	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1861		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1862	return (error);
1863}
1864
1865/*
1866 * Close wrapper for special devices.
1867 *
1868 * Update the times on the inode then do device close.
1869 */
1870int
1871ufsspec_close(ap)
1872	struct vop_close_args /* {
1873		struct vnode *a_vp;
1874		int  a_fflag;
1875		struct ucred *a_cred;
1876		struct proc *a_p;
1877	} */ *ap;
1878{
1879	struct vnode *vp = ap->a_vp;
1880
1881	simple_lock(&vp->v_interlock);
1882	if (vp->v_usecount > 1)
1883		ufs_itimes(vp);
1884	simple_unlock(&vp->v_interlock);
1885	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
1886}
1887
1888/*
1889 * Read wrapper for fifos.
1890 */
1891int
1892ufsfifo_read(ap)
1893	struct vop_read_args /* {
1894		struct vnode *a_vp;
1895		struct uio *a_uio;
1896		int  a_ioflag;
1897		struct ucred *a_cred;
1898	} */ *ap;
1899{
1900	int error, resid;
1901	struct inode *ip;
1902	struct uio *uio;
1903
1904	uio = ap->a_uio;
1905	resid = uio->uio_resid;
1906	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap);
1907	ip = VTOI(ap->a_vp);
1908	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
1909	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
1910		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1911	return (error);
1912}
1913
1914/*
1915 * Write wrapper for fifos.
1916 */
1917int
1918ufsfifo_write(ap)
1919	struct vop_write_args /* {
1920		struct vnode *a_vp;
1921		struct uio *a_uio;
1922		int  a_ioflag;
1923		struct ucred *a_cred;
1924	} */ *ap;
1925{
1926	int error, resid;
1927	struct inode *ip;
1928	struct uio *uio;
1929
1930	uio = ap->a_uio;
1931	resid = uio->uio_resid;
1932	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap);
1933	ip = VTOI(ap->a_vp);
1934	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1935		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1936	return (error);
1937}
1938
1939/*
1940 * Close wrapper for fifos.
1941 *
1942 * Update the times on the inode then do device close.
1943 */
1944int
1945ufsfifo_close(ap)
1946	struct vop_close_args /* {
1947		struct vnode *a_vp;
1948		int  a_fflag;
1949		struct ucred *a_cred;
1950		struct proc *a_p;
1951	} */ *ap;
1952{
1953	struct vnode *vp = ap->a_vp;
1954
1955	simple_lock(&vp->v_interlock);
1956	if (vp->v_usecount > 1)
1957		ufs_itimes(vp);
1958	simple_unlock(&vp->v_interlock);
1959	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
1960}
1961
1962/*
1963 * Return POSIX pathconf information applicable to ufs filesystems.
1964 */
1965int
1966ufs_pathconf(ap)
1967	struct vop_pathconf_args /* {
1968		struct vnode *a_vp;
1969		int a_name;
1970		int *a_retval;
1971	} */ *ap;
1972{
1973
1974	switch (ap->a_name) {
1975	case _PC_LINK_MAX:
1976		*ap->a_retval = LINK_MAX;
1977		return (0);
1978	case _PC_NAME_MAX:
1979		*ap->a_retval = NAME_MAX;
1980		return (0);
1981	case _PC_PATH_MAX:
1982		*ap->a_retval = PATH_MAX;
1983		return (0);
1984	case _PC_PIPE_BUF:
1985		*ap->a_retval = PIPE_BUF;
1986		return (0);
1987	case _PC_CHOWN_RESTRICTED:
1988		*ap->a_retval = 1;
1989		return (0);
1990	case _PC_NO_TRUNC:
1991		*ap->a_retval = 1;
1992		return (0);
1993	default:
1994		return (EINVAL);
1995	}
1996	/* NOTREACHED */
1997}
1998
1999/*
2000 * Advisory record locking support
2001 */
2002int
2003ufs_advlock(ap)
2004	struct vop_advlock_args /* {
2005		struct vnode *a_vp;
2006		caddr_t  a_id;
2007		int  a_op;
2008		struct flock *a_fl;
2009		int  a_flags;
2010	} */ *ap;
2011{
2012	register struct inode *ip = VTOI(ap->a_vp);
2013
2014	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
2015}
2016
2017/*
2018 * Initialize the vnode associated with a new inode, handle aliased
2019 * vnodes.
2020 */
2021int
2022ufs_vinit(mntp, specops, fifoops, vpp)
2023	struct mount *mntp;
2024	vop_t **specops;
2025	vop_t **fifoops;
2026	struct vnode **vpp;
2027{
2028	struct inode *ip;
2029	struct vnode *vp, *nvp;
2030	struct timeval tv;
2031
2032	vp = *vpp;
2033	ip = VTOI(vp);
2034	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2035	case VCHR:
2036	case VBLK:
2037		vp->v_op = specops;
2038		nvp = checkalias(vp, ip->i_rdev, mntp);
2039		if (nvp) {
2040			/*
2041			 * Discard unneeded vnode, but save its inode.
2042			 * Note that the lock is carried over in the inode
2043			 * to the replacement vnode.
2044			 */
2045			nvp->v_data = vp->v_data;
2046			vp->v_data = NULL;
2047			vp->v_op = spec_vnodeop_p;
2048			vrele(vp);
2049			vgone(vp);
2050			/*
2051			 * Reinitialize aliased inode.
2052			 */
2053			vp = nvp;
2054			ip->i_vnode = vp;
2055		}
2056		break;
2057	case VFIFO:
2058		vp->v_op = fifoops;
2059		break;
2060	default:
2061		break;
2062
2063	}
2064	if (ip->i_number == ROOTINO)
2065		vp->v_flag |= VROOT;
2066	/*
2067	 * Initialize modrev times
2068	 */
2069	getmicrouptime(&tv);
2070	SETHIGH(ip->i_modrev, tv.tv_sec);
2071	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
2072	*vpp = vp;
2073	return (0);
2074}
2075
2076/*
2077 * Allocate a new inode.
2078 */
2079int
2080ufs_makeinode(mode, dvp, vpp, cnp)
2081	int mode;
2082	struct vnode *dvp;
2083	struct vnode **vpp;
2084	struct componentname *cnp;
2085{
2086	register struct inode *ip, *pdir;
2087	struct direct newdir;
2088	struct vnode *tvp;
2089	int error;
2090
2091	pdir = VTOI(dvp);
2092#ifdef DIAGNOSTIC
2093	if ((cnp->cn_flags & HASBUF) == 0)
2094		panic("ufs_makeinode: no name");
2095#endif
2096	*vpp = NULL;
2097	if ((mode & IFMT) == 0)
2098		mode |= IFREG;
2099
2100	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2101	if (error) {
2102		zfree(namei_zone, cnp->cn_pnbuf);
2103		return (error);
2104	}
2105	ip = VTOI(tvp);
2106	ip->i_gid = pdir->i_gid;
2107#ifdef SUIDDIR
2108	{
2109#ifdef QUOTA
2110		struct ucred ucred, *ucp;
2111		ucp = cnp->cn_cred;
2112#endif			I
2113		/*
2114		 * If we are not the owner of the directory,
2115		 * and we are hacking owners here, (only do this where told to)
2116		 * and we are not giving it TOO root, (would subvert quotas)
2117		 * then go ahead and give it to the other user.
2118		 * Note that this drops off the execute bits for security.
2119		 */
2120		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2121		    (pdir->i_mode & ISUID) &&
2122		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2123			ip->i_uid = pdir->i_uid;
2124			mode &= ~07111;
2125#ifdef QUOTA
2126			/*
2127			 * Make sure the correct user gets charged
2128			 * for the space.
2129			 * Quickly knock up a dummy credential for the victim.
2130			 * XXX This seems to never be accessed out of our
2131			 * context so a stack variable is ok.
2132			 */
2133			ucred.cr_ref = 1;
2134			ucred.cr_uid = ip->i_uid;
2135			ucred.cr_ngroups = 1;
2136			ucred.cr_groups[0] = pdir->i_gid;
2137			ucp = &ucred;
2138#endif
2139		} else
2140			ip->i_uid = cnp->cn_cred->cr_uid;
2141
2142#ifdef QUOTA
2143		if ((error = getinoquota(ip)) ||
2144	    	    (error = chkiq(ip, 1, ucp, 0))) {
2145			zfree(namei_zone, cnp->cn_pnbuf);
2146			UFS_VFREE(tvp, ip->i_number, mode);
2147			vput(tvp);
2148			return (error);
2149		}
2150#endif
2151	}
2152#else	/* !SUIDDIR */
2153	ip->i_uid = cnp->cn_cred->cr_uid;
2154#ifdef QUOTA
2155	if ((error = getinoquota(ip)) ||
2156	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2157		zfree(namei_zone, cnp->cn_pnbuf);
2158		UFS_VFREE(tvp, ip->i_number, mode);
2159		vput(tvp);
2160		return (error);
2161	}
2162#endif
2163#endif	/* !SUIDDIR */
2164	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2165	ip->i_mode = mode;
2166	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2167	ip->i_effnlink = 1;
2168	ip->i_nlink = 1;
2169	if (DOINGSOFTDEP(tvp))
2170		softdep_increase_linkcnt(ip);
2171	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2172	    suser_xxx(cnp->cn_cred, 0, 0))
2173		ip->i_mode &= ~ISGID;
2174
2175	if (cnp->cn_flags & ISWHITEOUT)
2176		ip->i_flags |= UF_OPAQUE;
2177
2178	/*
2179	 * Make sure inode goes to disk before directory entry.
2180	 */
2181	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2182	if (error)
2183		goto bad;
2184	ufs_makedirentry(ip, cnp, &newdir);
2185	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
2186	if (error)
2187		goto bad;
2188
2189	if ((cnp->cn_flags & SAVESTART) == 0)
2190		zfree(namei_zone, cnp->cn_pnbuf);
2191	*vpp = tvp;
2192	return (0);
2193
2194bad:
2195	/*
2196	 * Write error occurred trying to update the inode
2197	 * or the directory so must deallocate the inode.
2198	 */
2199	zfree(namei_zone, cnp->cn_pnbuf);
2200	ip->i_effnlink = 0;
2201	ip->i_nlink = 0;
2202	ip->i_flag |= IN_CHANGE;
2203	vput(tvp);
2204	return (error);
2205}
2206
2207static int
2208ufs_missingop(ap)
2209	struct vop_generic_args *ap;
2210{
2211
2212	panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name);
2213	return (EOPNOTSUPP);
2214}
2215
2216/* Global vfs data structures for ufs. */
2217static vop_t **ufs_vnodeop_p;
2218static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
2219	{ &vop_default_desc,		(vop_t *) vop_defaultop },
2220	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2221	{ &vop_read_desc,		(vop_t *) ufs_missingop },
2222	{ &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
2223	{ &vop_write_desc,		(vop_t *) ufs_missingop },
2224	{ &vop_abortop_desc,		(vop_t *) ufs_abortop },
2225	{ &vop_access_desc,		(vop_t *) ufs_access },
2226	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
2227	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
2228	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
2229	{ &vop_close_desc,		(vop_t *) ufs_close },
2230	{ &vop_create_desc,		(vop_t *) ufs_create },
2231	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2232	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2233	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2234	{ &vop_link_desc,		(vop_t *) ufs_link },
2235	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2236	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
2237	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
2238	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
2239	{ &vop_mmap_desc,		(vop_t *) ufs_mmap },
2240	{ &vop_open_desc,		(vop_t *) ufs_open },
2241	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
2242	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
2243	{ &vop_print_desc,		(vop_t *) ufs_print },
2244	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
2245	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
2246	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2247	{ &vop_remove_desc,		(vop_t *) ufs_remove },
2248	{ &vop_rename_desc,		(vop_t *) ufs_rename },
2249	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
2250	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2251	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
2252	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
2253	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2254	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
2255	{ NULL, NULL }
2256};
2257static struct vnodeopv_desc ufs_vnodeop_opv_desc =
2258	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
2259
2260static vop_t **ufs_specop_p;
2261static struct vnodeopv_entry_desc ufs_specop_entries[] = {
2262	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
2263	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2264	{ &vop_access_desc,		(vop_t *) ufs_access },
2265	{ &vop_close_desc,		(vop_t *) ufsspec_close },
2266	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2267	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2268	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2269	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2270	{ &vop_print_desc,		(vop_t *) ufs_print },
2271	{ &vop_read_desc,		(vop_t *) ufsspec_read },
2272	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2273	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2274	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2275	{ &vop_write_desc,		(vop_t *) ufsspec_write },
2276	{ NULL, NULL }
2277};
2278static struct vnodeopv_desc ufs_specop_opv_desc =
2279	{ &ufs_specop_p, ufs_specop_entries };
2280
2281static vop_t **ufs_fifoop_p;
2282static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
2283	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
2284	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2285	{ &vop_access_desc,		(vop_t *) ufs_access },
2286	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
2287	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2288	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2289	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2290	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2291	{ &vop_print_desc,		(vop_t *) ufs_print },
2292	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
2293	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2294	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2295	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2296	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
2297	{ NULL, NULL }
2298};
2299static struct vnodeopv_desc ufs_fifoop_opv_desc =
2300	{ &ufs_fifoop_p, ufs_fifoop_entries };
2301
2302VNODEOP_SET(ufs_vnodeop_opv_desc);
2303VNODEOP_SET(ufs_specop_opv_desc);
2304VNODEOP_SET(ufs_fifoop_opv_desc);
2305
2306int
2307ufs_vnoperate(ap)
2308	struct vop_generic_args /* {
2309		struct vnodeop_desc *a_desc;
2310	} */ *ap;
2311{
2312	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2313}
2314
2315int
2316ufs_vnoperatefifo(ap)
2317	struct vop_generic_args /* {
2318		struct vnodeop_desc *a_desc;
2319	} */ *ap;
2320{
2321	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
2322}
2323
2324int
2325ufs_vnoperatespec(ap)
2326	struct vop_generic_args /* {
2327		struct vnodeop_desc *a_desc;
2328	} */ *ap;
2329{
2330	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
2331}
2332
2333
2334