ufs_vnops.c revision 3420
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)ufs_vnops.c	8.10 (Berkeley) 4/1/94
39 * $Id: ufs_vnops.c,v 1.8 1994/10/06 21:07:04 davidg Exp $
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/namei.h>
45#include <sys/resourcevar.h>
46#include <sys/kernel.h>
47#include <sys/file.h>
48#include <sys/stat.h>
49#include <sys/buf.h>
50#include <sys/proc.h>
51#include <sys/conf.h>
52#include <sys/mount.h>
53#include <sys/vnode.h>
54#include <sys/malloc.h>
55#include <sys/dirent.h>
56#include <sys/lockf.h>
57
58#include <vm/vm.h>
59
60#include <miscfs/specfs/specdev.h>
61
62#include <ufs/ufs/quota.h>
63#include <ufs/ufs/inode.h>
64#include <ufs/ufs/dir.h>
65#include <ufs/ufs/ufsmount.h>
66#include <ufs/ufs/ufs_extern.h>
67
68static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
69static int ufs_chown
70	__P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
71
72union _qcvt {
73	quad_t qcvt;
74	long val[2];
75};
76#define SETHIGH(q, h) { \
77	union _qcvt tmp; \
78	tmp.qcvt = (q); \
79	tmp.val[_QUAD_HIGHWORD] = (h); \
80	(q) = tmp.qcvt; \
81}
82#define SETLOW(q, l) { \
83	union _qcvt tmp; \
84	tmp.qcvt = (q); \
85	tmp.val[_QUAD_LOWWORD] = (l); \
86	(q) = tmp.qcvt; \
87}
88
89/*
90 * Create a regular file
91 */
92int
93ufs_create(ap)
94	struct vop_create_args /* {
95		struct vnode *a_dvp;
96		struct vnode **a_vpp;
97		struct componentname *a_cnp;
98		struct vattr *a_vap;
99	} */ *ap;
100{
101	int error;
102
103	error =
104	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
105	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
106	if (error)
107		return (error);
108	return (0);
109}
110
111/*
112 * Mknod vnode call
113 */
114/* ARGSUSED */
115int
116ufs_mknod(ap)
117	struct vop_mknod_args /* {
118		struct vnode *a_dvp;
119		struct vnode **a_vpp;
120		struct componentname *a_cnp;
121		struct vattr *a_vap;
122	} */ *ap;
123{
124	register struct vattr *vap = ap->a_vap;
125	register struct vnode **vpp = ap->a_vpp;
126	register struct inode *ip;
127	int error;
128
129	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
130	    ap->a_dvp, vpp, ap->a_cnp);
131	if (error)
132		return (error);
133	ip = VTOI(*vpp);
134	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
135	if (vap->va_rdev != VNOVAL) {
136		/*
137		 * Want to be able to use this to make badblock
138		 * inodes, so don't truncate the dev number.
139		 */
140		ip->i_rdev = vap->va_rdev;
141	}
142	/*
143	 * Remove inode so that it will be reloaded by VFS_VGET and
144	 * checked to see if it is an alias of an existing entry in
145	 * the inode cache.
146	 */
147	vput(*vpp);
148	(*vpp)->v_type = VNON;
149	vgone(*vpp);
150	*vpp = 0;
151	return (0);
152}
153
154/*
155 * Open called.
156 *
157 * Nothing to do.
158 */
159/* ARGSUSED */
160int
161ufs_open(ap)
162	struct vop_open_args /* {
163		struct vnode *a_vp;
164		int  a_mode;
165		struct ucred *a_cred;
166		struct proc *a_p;
167	} */ *ap;
168{
169
170	/*
171	 * Files marked append-only must be opened for appending.
172	 */
173	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
174	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
175		return (EPERM);
176	return (0);
177}
178
179/*
180 * Close called.
181 *
182 * Update the times on the inode.
183 */
184/* ARGSUSED */
185int
186ufs_close(ap)
187	struct vop_close_args /* {
188		struct vnode *a_vp;
189		int  a_fflag;
190		struct ucred *a_cred;
191		struct proc *a_p;
192	} */ *ap;
193{
194	register struct vnode *vp = ap->a_vp;
195	register struct inode *ip = VTOI(vp);
196
197	if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
198		ITIMES(ip, &time, &time);
199	return (0);
200}
201
202int
203ufs_access(ap)
204	struct vop_access_args /* {
205		struct vnode *a_vp;
206		int  a_mode;
207		struct ucred *a_cred;
208		struct proc *a_p;
209	} */ *ap;
210{
211	register struct vnode *vp = ap->a_vp;
212	register struct inode *ip = VTOI(vp);
213	register struct ucred *cred = ap->a_cred;
214	mode_t mask, mode = ap->a_mode;
215	register gid_t *gp;
216	int i;
217
218#ifdef DIAGNOSTIC
219	if (!VOP_ISLOCKED(vp)) {
220		vprint("ufs_access: not locked", vp);
221		panic("ufs_access: not locked");
222	}
223#endif
224#ifdef QUOTA
225	if (mode & VWRITE)
226		switch (vp->v_type) {
227		case VDIR:
228		case VLNK:
229		case VREG:
230			if (error = getinoquota(ip))
231				return (error);
232			break;
233		}
234#endif
235
236	/* If immutable bit set, nobody gets to write it. */
237	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
238		return (EPERM);
239
240	/* Otherwise, user id 0 always gets access. */
241	if (cred->cr_uid == 0)
242		return (0);
243
244	mask = 0;
245
246	/* Otherwise, check the owner. */
247	if (cred->cr_uid == ip->i_uid) {
248		if (mode & VEXEC)
249			mask |= S_IXUSR;
250		if (mode & VREAD)
251			mask |= S_IRUSR;
252		if (mode & VWRITE)
253			mask |= S_IWUSR;
254		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
255	}
256
257	/* Otherwise, check the groups. */
258	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
259		if (ip->i_gid == *gp) {
260			if (mode & VEXEC)
261				mask |= S_IXGRP;
262			if (mode & VREAD)
263				mask |= S_IRGRP;
264			if (mode & VWRITE)
265				mask |= S_IWGRP;
266			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
267		}
268
269	/* Otherwise, check everyone else. */
270	if (mode & VEXEC)
271		mask |= S_IXOTH;
272	if (mode & VREAD)
273		mask |= S_IROTH;
274	if (mode & VWRITE)
275		mask |= S_IWOTH;
276	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
277}
278
279/* ARGSUSED */
280int
281ufs_getattr(ap)
282	struct vop_getattr_args /* {
283		struct vnode *a_vp;
284		struct vattr *a_vap;
285		struct ucred *a_cred;
286		struct proc *a_p;
287	} */ *ap;
288{
289	register struct vnode *vp = ap->a_vp;
290	register struct inode *ip = VTOI(vp);
291	register struct vattr *vap = ap->a_vap;
292
293	ITIMES(ip, &time, &time);
294	/*
295	 * Copy from inode table
296	 */
297	vap->va_fsid = ip->i_dev;
298	vap->va_fileid = ip->i_number;
299	vap->va_mode = ip->i_mode & ~IFMT;
300	vap->va_nlink = ip->i_nlink;
301	vap->va_uid = ip->i_uid;
302	vap->va_gid = ip->i_gid;
303	vap->va_rdev = (dev_t)ip->i_rdev;
304	vap->va_size = ip->i_din.di_size;
305	vap->va_atime = ip->i_atime;
306	vap->va_mtime = ip->i_mtime;
307	vap->va_ctime = ip->i_ctime;
308	vap->va_flags = ip->i_flags;
309	vap->va_gen = ip->i_gen;
310	/* this doesn't belong here */
311	if (vp->v_type == VBLK)
312		vap->va_blocksize = BLKDEV_IOSIZE;
313	else if (vp->v_type == VCHR)
314		vap->va_blocksize = MAXBSIZE;
315	else
316		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
317	vap->va_bytes = dbtob(ip->i_blocks);
318	vap->va_type = vp->v_type;
319	vap->va_filerev = ip->i_modrev;
320	return (0);
321}
322
323/*
324 * Set attribute vnode op. called from several syscalls
325 */
326int
327ufs_setattr(ap)
328	struct vop_setattr_args /* {
329		struct vnode *a_vp;
330		struct vattr *a_vap;
331		struct ucred *a_cred;
332		struct proc *a_p;
333	} */ *ap;
334{
335	register struct vattr *vap = ap->a_vap;
336	register struct vnode *vp = ap->a_vp;
337	register struct inode *ip = VTOI(vp);
338	register struct ucred *cred = ap->a_cred;
339	register struct proc *p = ap->a_p;
340	struct timeval atimeval, mtimeval;
341	int error;
342
343	/*
344	 * Check for unsettable attributes.
345	 */
346	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
347	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
348	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
349	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
350		return (EINVAL);
351	}
352	if (vap->va_flags != VNOVAL) {
353		if (cred->cr_uid != ip->i_uid &&
354		    (error = suser(cred, &p->p_acflag)))
355			return (error);
356		if (cred->cr_uid == 0) {
357			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) &&
358			    securelevel > 0)
359				return (EPERM);
360			ip->i_flags = vap->va_flags;
361		} else {
362			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND))
363				return (EPERM);
364			ip->i_flags &= SF_SETTABLE;
365			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
366		}
367		ip->i_flag |= IN_CHANGE;
368		if (vap->va_flags & (IMMUTABLE | APPEND))
369			return (0);
370	}
371	if (ip->i_flags & (IMMUTABLE | APPEND))
372		return (EPERM);
373	/*
374	 * Go through the fields and update iff not VNOVAL.
375	 */
376	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
377		error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
378		if (error)
379			return (error);
380	}
381	if (vap->va_size != VNOVAL) {
382		if (vp->v_type == VDIR)
383			return (EISDIR);
384		error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p);
385		if (error)
386			return (error);
387	}
388	ip = VTOI(vp);
389	if (vap->va_atime.ts_sec != VNOVAL || vap->va_mtime.ts_sec != VNOVAL) {
390		if (cred->cr_uid != ip->i_uid &&
391		    (error = suser(cred, &p->p_acflag)) &&
392		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
393		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
394			return (error);
395		if (vap->va_atime.ts_sec != VNOVAL)
396			ip->i_flag |= IN_ACCESS;
397		if (vap->va_mtime.ts_sec != VNOVAL)
398			ip->i_flag |= IN_CHANGE | IN_UPDATE;
399		atimeval.tv_sec = vap->va_atime.ts_sec;
400		atimeval.tv_usec = vap->va_atime.ts_nsec / 1000;
401		mtimeval.tv_sec = vap->va_mtime.ts_sec;
402		mtimeval.tv_usec = vap->va_mtime.ts_nsec / 1000;
403		error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1);
404		if (error)
405			return (error);
406	}
407	error = 0;
408	if (vap->va_mode != (mode_t)VNOVAL)
409		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
410	return (error);
411}
412
413/*
414 * Change the mode on a file.
415 * Inode must be locked before calling.
416 */
417static int
418ufs_chmod(vp, mode, cred, p)
419	register struct vnode *vp;
420	register int mode;
421	register struct ucred *cred;
422	struct proc *p;
423{
424	register struct inode *ip = VTOI(vp);
425	int error;
426
427	if (cred->cr_uid != ip->i_uid) {
428	    error = suser(cred, &p->p_acflag);
429	    if (error)
430		return (error);
431	}
432	if (cred->cr_uid) {
433		if (vp->v_type != VDIR && (mode & S_ISTXT))
434			return (EFTYPE);
435		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
436			return (EPERM);
437	}
438	ip->i_mode &= ~ALLPERMS;
439	ip->i_mode |= (mode & ALLPERMS);
440	ip->i_flag |= IN_CHANGE;
441	if ((vp->v_flag & VTEXT) && (ip->i_mode & S_ISTXT) == 0)
442		(void) vnode_pager_uncache(vp);
443	return (0);
444}
445
446/*
447 * Perform chown operation on inode ip;
448 * inode must be locked prior to call.
449 */
450static int
451ufs_chown(vp, uid, gid, cred, p)
452	register struct vnode *vp;
453	uid_t uid;
454	gid_t gid;
455	struct ucred *cred;
456	struct proc *p;
457{
458	register struct inode *ip = VTOI(vp);
459	uid_t ouid;
460	gid_t ogid;
461	int error = 0;
462#ifdef QUOTA
463	register int i;
464	long change;
465#endif
466
467	if (uid == (uid_t)VNOVAL)
468		uid = ip->i_uid;
469	if (gid == (gid_t)VNOVAL)
470		gid = ip->i_gid;
471	/*
472	 * If we don't own the file, are trying to change the owner
473	 * of the file, or are not a member of the target group,
474	 * the caller must be superuser or the call fails.
475	 */
476	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
477	    !groupmember((gid_t)gid, cred)) &&
478	    (error = suser(cred, &p->p_acflag)))
479		return (error);
480	ogid = ip->i_gid;
481	ouid = ip->i_uid;
482#ifdef QUOTA
483	if (error = getinoquota(ip))
484		return (error);
485	if (ouid == uid) {
486		dqrele(vp, ip->i_dquot[USRQUOTA]);
487		ip->i_dquot[USRQUOTA] = NODQUOT;
488	}
489	if (ogid == gid) {
490		dqrele(vp, ip->i_dquot[GRPQUOTA]);
491		ip->i_dquot[GRPQUOTA] = NODQUOT;
492	}
493	change = ip->i_blocks;
494	(void) chkdq(ip, -change, cred, CHOWN);
495	(void) chkiq(ip, -1, cred, CHOWN);
496	for (i = 0; i < MAXQUOTAS; i++) {
497		dqrele(vp, ip->i_dquot[i]);
498		ip->i_dquot[i] = NODQUOT;
499	}
500#endif
501	ip->i_gid = gid;
502	ip->i_uid = uid;
503#ifdef QUOTA
504	if ((error = getinoquota(ip)) == 0) {
505		if (ouid == uid) {
506			dqrele(vp, ip->i_dquot[USRQUOTA]);
507			ip->i_dquot[USRQUOTA] = NODQUOT;
508		}
509		if (ogid == gid) {
510			dqrele(vp, ip->i_dquot[GRPQUOTA]);
511			ip->i_dquot[GRPQUOTA] = NODQUOT;
512		}
513		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
514			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
515				goto good;
516			else
517				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
518		}
519		for (i = 0; i < MAXQUOTAS; i++) {
520			dqrele(vp, ip->i_dquot[i]);
521			ip->i_dquot[i] = NODQUOT;
522		}
523	}
524	ip->i_gid = ogid;
525	ip->i_uid = ouid;
526	if (getinoquota(ip) == 0) {
527		if (ouid == uid) {
528			dqrele(vp, ip->i_dquot[USRQUOTA]);
529			ip->i_dquot[USRQUOTA] = NODQUOT;
530		}
531		if (ogid == gid) {
532			dqrele(vp, ip->i_dquot[GRPQUOTA]);
533			ip->i_dquot[GRPQUOTA] = NODQUOT;
534		}
535		(void) chkdq(ip, change, cred, FORCE|CHOWN);
536		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
537		(void) getinoquota(ip);
538	}
539	return (error);
540good:
541	if (getinoquota(ip))
542		panic("chown: lost quota");
543#endif /* QUOTA */
544	if (ouid != uid || ogid != gid)
545		ip->i_flag |= IN_CHANGE;
546	if (ouid != uid && cred->cr_uid != 0)
547		ip->i_mode &= ~ISUID;
548	if (ogid != gid && cred->cr_uid != 0)
549		ip->i_mode &= ~ISGID;
550	return (0);
551}
552
553/* ARGSUSED */
554int
555ufs_ioctl(ap)
556	struct vop_ioctl_args /* {
557		struct vnode *a_vp;
558		int  a_command;
559		caddr_t  a_data;
560		int  a_fflag;
561		struct ucred *a_cred;
562		struct proc *a_p;
563	} */ *ap;
564{
565
566	return (ENOTTY);
567}
568
569/* ARGSUSED */
570int
571ufs_select(ap)
572	struct vop_select_args /* {
573		struct vnode *a_vp;
574		int  a_which;
575		int  a_fflags;
576		struct ucred *a_cred;
577		struct proc *a_p;
578	} */ *ap;
579{
580
581	/*
582	 * We should really check to see if I/O is possible.
583	 */
584	return (1);
585}
586
587/*
588 * Mmap a file
589 *
590 * NB Currently unsupported.
591 */
592/* ARGSUSED */
593int
594ufs_mmap(ap)
595	struct vop_mmap_args /* {
596		struct vnode *a_vp;
597		int  a_fflags;
598		struct ucred *a_cred;
599		struct proc *a_p;
600	} */ *ap;
601{
602
603	return (EINVAL);
604}
605
606/*
607 * Seek on a file
608 *
609 * Nothing to do, so just return.
610 */
611/* ARGSUSED */
612int
613ufs_seek(ap)
614	struct vop_seek_args /* {
615		struct vnode *a_vp;
616		off_t  a_oldoff;
617		off_t  a_newoff;
618		struct ucred *a_cred;
619	} */ *ap;
620{
621
622	return (0);
623}
624
625int
626ufs_remove(ap)
627	struct vop_remove_args /* {
628		struct vnode *a_dvp;
629		struct vnode *a_vp;
630		struct componentname *a_cnp;
631	} */ *ap;
632{
633	register struct inode *ip;
634	register struct vnode *vp = ap->a_vp;
635	register struct vnode *dvp = ap->a_dvp;
636	int error;
637
638	ip = VTOI(vp);
639	if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
640	    (VTOI(dvp)->i_flags & APPEND)) {
641		error = EPERM;
642		goto out;
643	}
644	if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) {
645		ip->i_nlink--;
646		ip->i_flag |= IN_CHANGE;
647	}
648out:
649	if (dvp == vp)
650		vrele(vp);
651	else
652		vput(vp);
653	vput(dvp);
654	return (error);
655}
656
657/*
658 * link vnode call
659 */
660int
661ufs_link(ap)
662	struct vop_link_args /* {
663		struct vnode *a_vp;
664		struct vnode *a_tdvp;
665		struct componentname *a_cnp;
666	} */ *ap;
667{
668	register struct vnode *vp = ap->a_vp;
669	register struct vnode *tdvp = ap->a_tdvp;
670	register struct componentname *cnp = ap->a_cnp;
671	register struct inode *ip;
672	struct timeval tv;
673	int error;
674
675#ifdef DIAGNOSTIC
676	if ((cnp->cn_flags & HASBUF) == 0)
677		panic("ufs_link: no name");
678#endif
679	if (vp->v_mount != tdvp->v_mount) {
680		VOP_ABORTOP(vp, cnp);
681		error = EXDEV;
682		goto out2;
683	}
684	if (vp != tdvp && (error = VOP_LOCK(tdvp))) {
685		VOP_ABORTOP(vp, cnp);
686		goto out2;
687	}
688	ip = VTOI(tdvp);
689	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
690		VOP_ABORTOP(vp, cnp);
691		error = EMLINK;
692		goto out1;
693	}
694	if (ip->i_flags & (IMMUTABLE | APPEND)) {
695		VOP_ABORTOP(vp, cnp);
696		error = EPERM;
697		goto out1;
698	}
699	ip->i_nlink++;
700	ip->i_flag |= IN_CHANGE;
701	tv = time;
702	error = VOP_UPDATE(tdvp, &tv, &tv, 1);
703	if (!error)
704		error = ufs_direnter(ip, vp, cnp);
705	if (error) {
706		ip->i_nlink--;
707		ip->i_flag |= IN_CHANGE;
708	}
709	FREE(cnp->cn_pnbuf, M_NAMEI);
710out1:
711	if (vp != tdvp)
712		VOP_UNLOCK(tdvp);
713out2:
714	vput(vp);
715	return (error);
716}
717
718
719/*
720 * Rename system call.
721 * 	rename("foo", "bar");
722 * is essentially
723 *	unlink("bar");
724 *	link("foo", "bar");
725 *	unlink("foo");
726 * but ``atomically''.  Can't do full commit without saving state in the
727 * inode on disk which isn't feasible at this time.  Best we can do is
728 * always guarantee the target exists.
729 *
730 * Basic algorithm is:
731 *
732 * 1) Bump link count on source while we're linking it to the
733 *    target.  This also ensure the inode won't be deleted out
734 *    from underneath us while we work (it may be truncated by
735 *    a concurrent `trunc' or `open' for creation).
736 * 2) Link source to destination.  If destination already exists,
737 *    delete it first.
738 * 3) Unlink source reference to inode if still around. If a
739 *    directory was moved and the parent of the destination
740 *    is different from the source, patch the ".." entry in the
741 *    directory.
742 */
743int
744ufs_rename(ap)
745	struct vop_rename_args  /* {
746		struct vnode *a_fdvp;
747		struct vnode *a_fvp;
748		struct componentname *a_fcnp;
749		struct vnode *a_tdvp;
750		struct vnode *a_tvp;
751		struct componentname *a_tcnp;
752	} */ *ap;
753{
754	struct vnode *tvp = ap->a_tvp;
755	register struct vnode *tdvp = ap->a_tdvp;
756	struct vnode *fvp = ap->a_fvp;
757	register struct vnode *fdvp = ap->a_fdvp;
758	register struct componentname *tcnp = ap->a_tcnp;
759	register struct componentname *fcnp = ap->a_fcnp;
760	register struct inode *ip, *xp, *dp;
761	struct dirtemplate dirbuf;
762	struct timeval tv;
763	int doingdirectory = 0, oldparent = 0, newparent = 0;
764	int error = 0;
765	u_char namlen;
766
767#ifdef DIAGNOSTIC
768	if ((tcnp->cn_flags & HASBUF) == 0 ||
769	    (fcnp->cn_flags & HASBUF) == 0)
770		panic("ufs_rename: no name");
771#endif
772	/*
773	 * Check for cross-device rename.
774	 */
775	if ((fvp->v_mount != tdvp->v_mount) ||
776	    (tvp && (fvp->v_mount != tvp->v_mount))) {
777		error = EXDEV;
778abortit:
779		VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
780		if (tdvp == tvp)
781			vrele(tdvp);
782		else
783			vput(tdvp);
784		if (tvp)
785			vput(tvp);
786		VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
787		vrele(fdvp);
788		vrele(fvp);
789		return (error);
790	}
791
792	/*
793	 * Check if just deleting a link name.
794	 */
795	if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
796	    (VTOI(tdvp)->i_flags & APPEND))) {
797		error = EPERM;
798		goto abortit;
799	}
800	if (fvp == tvp) {
801		if (fvp->v_type == VDIR) {
802			error = EINVAL;
803			goto abortit;
804		}
805		VOP_ABORTOP(fdvp, fcnp);
806		vrele(fdvp);
807		vrele(fvp);
808		vput(tdvp);
809		vput(tvp);
810		tcnp->cn_flags &= ~MODMASK;
811		tcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
812		if ((tcnp->cn_flags & SAVESTART) == 0)
813			panic("ufs_rename: lost from startdir");
814		tcnp->cn_nameiop = DELETE;
815		(void) relookup(tdvp, &tvp, tcnp);
816		return (VOP_REMOVE(tdvp, tvp, tcnp));
817	}
818	error = VOP_LOCK(fvp);
819	if (error)
820		goto abortit;
821	dp = VTOI(fdvp);
822	ip = VTOI(fvp);
823	if ((ip->i_flags & (IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) {
824		VOP_UNLOCK(fvp);
825		error = EPERM;
826		goto abortit;
827	}
828	if ((ip->i_mode & IFMT) == IFDIR) {
829		/*
830		 * Avoid ".", "..", and aliases of "." for obvious reasons.
831		 */
832		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
833		    dp == ip || (fcnp->cn_flags&ISDOTDOT) ||
834		    (ip->i_flag & IN_RENAME)) {
835			VOP_UNLOCK(fvp);
836			error = EINVAL;
837			goto abortit;
838		}
839		ip->i_flag |= IN_RENAME;
840		oldparent = dp->i_number;
841		doingdirectory++;
842	}
843	vrele(fdvp);
844
845	/*
846	 * When the target exists, both the directory
847	 * and target vnodes are returned locked.
848	 */
849	dp = VTOI(tdvp);
850	xp = NULL;
851	if (tvp)
852		xp = VTOI(tvp);
853
854	/*
855	 * 1) Bump link count while we're moving stuff
856	 *    around.  If we crash somewhere before
857	 *    completing our work, the link count
858	 *    may be wrong, but correctable.
859	 */
860	ip->i_nlink++;
861	ip->i_flag |= IN_CHANGE;
862	tv = time;
863	error = VOP_UPDATE(fvp, &tv, &tv, 1);
864	if (error) {
865		VOP_UNLOCK(fvp);
866		goto bad;
867	}
868
869	/*
870	 * If ".." must be changed (ie the directory gets a new
871	 * parent) then the source directory must not be in the
872	 * directory heirarchy above the target, as this would
873	 * orphan everything below the source directory. Also
874	 * the user must have write permission in the source so
875	 * as to be able to change "..". We must repeat the call
876	 * to namei, as the parent directory is unlocked by the
877	 * call to checkpath().
878	 */
879	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
880	VOP_UNLOCK(fvp);
881	if (oldparent != dp->i_number)
882		newparent = dp->i_number;
883	if (doingdirectory && newparent) {
884		if (error)	/* write access check above */
885			goto bad;
886		if (xp != NULL)
887			vput(tvp);
888		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
889		if (error)
890			goto out;
891		if ((tcnp->cn_flags & SAVESTART) == 0)
892			panic("ufs_rename: lost to startdir");
893		error = relookup(tdvp, &tvp, tcnp);
894		if (error)
895			goto out;
896		dp = VTOI(tdvp);
897		xp = NULL;
898		if (tvp)
899			xp = VTOI(tvp);
900	}
901	/*
902	 * 2) If target doesn't exist, link the target
903	 *    to the source and unlink the source.
904	 *    Otherwise, rewrite the target directory
905	 *    entry to reference the source inode and
906	 *    expunge the original entry's existence.
907	 */
908	if (xp == NULL) {
909		if (dp->i_dev != ip->i_dev)
910			panic("rename: EXDEV");
911		/*
912		 * Account for ".." in new directory.
913		 * When source and destination have the same
914		 * parent we don't fool with the link count.
915		 */
916		if (doingdirectory && newparent) {
917			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
918				error = EMLINK;
919				goto bad;
920			}
921			dp->i_nlink++;
922			dp->i_flag |= IN_CHANGE;
923			error = VOP_UPDATE(tdvp, &tv, &tv, 1);
924			if (error)
925				goto bad;
926		}
927		error = ufs_direnter(ip, tdvp, tcnp);
928		if (error) {
929			if (doingdirectory && newparent) {
930				dp->i_nlink--;
931				dp->i_flag |= IN_CHANGE;
932				(void)VOP_UPDATE(tdvp, &tv, &tv, 1);
933			}
934			goto bad;
935		}
936		vput(tdvp);
937	} else {
938		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
939			panic("rename: EXDEV");
940		/*
941		 * Short circuit rename(foo, foo).
942		 */
943		if (xp->i_number == ip->i_number)
944			panic("rename: same file");
945		/*
946		 * If the parent directory is "sticky", then the user must
947		 * own the parent directory, or the destination of the rename,
948		 * otherwise the destination may not be changed (except by
949		 * root). This implements append-only directories.
950		 */
951		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
952		    tcnp->cn_cred->cr_uid != dp->i_uid &&
953		    xp->i_uid != tcnp->cn_cred->cr_uid) {
954			error = EPERM;
955			goto bad;
956		}
957		/*
958		 * Target must be empty if a directory and have no links
959		 * to it. Also, ensure source and target are compatible
960		 * (both directories, or both not directories).
961		 */
962		if ((xp->i_mode&IFMT) == IFDIR) {
963			if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) ||
964			    xp->i_nlink > 2) {
965				error = ENOTEMPTY;
966				goto bad;
967			}
968			if (!doingdirectory) {
969				error = ENOTDIR;
970				goto bad;
971			}
972			cache_purge(tdvp);
973		} else if (doingdirectory) {
974			error = EISDIR;
975			goto bad;
976		}
977		error = ufs_dirrewrite(dp, ip, tcnp);
978		if (error)
979			goto bad;
980		/*
981		 * If the target directory is in the same
982		 * directory as the source directory,
983		 * decrement the link count on the parent
984		 * of the target directory.
985		 */
986		 if (doingdirectory && !newparent) {
987			dp->i_nlink--;
988			dp->i_flag |= IN_CHANGE;
989		}
990		vput(tdvp);
991		/*
992		 * Adjust the link count of the target to
993		 * reflect the dirrewrite above.  If this is
994		 * a directory it is empty and there are
995		 * no links to it, so we can squash the inode and
996		 * any space associated with it.  We disallowed
997		 * renaming over top of a directory with links to
998		 * it above, as the remaining link would point to
999		 * a directory without "." or ".." entries.
1000		 */
1001		xp->i_nlink--;
1002		if (doingdirectory) {
1003			if (--xp->i_nlink != 0)
1004				panic("rename: linked directory");
1005			error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
1006			    tcnp->cn_cred, tcnp->cn_proc);
1007		}
1008		xp->i_flag |= IN_CHANGE;
1009		vput(tvp);
1010		xp = NULL;
1011	}
1012
1013	/*
1014	 * 3) Unlink the source.
1015	 */
1016	fcnp->cn_flags &= ~MODMASK;
1017	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1018	if ((fcnp->cn_flags & SAVESTART) == 0)
1019		panic("ufs_rename: lost from startdir");
1020	(void) relookup(fdvp, &fvp, fcnp);
1021	if (fvp != NULL) {
1022		xp = VTOI(fvp);
1023		dp = VTOI(fdvp);
1024	} else {
1025		/*
1026		 * From name has disappeared.
1027		 */
1028		if (doingdirectory)
1029			panic("rename: lost dir entry");
1030		vrele(ap->a_fvp);
1031		return (0);
1032	}
1033	/*
1034	 * Ensure that the directory entry still exists and has not
1035	 * changed while the new name has been entered. If the source is
1036	 * a file then the entry may have been unlinked or renamed. In
1037	 * either case there is no further work to be done. If the source
1038	 * is a directory then it cannot have been rmdir'ed; its link
1039	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1040	 * The IRENAME flag ensures that it cannot be moved by another
1041	 * rename.
1042	 */
1043	if (xp != ip) {
1044		if (doingdirectory)
1045			panic("rename: lost dir entry");
1046	} else {
1047		/*
1048		 * If the source is a directory with a
1049		 * new parent, the link count of the old
1050		 * parent directory must be decremented
1051		 * and ".." set to point to the new parent.
1052		 */
1053		if (doingdirectory && newparent) {
1054			dp->i_nlink--;
1055			dp->i_flag |= IN_CHANGE;
1056			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
1057				sizeof (struct dirtemplate), (off_t)0,
1058				UIO_SYSSPACE, IO_NODELOCKED,
1059				tcnp->cn_cred, (int *)0, (struct proc *)0);
1060			if (error == 0) {
1061#				if (BYTE_ORDER == LITTLE_ENDIAN)
1062					if (fvp->v_mount->mnt_maxsymlinklen <= 0)
1063						namlen = dirbuf.dotdot_type;
1064					else
1065						namlen = dirbuf.dotdot_namlen;
1066#				else
1067					namlen = dirbuf.dotdot_namlen;
1068#				endif
1069				if (namlen != 2 ||
1070				    dirbuf.dotdot_name[0] != '.' ||
1071				    dirbuf.dotdot_name[1] != '.') {
1072					ufs_dirbad(xp, (doff_t)12,
1073					    "rename: mangled dir");
1074				} else {
1075					dirbuf.dotdot_ino = newparent;
1076					(void) vn_rdwr(UIO_WRITE, fvp,
1077					    (caddr_t)&dirbuf,
1078					    sizeof (struct dirtemplate),
1079					    (off_t)0, UIO_SYSSPACE,
1080					    IO_NODELOCKED|IO_SYNC,
1081					    tcnp->cn_cred, (int *)0,
1082					    (struct proc *)0);
1083					cache_purge(fdvp);
1084				}
1085			}
1086		}
1087		error = ufs_dirremove(fdvp, fcnp);
1088		if (!error) {
1089			xp->i_nlink--;
1090			xp->i_flag |= IN_CHANGE;
1091		}
1092		xp->i_flag &= ~IN_RENAME;
1093	}
1094	if (dp)
1095		vput(fdvp);
1096	if (xp)
1097		vput(fvp);
1098	vrele(ap->a_fvp);
1099	return (error);
1100
1101bad:
1102	if (xp)
1103		vput(ITOV(xp));
1104	vput(ITOV(dp));
1105out:
1106	if (VOP_LOCK(fvp) == 0) {
1107		ip->i_nlink--;
1108		ip->i_flag |= IN_CHANGE;
1109		vput(fvp);
1110	} else
1111		vrele(fvp);
1112	return (error);
1113}
1114
1115/*
1116 * A virgin directory (no blushing please).
1117 */
1118static struct dirtemplate mastertemplate = {
1119	0, 12, DT_DIR, 1, ".",
1120	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
1121};
1122static struct odirtemplate omastertemplate = {
1123	0, 12, 1, ".",
1124	0, DIRBLKSIZ - 12, 2, ".."
1125};
1126
1127/*
1128 * Mkdir system call
1129 */
1130int
1131ufs_mkdir(ap)
1132	struct vop_mkdir_args /* {
1133		struct vnode *a_dvp;
1134		struct vnode **a_vpp;
1135		struct componentname *a_cnp;
1136		struct vattr *a_vap;
1137	} */ *ap;
1138{
1139	register struct vnode *dvp = ap->a_dvp;
1140	register struct vattr *vap = ap->a_vap;
1141	register struct componentname *cnp = ap->a_cnp;
1142	register struct inode *ip, *dp;
1143	struct vnode *tvp;
1144	struct dirtemplate dirtemplate, *dtp;
1145	struct timeval tv;
1146	int error, dmode;
1147
1148#ifdef DIAGNOSTIC
1149	if ((cnp->cn_flags & HASBUF) == 0)
1150		panic("ufs_mkdir: no name");
1151#endif
1152	dp = VTOI(dvp);
1153	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1154		error = EMLINK;
1155		goto out;
1156	}
1157	dmode = vap->va_mode & 0777;
1158	dmode |= IFDIR;
1159	/*
1160	 * Must simulate part of ufs_makeinode here to acquire the inode,
1161	 * but not have it entered in the parent directory. The entry is
1162	 * made later after writing "." and ".." entries.
1163	 */
1164	error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1165	if (error)
1166		goto out;
1167	ip = VTOI(tvp);
1168	ip->i_uid = cnp->cn_cred->cr_uid;
1169	ip->i_gid = dp->i_gid;
1170#ifdef QUOTA
1171	if ((error = getinoquota(ip)) ||
1172	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1173		free(cnp->cn_pnbuf, M_NAMEI);
1174		VOP_VFREE(tvp, ip->i_number, dmode);
1175		vput(tvp);
1176		vput(dvp);
1177		return (error);
1178	}
1179#endif
1180	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1181	ip->i_mode = dmode;
1182	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1183	ip->i_nlink = 2;
1184	tv = time;
1185	error = VOP_UPDATE(tvp, &tv, &tv, 1);
1186
1187	/*
1188	 * Bump link count in parent directory
1189	 * to reflect work done below.  Should
1190	 * be done before reference is created
1191	 * so reparation is possible if we crash.
1192	 */
1193	dp->i_nlink++;
1194	dp->i_flag |= IN_CHANGE;
1195	error = VOP_UPDATE(dvp, &tv, &tv, 1);
1196	if (error)
1197		goto bad;
1198
1199	/* Initialize directory with "." and ".." from static template. */
1200	if (dvp->v_mount->mnt_maxsymlinklen > 0)
1201		dtp = &mastertemplate;
1202	else
1203		dtp = (struct dirtemplate *)&omastertemplate;
1204	dirtemplate = *dtp;
1205	dirtemplate.dot_ino = ip->i_number;
1206	dirtemplate.dotdot_ino = dp->i_number;
1207	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
1208	    sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
1209	    IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
1210	if (error) {
1211		dp->i_nlink--;
1212		dp->i_flag |= IN_CHANGE;
1213		goto bad;
1214	}
1215	if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
1216		panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
1217	else {
1218		ip->i_size = DIRBLKSIZ;
1219		ip->i_flag |= IN_CHANGE;
1220	}
1221
1222	/* Directory set up, now install it's entry in the parent directory. */
1223	error = ufs_direnter(ip, dvp, cnp);
1224	if (error) {
1225		dp->i_nlink--;
1226		dp->i_flag |= IN_CHANGE;
1227	}
1228bad:
1229	/*
1230	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
1231	 * for us because we set the link count to 0.
1232	 */
1233	if (error) {
1234		ip->i_nlink = 0;
1235		ip->i_flag |= IN_CHANGE;
1236		vput(tvp);
1237	} else
1238		*ap->a_vpp = tvp;
1239out:
1240	FREE(cnp->cn_pnbuf, M_NAMEI);
1241	vput(dvp);
1242	return (error);
1243}
1244
1245/*
1246 * Rmdir system call.
1247 */
1248int
1249ufs_rmdir(ap)
1250	struct vop_rmdir_args /* {
1251		struct vnode *a_dvp;
1252		struct vnode *a_vp;
1253		struct componentname *a_cnp;
1254	} */ *ap;
1255{
1256	register struct vnode *vp = ap->a_vp;
1257	register struct vnode *dvp = ap->a_dvp;
1258	register struct componentname *cnp = ap->a_cnp;
1259	register struct inode *ip, *dp;
1260	int error;
1261
1262	ip = VTOI(vp);
1263	dp = VTOI(dvp);
1264	/*
1265	 * No rmdir "." please.
1266	 */
1267	if (dp == ip) {
1268		vrele(dvp);
1269		vput(vp);
1270		return (EINVAL);
1271	}
1272	/*
1273	 * Verify the directory is empty (and valid).
1274	 * (Rmdir ".." won't be valid since
1275	 *  ".." will contain a reference to
1276	 *  the current directory and thus be
1277	 *  non-empty.)
1278	 */
1279	error = 0;
1280	if (ip->i_nlink != 2 ||
1281	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1282		error = ENOTEMPTY;
1283		goto out;
1284	}
1285	if ((dp->i_flags & APPEND) || (ip->i_flags & (IMMUTABLE | APPEND))) {
1286		error = EPERM;
1287		goto out;
1288	}
1289	/*
1290	 * Delete reference to directory before purging
1291	 * inode.  If we crash in between, the directory
1292	 * will be reattached to lost+found,
1293	 */
1294	error = ufs_dirremove(dvp, cnp);
1295	if (error)
1296		goto out;
1297	dp->i_nlink--;
1298	dp->i_flag |= IN_CHANGE;
1299	cache_purge(dvp);
1300	vput(dvp);
1301	dvp = NULL;
1302	/*
1303	 * Truncate inode.  The only stuff left
1304	 * in the directory is "." and "..".  The
1305	 * "." reference is inconsequential since
1306	 * we're quashing it.  The ".." reference
1307	 * has already been adjusted above.  We've
1308	 * removed the "." reference and the reference
1309	 * in the parent directory, but there may be
1310	 * other hard links so decrement by 2 and
1311	 * worry about them later.
1312	 */
1313	ip->i_nlink -= 2;
1314	error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
1315	    cnp->cn_proc);
1316	cache_purge(ITOV(ip));
1317out:
1318	if (dvp)
1319		vput(dvp);
1320	vput(vp);
1321	return (error);
1322}
1323
1324/*
1325 * symlink -- make a symbolic link
1326 */
1327int
1328ufs_symlink(ap)
1329	struct vop_symlink_args /* {
1330		struct vnode *a_dvp;
1331		struct vnode **a_vpp;
1332		struct componentname *a_cnp;
1333		struct vattr *a_vap;
1334		char *a_target;
1335	} */ *ap;
1336{
1337	register struct vnode *vp, **vpp = ap->a_vpp;
1338	register struct inode *ip;
1339	int len, error;
1340
1341	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1342	    vpp, ap->a_cnp);
1343	if (error)
1344		return (error);
1345	vp = *vpp;
1346	len = strlen(ap->a_target);
1347	if (len < vp->v_mount->mnt_maxsymlinklen) {
1348		ip = VTOI(vp);
1349		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
1350		ip->i_size = len;
1351		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1352	} else
1353		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1354		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
1355		    (struct proc *)0);
1356	vput(vp);
1357	return (error);
1358}
1359
1360/*
1361 * Vnode op for reading directories.
1362 *
1363 * The routine below assumes that the on-disk format of a directory
1364 * is the same as that defined by <sys/dirent.h>. If the on-disk
1365 * format changes, then it will be necessary to do a conversion
1366 * from the on-disk format that read returns to the format defined
1367 * by <sys/dirent.h>.
1368 */
1369int
1370ufs_readdir(ap)
1371	struct vop_readdir_args /* {
1372		struct vnode *a_vp;
1373		struct uio *a_uio;
1374		struct ucred *a_cred;
1375		int *a_ncookies;
1376		u_int **cookies;
1377	} */ *ap;
1378{
1379	register struct uio *uio = ap->a_uio;
1380	off_t off;
1381	int count, lost, error;
1382
1383	if (ap->a_ncookies != NULL)
1384		/*
1385		 * Ensure that the block is aligned.  The caller can use
1386		 * the cookies to determine where in the block to start.
1387		 */
1388		uio->uio_offset &= ~(DIRBLKSIZ - 1);
1389	off = uio->uio_offset;
1390	count = uio->uio_resid;
1391	count &= ~(DIRBLKSIZ - 1);
1392	lost = uio->uio_resid - count;
1393	if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
1394		return (EINVAL);
1395	uio->uio_resid = count;
1396	uio->uio_iov->iov_len = count;
1397#	if (BYTE_ORDER == LITTLE_ENDIAN)
1398		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
1399			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1400		} else {
1401			struct dirent *dp, *edp;
1402			struct uio auio;
1403			struct iovec aiov;
1404			caddr_t dirbuf;
1405			int readcnt;
1406			u_char tmp;
1407
1408			auio = *uio;
1409			auio.uio_iov = &aiov;
1410			auio.uio_iovcnt = 1;
1411			auio.uio_segflg = UIO_SYSSPACE;
1412			aiov.iov_len = count;
1413			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
1414			aiov.iov_base = dirbuf;
1415			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
1416			if (error == 0) {
1417				readcnt = count - auio.uio_resid;
1418				edp = (struct dirent *)&dirbuf[readcnt];
1419				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
1420					tmp = dp->d_namlen;
1421					dp->d_namlen = dp->d_type;
1422					dp->d_type = tmp;
1423					if (dp->d_reclen > 0) {
1424						dp = (struct dirent *)
1425						    ((char *)dp + dp->d_reclen);
1426					} else {
1427						error = EIO;
1428						break;
1429					}
1430				}
1431				if (dp >= edp)
1432					error = uiomove(dirbuf, readcnt, uio);
1433			}
1434			FREE(dirbuf, M_TEMP);
1435		}
1436#	else
1437		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1438#	endif
1439	if (!error && ap->a_ncookies != NULL) {
1440		struct dirent* dpStart;
1441		struct dirent* dpEnd;
1442		struct dirent* dp;
1443		int ncookies;
1444		u_int *cookies;
1445		u_int *cookiep;
1446
1447		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1448			panic("ufs_readdir: unexpected uio from NFS server");
1449		dpStart = (struct dirent *)
1450		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
1451		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
1452		for (dp = dpStart, ncookies = 0;
1453		     dp < dpEnd;
1454		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
1455			ncookies++;
1456		MALLOC(cookies, u_int *, ncookies * sizeof(u_int),
1457		       M_TEMP, M_WAITOK);
1458		for (dp = dpStart, cookiep = cookies;
1459		     dp < dpEnd;
1460		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
1461			off += dp->d_reclen;
1462			*cookiep++ = (u_int) off;
1463		}
1464		*ap->a_ncookies = ncookies;
1465		*ap->a_cookies = cookies;
1466	}
1467	if (ap->a_eofflag)
1468	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
1469	uio->uio_resid += lost;
1470	return (error);
1471}
1472
1473/*
1474 * Return target name of a symbolic link
1475 */
1476int
1477ufs_readlink(ap)
1478	struct vop_readlink_args /* {
1479		struct vnode *a_vp;
1480		struct uio *a_uio;
1481		struct ucred *a_cred;
1482	} */ *ap;
1483{
1484	register struct vnode *vp = ap->a_vp;
1485	register struct inode *ip = VTOI(vp);
1486	int isize;
1487
1488	isize = ip->i_size;
1489	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1490	    (ip->i_din.di_blocks == 0)) {	/* XXX - for old fastlink support */
1491		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
1492		return (0);
1493	}
1494	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1495}
1496
1497/*
1498 * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
1499 * done. If a buffer has been saved in anticipation of a CREATE, delete it.
1500 */
1501/* ARGSUSED */
1502int
1503ufs_abortop(ap)
1504	struct vop_abortop_args /* {
1505		struct vnode *a_dvp;
1506		struct componentname *a_cnp;
1507	} */ *ap;
1508{
1509	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1510		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
1511	return (0);
1512}
1513
1514/*
1515 * Lock an inode. If its already locked, set the WANT bit and sleep.
1516 */
1517int
1518ufs_lock(ap)
1519	struct vop_lock_args /* {
1520		struct vnode *a_vp;
1521	} */ *ap;
1522{
1523	register struct vnode *vp = ap->a_vp;
1524	register struct inode *ip;
1525
1526start:
1527	while (vp->v_flag & VXLOCK) {
1528		vp->v_flag |= VXWANT;
1529		(void) tsleep((caddr_t)vp, PINOD, "ufslk1", 0);
1530	}
1531	if (vp->v_tag == VT_NON)
1532		return (ENOENT);
1533	ip = VTOI(vp);
1534	if (ip->i_flag & IN_LOCKED) {
1535		ip->i_flag |= IN_WANTED;
1536#ifdef DIAGNOSTIC
1537		if (p) {
1538			if (p->p_pid == ip->i_lockholder)
1539				panic("locking against myself");
1540			ip->i_lockwaiter = p->p_pid;
1541		} else
1542			ip->i_lockwaiter = -1;
1543#endif
1544		(void) tsleep((caddr_t)ip, PINOD, "ufslk2", 0);
1545		goto start;
1546	}
1547#ifdef DIAGNOSTIC
1548	ip->i_lockwaiter = 0;
1549	if (ip->i_lockholder != 0)
1550		panic("lockholder (%d) != 0", ip->i_lockholder);
1551	if (p && p->p_pid == 0)
1552		printf("locking by process 0\n");
1553	if (p)
1554		ip->i_lockholder = p->p_pid;
1555	else
1556		ip->i_lockholder = -1;
1557#endif
1558	ip->i_flag |= IN_LOCKED;
1559	return (0);
1560}
1561
1562/*
1563 * Unlock an inode.  If WANT bit is on, wakeup.
1564 */
1565int lockcount = 90;
1566int
1567ufs_unlock(ap)
1568	struct vop_unlock_args /* {
1569		struct vnode *a_vp;
1570	} */ *ap;
1571{
1572	register struct inode *ip = VTOI(ap->a_vp);
1573
1574#ifdef DIAGNOSTIC
1575	if ((ip->i_flag & IN_LOCKED) == 0) {
1576		vprint("ufs_unlock: unlocked inode", ap->a_vp);
1577		panic("ufs_unlock NOT LOCKED");
1578	}
1579	if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
1580	    ip->i_lockholder > -1 && lockcount++ < 100)
1581		panic("unlocker (%d) != lock holder (%d)",
1582		    p->p_pid, ip->i_lockholder);
1583	ip->i_lockholder = 0;
1584#endif
1585	ip->i_flag &= ~IN_LOCKED;
1586	if (ip->i_flag & IN_WANTED) {
1587		ip->i_flag &= ~IN_WANTED;
1588		wakeup((caddr_t)ip);
1589	}
1590	return (0);
1591}
1592
1593/*
1594 * Check for a locked inode.
1595 */
1596int
1597ufs_islocked(ap)
1598	struct vop_islocked_args /* {
1599		struct vnode *a_vp;
1600	} */ *ap;
1601{
1602
1603	if (VTOI(ap->a_vp)->i_flag & IN_LOCKED)
1604		return (1);
1605	return (0);
1606}
1607
1608/*
1609 * Calculate the logical to physical mapping if not done already,
1610 * then call the device strategy routine.
1611 */
1612int
1613ufs_strategy(ap)
1614	struct vop_strategy_args /* {
1615		struct buf *a_bp;
1616	} */ *ap;
1617{
1618	register struct buf *bp = ap->a_bp;
1619	register struct vnode *vp = bp->b_vp;
1620	register struct inode *ip;
1621	int error;
1622
1623	ip = VTOI(vp);
1624	if (vp->v_type == VBLK || vp->v_type == VCHR)
1625		panic("ufs_strategy: spec");
1626	if (bp->b_blkno == bp->b_lblkno) {
1627		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
1628		if (error) {
1629			bp->b_error = error;
1630			bp->b_flags |= B_ERROR;
1631			biodone(bp);
1632			return (error);
1633		}
1634		if ((long)bp->b_blkno == -1)
1635			clrbuf(bp);
1636	}
1637	if ((long)bp->b_blkno == -1) {
1638		biodone(bp);
1639		return (0);
1640	}
1641	vp = ip->i_devvp;
1642	bp->b_dev = vp->v_rdev;
1643	VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1644	return (0);
1645}
1646
1647/*
1648 * Print out the contents of an inode.
1649 */
1650int
1651ufs_print(ap)
1652	struct vop_print_args /* {
1653		struct vnode *a_vp;
1654	} */ *ap;
1655{
1656	register struct vnode *vp = ap->a_vp;
1657	register struct inode *ip = VTOI(vp);
1658
1659	printf("tag VT_UFS, ino %ld, on dev %d, %d", ip->i_number,
1660		major(ip->i_dev), minor(ip->i_dev));
1661	if (vp->v_type == VFIFO)
1662		fifo_printinfo(vp);
1663	printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : "");
1664	if (ip->i_lockholder == 0)
1665		return (0);
1666	printf("\towner pid %lu", (u_long)ip->i_lockholder);
1667	if (ip->i_lockwaiter)
1668		printf(" waiting pid %lu", (u_long)ip->i_lockwaiter);
1669	printf("\n");
1670	return (0);
1671}
1672
1673/*
1674 * Read wrapper for special devices.
1675 */
1676int
1677ufsspec_read(ap)
1678	struct vop_read_args /* {
1679		struct vnode *a_vp;
1680		struct uio *a_uio;
1681		int  a_ioflag;
1682		struct ucred *a_cred;
1683	} */ *ap;
1684{
1685
1686	/*
1687	 * Set access flag.
1688	 */
1689	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1690	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
1691}
1692
1693/*
1694 * Write wrapper for special devices.
1695 */
1696int
1697ufsspec_write(ap)
1698	struct vop_write_args /* {
1699		struct vnode *a_vp;
1700		struct uio *a_uio;
1701		int  a_ioflag;
1702		struct ucred *a_cred;
1703	} */ *ap;
1704{
1705
1706	/*
1707	 * Set update and change flags.
1708	 */
1709	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1710	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
1711}
1712
1713/*
1714 * Close wrapper for special devices.
1715 *
1716 * Update the times on the inode then do device close.
1717 */
1718int
1719ufsspec_close(ap)
1720	struct vop_close_args /* {
1721		struct vnode *a_vp;
1722		int  a_fflag;
1723		struct ucred *a_cred;
1724		struct proc *a_p;
1725	} */ *ap;
1726{
1727	register struct inode *ip = VTOI(ap->a_vp);
1728
1729	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
1730		ITIMES(ip, &time, &time);
1731	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1732}
1733
1734/*
1735 * Read wrapper for fifo's
1736 */
1737int
1738ufsfifo_read(ap)
1739	struct vop_read_args /* {
1740		struct vnode *a_vp;
1741		struct uio *a_uio;
1742		int  a_ioflag;
1743		struct ucred *a_cred;
1744	} */ *ap;
1745{
1746	extern int (**fifo_vnodeop_p)();
1747
1748	/*
1749	 * Set access flag.
1750	 */
1751	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1752	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
1753}
1754
1755/*
1756 * Write wrapper for fifo's.
1757 */
1758int
1759ufsfifo_write(ap)
1760	struct vop_write_args /* {
1761		struct vnode *a_vp;
1762		struct uio *a_uio;
1763		int  a_ioflag;
1764		struct ucred *a_cred;
1765	} */ *ap;
1766{
1767	extern int (**fifo_vnodeop_p)();
1768
1769	/*
1770	 * Set update and change flags.
1771	 */
1772	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1773	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
1774}
1775
1776/*
1777 * Close wrapper for fifo's.
1778 *
1779 * Update the times on the inode then do device close.
1780 */
1781int
1782ufsfifo_close(ap)
1783	struct vop_close_args /* {
1784		struct vnode *a_vp;
1785		int  a_fflag;
1786		struct ucred *a_cred;
1787		struct proc *a_p;
1788	} */ *ap;
1789{
1790	extern int (**fifo_vnodeop_p)();
1791	register struct inode *ip = VTOI(ap->a_vp);
1792
1793	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
1794		ITIMES(ip, &time, &time);
1795	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1796}
1797
1798/*
1799 * Return POSIX pathconf information applicable to ufs filesystems.
1800 */
1801int
1802ufs_pathconf(ap)
1803	struct vop_pathconf_args /* {
1804		struct vnode *a_vp;
1805		int a_name;
1806		int *a_retval;
1807	} */ *ap;
1808{
1809
1810	switch (ap->a_name) {
1811	case _PC_LINK_MAX:
1812		*ap->a_retval = LINK_MAX;
1813		return (0);
1814	case _PC_NAME_MAX:
1815		*ap->a_retval = NAME_MAX;
1816		return (0);
1817	case _PC_PATH_MAX:
1818		*ap->a_retval = PATH_MAX;
1819		return (0);
1820	case _PC_PIPE_BUF:
1821		*ap->a_retval = PIPE_BUF;
1822		return (0);
1823	case _PC_CHOWN_RESTRICTED:
1824		*ap->a_retval = 1;
1825		return (0);
1826	case _PC_NO_TRUNC:
1827		*ap->a_retval = 1;
1828		return (0);
1829	default:
1830		return (EINVAL);
1831	}
1832	/* NOTREACHED */
1833}
1834
1835/*
1836 * Advisory record locking support
1837 */
1838int
1839ufs_advlock(ap)
1840	struct vop_advlock_args /* {
1841		struct vnode *a_vp;
1842		caddr_t  a_id;
1843		int  a_op;
1844		struct flock *a_fl;
1845		int  a_flags;
1846	} */ *ap;
1847{
1848	register struct inode *ip = VTOI(ap->a_vp);
1849
1850	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
1851}
1852
1853/*
1854 * Initialize the vnode associated with a new inode, handle aliased
1855 * vnodes.
1856 */
1857int
1858ufs_vinit(mntp, specops, fifoops, vpp)
1859	struct mount *mntp;
1860	int (**specops)();
1861	int (**fifoops)();
1862	struct vnode **vpp;
1863{
1864	struct inode *ip;
1865	struct vnode *vp, *nvp;
1866
1867	vp = *vpp;
1868	ip = VTOI(vp);
1869	switch(vp->v_type = IFTOVT(ip->i_mode)) {
1870	case VCHR:
1871	case VBLK:
1872		vp->v_op = specops;
1873		nvp = checkalias(vp, ip->i_rdev, mntp);
1874		if (nvp) {
1875			/*
1876			 * Discard unneeded vnode, but save its inode.
1877			 */
1878			ufs_ihashrem(ip);
1879			VOP_UNLOCK(vp);
1880			nvp->v_data = vp->v_data;
1881			vp->v_data = NULL;
1882			vp->v_op = spec_vnodeop_p;
1883			vrele(vp);
1884			vgone(vp);
1885			/*
1886			 * Reinitialize aliased inode.
1887			 */
1888			vp = nvp;
1889			ip->i_vnode = vp;
1890			ufs_ihashins(ip);
1891		}
1892		break;
1893	case VFIFO:
1894		vp->v_op = fifoops;
1895		break;
1896	}
1897	if (ip->i_number == ROOTINO)
1898                vp->v_flag |= VROOT;
1899	/*
1900	 * Initialize modrev times
1901	 */
1902	SETHIGH(ip->i_modrev, mono_time.tv_sec);
1903	SETLOW(ip->i_modrev, mono_time.tv_usec * 4294);
1904	*vpp = vp;
1905	return (0);
1906}
1907
1908/*
1909 * Allocate a new inode.
1910 */
1911int
1912ufs_makeinode(mode, dvp, vpp, cnp)
1913	int mode;
1914	struct vnode *dvp;
1915	struct vnode **vpp;
1916	struct componentname *cnp;
1917{
1918	register struct inode *ip, *pdir;
1919	struct timeval tv;
1920	struct vnode *tvp;
1921	int error;
1922
1923	pdir = VTOI(dvp);
1924#ifdef DIAGNOSTIC
1925	if ((cnp->cn_flags & HASBUF) == 0)
1926		panic("ufs_makeinode: no name");
1927#endif
1928	*vpp = NULL;
1929	if ((mode & IFMT) == 0)
1930		mode |= IFREG;
1931
1932	error = VOP_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
1933	if (error) {
1934		free(cnp->cn_pnbuf, M_NAMEI);
1935		vput(dvp);
1936		return (error);
1937	}
1938	ip = VTOI(tvp);
1939	ip->i_gid = pdir->i_gid;
1940	if ((mode & IFMT) == IFLNK)
1941		ip->i_uid = pdir->i_uid;
1942	else
1943		ip->i_uid = cnp->cn_cred->cr_uid;
1944#ifdef QUOTA
1945	if ((error = getinoquota(ip)) ||
1946	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1947		free(cnp->cn_pnbuf, M_NAMEI);
1948		VOP_VFREE(tvp, ip->i_number, mode);
1949		vput(tvp);
1950		vput(dvp);
1951		return (error);
1952	}
1953#endif
1954	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1955	ip->i_mode = mode;
1956	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
1957	ip->i_nlink = 1;
1958	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
1959	    suser(cnp->cn_cred, NULL))
1960		ip->i_mode &= ~ISGID;
1961
1962	/*
1963	 * Make sure inode goes to disk before directory entry.
1964	 */
1965	tv = time;
1966	error = VOP_UPDATE(tvp, &tv, &tv, 1);
1967	if (error)
1968		goto bad;
1969	error = ufs_direnter(ip, dvp, cnp);
1970	if (error)
1971		goto bad;
1972	if ((cnp->cn_flags & SAVESTART) == 0)
1973		FREE(cnp->cn_pnbuf, M_NAMEI);
1974	vput(dvp);
1975	*vpp = tvp;
1976	return (0);
1977
1978bad:
1979	/*
1980	 * Write error occurred trying to update the inode
1981	 * or the directory so must deallocate the inode.
1982	 */
1983	free(cnp->cn_pnbuf, M_NAMEI);
1984	vput(dvp);
1985	ip->i_nlink = 0;
1986	ip->i_flag |= IN_CHANGE;
1987	vput(tvp);
1988	return (error);
1989}
1990