ext2fs_lookup.c revision 1.11
1/*	$OpenBSD: ext2fs_lookup.c,v 1.11 2001/09/18 01:21:55 art Exp $	*/
2/*	$NetBSD: ext2fs_lookup.c,v 1.16 2000/08/03 20:29:26 thorpej Exp $	*/
3
4/*
5 * Modified for NetBSD 1.2E
6 * May 1997, Manuel Bouyer
7 * Laboratoire d'informatique de Paris VI
8 */
9/*
10 *  modified for Lites 1.1
11 *
12 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
13 *  University of Utah, Department of Computer Science
14 */
15/*
16 * Copyright (c) 1989, 1993
17 *	The Regents of the University of California.  All rights reserved.
18 * (c) UNIX System Laboratories, Inc.
19 * All or some portions of this file are derived from material licensed
20 * to the University of California by American Telephone and Telegraph
21 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
22 * the permission of UNIX System Laboratories, Inc.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in the
31 *    documentation and/or other materials provided with the distribution.
32 * 3. All advertising materials mentioning features or use of this software
33 *    must display the following acknowledgement:
34 *	This product includes software developed by the University of
35 *	California, Berkeley and its contributors.
36 * 4. Neither the name of the University nor the names of its contributors
37 *    may be used to endorse or promote products derived from this software
38 *    without specific prior written permission.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
53 */
54
55#include <sys/param.h>
56#include <sys/systm.h>
57#include <sys/namei.h>
58#include <sys/buf.h>
59#include <sys/file.h>
60#include <sys/mount.h>
61#include <sys/vnode.h>
62#include <sys/malloc.h>
63#include <sys/dirent.h>
64
65#include <ufs/ufs/quota.h>
66#include <ufs/ufs/inode.h>
67#include <ufs/ufs/ufsmount.h>
68#include <ufs/ufs/ufs_extern.h>
69
70#include <ufs/ext2fs/ext2fs_extern.h>
71#include <ufs/ext2fs/ext2fs_dir.h>
72#include <ufs/ext2fs/ext2fs.h>
73
74extern	int dirchk;
75
76static void	ext2fs_dirconv2ffs __P((struct ext2fs_direct *e2dir,
77					  struct dirent *ffsdir));
78static int	ext2fs_dirbadentry __P((struct vnode *dp,
79					  struct ext2fs_direct *de,
80					  int entryoffsetinblock));
81
82/*
83 * the problem that is tackled below is the fact that FFS
84 * includes the terminating zero on disk while EXT2FS doesn't
85 * this implies that we need to introduce some padding.
86 * For instance, a filename "sbin" has normally a reclen 12
87 * in EXT2, but 16 in FFS.
88 * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...'
89 * If it wasn't for that, the complete ufs code for directories would
90 * have worked w/o changes (except for the difference in DIRBLKSIZ)
91 */
92static void
93ext2fs_dirconv2ffs( e2dir, ffsdir)
94	struct ext2fs_direct	*e2dir;
95	struct dirent 		*ffsdir;
96{
97	memset(ffsdir, 0, sizeof(struct dirent));
98	ffsdir->d_fileno = fs2h32(e2dir->e2d_ino);
99	ffsdir->d_namlen = e2dir->e2d_namlen;
100
101	ffsdir->d_type = DT_UNKNOWN;		/* don't know more here */
102#ifdef DIAGNOSTIC
103	/*
104	 * XXX Rigth now this can't happen, but if one day
105	 * MAXNAMLEN != E2FS_MAXNAMLEN we should handle this more gracefully !
106	 */
107	if (e2dir->e2d_namlen > MAXNAMLEN)
108		panic("ext2fs: e2dir->e2d_namlen\n");
109#endif
110	strncpy(ffsdir->d_name, e2dir->e2d_name, ffsdir->d_namlen);
111
112	/* Godmar thinks: since e2dir->e2d_reclen can be big and means
113	   nothing anyway, we compute our own reclen according to what
114	   we think is right
115	 */
116	ffsdir->d_reclen = DIRENT_SIZE(ffsdir);
117}
118
119/*
120 * Vnode op for reading directories.
121 *
122 * Convert the on-disk entries to <sys/dirent.h> entries.
123 * the problem is that the conversion will blow up some entries by four bytes,
124 * so it can't be done in place. This is too bad. Right now the conversion is
125 * done entry by entry, the converted entry is sent via uiomove.
126 *
127 * XXX allocate a buffer, convert as many entries as possible, then send
128 * the whole buffer to uiomove
129 */
130int
131ext2fs_readdir(v)
132	void *v;
133{
134	struct vop_readdir_args /* {
135		struct vnode *a_vp;
136		struct uio *a_uio;
137		struct ucred *a_cred;
138		int **a_eofflag;
139		off_t **a_cookies;
140		int ncookies;
141	} */ *ap = v;
142	struct uio *uio = ap->a_uio;
143	int error;
144	size_t e2fs_count, readcnt;
145	struct vnode *vp = ap->a_vp;
146	struct m_ext2fs *fs = VTOI(vp)->i_e2fs;
147
148	struct ext2fs_direct *dp;
149	struct dirent dstd;
150	struct uio auio;
151	struct iovec aiov;
152	caddr_t dirbuf;
153	off_t off = uio->uio_offset;
154	u_long *cookies = NULL;
155	int nc = 0, ncookies = 0;
156	int e2d_reclen;
157
158	if (vp->v_type != VDIR)
159		return (ENOTDIR);
160
161	e2fs_count = uio->uio_resid;
162	/* Make sure we don't return partial entries. */
163	e2fs_count -= (uio->uio_offset + e2fs_count) & (fs->e2fs_bsize -1);
164	if (e2fs_count <= 0)
165		return (EINVAL);
166
167	auio = *uio;
168	auio.uio_iov = &aiov;
169	auio.uio_iovcnt = 1;
170	auio.uio_segflg = UIO_SYSSPACE;
171	aiov.iov_len = e2fs_count;
172	auio.uio_resid = e2fs_count;
173	MALLOC(dirbuf, caddr_t, e2fs_count, M_TEMP, M_WAITOK);
174	if (ap->a_ncookies) {
175		nc = ncookies = e2fs_count / 16;
176		cookies = malloc(sizeof (off_t) * ncookies, M_TEMP, M_WAITOK);
177		*ap->a_cookies = cookies;
178	}
179	memset(dirbuf, 0, e2fs_count);
180	aiov.iov_base = dirbuf;
181
182	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
183	if (error == 0) {
184		readcnt = e2fs_count - auio.uio_resid;
185		for (dp = (struct ext2fs_direct *)dirbuf;
186			(char *)dp < (char *)dirbuf + readcnt; ) {
187			e2d_reclen = fs2h16(dp->e2d_reclen);
188			if (e2d_reclen == 0) {
189				error = EIO;
190				break;
191			}
192			ext2fs_dirconv2ffs(dp, &dstd);
193			if(dstd.d_reclen > uio->uio_resid) {
194				break;
195			}
196			if ((error = uiomove((caddr_t)&dstd, dstd.d_reclen, uio)) != 0) {
197				break;
198			}
199			off = off + e2d_reclen;
200			if (cookies != NULL) {
201				*cookies++ = off;
202				if (--ncookies <= 0){
203					break;  /* out of cookies */
204				}
205			}
206			/* advance dp */
207			dp = (struct ext2fs_direct *) ((char *)dp + e2d_reclen);
208		}
209		/* we need to correct uio_offset */
210		uio->uio_offset = off;
211	}
212	FREE(dirbuf, M_TEMP);
213	*ap->a_eofflag = VTOI(ap->a_vp)->i_e2fs_size <= uio->uio_offset;
214	if (ap->a_ncookies) {
215		if (error) {
216			free(*ap->a_cookies, M_TEMP);
217			*ap->a_ncookies = 0;
218			*ap->a_cookies = NULL;
219		} else
220			*ap->a_ncookies = nc - ncookies;
221	}
222	return (error);
223}
224
225/*
226 * Convert a component of a pathname into a pointer to a locked inode.
227 * This is a very central and rather complicated routine.
228 * If the file system is not maintained in a strict tree hierarchy,
229 * this can result in a deadlock situation (see comments in code below).
230 *
231 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
232 * on whether the name is to be looked up, created, renamed, or deleted.
233 * When CREATE, RENAME, or DELETE is specified, information usable in
234 * creating, renaming, or deleting a directory entry may be calculated.
235 * If flag has LOCKPARENT or'ed into it and the target of the pathname
236 * exists, lookup returns both the target and its parent directory locked.
237 * When creating or renaming and LOCKPARENT is specified, the target may
238 * not be ".".  When deleting and LOCKPARENT is specified, the target may
239 * be "."., but the caller must check to ensure it does an vrele and vput
240 * instead of two vputs.
241 *
242 * Overall outline of ext2fs_lookup:
243 *
244 *	check accessibility of directory
245 *	look for name in cache, if found, then if at end of path
246 *	  and deleting or creating, drop it, else return name
247 *	search for name in directory, to found or notfound
248 * notfound:
249 *	if creating, return locked directory, leaving info on available slots
250 *	else return error
251 * found:
252 *	if at end of path and deleting, return information to allow delete
253 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
254 *	  inode and return info to allow rewrite
255 *	if not at end, add name to cache; if at end and neither creating
256 *	  nor deleting, add name to cache
257 */
258int
259ext2fs_lookup(v)
260	void *v;
261{
262	struct vop_lookup_args /* {
263		struct vnode *a_dvp;
264		struct vnode **a_vpp;
265		struct componentname *a_cnp;
266	} */ *ap = v;
267	struct vnode *vdp;	/* vnode for directory being searched */
268	struct inode *dp;	/* inode for directory being searched */
269	struct buf *bp;			/* a buffer of directory entries */
270	struct ext2fs_direct *ep; /* the current directory entry */
271	int entryoffsetinblock;		/* offset of ep in bp's buffer */
272	enum {NONE, COMPACT, FOUND} slotstatus;
273	doff_t slotoffset;		/* offset of area with free space */
274	int slotsize;			/* size of area at slotoffset */
275	int slotfreespace;		/* amount of space free in slot */
276	int slotneeded;			/* size of the entry we're seeking */
277	int numdirpasses;		/* strategy for directory search */
278	doff_t endsearch;		/* offset to end directory search */
279	doff_t prevoff;			/* prev entry dp->i_offset */
280	struct vnode *pdp;		/* saved dp during symlink work */
281	struct vnode *tdp;		/* returned by VFS_VGET */
282	doff_t enduseful;		/* pointer past last used dir slot */
283	u_long bmask;			/* block offset mask */
284	int lockparent;			/* 1 => lockparent flag is set */
285	int wantparent;			/* 1 => wantparent or lockparent flag */
286	int namlen, error;
287	struct vnode **vpp = ap->a_vpp;
288	struct componentname *cnp = ap->a_cnp;
289	struct ucred *cred = cnp->cn_cred;
290	int flags = cnp->cn_flags;
291	int nameiop = cnp->cn_nameiop;
292	struct proc *p = cnp->cn_proc;
293	int	dirblksize = VTOI(ap->a_dvp)->i_e2fs->e2fs_bsize;
294
295	bp = NULL;
296	slotoffset = -1;
297	*vpp = NULL;
298	vdp = ap->a_dvp;
299	dp = VTOI(vdp);
300	lockparent = flags & LOCKPARENT;
301	wantparent = flags & (LOCKPARENT|WANTPARENT);
302	/*
303	 * Check accessiblity of directory.
304	 */
305	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
306		return (error);
307
308	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
309	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
310		return (EROFS);
311
312	/*
313	 * We now have a segment name to search for, and a directory to search.
314	 *
315	 * Before tediously performing a linear scan of the directory,
316	 * check the name cache to see if the directory/name pair
317	 * we are looking for is known already.
318	 */
319	if ((error = cache_lookup(vdp, vpp, cnp)) != 0) {
320		int vpid;	/* capability number of vnode */
321
322		if (error == ENOENT)
323			return (error);
324		/*
325		 * Get the next vnode in the path.
326		 * See comment below starting `Step through' for
327		 * an explaination of the locking protocol.
328		 */
329		pdp = vdp;
330		dp = VTOI(*vpp);
331		vdp = *vpp;
332		vpid = vdp->v_id;
333		if (pdp == vdp) {   /* lookup on "." */
334			VREF(vdp);
335			error = 0;
336		} else if (flags & ISDOTDOT) {
337			VOP_UNLOCK(pdp, 0, p);
338			error = vget(vdp, LK_EXCLUSIVE, p);
339			if (!error && lockparent && (flags & ISLASTCN))
340				error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
341		} else {
342			error = vget(vdp, LK_EXCLUSIVE, p);
343			if (!lockparent || error || !(flags & ISLASTCN))
344				VOP_UNLOCK(pdp, 0, p);
345		}
346		/*
347		 * Check that the capability number did not change
348		 * while we were waiting for the lock.
349		 */
350		if (!error) {
351			if (vpid == vdp->v_id)
352				return (0);
353			vput(vdp);
354			if (lockparent && pdp != vdp && (flags & ISLASTCN))
355				VOP_UNLOCK(pdp, 0, p);
356		}
357		if ((error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0)
358			return (error);
359		vdp = pdp;
360		dp = VTOI(pdp);
361		*vpp = NULL;
362	}
363
364	/*
365	 * Suppress search for slots unless creating
366	 * file and at end of pathname, in which case
367	 * we watch for a place to put the new file in
368	 * case it doesn't already exist.
369	 */
370	slotstatus = FOUND;
371	slotfreespace = slotsize = slotneeded = 0;
372	if ((nameiop == CREATE || nameiop == RENAME) &&
373		(flags & ISLASTCN)) {
374		slotstatus = NONE;
375		slotneeded = EXT2FS_DIRSIZ(cnp->cn_namelen);
376	}
377
378	/*
379	 * If there is cached information on a previous search of
380	 * this directory, pick up where we last left off.
381	 * We cache only lookups as these are the most common
382	 * and have the greatest payoff. Caching CREATE has little
383	 * benefit as it usually must search the entire directory
384	 * to determine that the entry does not exist. Caching the
385	 * location of the last DELETE or RENAME has not reduced
386	 * profiling time and hence has been removed in the interest
387	 * of simplicity.
388	 */
389	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
390	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
391		dp->i_diroff > dp->i_e2fs_size) {
392		entryoffsetinblock = 0;
393		dp->i_offset = 0;
394		numdirpasses = 1;
395	} else {
396		dp->i_offset = dp->i_diroff;
397		if ((entryoffsetinblock = dp->i_offset & bmask) &&
398			(error = ext2fs_bufatoff(dp, (off_t)dp->i_offset,
399			    NULL, &bp)))
400			return (error);
401		numdirpasses = 2;
402	}
403	prevoff = dp->i_offset;
404	endsearch = roundup(dp->i_e2fs_size, dirblksize);
405	enduseful = 0;
406
407searchloop:
408	while (dp->i_offset < endsearch) {
409		/*
410		 * If necessary, get the next directory block.
411		 */
412		if ((dp->i_offset & bmask) == 0) {
413			if (bp != NULL)
414				brelse(bp);
415			error = ext2fs_bufatoff(dp, (off_t)dp->i_offset,
416			    NULL, &bp);
417			if (error != 0)
418				return (error);
419			entryoffsetinblock = 0;
420		}
421		/*
422		 * If still looking for a slot, and at a dirblksize
423		 * boundary, have to start looking for free space again.
424		 */
425		if (slotstatus == NONE &&
426			(entryoffsetinblock & (dirblksize - 1)) == 0) {
427			slotoffset = -1;
428			slotfreespace = 0;
429		}
430		/*
431		 * Get pointer to next entry.
432		 * Full validation checks are slow, so we only check
433		 * enough to insure forward progress through the
434		 * directory. Complete checks can be run by patching
435		 * "dirchk" to be true.
436		 */
437		ep = (struct ext2fs_direct *)
438			((char *)bp->b_data + entryoffsetinblock);
439		if (ep->e2d_reclen == 0 ||
440		    (dirchk &&
441		    ext2fs_dirbadentry(vdp, ep, entryoffsetinblock))) {
442			int i;
443			ufs_dirbad(dp, dp->i_offset, "mangled entry");
444			i = dirblksize -
445			    (entryoffsetinblock & (dirblksize - 1));
446			dp->i_offset += i;
447			entryoffsetinblock += i;
448			continue;
449		}
450
451		/*
452		 * If an appropriate sized slot has not yet been found,
453		 * check to see if one is available. Also accumulate space
454		 * in the current block so that we can determine if
455		 * compaction is viable.
456		 */
457		if (slotstatus != FOUND) {
458			int size = fs2h16(ep->e2d_reclen);
459
460			if (ep->e2d_ino != 0)
461				size -= EXT2FS_DIRSIZ(ep->e2d_namlen);
462			if (size > 0) {
463				if (size >= slotneeded) {
464					slotstatus = FOUND;
465					slotoffset = dp->i_offset;
466					slotsize = fs2h16(ep->e2d_reclen);
467				} else if (slotstatus == NONE) {
468					slotfreespace += size;
469					if (slotoffset == -1)
470						slotoffset = dp->i_offset;
471					if (slotfreespace >= slotneeded) {
472						slotstatus = COMPACT;
473						slotsize = dp->i_offset +
474							  fs2h16(ep->e2d_reclen) - slotoffset;
475					}
476				}
477			}
478		}
479
480		/*
481		 * Check for a name match.
482		 */
483		if (ep->e2d_ino) {
484			namlen = ep->e2d_namlen;
485			if (namlen == cnp->cn_namelen &&
486				!memcmp(cnp->cn_nameptr, ep->e2d_name,
487				(unsigned)namlen)) {
488				/*
489				 * Save directory entry's inode number and
490				 * reclen in ndp->ni_ufs area, and release
491				 * directory buffer.
492				 */
493				dp->i_ino = fs2h32(ep->e2d_ino);
494				dp->i_reclen = fs2h16(ep->e2d_reclen);
495				brelse(bp);
496				goto found;
497			}
498		}
499		prevoff = dp->i_offset;
500		dp->i_offset += fs2h16(ep->e2d_reclen);
501		entryoffsetinblock += fs2h16(ep->e2d_reclen);
502		if (ep->e2d_ino)
503			enduseful = dp->i_offset;
504	}
505/* notfound: */
506	/*
507	 * If we started in the middle of the directory and failed
508	 * to find our target, we must check the beginning as well.
509	 */
510	if (numdirpasses == 2) {
511		numdirpasses--;
512		dp->i_offset = 0;
513		endsearch = dp->i_diroff;
514		goto searchloop;
515	}
516	if (bp != NULL)
517		brelse(bp);
518	/*
519	 * If creating, and at end of pathname and current
520	 * directory has not been removed, then can consider
521	 * allowing file to be created.
522	 */
523	if ((nameiop == CREATE || nameiop == RENAME) &&
524		(flags & ISLASTCN) && dp->i_e2fs_nlink != 0) {
525		/*
526		 * Creation of files on a read-only mounted file system
527		 * is pointless, so don't proceed any further.
528		 */
529		if (vdp->v_mount->mnt_flag & MNT_RDONLY)
530					return (EROFS);
531		/*
532		 * Access for write is interpreted as allowing
533		 * creation of files in the directory.
534		 */
535		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
536			return (error);
537		/*
538		 * Return an indication of where the new directory
539		 * entry should be put.  If we didn't find a slot,
540		 * then set dp->i_count to 0 indicating
541		 * that the new slot belongs at the end of the
542		 * directory. If we found a slot, then the new entry
543		 * can be put in the range from dp->i_offset to
544		 * dp->i_offset + dp->i_count.
545		 */
546		if (slotstatus == NONE) {
547			dp->i_offset = roundup(dp->i_e2fs_size, dirblksize);
548			dp->i_count = 0;
549			enduseful = dp->i_offset;
550		} else {
551			dp->i_offset = slotoffset;
552			dp->i_count = slotsize;
553			if (enduseful < slotoffset + slotsize)
554				enduseful = slotoffset + slotsize;
555		}
556		dp->i_endoff = roundup(enduseful, dirblksize);
557		dp->i_flag |= IN_CHANGE | IN_UPDATE;
558		/*
559		 * We return with the directory locked, so that
560		 * the parameters we set up above will still be
561		 * valid if we actually decide to do a direnter().
562		 * We return ni_vp == NULL to indicate that the entry
563		 * does not currently exist; we leave a pointer to
564		 * the (locked) directory inode in ndp->ni_dvp.
565		 * The pathname buffer is saved so that the name
566		 * can be obtained later.
567		 *
568		 * NB - if the directory is unlocked, then this
569		 * information cannot be used.
570		 */
571		cnp->cn_flags |= SAVENAME;
572		if (!lockparent)
573			VOP_UNLOCK(vdp, 0, p);
574		return (EJUSTRETURN);
575	}
576	/*
577	 * Insert name into cache (as non-existent) if appropriate.
578	 */
579	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
580		cache_enter(vdp, *vpp, cnp);
581	return (ENOENT);
582
583found:
584	/*
585	 * Check that directory length properly reflects presence
586	 * of this entry.
587	 */
588	if (entryoffsetinblock + EXT2FS_DIRSIZ(ep->e2d_namlen)
589	    > dp->i_e2fs_size) {
590		ufs_dirbad(dp, dp->i_offset, "i_size too small");
591		dp->i_e2fs_size = entryoffsetinblock +
592			EXT2FS_DIRSIZ(ep->e2d_namlen);
593		dp->i_flag |= IN_CHANGE | IN_UPDATE;
594	}
595
596	/*
597	 * Found component in pathname.
598	 * If the final component of path name, save information
599	 * in the cache as to where the entry was found.
600	 */
601	if ((flags & ISLASTCN) && nameiop == LOOKUP)
602		dp->i_diroff = dp->i_offset &~ (dirblksize - 1);
603
604	/*
605	 * If deleting, and at end of pathname, return
606	 * parameters which can be used to remove file.
607	 * If the wantparent flag isn't set, we return only
608	 * the directory (in ndp->ni_dvp), otherwise we go
609	 * on and lock the inode, being careful with ".".
610	 */
611	if (nameiop == DELETE && (flags & ISLASTCN)) {
612		/*
613		 * Write access to directory required to delete files.
614		 */
615		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
616			return (error);
617		/*
618		 * Return pointer to current entry in dp->i_offset,
619		 * and distance past previous entry (if there
620		 * is a previous entry in this block) in dp->i_count.
621		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
622		 */
623		if ((dp->i_offset & (dirblksize - 1)) == 0)
624			dp->i_count = 0;
625		else
626			dp->i_count = dp->i_offset - prevoff;
627		if (dp->i_number == dp->i_ino) {
628			VREF(vdp);
629			*vpp = vdp;
630			return (0);
631		}
632		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
633			return (error);
634		/*
635		 * If directory is "sticky", then user must own
636		 * the directory, or the file in it, else she
637		 * may not delete it (unless she's root). This
638		 * implements append-only directories.
639		 */
640		if ((dp->i_e2fs_mode & ISVTX) &&
641			cred->cr_uid != 0 &&
642			cred->cr_uid != dp->i_e2fs_uid &&
643			VTOI(tdp)->i_e2fs_uid != cred->cr_uid) {
644			vput(tdp);
645			return (EPERM);
646		}
647		*vpp = tdp;
648		if (!lockparent)
649			VOP_UNLOCK(vdp, 0, p);
650		return (0);
651	}
652
653	/*
654	 * If rewriting (RENAME), return the inode and the
655	 * information required to rewrite the present directory
656	 * Must get inode of directory entry to verify it's a
657	 * regular file, or empty directory.
658	 */
659	if (nameiop == RENAME && wantparent &&
660		(flags & ISLASTCN)) {
661		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
662			return (error);
663		/*
664		 * Careful about locking second inode.
665		 * This can only occur if the target is ".".
666		 */
667		if (dp->i_number == dp->i_ino)
668			return (EISDIR);
669		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
670			return (error);
671		*vpp = tdp;
672		cnp->cn_flags |= SAVENAME;
673		if (!lockparent)
674			VOP_UNLOCK(vdp, 0, p);
675		return (0);
676	}
677
678	/*
679	 * Step through the translation in the name.  We do not `vput' the
680	 * directory because we may need it again if a symbolic link
681	 * is relative to the current directory.  Instead we save it
682	 * unlocked as "pdp".  We must get the target inode before unlocking
683	 * the directory to insure that the inode will not be removed
684	 * before we get it.  We prevent deadlock by always fetching
685	 * inodes from the root, moving down the directory tree. Thus
686	 * when following backward pointers ".." we must unlock the
687	 * parent directory before getting the requested directory.
688	 * There is a potential race condition here if both the current
689	 * and parent directories are removed before the VFS_VGET for the
690	 * inode associated with ".." returns.  We hope that this occurs
691	 * infrequently since we cannot avoid this race condition without
692	 * implementing a sophisticated deadlock detection algorithm.
693	 * Note also that this simple deadlock detection scheme will not
694	 * work if the file system has any hard links other than ".."
695	 * that point backwards in the directory structure.
696	 */
697	pdp = vdp;
698	if (flags & ISDOTDOT) {
699		VOP_UNLOCK(pdp, 0, p);	/* race to get the inode */
700		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) {
701			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
702			return (error);
703		}
704		if (lockparent && (flags & ISLASTCN) &&
705			(error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0) {
706			vput(tdp);
707			return (error);
708		}
709		*vpp = tdp;
710	} else if (dp->i_number == dp->i_ino) {
711		VREF(vdp);	/* we want ourself, ie "." */
712		*vpp = vdp;
713	} else {
714		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
715			return (error);
716		if (!lockparent || !(flags & ISLASTCN))
717			VOP_UNLOCK(pdp, 0, p);
718		*vpp = tdp;
719	}
720
721	/*
722	 * Insert name into cache if appropriate.
723	 */
724	if (cnp->cn_flags & MAKEENTRY)
725		cache_enter(vdp, *vpp, cnp);
726	return (0);
727}
728
729/*
730 * Do consistency checking on a directory entry:
731 *	record length must be multiple of 4
732 *	entry must fit in rest of its dirblksize block
733 *	record must be large enough to contain entry
734 *	name is not longer than MAXNAMLEN
735 *	name must be as long as advertised, and null terminated
736 */
737/*
738 *	changed so that it confirms to ext2fs_check_dir_entry
739 */
740static int
741ext2fs_dirbadentry(dp, de, entryoffsetinblock)
742	struct vnode *dp;
743	struct ext2fs_direct *de;
744	int entryoffsetinblock;
745{
746	int	dirblksize = VTOI(dp)->i_e2fs->e2fs_bsize;
747
748		char * error_msg = NULL;
749		int reclen = fs2h16(de->e2d_reclen);
750		int namlen = de->e2d_namlen;
751
752		if (reclen < EXT2FS_DIRSIZ(1)) /* e2d_namlen = 1 */
753				error_msg = "rec_len is smaller than minimal";
754		else if (reclen % 4 != 0)
755				error_msg = "rec_len % 4 != 0";
756		else if (reclen < EXT2FS_DIRSIZ(namlen))
757				error_msg = "reclen is too small for name_len";
758		else if (entryoffsetinblock + reclen > dirblksize)
759				error_msg = "directory entry across blocks";
760		else if (fs2h32(de->e2d_ino) >
761		    VTOI(dp)->i_e2fs->e2fs.e2fs_icount)
762				error_msg = "inode out of bounds";
763
764		if (error_msg != NULL) {
765			printf( "bad directory entry: %s\n"
766			    "offset=%d, inode=%lu, rec_len=%d, name_len=%d \n",
767			    error_msg, entryoffsetinblock,
768			    (unsigned long) fs2h32(de->e2d_ino),
769			    reclen, namlen);
770			panic("ext2fs_dirbadentry");
771		}
772		return error_msg == NULL ? 0 : 1;
773}
774
775/*
776 * Write a directory entry after a call to namei, using the parameters
777 * that it left in nameidata.  The argument ip is the inode which the new
778 * directory entry will refer to.  Dvp is a pointer to the directory to
779 * be written, which was left locked by namei. Remaining parameters
780 * (dp->i_offset, dp->i_count) indicate how the space for the new
781 * entry is to be obtained.
782 */
783int
784ext2fs_direnter(ip, dvp, cnp)
785	struct inode *ip;
786	struct vnode *dvp;
787	struct componentname *cnp;
788{
789	struct ext2fs_direct *ep, *nep;
790	struct inode *dp;
791	struct buf *bp;
792	struct ext2fs_direct newdir;
793	struct iovec aiov;
794	struct uio auio;
795	u_int dsize;
796	int error, loc, newentrysize, spacefree;
797	char *dirbuf;
798	int dirblksize = ip->i_e2fs->e2fs_bsize;
799
800
801#ifdef DIAGNOSTIC
802	if ((cnp->cn_flags & SAVENAME) == 0)
803		panic("direnter: missing name");
804#endif
805	dp = VTOI(dvp);
806	newdir.e2d_ino = h2fs32(ip->i_number);
807	newdir.e2d_namlen = cnp->cn_namelen;
808	if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 &&
809	    (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) {
810		newdir.e2d_type = inot2ext2dt(IFTODT(ip->i_ffs_mode));
811	} else {
812		newdir.e2d_type = 0;
813	};
814	memcpy(newdir.e2d_name, cnp->cn_nameptr, (unsigned)cnp->cn_namelen + 1);
815	newentrysize = EXT2FS_DIRSIZ(cnp->cn_namelen);
816	if (dp->i_count == 0) {
817		/*
818		 * If dp->i_count is 0, then namei could find no
819		 * space in the directory. Here, dp->i_offset will
820		 * be on a directory block boundary and we will write the
821		 * new entry into a fresh block.
822		 */
823		if (dp->i_offset & (dirblksize - 1))
824			panic("ext2fs_direnter: newblk");
825		auio.uio_offset = dp->i_offset;
826		newdir.e2d_reclen = h2fs16(dirblksize);
827		auio.uio_resid = newentrysize;
828		aiov.iov_len = newentrysize;
829		aiov.iov_base = (caddr_t)&newdir;
830		auio.uio_iov = &aiov;
831		auio.uio_iovcnt = 1;
832		auio.uio_rw = UIO_WRITE;
833		auio.uio_segflg = UIO_SYSSPACE;
834		auio.uio_procp = (struct proc *)0;
835		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
836		if (dirblksize >
837			VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
838			/* XXX should grow with balloc() */
839			panic("ext2fs_direnter: frag size");
840		else if (!error) {
841			dp->i_e2fs_size = roundup(dp->i_e2fs_size, dirblksize);
842			dp->i_flag |= IN_CHANGE;
843		}
844		return (error);
845	}
846
847	/*
848	 * If dp->i_count is non-zero, then namei found space
849	 * for the new entry in the range dp->i_offset to
850	 * dp->i_offset + dp->i_count in the directory.
851	 * To use this space, we may have to compact the entries located
852	 * there, by copying them together towards the beginning of the
853	 * block, leaving the free space in one usable chunk at the end.
854	 */
855
856	/*
857	 * Get the block containing the space for the new directory entry.
858	 */
859	if ((error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, &dirbuf, &bp))
860	    != 0)
861		return (error);
862	/*
863	 * Find space for the new entry. In the simple case, the entry at
864	 * offset base will have the space. If it does not, then namei
865	 * arranged that compacting the region dp->i_offset to
866	 * dp->i_offset + dp->i_count would yield the
867	 * space.
868	 */
869	ep = (struct ext2fs_direct *)dirbuf;
870	dsize = EXT2FS_DIRSIZ(ep->e2d_namlen);
871	spacefree = fs2h16(ep->e2d_reclen) - dsize;
872	for (loc = fs2h16(ep->e2d_reclen); loc < dp->i_count; ) {
873		nep = (struct ext2fs_direct *)(dirbuf + loc);
874		if (ep->e2d_ino) {
875			/* trim the existing slot */
876			ep->e2d_reclen = h2fs16(dsize);
877			ep = (struct ext2fs_direct *)((char *)ep + dsize);
878		} else {
879			/* overwrite; nothing there; header is ours */
880			spacefree += dsize;
881		}
882		dsize = EXT2FS_DIRSIZ(nep->e2d_namlen);
883		spacefree += fs2h16(nep->e2d_reclen) - dsize;
884		loc += fs2h16(nep->e2d_reclen);
885		memcpy((caddr_t)ep, (caddr_t)nep, dsize);
886	}
887	/*
888	 * Update the pointer fields in the previous entry (if any),
889	 * copy in the new entry, and write out the block.
890	 */
891	if (ep->e2d_ino == 0) {
892#ifdef DIAGNOSTIC
893		if (spacefree + dsize < newentrysize)
894			panic("ext2fs_direnter: compact1");
895#endif
896		newdir.e2d_reclen = h2fs16(spacefree + dsize);
897	} else {
898#ifdef DIAGNOSTIC
899		if (spacefree < newentrysize) {
900			printf("ext2fs_direnter: compact2 %u %u",
901			    (u_int)spacefree, (u_int)newentrysize);
902			panic("ext2fs_direnter: compact2");
903		}
904#endif
905		newdir.e2d_reclen = h2fs16(spacefree);
906		ep->e2d_reclen = h2fs16(dsize);
907		ep = (struct ext2fs_direct *)((char *)ep + dsize);
908	}
909	memcpy((caddr_t)ep, (caddr_t)&newdir, (u_int)newentrysize);
910	error = VOP_BWRITE(bp);
911	dp->i_flag |= IN_CHANGE | IN_UPDATE;
912	if (!error && dp->i_endoff && dp->i_endoff < dp->i_e2fs_size)
913		error = ext2fs_truncate(dp, (off_t)dp->i_endoff, IO_SYNC,
914		    cnp->cn_cred);
915	return (error);
916}
917
918/*
919 * Remove a directory entry after a call to namei, using
920 * the parameters which it left in nameidata. The entry
921 * dp->i_offset contains the offset into the directory of the
922 * entry to be eliminated.  The dp->i_count field contains the
923 * size of the previous record in the directory.  If this
924 * is 0, the first entry is being deleted, so we need only
925 * zero the inode number to mark the entry as free.  If the
926 * entry is not the first in the directory, we must reclaim
927 * the space of the now empty record by adding the record size
928 * to the size of the previous entry.
929 */
930int
931ext2fs_dirremove(dvp, cnp)
932	struct vnode *dvp;
933	struct componentname *cnp;
934{
935	struct inode *dp;
936	struct ext2fs_direct *ep;
937	struct buf *bp;
938	int error;
939
940	dp = VTOI(dvp);
941	if (dp->i_count == 0) {
942		/*
943		 * First entry in block: set d_ino to zero.
944		 */
945		error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, (char **)&ep,
946		    &bp);
947		if (error != 0)
948			return (error);
949		ep->e2d_ino = 0;
950		error = VOP_BWRITE(bp);
951		dp->i_flag |= IN_CHANGE | IN_UPDATE;
952		return (error);
953	}
954	/*
955	 * Collapse new free space into previous entry.
956	 */
957	error = ext2fs_bufatoff(dp, (off_t)(dp->i_offset - dp->i_count),
958	    (char **)&ep, &bp);
959	if (error != 0)
960		return (error);
961	ep->e2d_reclen = h2fs16(fs2h16(ep->e2d_reclen) + dp->i_reclen);
962	error = VOP_BWRITE(bp);
963	dp->i_flag |= IN_CHANGE | IN_UPDATE;
964	return (error);
965}
966
967/*
968 * Rewrite an existing directory entry to point at the inode
969 * supplied.  The parameters describing the directory entry are
970 * set up by a call to namei.
971 */
972int
973ext2fs_dirrewrite(dp, ip, cnp)
974	struct inode *dp, *ip;
975	struct componentname *cnp;
976{
977	struct buf *bp;
978	struct ext2fs_direct *ep;
979	int error;
980
981	error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, (char **)&ep, &bp);
982	if (error != 0)
983		return (error);
984	ep->e2d_ino = h2fs32(ip->i_number);
985	if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 &&
986	    (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) {
987		ep->e2d_type = inot2ext2dt(IFTODT(ip->i_ffs_mode));
988	} else {
989		ep->e2d_type = 0;
990	}
991	error = VOP_BWRITE(bp);
992	dp->i_flag |= IN_CHANGE | IN_UPDATE;
993	return (error);
994}
995
996/*
997 * Check if a directory is empty or not.
998 * Inode supplied must be locked.
999 *
1000 * Using a struct dirtemplate here is not precisely
1001 * what we want, but better than using a struct ext2fs_direct.
1002 *
1003 * NB: does not handle corrupted directories.
1004 */
1005int
1006ext2fs_dirempty(ip, parentino, cred)
1007	struct inode *ip;
1008	ino_t parentino;
1009	struct ucred *cred;
1010{
1011	off_t off;
1012	struct ext2fs_dirtemplate dbuf;
1013	struct ext2fs_direct *dp = (struct ext2fs_direct *)&dbuf;
1014	int error, namlen;
1015	size_t count;
1016
1017#define	MINDIRSIZ (sizeof (struct ext2fs_dirtemplate) / 2)
1018
1019	for (off = 0; off < ip->i_e2fs_size; off += fs2h16(dp->e2d_reclen)) {
1020		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
1021		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
1022		/*
1023		 * Since we read MINDIRSIZ, residual must
1024		 * be 0 unless we're at end of file.
1025		 */
1026		if (error || count != 0)
1027			return (0);
1028		/* avoid infinite loops */
1029		if (dp->e2d_reclen == 0)
1030			return (0);
1031		/* skip empty entries */
1032		if (dp->e2d_ino == 0)
1033			continue;
1034		/* accept only "." and ".." */
1035		namlen = dp->e2d_namlen;
1036		if (namlen > 2)
1037			return (0);
1038		if (dp->e2d_name[0] != '.')
1039			return (0);
1040		/*
1041		 * At this point namlen must be 1 or 2.
1042		 * 1 implies ".", 2 implies ".." if second
1043		 * char is also "."
1044		 */
1045		if (namlen == 1)
1046			continue;
1047		if (dp->e2d_name[1] == '.' && fs2h32(dp->e2d_ino) == parentino)
1048			continue;
1049		return (0);
1050	}
1051	return (1);
1052}
1053
1054/*
1055 * Check if source directory is in the path of the target directory.
1056 * Target is supplied locked, source is unlocked.
1057 * The target is always vput before returning.
1058 */
1059int
1060ext2fs_checkpath(source, target, cred)
1061	struct inode *source, *target;
1062	struct ucred *cred;
1063{
1064	struct vnode *vp;
1065	int error, rootino, namlen;
1066	struct ext2fs_dirtemplate dirbuf;
1067	u_int32_t ino;
1068
1069	vp = ITOV(target);
1070	if (target->i_number == source->i_number) {
1071		error = EEXIST;
1072		goto out;
1073	}
1074	rootino = ROOTINO;
1075	error = 0;
1076	if (target->i_number == rootino)
1077		goto out;
1078
1079	for (;;) {
1080		if (vp->v_type != VDIR) {
1081			error = ENOTDIR;
1082			break;
1083		}
1084		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1085			sizeof (struct ext2fs_dirtemplate), (off_t)0,
1086			UIO_SYSSPACE, IO_NODELOCKED, cred, (size_t *)0,
1087			(struct proc *)0);
1088		if (error != 0)
1089			break;
1090		namlen = dirbuf.dotdot_namlen;
1091		if (namlen != 2 ||
1092			dirbuf.dotdot_name[0] != '.' ||
1093			dirbuf.dotdot_name[1] != '.') {
1094			error = ENOTDIR;
1095			break;
1096		}
1097		ino = fs2h32(dirbuf.dotdot_ino);
1098		if (ino == source->i_number) {
1099			error = EINVAL;
1100			break;
1101		}
1102		if (ino == rootino)
1103			break;
1104		vput(vp);
1105		error = VFS_VGET(vp->v_mount, ino, &vp);
1106		if (error != 0) {
1107			vp = NULL;
1108			break;
1109		}
1110	}
1111
1112out:
1113	if (error == ENOTDIR) {
1114		printf("checkpath: .. not a directory\n");
1115		panic("checkpath");
1116	}
1117	if (vp != NULL)
1118		vput(vp);
1119	return (error);
1120}
1121