ext2_lookup.c revision 96752
1/*
2 *  modified for Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1993
9 *	The Regents of the University of California.  All rights reserved.
10 * (c) UNIX System Laboratories, Inc.
11 * All or some portions of this file are derived from material licensed
12 * to the University of California by American Telephone and Telegraph
13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
14 * the permission of UNIX System Laboratories, Inc.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 *    notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 *    notice, this list of conditions and the following disclaimer in the
23 *    documentation and/or other materials provided with the distribution.
24 * 3. All advertising materials mentioning features or use of this software
25 *    must display the following acknowledgement:
26 *	This product includes software developed by the University of
27 *	California, Berkeley and its contributors.
28 * 4. Neither the name of the University nor the names of its contributors
29 *    may be used to endorse or promote products derived from this software
30 *    without specific prior written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * SUCH DAMAGE.
43 *
44 *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
45 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_lookup.c 96752 2002-05-16 19:43:28Z iedowse $
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/namei.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/mount.h>
54#include <sys/vnode.h>
55#include <sys/malloc.h>
56#include <sys/dirent.h>
57
58#include <ufs/ufs/dir.h>
59
60#include <gnu/ext2fs/inode.h>
61#include <gnu/ext2fs/ext2_mount.h>
62#include <gnu/ext2fs/ext2_extern.h>
63#include <gnu/ext2fs/ext2_fs.h>
64#include <gnu/ext2fs/ext2_fs_sb.h>
65
66/*
67   DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512)
68   while it is the native blocksize in ext2fs - thus, a #define
69   is no longer appropriate
70*/
71#undef  DIRBLKSIZ
72
73extern	int dirchk;
74
75static u_char ext2_ft_to_dt[] = {
76	DT_UNKNOWN,		/* EXT2_FT_UNKNOWN */
77	DT_REG,			/* EXT2_FT_REG_FILE */
78	DT_DIR,			/* EXT2_FT_DIR */
79	DT_CHR,			/* EXT2_FT_CHRDEV */
80	DT_BLK,			/* EXT2_FT_BLKDEV */
81	DT_FIFO,		/* EXT2_FT_FIFO */
82	DT_SOCK,		/* EXT2_FT_SOCK */
83	DT_LNK,			/* EXT2_FT_SYMLINK */
84};
85#define	FTTODT(ft)						\
86    ((ft) > sizeof(ext2_ft_to_dt) / sizeof(ext2_ft_to_dt[0]) ?	\
87    DT_UNKNOWN : ext2_ft_to_dt[(ft)])
88
89static u_char dt_to_ext2_ft[] = {
90	EXT2_FT_UNKNOWN,	/* DT_UNKNOWN */
91	EXT2_FT_FIFO,		/* DT_FIFO */
92	EXT2_FT_CHRDEV,		/* DT_CHR */
93	EXT2_FT_UNKNOWN,	/* unused */
94	EXT2_FT_DIR,		/* DT_DIR */
95	EXT2_FT_UNKNOWN,	/* unused */
96	EXT2_FT_BLKDEV,		/* DT_BLK */
97	EXT2_FT_UNKNOWN,	/* unused */
98	EXT2_FT_REG_FILE,	/* DT_REG */
99	EXT2_FT_UNKNOWN,	/* unused */
100	EXT2_FT_SYMLINK,	/* DT_LNK */
101	EXT2_FT_UNKNOWN,	/* unused */
102	EXT2_FT_SOCK,		/* DT_SOCK */
103	EXT2_FT_UNKNOWN,	/* unused */
104	EXT2_FT_UNKNOWN,	/* DT_WHT */
105};
106#define	DTTOFT(dt)						\
107    ((dt) > sizeof(dt_to_ext2_ft) / sizeof(dt_to_ext2_ft[0]) ?	\
108    EXT2_FT_UNKNOWN : dt_to_ext2_ft[(dt)])
109
110static int	ext2_dirbadentry(struct vnode *dp, struct ext2_dir_entry_2 *de,
111		    int entryoffsetinblock);
112
113/*
114 * Vnode op for reading directories.
115 *
116 * The routine below assumes that the on-disk format of a directory
117 * is the same as that defined by <sys/dirent.h>. If the on-disk
118 * format changes, then it will be necessary to do a conversion
119 * from the on-disk format that read returns to the format defined
120 * by <sys/dirent.h>.
121 */
122/*
123 * this is exactly what we do here - the problem is that the conversion
124 * will blow up some entries by four bytes, so it can't be done in place.
125 * This is too bad. Right now the conversion is done entry by entry, the
126 * converted entry is sent via uiomove.
127 *
128 * XXX allocate a buffer, convert as many entries as possible, then send
129 * the whole buffer to uiomove
130 */
131int
132ext2_readdir(ap)
133        struct vop_readdir_args /* {
134                struct vnode *a_vp;
135                struct uio *a_uio;
136                struct ucred *a_cred;
137        } */ *ap;
138{
139        struct uio *uio = ap->a_uio;
140        int count, error;
141
142	struct ext2_dir_entry_2 *edp, *dp;
143	int ncookies;
144	struct dirent dstdp;
145	struct uio auio;
146	struct iovec aiov;
147	caddr_t dirbuf;
148	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize;
149	int readcnt;
150	off_t startoffset = uio->uio_offset;
151
152	count = uio->uio_resid;
153	/*
154	 * Avoid complications for partial directory entries by adjusting
155	 * the i/o to end at a block boundary.  Don't give up (like ufs
156	 * does) if the initial adjustment gives a negative count, since
157	 * many callers don't supply a large enough buffer.  The correct
158	 * size is a little larger than DIRBLKSIZ to allow for expansion
159	 * of directory entries, but some callers just use 512.
160	 */
161	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
162	if (count <= 0)
163		count += DIRBLKSIZ;
164
165#ifdef EXT2FS_DEBUG
166	printf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n",
167	    uio->uio_offset, uio->uio_resid, count);
168#endif
169
170	auio = *uio;
171	auio.uio_iov = &aiov;
172	auio.uio_iovcnt = 1;
173	auio.uio_resid = count;
174	auio.uio_segflg = UIO_SYSSPACE;
175	aiov.iov_len = count;
176	MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
177	aiov.iov_base = dirbuf;
178	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
179	if (error == 0) {
180		readcnt = count - auio.uio_resid;
181		edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt];
182		ncookies = 0;
183		bzero(&dstdp, offsetof(struct dirent, d_name));
184		for (dp = (struct ext2_dir_entry_2 *)dirbuf;
185		    !error && uio->uio_resid > 0 && dp < edp; ) {
186			/*-
187			 * "New" ext2fs directory entries differ in 3 ways
188			 * from ufs on-disk ones:
189			 * - the name is not necessarily NUL-terminated.
190			 * - the file type field always exists and always
191			 * follows the name length field.
192			 * - the file type is encoded in a different way.
193			 *
194			 * "Old" ext2fs directory entries need no special
195			 * conversions, since they binary compatible with
196			 * "new" entries having a file type of 0 (i.e.,
197			 * EXT2_FT_UNKNOWN).  Splitting the old name length
198			 * field didn't make a mess like it did in ufs,
199			 * because ext2fs uses a machine-dependent disk
200			 * layout.
201			 */
202			dstdp.d_fileno = dp->inode;
203			dstdp.d_type = FTTODT(dp->file_type);
204			dstdp.d_namlen = dp->name_len;
205			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
206			bcopy(dp->name, dstdp.d_name, dstdp.d_namlen);
207			bzero(dstdp.d_name + dstdp.d_namlen,
208			    dstdp.d_reclen - offsetof(struct dirent, d_name) -
209			    dstdp.d_namlen);
210
211			if (dp->rec_len > 0) {
212				if(dstdp.d_reclen <= uio->uio_resid) {
213					/* advance dp */
214					dp = (struct ext2_dir_entry_2 *)
215					    ((char *)dp + dp->rec_len);
216					error =
217					  uiomove((caddr_t)&dstdp,
218						  dstdp.d_reclen, uio);
219					if (!error)
220						ncookies++;
221				} else
222					break;
223			} else {
224				error = EIO;
225				break;
226			}
227		}
228		/* we need to correct uio_offset */
229		uio->uio_offset = startoffset + (caddr_t)dp - dirbuf;
230
231		if (!error && ap->a_ncookies != NULL) {
232			u_long *cookiep, *cookies, *ecookies;
233			off_t off;
234
235			if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
236				panic("ext2fs_readdir: unexpected uio from NFS server");
237			MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
238			       M_WAITOK);
239			off = startoffset;
240			for (dp = (struct ext2_dir_entry_2 *)dirbuf,
241			     cookiep = cookies, ecookies = cookies + ncookies;
242			     cookiep < ecookies;
243			     dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) {
244				off += dp->rec_len;
245				*cookiep++ = (u_long) off;
246			}
247			*ap->a_ncookies = ncookies;
248			*ap->a_cookies = cookies;
249		}
250	}
251	FREE(dirbuf, M_TEMP);
252	if (ap->a_eofflag)
253		*ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
254        return (error);
255}
256
257/*
258 * Convert a component of a pathname into a pointer to a locked inode.
259 * This is a very central and rather complicated routine.
260 * If the file system is not maintained in a strict tree hierarchy,
261 * this can result in a deadlock situation (see comments in code below).
262 *
263 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
264 * on whether the name is to be looked up, created, renamed, or deleted.
265 * When CREATE, RENAME, or DELETE is specified, information usable in
266 * creating, renaming, or deleting a directory entry may be calculated.
267 * If flag has LOCKPARENT or'ed into it and the target of the pathname
268 * exists, lookup returns both the target and its parent directory locked.
269 * When creating or renaming and LOCKPARENT is specified, the target may
270 * not be ".".  When deleting and LOCKPARENT is specified, the target may
271 * be "."., but the caller must check to ensure it does an vrele and vput
272 * instead of two vputs.
273 *
274 * Overall outline of ufs_lookup:
275 *
276 *	search for name in directory, to found or notfound
277 * notfound:
278 *	if creating, return locked directory, leaving info on available slots
279 *	else return error
280 * found:
281 *	if at end of path and deleting, return information to allow delete
282 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
283 *	  inode and return info to allow rewrite
284 *	if not at end, add name to cache; if at end and neither creating
285 *	  nor deleting, add name to cache
286 */
287int
288ext2_lookup(ap)
289	struct vop_cachedlookup_args /* {
290		struct vnode *a_dvp;
291		struct vnode **a_vpp;
292		struct componentname *a_cnp;
293	} */ *ap;
294{
295	struct vnode *vdp;		/* vnode for directory being searched */
296	struct inode *dp;		/* inode for directory being searched */
297	struct buf *bp;			/* a buffer of directory entries */
298	struct ext2_dir_entry_2 *ep;	/* the current directory entry */
299	int entryoffsetinblock;		/* offset of ep in bp's buffer */
300	enum {NONE, COMPACT, FOUND} slotstatus;
301	doff_t slotoffset;		/* offset of area with free space */
302	int slotsize;			/* size of area at slotoffset */
303	int slotfreespace;		/* amount of space free in slot */
304	int slotneeded;			/* size of the entry we're seeking */
305	int numdirpasses;		/* strategy for directory search */
306	doff_t endsearch;		/* offset to end directory search */
307	doff_t prevoff;			/* prev entry dp->i_offset */
308	struct vnode *pdp;		/* saved dp during symlink work */
309	struct vnode *tdp;		/* returned by VFS_VGET */
310	doff_t enduseful;		/* pointer past last used dir slot */
311	u_long bmask;			/* block offset mask */
312	int lockparent;			/* 1 => lockparent flag is set */
313	int wantparent;			/* 1 => wantparent or lockparent flag */
314	int namlen, error;
315	struct vnode **vpp = ap->a_vpp;
316	struct componentname *cnp = ap->a_cnp;
317	struct ucred *cred = cnp->cn_cred;
318	int flags = cnp->cn_flags;
319	int nameiop = cnp->cn_nameiop;
320	struct thread *td = cnp->cn_thread;
321
322	int	DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize;
323
324	bp = NULL;
325	slotoffset = -1;
326	*vpp = NULL;
327	vdp = ap->a_dvp;
328	dp = VTOI(vdp);
329	lockparent = flags & LOCKPARENT;
330	wantparent = flags & (LOCKPARENT|WANTPARENT);
331
332	/*
333	 * We now have a segment name to search for, and a directory to search.
334	 */
335
336	/*
337	 * Suppress search for slots unless creating
338	 * file and at end of pathname, in which case
339	 * we watch for a place to put the new file in
340	 * case it doesn't already exist.
341	 */
342	slotstatus = FOUND;
343	slotfreespace = slotsize = slotneeded = 0;
344	if ((nameiop == CREATE || nameiop == RENAME) &&
345	    (flags & ISLASTCN)) {
346		slotstatus = NONE;
347		slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
348		/* was
349		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
350			cnp->cn_namelen + 3) &~ 3; */
351	}
352
353	/*
354	 * If there is cached information on a previous search of
355	 * this directory, pick up where we last left off.
356	 * We cache only lookups as these are the most common
357	 * and have the greatest payoff. Caching CREATE has little
358	 * benefit as it usually must search the entire directory
359	 * to determine that the entry does not exist. Caching the
360	 * location of the last DELETE or RENAME has not reduced
361	 * profiling time and hence has been removed in the interest
362	 * of simplicity.
363	 */
364	bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
365	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
366	    dp->i_diroff > dp->i_size) {
367		entryoffsetinblock = 0;
368		dp->i_offset = 0;
369		numdirpasses = 1;
370	} else {
371		dp->i_offset = dp->i_diroff;
372		if ((entryoffsetinblock = dp->i_offset & bmask) &&
373		    (error = ext2_blkatoff(vdp, (off_t)dp->i_offset, NULL,
374		    &bp)))
375			return (error);
376		numdirpasses = 2;
377		nchstats.ncs_2passes++;
378	}
379	prevoff = dp->i_offset;
380	endsearch = roundup(dp->i_size, DIRBLKSIZ);
381	enduseful = 0;
382
383searchloop:
384	while (dp->i_offset < endsearch) {
385		/*
386		 * If necessary, get the next directory block.
387		 */
388		if ((dp->i_offset & bmask) == 0) {
389			if (bp != NULL)
390				brelse(bp);
391			if ((error =
392			    ext2_blkatoff(vdp, (off_t)dp->i_offset, NULL,
393			    &bp)) != 0)
394				return (error);
395			entryoffsetinblock = 0;
396		}
397		/*
398		 * If still looking for a slot, and at a DIRBLKSIZE
399		 * boundary, have to start looking for free space again.
400		 */
401		if (slotstatus == NONE &&
402		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
403			slotoffset = -1;
404			slotfreespace = 0;
405		}
406		/*
407		 * Get pointer to next entry.
408		 * Full validation checks are slow, so we only check
409		 * enough to insure forward progress through the
410		 * directory. Complete checks can be run by patching
411		 * "dirchk" to be true.
412		 */
413		ep = (struct ext2_dir_entry_2 *)
414			((char *)bp->b_data + entryoffsetinblock);
415		if (ep->rec_len == 0 ||
416		    (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
417			int i;
418			ext2_dirbad(dp, dp->i_offset, "mangled entry");
419			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
420			dp->i_offset += i;
421			entryoffsetinblock += i;
422			continue;
423		}
424
425		/*
426		 * If an appropriate sized slot has not yet been found,
427		 * check to see if one is available. Also accumulate space
428		 * in the current block so that we can determine if
429		 * compaction is viable.
430		 */
431		if (slotstatus != FOUND) {
432			int size = ep->rec_len;
433
434			if (ep->inode != 0)
435				size -= EXT2_DIR_REC_LEN(ep->name_len);
436			if (size > 0) {
437				if (size >= slotneeded) {
438					slotstatus = FOUND;
439					slotoffset = dp->i_offset;
440					slotsize = ep->rec_len;
441				} else if (slotstatus == NONE) {
442					slotfreespace += size;
443					if (slotoffset == -1)
444						slotoffset = dp->i_offset;
445					if (slotfreespace >= slotneeded) {
446						slotstatus = COMPACT;
447						slotsize = dp->i_offset +
448						      ep->rec_len - slotoffset;
449					}
450				}
451			}
452		}
453
454		/*
455		 * Check for a name match.
456		 */
457		if (ep->inode) {
458			namlen = ep->name_len;
459			if (namlen == cnp->cn_namelen &&
460			    !bcmp(cnp->cn_nameptr, ep->name,
461				(unsigned)namlen)) {
462				/*
463				 * Save directory entry's inode number and
464				 * reclen in ndp->ni_ufs area, and release
465				 * directory buffer.
466				 */
467				dp->i_ino = ep->inode;
468				dp->i_reclen = ep->rec_len;
469				brelse(bp);
470				goto found;
471			}
472		}
473		prevoff = dp->i_offset;
474		dp->i_offset += ep->rec_len;
475		entryoffsetinblock += ep->rec_len;
476		if (ep->inode)
477			enduseful = dp->i_offset;
478	}
479/* notfound: */
480	/*
481	 * If we started in the middle of the directory and failed
482	 * to find our target, we must check the beginning as well.
483	 */
484	if (numdirpasses == 2) {
485		numdirpasses--;
486		dp->i_offset = 0;
487		endsearch = dp->i_diroff;
488		goto searchloop;
489	}
490	if (bp != NULL)
491		brelse(bp);
492	/*
493	 * If creating, and at end of pathname and current
494	 * directory has not been removed, then can consider
495	 * allowing file to be created.
496	 */
497	if ((nameiop == CREATE || nameiop == RENAME) &&
498	    (flags & ISLASTCN) && dp->i_nlink != 0) {
499		/*
500		 * Access for write is interpreted as allowing
501		 * creation of files in the directory.
502		 */
503		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
504			return (error);
505		/*
506		 * Return an indication of where the new directory
507		 * entry should be put.  If we didn't find a slot,
508		 * then set dp->i_count to 0 indicating
509		 * that the new slot belongs at the end of the
510		 * directory. If we found a slot, then the new entry
511		 * can be put in the range from dp->i_offset to
512		 * dp->i_offset + dp->i_count.
513		 */
514		if (slotstatus == NONE) {
515			dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
516			dp->i_count = 0;
517			enduseful = dp->i_offset;
518		} else {
519			dp->i_offset = slotoffset;
520			dp->i_count = slotsize;
521			if (enduseful < slotoffset + slotsize)
522				enduseful = slotoffset + slotsize;
523		}
524		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
525		dp->i_flag |= IN_CHANGE | IN_UPDATE;
526		/*
527		 * We return with the directory locked, so that
528		 * the parameters we set up above will still be
529		 * valid if we actually decide to do a direnter().
530		 * We return ni_vp == NULL to indicate that the entry
531		 * does not currently exist; we leave a pointer to
532		 * the (locked) directory inode in ndp->ni_dvp.
533		 * The pathname buffer is saved so that the name
534		 * can be obtained later.
535		 *
536		 * NB - if the directory is unlocked, then this
537		 * information cannot be used.
538		 */
539		cnp->cn_flags |= SAVENAME;
540		if (!lockparent)
541			VOP_UNLOCK(vdp, 0, td);
542		return (EJUSTRETURN);
543	}
544	/*
545	 * Insert name into cache (as non-existent) if appropriate.
546	 */
547	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
548		cache_enter(vdp, *vpp, cnp);
549	return (ENOENT);
550
551found:
552	if (numdirpasses == 2)
553		nchstats.ncs_pass2++;
554	/*
555	 * Check that directory length properly reflects presence
556	 * of this entry.
557	 */
558	if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len)
559		> dp->i_size) {
560		ext2_dirbad(dp, dp->i_offset, "i_size too small");
561		dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len);
562		dp->i_flag |= IN_CHANGE | IN_UPDATE;
563	}
564
565	/*
566	 * Found component in pathname.
567	 * If the final component of path name, save information
568	 * in the cache as to where the entry was found.
569	 */
570	if ((flags & ISLASTCN) && nameiop == LOOKUP)
571		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
572
573	/*
574	 * If deleting, and at end of pathname, return
575	 * parameters which can be used to remove file.
576	 * If the wantparent flag isn't set, we return only
577	 * the directory (in ndp->ni_dvp), otherwise we go
578	 * on and lock the inode, being careful with ".".
579	 */
580	if (nameiop == DELETE && (flags & ISLASTCN)) {
581		/*
582		 * Write access to directory required to delete files.
583		 */
584		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
585			return (error);
586		/*
587		 * Return pointer to current entry in dp->i_offset,
588		 * and distance past previous entry (if there
589		 * is a previous entry in this block) in dp->i_count.
590		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
591		 */
592		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
593			dp->i_count = 0;
594		else
595			dp->i_count = dp->i_offset - prevoff;
596		if (dp->i_number == dp->i_ino) {
597			VREF(vdp);
598			*vpp = vdp;
599			return (0);
600		}
601		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
602		    &tdp)) != 0)
603			return (error);
604		/*
605		 * If directory is "sticky", then user must own
606		 * the directory, or the file in it, else she
607		 * may not delete it (unless she's root). This
608		 * implements append-only directories.
609		 */
610		if ((dp->i_mode & ISVTX) &&
611		    cred->cr_uid != 0 &&
612		    cred->cr_uid != dp->i_uid &&
613		    VTOI(tdp)->i_uid != cred->cr_uid) {
614			vput(tdp);
615			return (EPERM);
616		}
617		*vpp = tdp;
618		if (!lockparent)
619			VOP_UNLOCK(vdp, 0, td);
620		return (0);
621	}
622
623	/*
624	 * If rewriting (RENAME), return the inode and the
625	 * information required to rewrite the present directory
626	 * Must get inode of directory entry to verify it's a
627	 * regular file, or empty directory.
628	 */
629	if (nameiop == RENAME && wantparent &&
630	    (flags & ISLASTCN)) {
631		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
632			return (error);
633		/*
634		 * Careful about locking second inode.
635		 * This can only occur if the target is ".".
636		 */
637		if (dp->i_number == dp->i_ino)
638			return (EISDIR);
639		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
640		    &tdp)) != 0)
641			return (error);
642		*vpp = tdp;
643		cnp->cn_flags |= SAVENAME;
644		if (!lockparent)
645			VOP_UNLOCK(vdp, 0, td);
646		return (0);
647	}
648
649	/*
650	 * Step through the translation in the name.  We do not `vput' the
651	 * directory because we may need it again if a symbolic link
652	 * is relative to the current directory.  Instead we save it
653	 * unlocked as "pdp".  We must get the target inode before unlocking
654	 * the directory to insure that the inode will not be removed
655	 * before we get it.  We prevent deadlock by always fetching
656	 * inodes from the root, moving down the directory tree. Thus
657	 * when following backward pointers ".." we must unlock the
658	 * parent directory before getting the requested directory.
659	 * There is a potential race condition here if both the current
660	 * and parent directories are removed before the VFS_VGET for the
661	 * inode associated with ".." returns.  We hope that this occurs
662	 * infrequently since we cannot avoid this race condition without
663	 * implementing a sophisticated deadlock detection algorithm.
664	 * Note also that this simple deadlock detection scheme will not
665	 * work if the file system has any hard links other than ".."
666	 * that point backwards in the directory structure.
667	 */
668	pdp = vdp;
669	if (flags & ISDOTDOT) {
670		VOP_UNLOCK(pdp, 0, td);	/* race to get the inode */
671		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
672		    &tdp)) != 0) {
673			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td);
674			return (error);
675		}
676		if (lockparent && (flags & ISLASTCN) &&
677		    (error = vn_lock(pdp, LK_EXCLUSIVE, td))) {
678			vput(tdp);
679			return (error);
680		}
681		*vpp = tdp;
682	} else if (dp->i_number == dp->i_ino) {
683		VREF(vdp);	/* we want ourself, ie "." */
684		*vpp = vdp;
685	} else {
686		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
687		    &tdp)) != 0)
688			return (error);
689		if (!lockparent || !(flags & ISLASTCN))
690			VOP_UNLOCK(pdp, 0, td);
691		*vpp = tdp;
692	}
693
694	/*
695	 * Insert name into cache if appropriate.
696	 */
697	if (cnp->cn_flags & MAKEENTRY)
698		cache_enter(vdp, *vpp, cnp);
699	return (0);
700}
701
702void
703ext2_dirbad(ip, offset, how)
704	struct inode *ip;
705	doff_t offset;
706	char *how;
707{
708	struct mount *mp;
709
710	mp = ITOV(ip)->v_mount;
711	(void)printf("%s: bad dir ino %lu at offset %ld: %s\n",
712	    mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
713	if ((mp->mnt_flag & MNT_RDONLY) == 0)
714		panic("ext2_dirbad: bad dir");
715}
716
717/*
718 * Do consistency checking on a directory entry:
719 *	record length must be multiple of 4
720 *	entry must fit in rest of its DIRBLKSIZ block
721 *	record must be large enough to contain entry
722 *	name is not longer than MAXNAMLEN
723 *	name must be as long as advertised, and null terminated
724 */
725/*
726 *	changed so that it confirms to ext2_check_dir_entry
727 */
728static int
729ext2_dirbadentry(dp, de, entryoffsetinblock)
730	struct vnode *dp;
731	struct ext2_dir_entry_2 *de;
732	int entryoffsetinblock;
733{
734	int	DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize;
735
736        char * error_msg = NULL;
737
738        if (de->rec_len < EXT2_DIR_REC_LEN(1))
739                error_msg = "rec_len is smaller than minimal";
740        else if (de->rec_len % 4 != 0)
741                error_msg = "rec_len % 4 != 0";
742        else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len))
743                error_msg = "reclen is too small for name_len";
744        else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ)
745                error_msg = "directory entry across blocks";
746        /* else LATER
747	     if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count)
748                error_msg = "inode out of bounds";
749	*/
750
751        if (error_msg != NULL) {
752                printf("bad directory entry: %s\n", error_msg);
753                printf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n",
754			entryoffsetinblock, (unsigned long)de->inode,
755			de->rec_len, de->name_len);
756        }
757        return error_msg == NULL ? 0 : 1;
758}
759
760/*
761 * Write a directory entry after a call to namei, using the parameters
762 * that it left in nameidata.  The argument ip is the inode which the new
763 * directory entry will refer to.  Dvp is a pointer to the directory to
764 * be written, which was left locked by namei. Remaining parameters
765 * (dp->i_offset, dp->i_count) indicate how the space for the new
766 * entry is to be obtained.
767 */
768int
769ext2_direnter(ip, dvp, cnp)
770	struct inode *ip;
771	struct vnode *dvp;
772	struct componentname *cnp;
773{
774	struct ext2_dir_entry_2 *ep, *nep;
775	struct inode *dp;
776	struct buf *bp;
777	struct ext2_dir_entry_2 newdir;
778	struct iovec aiov;
779	struct uio auio;
780	u_int dsize;
781	int error, loc, newentrysize, spacefree;
782	char *dirbuf;
783	int     DIRBLKSIZ = ip->i_e2fs->s_blocksize;
784
785
786#if DIAGNOSTIC
787	if ((cnp->cn_flags & SAVENAME) == 0)
788		panic("direnter: missing name");
789#endif
790	dp = VTOI(dvp);
791	newdir.inode = ip->i_number;
792	newdir.name_len = cnp->cn_namelen;
793	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
794	    EXT2_FEATURE_INCOMPAT_FILETYPE))
795		newdir.file_type = DTTOFT(IFTODT(ip->i_mode));
796	else
797		newdir.file_type = EXT2_FT_UNKNOWN;
798	bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1);
799	newentrysize = EXT2_DIR_REC_LEN(newdir.name_len);
800	if (dp->i_count == 0) {
801		/*
802		 * If dp->i_count is 0, then namei could find no
803		 * space in the directory. Here, dp->i_offset will
804		 * be on a directory block boundary and we will write the
805		 * new entry into a fresh block.
806		 */
807		if (dp->i_offset & (DIRBLKSIZ - 1))
808			panic("ext2_direnter: newblk");
809		auio.uio_offset = dp->i_offset;
810		newdir.rec_len = DIRBLKSIZ;
811		auio.uio_resid = newentrysize;
812		aiov.iov_len = newentrysize;
813		aiov.iov_base = (caddr_t)&newdir;
814		auio.uio_iov = &aiov;
815		auio.uio_iovcnt = 1;
816		auio.uio_rw = UIO_WRITE;
817		auio.uio_segflg = UIO_SYSSPACE;
818		auio.uio_td = (struct thread *)0;
819		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
820		if (DIRBLKSIZ >
821		    VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
822			/* XXX should grow with balloc() */
823			panic("ext2_direnter: frag size");
824		else if (!error) {
825			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
826			dp->i_flag |= IN_CHANGE;
827		}
828		return (error);
829	}
830
831	/*
832	 * If dp->i_count is non-zero, then namei found space
833	 * for the new entry in the range dp->i_offset to
834	 * dp->i_offset + dp->i_count in the directory.
835	 * To use this space, we may have to compact the entries located
836	 * there, by copying them together towards the beginning of the
837	 * block, leaving the free space in one usable chunk at the end.
838	 */
839
840	/*
841	 * Increase size of directory if entry eats into new space.
842	 * This should never push the size past a new multiple of
843	 * DIRBLKSIZE.
844	 *
845	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
846	 */
847	if (dp->i_offset + dp->i_count > dp->i_size)
848		dp->i_size = dp->i_offset + dp->i_count;
849	/*
850	 * Get the block containing the space for the new directory entry.
851	 */
852	if ((error = ext2_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf,
853	    &bp)) != 0)
854		return (error);
855	/*
856	 * Find space for the new entry. In the simple case, the entry at
857	 * offset base will have the space. If it does not, then namei
858	 * arranged that compacting the region dp->i_offset to
859	 * dp->i_offset + dp->i_count would yield the
860	 * space.
861	 */
862	ep = (struct ext2_dir_entry_2 *)dirbuf;
863	dsize = EXT2_DIR_REC_LEN(ep->name_len);
864	spacefree = ep->rec_len - dsize;
865	for (loc = ep->rec_len; loc < dp->i_count; ) {
866		nep = (struct ext2_dir_entry_2 *)(dirbuf + loc);
867		if (ep->inode) {
868			/* trim the existing slot */
869			ep->rec_len = dsize;
870			ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
871		} else {
872			/* overwrite; nothing there; header is ours */
873			spacefree += dsize;
874		}
875		dsize = EXT2_DIR_REC_LEN(nep->name_len);
876		spacefree += nep->rec_len - dsize;
877		loc += nep->rec_len;
878		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
879	}
880	/*
881	 * Update the pointer fields in the previous entry (if any),
882	 * copy in the new entry, and write out the block.
883	 */
884	if (ep->inode == 0) {
885		if (spacefree + dsize < newentrysize)
886			panic("ext2_direnter: compact1");
887		newdir.rec_len = spacefree + dsize;
888	} else {
889		if (spacefree < newentrysize)
890			panic("ext2_direnter: compact2");
891		newdir.rec_len = spacefree;
892		ep->rec_len = dsize;
893		ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
894	}
895	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
896	error = BUF_WRITE(bp);
897	dp->i_flag |= IN_CHANGE | IN_UPDATE;
898	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
899		error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
900		    cnp->cn_cred, cnp->cn_thread);
901	return (error);
902}
903
904/*
905 * Remove a directory entry after a call to namei, using
906 * the parameters which it left in nameidata. The entry
907 * dp->i_offset contains the offset into the directory of the
908 * entry to be eliminated.  The dp->i_count field contains the
909 * size of the previous record in the directory.  If this
910 * is 0, the first entry is being deleted, so we need only
911 * zero the inode number to mark the entry as free.  If the
912 * entry is not the first in the directory, we must reclaim
913 * the space of the now empty record by adding the record size
914 * to the size of the previous entry.
915 */
916int
917ext2_dirremove(dvp, cnp)
918	struct vnode *dvp;
919	struct componentname *cnp;
920{
921	struct inode *dp;
922	struct ext2_dir_entry_2 *ep;
923	struct buf *bp;
924	int error;
925
926	dp = VTOI(dvp);
927	if (dp->i_count == 0) {
928		/*
929		 * First entry in block: set d_ino to zero.
930		 */
931		if ((error =
932		    ext2_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep,
933		    &bp)) != 0)
934			return (error);
935		ep->inode = 0;
936		error = BUF_WRITE(bp);
937		dp->i_flag |= IN_CHANGE | IN_UPDATE;
938		return (error);
939	}
940	/*
941	 * Collapse new free space into previous entry.
942	 */
943	if ((error = ext2_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count),
944	    (char **)&ep, &bp)) != 0)
945		return (error);
946	ep->rec_len += dp->i_reclen;
947	error = BUF_WRITE(bp);
948	dp->i_flag |= IN_CHANGE | IN_UPDATE;
949	return (error);
950}
951
952/*
953 * Rewrite an existing directory entry to point at the inode
954 * supplied.  The parameters describing the directory entry are
955 * set up by a call to namei.
956 */
957int
958ext2_dirrewrite(dp, ip, cnp)
959	struct inode *dp, *ip;
960	struct componentname *cnp;
961{
962	struct buf *bp;
963	struct ext2_dir_entry_2 *ep;
964	struct vnode *vdp = ITOV(dp);
965	int error;
966
967	if ((error = ext2_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep,
968	    &bp)) != 0)
969		return (error);
970	ep->inode = ip->i_number;
971	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
972	    EXT2_FEATURE_INCOMPAT_FILETYPE))
973		ep->file_type = DTTOFT(IFTODT(ip->i_mode));
974	else
975		ep->file_type = EXT2_FT_UNKNOWN;
976	error = BUF_WRITE(bp);
977	dp->i_flag |= IN_CHANGE | IN_UPDATE;
978	return (error);
979}
980
981/*
982 * Check if a directory is empty or not.
983 * Inode supplied must be locked.
984 *
985 * Using a struct dirtemplate here is not precisely
986 * what we want, but better than using a struct direct.
987 *
988 * NB: does not handle corrupted directories.
989 */
990int
991ext2_dirempty(ip, parentino, cred)
992	struct inode *ip;
993	ino_t parentino;
994	struct ucred *cred;
995{
996	off_t off;
997	struct dirtemplate dbuf;
998	struct ext2_dir_entry_2 *dp = (struct ext2_dir_entry_2 *)&dbuf;
999	int error, count, namlen;
1000
1001#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1002
1003	for (off = 0; off < ip->i_size; off += dp->rec_len) {
1004		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
1005		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct thread *)0);
1006		/*
1007		 * Since we read MINDIRSIZ, residual must
1008		 * be 0 unless we're at end of file.
1009		 */
1010		if (error || count != 0)
1011			return (0);
1012		/* avoid infinite loops */
1013		if (dp->rec_len == 0)
1014			return (0);
1015		/* skip empty entries */
1016		if (dp->inode == 0)
1017			continue;
1018		/* accept only "." and ".." */
1019		namlen = dp->name_len;
1020		if (namlen > 2)
1021			return (0);
1022		if (dp->name[0] != '.')
1023			return (0);
1024		/*
1025		 * At this point namlen must be 1 or 2.
1026		 * 1 implies ".", 2 implies ".." if second
1027		 * char is also "."
1028		 */
1029		if (namlen == 1)
1030			continue;
1031		if (dp->name[1] == '.' && dp->inode == parentino)
1032			continue;
1033		return (0);
1034	}
1035	return (1);
1036}
1037
1038/*
1039 * Check if source directory is in the path of the target directory.
1040 * Target is supplied locked, source is unlocked.
1041 * The target is always vput before returning.
1042 */
1043int
1044ext2_checkpath(source, target, cred)
1045	struct inode *source, *target;
1046	struct ucred *cred;
1047{
1048	struct vnode *vp;
1049	int error, rootino, namlen;
1050	struct dirtemplate dirbuf;
1051
1052	vp = ITOV(target);
1053	if (target->i_number == source->i_number) {
1054		error = EEXIST;
1055		goto out;
1056	}
1057	rootino = ROOTINO;
1058	error = 0;
1059	if (target->i_number == rootino)
1060		goto out;
1061
1062	for (;;) {
1063		if (vp->v_type != VDIR) {
1064			error = ENOTDIR;
1065			break;
1066		}
1067		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1068			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
1069			IO_NODELOCKED, cred, (int *)0, (struct thread *)0);
1070		if (error != 0)
1071			break;
1072		namlen = dirbuf.dotdot_type;	/* like ufs little-endian */
1073		if (namlen != 2 ||
1074		    dirbuf.dotdot_name[0] != '.' ||
1075		    dirbuf.dotdot_name[1] != '.') {
1076			error = ENOTDIR;
1077			break;
1078		}
1079		if (dirbuf.dotdot_ino == source->i_number) {
1080			error = EINVAL;
1081			break;
1082		}
1083		if (dirbuf.dotdot_ino == rootino)
1084			break;
1085		vput(vp);
1086		if ((error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino,
1087		    LK_EXCLUSIVE, &vp)) != 0) {
1088			vp = NULL;
1089			break;
1090		}
1091	}
1092
1093out:
1094	if (error == ENOTDIR)
1095		printf("checkpath: .. not a directory\n");
1096	if (vp != NULL)
1097		vput(vp);
1098	return (error);
1099}
1100
1101