ext2_lookup.c revision 111742
1/*
2 *  modified for Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1993
9 *	The Regents of the University of California.  All rights reserved.
10 * (c) UNIX System Laboratories, Inc.
11 * All or some portions of this file are derived from material licensed
12 * to the University of California by American Telephone and Telegraph
13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
14 * the permission of UNIX System Laboratories, Inc.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 *    notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 *    notice, this list of conditions and the following disclaimer in the
23 *    documentation and/or other materials provided with the distribution.
24 * 3. All advertising materials mentioning features or use of this software
25 *    must display the following acknowledgement:
26 *	This product includes software developed by the University of
27 *	California, Berkeley and its contributors.
28 * 4. Neither the name of the University nor the names of its contributors
29 *    may be used to endorse or promote products derived from this software
30 *    without specific prior written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * SUCH DAMAGE.
43 *
44 *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
45 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_lookup.c 111742 2003-03-02 15:56:49Z des $
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/namei.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/mount.h>
54#include <sys/vnode.h>
55#include <sys/malloc.h>
56#include <sys/dirent.h>
57#include <sys/sysctl.h>
58
59#include <ufs/ufs/dir.h>
60
61#include <gnu/ext2fs/inode.h>
62#include <gnu/ext2fs/ext2_mount.h>
63#include <gnu/ext2fs/ext2_extern.h>
64#include <gnu/ext2fs/ext2_fs.h>
65#include <gnu/ext2fs/ext2_fs_sb.h>
66
67#ifdef DIAGNOSTIC
68static int dirchk = 1;
69#else
70static int dirchk = 0;
71#endif
72
73SYSCTL_NODE(_vfs, OID_AUTO, e2fs, CTLFLAG_RD, 0, "EXT2FS filesystem");
74SYSCTL_INT(_vfs_e2fs, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, "");
75
76/*
77   DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512)
78   while it is the native blocksize in ext2fs - thus, a #define
79   is no longer appropriate
80*/
81#undef  DIRBLKSIZ
82
83static u_char ext2_ft_to_dt[] = {
84	DT_UNKNOWN,		/* EXT2_FT_UNKNOWN */
85	DT_REG,			/* EXT2_FT_REG_FILE */
86	DT_DIR,			/* EXT2_FT_DIR */
87	DT_CHR,			/* EXT2_FT_CHRDEV */
88	DT_BLK,			/* EXT2_FT_BLKDEV */
89	DT_FIFO,		/* EXT2_FT_FIFO */
90	DT_SOCK,		/* EXT2_FT_SOCK */
91	DT_LNK,			/* EXT2_FT_SYMLINK */
92};
93#define	FTTODT(ft)						\
94    ((ft) > sizeof(ext2_ft_to_dt) / sizeof(ext2_ft_to_dt[0]) ?	\
95    DT_UNKNOWN : ext2_ft_to_dt[(ft)])
96
97static u_char dt_to_ext2_ft[] = {
98	EXT2_FT_UNKNOWN,	/* DT_UNKNOWN */
99	EXT2_FT_FIFO,		/* DT_FIFO */
100	EXT2_FT_CHRDEV,		/* DT_CHR */
101	EXT2_FT_UNKNOWN,	/* unused */
102	EXT2_FT_DIR,		/* DT_DIR */
103	EXT2_FT_UNKNOWN,	/* unused */
104	EXT2_FT_BLKDEV,		/* DT_BLK */
105	EXT2_FT_UNKNOWN,	/* unused */
106	EXT2_FT_REG_FILE,	/* DT_REG */
107	EXT2_FT_UNKNOWN,	/* unused */
108	EXT2_FT_SYMLINK,	/* DT_LNK */
109	EXT2_FT_UNKNOWN,	/* unused */
110	EXT2_FT_SOCK,		/* DT_SOCK */
111	EXT2_FT_UNKNOWN,	/* unused */
112	EXT2_FT_UNKNOWN,	/* DT_WHT */
113};
114#define	DTTOFT(dt)						\
115    ((dt) > sizeof(dt_to_ext2_ft) / sizeof(dt_to_ext2_ft[0]) ?	\
116    EXT2_FT_UNKNOWN : dt_to_ext2_ft[(dt)])
117
118static int	ext2_dirbadentry(struct vnode *dp, struct ext2_dir_entry_2 *de,
119		    int entryoffsetinblock);
120
121/*
122 * Vnode op for reading directories.
123 *
124 * The routine below assumes that the on-disk format of a directory
125 * is the same as that defined by <sys/dirent.h>. If the on-disk
126 * format changes, then it will be necessary to do a conversion
127 * from the on-disk format that read returns to the format defined
128 * by <sys/dirent.h>.
129 */
130/*
131 * this is exactly what we do here - the problem is that the conversion
132 * will blow up some entries by four bytes, so it can't be done in place.
133 * This is too bad. Right now the conversion is done entry by entry, the
134 * converted entry is sent via uiomove.
135 *
136 * XXX allocate a buffer, convert as many entries as possible, then send
137 * the whole buffer to uiomove
138 */
139int
140ext2_readdir(ap)
141	struct vop_readdir_args /* {
142		struct vnode *a_vp;
143		struct uio *a_uio;
144		struct ucred *a_cred;
145	} */ *ap;
146{
147	struct uio *uio = ap->a_uio;
148	int count, error;
149
150	struct ext2_dir_entry_2 *edp, *dp;
151	int ncookies;
152	struct dirent dstdp;
153	struct uio auio;
154	struct iovec aiov;
155	caddr_t dirbuf;
156	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize;
157	int readcnt;
158	off_t startoffset = uio->uio_offset;
159
160	count = uio->uio_resid;
161	/*
162	 * Avoid complications for partial directory entries by adjusting
163	 * the i/o to end at a block boundary.  Don't give up (like ufs
164	 * does) if the initial adjustment gives a negative count, since
165	 * many callers don't supply a large enough buffer.  The correct
166	 * size is a little larger than DIRBLKSIZ to allow for expansion
167	 * of directory entries, but some callers just use 512.
168	 */
169	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
170	if (count <= 0)
171		count += DIRBLKSIZ;
172
173#ifdef EXT2FS_DEBUG
174	printf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n",
175	    uio->uio_offset, uio->uio_resid, count);
176#endif
177
178	auio = *uio;
179	auio.uio_iov = &aiov;
180	auio.uio_iovcnt = 1;
181	auio.uio_resid = count;
182	auio.uio_segflg = UIO_SYSSPACE;
183	aiov.iov_len = count;
184	MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
185	aiov.iov_base = dirbuf;
186	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
187	if (error == 0) {
188		readcnt = count - auio.uio_resid;
189		edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt];
190		ncookies = 0;
191		bzero(&dstdp, offsetof(struct dirent, d_name));
192		for (dp = (struct ext2_dir_entry_2 *)dirbuf;
193		    !error && uio->uio_resid > 0 && dp < edp; ) {
194			/*-
195			 * "New" ext2fs directory entries differ in 3 ways
196			 * from ufs on-disk ones:
197			 * - the name is not necessarily NUL-terminated.
198			 * - the file type field always exists and always
199			 * follows the name length field.
200			 * - the file type is encoded in a different way.
201			 *
202			 * "Old" ext2fs directory entries need no special
203			 * conversions, since they binary compatible with
204			 * "new" entries having a file type of 0 (i.e.,
205			 * EXT2_FT_UNKNOWN).  Splitting the old name length
206			 * field didn't make a mess like it did in ufs,
207			 * because ext2fs uses a machine-dependent disk
208			 * layout.
209			 */
210			dstdp.d_fileno = dp->inode;
211			dstdp.d_type = FTTODT(dp->file_type);
212			dstdp.d_namlen = dp->name_len;
213			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
214			bcopy(dp->name, dstdp.d_name, dstdp.d_namlen);
215			bzero(dstdp.d_name + dstdp.d_namlen,
216			    dstdp.d_reclen - offsetof(struct dirent, d_name) -
217			    dstdp.d_namlen);
218
219			if (dp->rec_len > 0) {
220				if(dstdp.d_reclen <= uio->uio_resid) {
221					/* advance dp */
222					dp = (struct ext2_dir_entry_2 *)
223					    ((char *)dp + dp->rec_len);
224					error =
225					  uiomove(&dstdp, dstdp.d_reclen, uio);
226					if (!error)
227						ncookies++;
228				} else
229					break;
230			} else {
231				error = EIO;
232				break;
233			}
234		}
235		/* we need to correct uio_offset */
236		uio->uio_offset = startoffset + (caddr_t)dp - dirbuf;
237
238		if (!error && ap->a_ncookies != NULL) {
239			u_long *cookiep, *cookies, *ecookies;
240			off_t off;
241
242			if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
243				panic("ext2fs_readdir: unexpected uio from NFS server");
244			MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
245			       M_WAITOK);
246			off = startoffset;
247			for (dp = (struct ext2_dir_entry_2 *)dirbuf,
248			     cookiep = cookies, ecookies = cookies + ncookies;
249			     cookiep < ecookies;
250			     dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) {
251				off += dp->rec_len;
252				*cookiep++ = (u_long) off;
253			}
254			*ap->a_ncookies = ncookies;
255			*ap->a_cookies = cookies;
256		}
257	}
258	FREE(dirbuf, M_TEMP);
259	if (ap->a_eofflag)
260		*ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
261	return (error);
262}
263
264/*
265 * Convert a component of a pathname into a pointer to a locked inode.
266 * This is a very central and rather complicated routine.
267 * If the file system is not maintained in a strict tree hierarchy,
268 * this can result in a deadlock situation (see comments in code below).
269 *
270 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
271 * on whether the name is to be looked up, created, renamed, or deleted.
272 * When CREATE, RENAME, or DELETE is specified, information usable in
273 * creating, renaming, or deleting a directory entry may be calculated.
274 * If flag has LOCKPARENT or'ed into it and the target of the pathname
275 * exists, lookup returns both the target and its parent directory locked.
276 * When creating or renaming and LOCKPARENT is specified, the target may
277 * not be ".".  When deleting and LOCKPARENT is specified, the target may
278 * be "."., but the caller must check to ensure it does an vrele and vput
279 * instead of two vputs.
280 *
281 * Overall outline of ufs_lookup:
282 *
283 *	search for name in directory, to found or notfound
284 * notfound:
285 *	if creating, return locked directory, leaving info on available slots
286 *	else return error
287 * found:
288 *	if at end of path and deleting, return information to allow delete
289 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
290 *	  inode and return info to allow rewrite
291 *	if not at end, add name to cache; if at end and neither creating
292 *	  nor deleting, add name to cache
293 */
294int
295ext2_lookup(ap)
296	struct vop_cachedlookup_args /* {
297		struct vnode *a_dvp;
298		struct vnode **a_vpp;
299		struct componentname *a_cnp;
300	} */ *ap;
301{
302	struct vnode *vdp;		/* vnode for directory being searched */
303	struct inode *dp;		/* inode for directory being searched */
304	struct buf *bp;			/* a buffer of directory entries */
305	struct ext2_dir_entry_2 *ep;	/* the current directory entry */
306	int entryoffsetinblock;		/* offset of ep in bp's buffer */
307	enum {NONE, COMPACT, FOUND} slotstatus;
308	doff_t slotoffset;		/* offset of area with free space */
309	int slotsize;			/* size of area at slotoffset */
310	int slotfreespace;		/* amount of space free in slot */
311	int slotneeded;			/* size of the entry we're seeking */
312	int numdirpasses;		/* strategy for directory search */
313	doff_t endsearch;		/* offset to end directory search */
314	doff_t prevoff;			/* prev entry dp->i_offset */
315	struct vnode *pdp;		/* saved dp during symlink work */
316	struct vnode *tdp;		/* returned by VFS_VGET */
317	doff_t enduseful;		/* pointer past last used dir slot */
318	u_long bmask;			/* block offset mask */
319	int lockparent;			/* 1 => lockparent flag is set */
320	int wantparent;			/* 1 => wantparent or lockparent flag */
321	int namlen, error;
322	struct vnode **vpp = ap->a_vpp;
323	struct componentname *cnp = ap->a_cnp;
324	struct ucred *cred = cnp->cn_cred;
325	int flags = cnp->cn_flags;
326	int nameiop = cnp->cn_nameiop;
327	struct thread *td = cnp->cn_thread;
328
329	int	DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize;
330
331	bp = NULL;
332	slotoffset = -1;
333	*vpp = NULL;
334	vdp = ap->a_dvp;
335	dp = VTOI(vdp);
336	lockparent = flags & LOCKPARENT;
337	wantparent = flags & (LOCKPARENT|WANTPARENT);
338
339	/*
340	 * We now have a segment name to search for, and a directory to search.
341	 */
342
343	/*
344	 * Suppress search for slots unless creating
345	 * file and at end of pathname, in which case
346	 * we watch for a place to put the new file in
347	 * case it doesn't already exist.
348	 */
349	slotstatus = FOUND;
350	slotfreespace = slotsize = slotneeded = 0;
351	if ((nameiop == CREATE || nameiop == RENAME) &&
352	    (flags & ISLASTCN)) {
353		slotstatus = NONE;
354		slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
355		/* was
356		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
357			cnp->cn_namelen + 3) &~ 3; */
358	}
359
360	/*
361	 * If there is cached information on a previous search of
362	 * this directory, pick up where we last left off.
363	 * We cache only lookups as these are the most common
364	 * and have the greatest payoff. Caching CREATE has little
365	 * benefit as it usually must search the entire directory
366	 * to determine that the entry does not exist. Caching the
367	 * location of the last DELETE or RENAME has not reduced
368	 * profiling time and hence has been removed in the interest
369	 * of simplicity.
370	 */
371	bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
372	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
373	    dp->i_diroff > dp->i_size) {
374		entryoffsetinblock = 0;
375		dp->i_offset = 0;
376		numdirpasses = 1;
377	} else {
378		dp->i_offset = dp->i_diroff;
379		if ((entryoffsetinblock = dp->i_offset & bmask) &&
380		    (error = ext2_blkatoff(vdp, (off_t)dp->i_offset, NULL,
381		    &bp)))
382			return (error);
383		numdirpasses = 2;
384		nchstats.ncs_2passes++;
385	}
386	prevoff = dp->i_offset;
387	endsearch = roundup(dp->i_size, DIRBLKSIZ);
388	enduseful = 0;
389
390searchloop:
391	while (dp->i_offset < endsearch) {
392		/*
393		 * If necessary, get the next directory block.
394		 */
395		if ((dp->i_offset & bmask) == 0) {
396			if (bp != NULL)
397				brelse(bp);
398			if ((error =
399			    ext2_blkatoff(vdp, (off_t)dp->i_offset, NULL,
400			    &bp)) != 0)
401				return (error);
402			entryoffsetinblock = 0;
403		}
404		/*
405		 * If still looking for a slot, and at a DIRBLKSIZE
406		 * boundary, have to start looking for free space again.
407		 */
408		if (slotstatus == NONE &&
409		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
410			slotoffset = -1;
411			slotfreespace = 0;
412		}
413		/*
414		 * Get pointer to next entry.
415		 * Full validation checks are slow, so we only check
416		 * enough to insure forward progress through the
417		 * directory. Complete checks can be run by setting
418		 * "vfs.e2fs.dirchk" to be true.
419		 */
420		ep = (struct ext2_dir_entry_2 *)
421			((char *)bp->b_data + entryoffsetinblock);
422		if (ep->rec_len == 0 ||
423		    (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
424			int i;
425			ext2_dirbad(dp, dp->i_offset, "mangled entry");
426			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
427			dp->i_offset += i;
428			entryoffsetinblock += i;
429			continue;
430		}
431
432		/*
433		 * If an appropriate sized slot has not yet been found,
434		 * check to see if one is available. Also accumulate space
435		 * in the current block so that we can determine if
436		 * compaction is viable.
437		 */
438		if (slotstatus != FOUND) {
439			int size = ep->rec_len;
440
441			if (ep->inode != 0)
442				size -= EXT2_DIR_REC_LEN(ep->name_len);
443			if (size > 0) {
444				if (size >= slotneeded) {
445					slotstatus = FOUND;
446					slotoffset = dp->i_offset;
447					slotsize = ep->rec_len;
448				} else if (slotstatus == NONE) {
449					slotfreespace += size;
450					if (slotoffset == -1)
451						slotoffset = dp->i_offset;
452					if (slotfreespace >= slotneeded) {
453						slotstatus = COMPACT;
454						slotsize = dp->i_offset +
455						      ep->rec_len - slotoffset;
456					}
457				}
458			}
459		}
460
461		/*
462		 * Check for a name match.
463		 */
464		if (ep->inode) {
465			namlen = ep->name_len;
466			if (namlen == cnp->cn_namelen &&
467			    !bcmp(cnp->cn_nameptr, ep->name,
468				(unsigned)namlen)) {
469				/*
470				 * Save directory entry's inode number and
471				 * reclen in ndp->ni_ufs area, and release
472				 * directory buffer.
473				 */
474				dp->i_ino = ep->inode;
475				dp->i_reclen = ep->rec_len;
476				goto found;
477			}
478		}
479		prevoff = dp->i_offset;
480		dp->i_offset += ep->rec_len;
481		entryoffsetinblock += ep->rec_len;
482		if (ep->inode)
483			enduseful = dp->i_offset;
484	}
485/* notfound: */
486	/*
487	 * If we started in the middle of the directory and failed
488	 * to find our target, we must check the beginning as well.
489	 */
490	if (numdirpasses == 2) {
491		numdirpasses--;
492		dp->i_offset = 0;
493		endsearch = dp->i_diroff;
494		goto searchloop;
495	}
496	if (bp != NULL)
497		brelse(bp);
498	/*
499	 * If creating, and at end of pathname and current
500	 * directory has not been removed, then can consider
501	 * allowing file to be created.
502	 */
503	if ((nameiop == CREATE || nameiop == RENAME) &&
504	    (flags & ISLASTCN) && dp->i_nlink != 0) {
505		/*
506		 * Access for write is interpreted as allowing
507		 * creation of files in the directory.
508		 */
509		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
510			return (error);
511		/*
512		 * Return an indication of where the new directory
513		 * entry should be put.  If we didn't find a slot,
514		 * then set dp->i_count to 0 indicating
515		 * that the new slot belongs at the end of the
516		 * directory. If we found a slot, then the new entry
517		 * can be put in the range from dp->i_offset to
518		 * dp->i_offset + dp->i_count.
519		 */
520		if (slotstatus == NONE) {
521			dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
522			dp->i_count = 0;
523			enduseful = dp->i_offset;
524		} else {
525			dp->i_offset = slotoffset;
526			dp->i_count = slotsize;
527			if (enduseful < slotoffset + slotsize)
528				enduseful = slotoffset + slotsize;
529		}
530		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
531		dp->i_flag |= IN_CHANGE | IN_UPDATE;
532		/*
533		 * We return with the directory locked, so that
534		 * the parameters we set up above will still be
535		 * valid if we actually decide to do a direnter().
536		 * We return ni_vp == NULL to indicate that the entry
537		 * does not currently exist; we leave a pointer to
538		 * the (locked) directory inode in ndp->ni_dvp.
539		 * The pathname buffer is saved so that the name
540		 * can be obtained later.
541		 *
542		 * NB - if the directory is unlocked, then this
543		 * information cannot be used.
544		 */
545		cnp->cn_flags |= SAVENAME;
546		if (!lockparent)
547			VOP_UNLOCK(vdp, 0, td);
548		return (EJUSTRETURN);
549	}
550	/*
551	 * Insert name into cache (as non-existent) if appropriate.
552	 */
553	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
554		cache_enter(vdp, *vpp, cnp);
555	return (ENOENT);
556
557found:
558	if (numdirpasses == 2)
559		nchstats.ncs_pass2++;
560	/*
561	 * Check that directory length properly reflects presence
562	 * of this entry.
563	 */
564	if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len)
565		> dp->i_size) {
566		ext2_dirbad(dp, dp->i_offset, "i_size too small");
567		dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len);
568		dp->i_flag |= IN_CHANGE | IN_UPDATE;
569	}
570	brelse(bp);
571
572	/*
573	 * Found component in pathname.
574	 * If the final component of path name, save information
575	 * in the cache as to where the entry was found.
576	 */
577	if ((flags & ISLASTCN) && nameiop == LOOKUP)
578		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
579
580	/*
581	 * If deleting, and at end of pathname, return
582	 * parameters which can be used to remove file.
583	 * If the wantparent flag isn't set, we return only
584	 * the directory (in ndp->ni_dvp), otherwise we go
585	 * on and lock the inode, being careful with ".".
586	 */
587	if (nameiop == DELETE && (flags & ISLASTCN)) {
588		/*
589		 * Write access to directory required to delete files.
590		 */
591		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
592			return (error);
593		/*
594		 * Return pointer to current entry in dp->i_offset,
595		 * and distance past previous entry (if there
596		 * is a previous entry in this block) in dp->i_count.
597		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
598		 */
599		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
600			dp->i_count = 0;
601		else
602			dp->i_count = dp->i_offset - prevoff;
603		if (dp->i_number == dp->i_ino) {
604			VREF(vdp);
605			*vpp = vdp;
606			return (0);
607		}
608		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
609		    &tdp)) != 0)
610			return (error);
611		/*
612		 * If directory is "sticky", then user must own
613		 * the directory, or the file in it, else she
614		 * may not delete it (unless she's root). This
615		 * implements append-only directories.
616		 */
617		if ((dp->i_mode & ISVTX) &&
618		    cred->cr_uid != 0 &&
619		    cred->cr_uid != dp->i_uid &&
620		    VTOI(tdp)->i_uid != cred->cr_uid) {
621			vput(tdp);
622			return (EPERM);
623		}
624		*vpp = tdp;
625		if (!lockparent)
626			VOP_UNLOCK(vdp, 0, td);
627		return (0);
628	}
629
630	/*
631	 * If rewriting (RENAME), return the inode and the
632	 * information required to rewrite the present directory
633	 * Must get inode of directory entry to verify it's a
634	 * regular file, or empty directory.
635	 */
636	if (nameiop == RENAME && wantparent &&
637	    (flags & ISLASTCN)) {
638		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
639			return (error);
640		/*
641		 * Careful about locking second inode.
642		 * This can only occur if the target is ".".
643		 */
644		if (dp->i_number == dp->i_ino)
645			return (EISDIR);
646		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
647		    &tdp)) != 0)
648			return (error);
649		*vpp = tdp;
650		cnp->cn_flags |= SAVENAME;
651		if (!lockparent)
652			VOP_UNLOCK(vdp, 0, td);
653		return (0);
654	}
655
656	/*
657	 * Step through the translation in the name.  We do not `vput' the
658	 * directory because we may need it again if a symbolic link
659	 * is relative to the current directory.  Instead we save it
660	 * unlocked as "pdp".  We must get the target inode before unlocking
661	 * the directory to insure that the inode will not be removed
662	 * before we get it.  We prevent deadlock by always fetching
663	 * inodes from the root, moving down the directory tree. Thus
664	 * when following backward pointers ".." we must unlock the
665	 * parent directory before getting the requested directory.
666	 * There is a potential race condition here if both the current
667	 * and parent directories are removed before the VFS_VGET for the
668	 * inode associated with ".." returns.  We hope that this occurs
669	 * infrequently since we cannot avoid this race condition without
670	 * implementing a sophisticated deadlock detection algorithm.
671	 * Note also that this simple deadlock detection scheme will not
672	 * work if the file system has any hard links other than ".."
673	 * that point backwards in the directory structure.
674	 */
675	pdp = vdp;
676	if (flags & ISDOTDOT) {
677		VOP_UNLOCK(pdp, 0, td);	/* race to get the inode */
678		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
679		    &tdp)) != 0) {
680			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td);
681			return (error);
682		}
683		if (lockparent && (flags & ISLASTCN) &&
684		    (error = vn_lock(pdp, LK_EXCLUSIVE, td))) {
685			vput(tdp);
686			return (error);
687		}
688		*vpp = tdp;
689	} else if (dp->i_number == dp->i_ino) {
690		VREF(vdp);	/* we want ourself, ie "." */
691		*vpp = vdp;
692	} else {
693		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE,
694		    &tdp)) != 0)
695			return (error);
696		if (!lockparent || !(flags & ISLASTCN))
697			VOP_UNLOCK(pdp, 0, td);
698		*vpp = tdp;
699	}
700
701	/*
702	 * Insert name into cache if appropriate.
703	 */
704	if (cnp->cn_flags & MAKEENTRY)
705		cache_enter(vdp, *vpp, cnp);
706	return (0);
707}
708
709void
710ext2_dirbad(ip, offset, how)
711	struct inode *ip;
712	doff_t offset;
713	char *how;
714{
715	struct mount *mp;
716
717	mp = ITOV(ip)->v_mount;
718	(void)printf("%s: bad dir ino %lu at offset %ld: %s\n",
719	    mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
720	if ((mp->mnt_flag & MNT_RDONLY) == 0)
721		panic("ext2_dirbad: bad dir");
722}
723
724/*
725 * Do consistency checking on a directory entry:
726 *	record length must be multiple of 4
727 *	entry must fit in rest of its DIRBLKSIZ block
728 *	record must be large enough to contain entry
729 *	name is not longer than MAXNAMLEN
730 *	name must be as long as advertised, and null terminated
731 */
732/*
733 *	changed so that it confirms to ext2_check_dir_entry
734 */
735static int
736ext2_dirbadentry(dp, de, entryoffsetinblock)
737	struct vnode *dp;
738	struct ext2_dir_entry_2 *de;
739	int entryoffsetinblock;
740{
741	int	DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize;
742
743	char * error_msg = NULL;
744
745	if (de->rec_len < EXT2_DIR_REC_LEN(1))
746		error_msg = "rec_len is smaller than minimal";
747	else if (de->rec_len % 4 != 0)
748		error_msg = "rec_len % 4 != 0";
749	else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len))
750		error_msg = "reclen is too small for name_len";
751	else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ)
752		error_msg = "directory entry across blocks";
753	/* else LATER
754	     if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count)
755		error_msg = "inode out of bounds";
756	*/
757
758	if (error_msg != NULL) {
759		printf("bad directory entry: %s\n", error_msg);
760		printf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n",
761			entryoffsetinblock, (unsigned long)de->inode,
762			de->rec_len, de->name_len);
763	}
764	return error_msg == NULL ? 0 : 1;
765}
766
767/*
768 * Write a directory entry after a call to namei, using the parameters
769 * that it left in nameidata.  The argument ip is the inode which the new
770 * directory entry will refer to.  Dvp is a pointer to the directory to
771 * be written, which was left locked by namei. Remaining parameters
772 * (dp->i_offset, dp->i_count) indicate how the space for the new
773 * entry is to be obtained.
774 */
775int
776ext2_direnter(ip, dvp, cnp)
777	struct inode *ip;
778	struct vnode *dvp;
779	struct componentname *cnp;
780{
781	struct ext2_dir_entry_2 *ep, *nep;
782	struct inode *dp;
783	struct buf *bp;
784	struct ext2_dir_entry_2 newdir;
785	struct iovec aiov;
786	struct uio auio;
787	u_int dsize;
788	int error, loc, newentrysize, spacefree;
789	char *dirbuf;
790	int     DIRBLKSIZ = ip->i_e2fs->s_blocksize;
791
792
793#if DIAGNOSTIC
794	if ((cnp->cn_flags & SAVENAME) == 0)
795		panic("direnter: missing name");
796#endif
797	dp = VTOI(dvp);
798	newdir.inode = ip->i_number;
799	newdir.name_len = cnp->cn_namelen;
800	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
801	    EXT2_FEATURE_INCOMPAT_FILETYPE))
802		newdir.file_type = DTTOFT(IFTODT(ip->i_mode));
803	else
804		newdir.file_type = EXT2_FT_UNKNOWN;
805	bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1);
806	newentrysize = EXT2_DIR_REC_LEN(newdir.name_len);
807	if (dp->i_count == 0) {
808		/*
809		 * If dp->i_count is 0, then namei could find no
810		 * space in the directory. Here, dp->i_offset will
811		 * be on a directory block boundary and we will write the
812		 * new entry into a fresh block.
813		 */
814		if (dp->i_offset & (DIRBLKSIZ - 1))
815			panic("ext2_direnter: newblk");
816		auio.uio_offset = dp->i_offset;
817		newdir.rec_len = DIRBLKSIZ;
818		auio.uio_resid = newentrysize;
819		aiov.iov_len = newentrysize;
820		aiov.iov_base = (caddr_t)&newdir;
821		auio.uio_iov = &aiov;
822		auio.uio_iovcnt = 1;
823		auio.uio_rw = UIO_WRITE;
824		auio.uio_segflg = UIO_SYSSPACE;
825		auio.uio_td = (struct thread *)0;
826		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
827		if (DIRBLKSIZ >
828		    VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
829			/* XXX should grow with balloc() */
830			panic("ext2_direnter: frag size");
831		else if (!error) {
832			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
833			dp->i_flag |= IN_CHANGE;
834		}
835		return (error);
836	}
837
838	/*
839	 * If dp->i_count is non-zero, then namei found space
840	 * for the new entry in the range dp->i_offset to
841	 * dp->i_offset + dp->i_count in the directory.
842	 * To use this space, we may have to compact the entries located
843	 * there, by copying them together towards the beginning of the
844	 * block, leaving the free space in one usable chunk at the end.
845	 */
846
847	/*
848	 * Increase size of directory if entry eats into new space.
849	 * This should never push the size past a new multiple of
850	 * DIRBLKSIZE.
851	 *
852	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
853	 */
854	if (dp->i_offset + dp->i_count > dp->i_size)
855		dp->i_size = dp->i_offset + dp->i_count;
856	/*
857	 * Get the block containing the space for the new directory entry.
858	 */
859	if ((error = ext2_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf,
860	    &bp)) != 0)
861		return (error);
862	/*
863	 * Find space for the new entry. In the simple case, the entry at
864	 * offset base will have the space. If it does not, then namei
865	 * arranged that compacting the region dp->i_offset to
866	 * dp->i_offset + dp->i_count would yield the
867	 * space.
868	 */
869	ep = (struct ext2_dir_entry_2 *)dirbuf;
870	dsize = EXT2_DIR_REC_LEN(ep->name_len);
871	spacefree = ep->rec_len - dsize;
872	for (loc = ep->rec_len; loc < dp->i_count; ) {
873		nep = (struct ext2_dir_entry_2 *)(dirbuf + loc);
874		if (ep->inode) {
875			/* trim the existing slot */
876			ep->rec_len = dsize;
877			ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
878		} else {
879			/* overwrite; nothing there; header is ours */
880			spacefree += dsize;
881		}
882		dsize = EXT2_DIR_REC_LEN(nep->name_len);
883		spacefree += nep->rec_len - dsize;
884		loc += nep->rec_len;
885		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
886	}
887	/*
888	 * Update the pointer fields in the previous entry (if any),
889	 * copy in the new entry, and write out the block.
890	 */
891	if (ep->inode == 0) {
892		if (spacefree + dsize < newentrysize)
893			panic("ext2_direnter: compact1");
894		newdir.rec_len = spacefree + dsize;
895	} else {
896		if (spacefree < newentrysize)
897			panic("ext2_direnter: compact2");
898		newdir.rec_len = spacefree;
899		ep->rec_len = dsize;
900		ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
901	}
902	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
903	error = BUF_WRITE(bp);
904	dp->i_flag |= IN_CHANGE | IN_UPDATE;
905	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
906		error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
907		    cnp->cn_cred, cnp->cn_thread);
908	return (error);
909}
910
911/*
912 * Remove a directory entry after a call to namei, using
913 * the parameters which it left in nameidata. The entry
914 * dp->i_offset contains the offset into the directory of the
915 * entry to be eliminated.  The dp->i_count field contains the
916 * size of the previous record in the directory.  If this
917 * is 0, the first entry is being deleted, so we need only
918 * zero the inode number to mark the entry as free.  If the
919 * entry is not the first in the directory, we must reclaim
920 * the space of the now empty record by adding the record size
921 * to the size of the previous entry.
922 */
923int
924ext2_dirremove(dvp, cnp)
925	struct vnode *dvp;
926	struct componentname *cnp;
927{
928	struct inode *dp;
929	struct ext2_dir_entry_2 *ep;
930	struct buf *bp;
931	int error;
932
933	dp = VTOI(dvp);
934	if (dp->i_count == 0) {
935		/*
936		 * First entry in block: set d_ino to zero.
937		 */
938		if ((error =
939		    ext2_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep,
940		    &bp)) != 0)
941			return (error);
942		ep->inode = 0;
943		error = BUF_WRITE(bp);
944		dp->i_flag |= IN_CHANGE | IN_UPDATE;
945		return (error);
946	}
947	/*
948	 * Collapse new free space into previous entry.
949	 */
950	if ((error = ext2_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count),
951	    (char **)&ep, &bp)) != 0)
952		return (error);
953	ep->rec_len += dp->i_reclen;
954	error = BUF_WRITE(bp);
955	dp->i_flag |= IN_CHANGE | IN_UPDATE;
956	return (error);
957}
958
959/*
960 * Rewrite an existing directory entry to point at the inode
961 * supplied.  The parameters describing the directory entry are
962 * set up by a call to namei.
963 */
964int
965ext2_dirrewrite(dp, ip, cnp)
966	struct inode *dp, *ip;
967	struct componentname *cnp;
968{
969	struct buf *bp;
970	struct ext2_dir_entry_2 *ep;
971	struct vnode *vdp = ITOV(dp);
972	int error;
973
974	if ((error = ext2_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep,
975	    &bp)) != 0)
976		return (error);
977	ep->inode = ip->i_number;
978	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
979	    EXT2_FEATURE_INCOMPAT_FILETYPE))
980		ep->file_type = DTTOFT(IFTODT(ip->i_mode));
981	else
982		ep->file_type = EXT2_FT_UNKNOWN;
983	error = BUF_WRITE(bp);
984	dp->i_flag |= IN_CHANGE | IN_UPDATE;
985	return (error);
986}
987
988/*
989 * Check if a directory is empty or not.
990 * Inode supplied must be locked.
991 *
992 * Using a struct dirtemplate here is not precisely
993 * what we want, but better than using a struct direct.
994 *
995 * NB: does not handle corrupted directories.
996 */
997int
998ext2_dirempty(ip, parentino, cred)
999	struct inode *ip;
1000	ino_t parentino;
1001	struct ucred *cred;
1002{
1003	off_t off;
1004	struct dirtemplate dbuf;
1005	struct ext2_dir_entry_2 *dp = (struct ext2_dir_entry_2 *)&dbuf;
1006	int error, count, namlen;
1007
1008#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1009
1010	for (off = 0; off < ip->i_size; off += dp->rec_len) {
1011		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
1012		    off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred,
1013		    NOCRED, &count, (struct thread *)0);
1014		/*
1015		 * Since we read MINDIRSIZ, residual must
1016		 * be 0 unless we're at end of file.
1017		 */
1018		if (error || count != 0)
1019			return (0);
1020		/* avoid infinite loops */
1021		if (dp->rec_len == 0)
1022			return (0);
1023		/* skip empty entries */
1024		if (dp->inode == 0)
1025			continue;
1026		/* accept only "." and ".." */
1027		namlen = dp->name_len;
1028		if (namlen > 2)
1029			return (0);
1030		if (dp->name[0] != '.')
1031			return (0);
1032		/*
1033		 * At this point namlen must be 1 or 2.
1034		 * 1 implies ".", 2 implies ".." if second
1035		 * char is also "."
1036		 */
1037		if (namlen == 1)
1038			continue;
1039		if (dp->name[1] == '.' && dp->inode == parentino)
1040			continue;
1041		return (0);
1042	}
1043	return (1);
1044}
1045
1046/*
1047 * Check if source directory is in the path of the target directory.
1048 * Target is supplied locked, source is unlocked.
1049 * The target is always vput before returning.
1050 */
1051int
1052ext2_checkpath(source, target, cred)
1053	struct inode *source, *target;
1054	struct ucred *cred;
1055{
1056	struct vnode *vp;
1057	int error, rootino, namlen;
1058	struct dirtemplate dirbuf;
1059
1060	vp = ITOV(target);
1061	if (target->i_number == source->i_number) {
1062		error = EEXIST;
1063		goto out;
1064	}
1065	rootino = ROOTINO;
1066	error = 0;
1067	if (target->i_number == rootino)
1068		goto out;
1069
1070	for (;;) {
1071		if (vp->v_type != VDIR) {
1072			error = ENOTDIR;
1073			break;
1074		}
1075		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1076			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
1077			IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, (int *)0,
1078			(struct thread *)0);
1079		if (error != 0)
1080			break;
1081		namlen = dirbuf.dotdot_type;	/* like ufs little-endian */
1082		if (namlen != 2 ||
1083		    dirbuf.dotdot_name[0] != '.' ||
1084		    dirbuf.dotdot_name[1] != '.') {
1085			error = ENOTDIR;
1086			break;
1087		}
1088		if (dirbuf.dotdot_ino == source->i_number) {
1089			error = EINVAL;
1090			break;
1091		}
1092		if (dirbuf.dotdot_ino == rootino)
1093			break;
1094		vput(vp);
1095		if ((error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino,
1096		    LK_EXCLUSIVE, &vp)) != 0) {
1097			vp = NULL;
1098			break;
1099		}
1100	}
1101
1102out:
1103	if (error == ENOTDIR)
1104		printf("checkpath: .. not a directory\n");
1105	if (vp != NULL)
1106		vput(vp);
1107	return (error);
1108}
1109