ext2fs_lookup.c revision 1.42
1/*	$OpenBSD: ext2fs_lookup.c,v 1.42 2016/03/19 12:04:16 natano Exp $	*/
2/*	$NetBSD: ext2fs_lookup.c,v 1.16 2000/08/03 20:29:26 thorpej Exp $	*/
3
4/*
5 * Modified for NetBSD 1.2E
6 * May 1997, Manuel Bouyer
7 * Laboratoire d'informatique de Paris VI
8 */
9/*
10 *  modified for Lites 1.1
11 *
12 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
13 *  University of Utah, Department of Computer Science
14 */
15/*
16 * Copyright (c) 1989, 1993
17 *	The Regents of the University of California.  All rights reserved.
18 * (c) UNIX System Laboratories, Inc.
19 * All or some portions of this file are derived from material licensed
20 * to the University of California by American Telephone and Telegraph
21 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
22 * the permission of UNIX System Laboratories, Inc.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in the
31 *    documentation and/or other materials provided with the distribution.
32 * 3. Neither the name of the University nor the names of its contributors
33 *    may be used to endorse or promote products derived from this software
34 *    without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
48 *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
49 */
50
51#include <sys/param.h>
52#include <sys/systm.h>
53#include <sys/namei.h>
54#include <sys/buf.h>
55#include <sys/mount.h>
56#include <sys/vnode.h>
57#include <sys/malloc.h>
58#include <sys/dirent.h>
59
60#include <ufs/ufs/quota.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufsmount.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#include <ufs/ext2fs/ext2fs_extern.h>
66#include <ufs/ext2fs/ext2fs_dir.h>
67#include <ufs/ext2fs/ext2fs.h>
68
69extern	int dirchk;
70
71static void	ext2fs_dirconv2ffs(struct ext2fs_direct *e2dir,
72    struct dirent *ffsdir);
73static int	ext2fs_dirbadentry(struct vnode *dp, struct ext2fs_direct *de,
74    int entryoffsetinblock);
75static int	ext2fs_search_dirblock(struct inode *, void *, int *,
76    struct componentname *, int *, doff_t *, doff_t *,
77    struct ext2fs_searchslot *);
78
79/*
80 * the problem that is tackled below is the fact that FFS
81 * includes the terminating zero on disk while EXT2FS doesn't
82 * this implies that we need to introduce some padding.
83 * For instance, a filename "sbin" has normally a reclen 12
84 * in EXT2, but 16 in FFS.
85 * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...'
86 * If it wasn't for that, the complete ufs code for directories would
87 * have worked w/o changes (except for the difference in DIRBLKSIZ)
88 */
89static void
90ext2fs_dirconv2ffs(struct ext2fs_direct	*e2dir, struct dirent *ffsdir)
91{
92	memset(ffsdir, 0, sizeof(struct dirent));
93	ffsdir->d_fileno = letoh32(e2dir->e2d_ino);
94	ffsdir->d_namlen = e2dir->e2d_namlen;
95
96	ffsdir->d_type = DT_UNKNOWN;		/* don't know more here */
97#ifdef DIAGNOSTIC
98	/*
99	 * XXX Rigth now this can't happen, but if one day
100	 * MAXNAMLEN != E2FS_MAXNAMLEN we should handle this more gracefully !
101	 */
102	/* XXX: e2d_namlen is to small for such comparison
103	if (e2dir->e2d_namlen > MAXNAMLEN)
104		panic("ext2fs: e2dir->e2d_namlen");
105	*/
106#endif
107	strncpy(ffsdir->d_name, e2dir->e2d_name, ffsdir->d_namlen);
108
109	/* Godmar thinks: since e2dir->e2d_reclen can be big and means
110	   nothing anyway, we compute our own reclen according to what
111	   we think is right
112	 */
113	ffsdir->d_reclen = DIRENT_SIZE(ffsdir);
114}
115
116/*
117 * Vnode op for reading directories.
118 *
119 * Convert the on-disk entries to <sys/dirent.h> entries.
120 * the problem is that the conversion will blow up some entries by four bytes,
121 * so it can't be done in place. This is too bad. Right now the conversion is
122 * done entry by entry, the converted entry is sent via uiomove.
123 *
124 * XXX allocate a buffer, convert as many entries as possible, then send
125 * the whole buffer to uiomove
126 */
127int
128ext2fs_readdir(void *v)
129{
130	struct vop_readdir_args *ap = v;
131	struct uio *uio = ap->a_uio;
132	int error;
133	size_t e2fs_count, readcnt, entries;
134	struct vnode *vp = ap->a_vp;
135	struct m_ext2fs *fs = VTOI(vp)->i_e2fs;
136
137	struct ext2fs_direct *dp;
138	struct dirent dstd;
139	struct uio auio;
140	struct iovec aiov;
141	caddr_t dirbuf;
142	off_t off = uio->uio_offset;
143	int e2d_reclen;
144
145	if (vp->v_type != VDIR)
146		return (ENOTDIR);
147
148	e2fs_count = uio->uio_resid;
149	entries = (uio->uio_offset + e2fs_count) & (fs->e2fs_bsize - 1);
150
151	/* Make sure we don't return partial entries. */
152	if (e2fs_count <= entries)
153		return (EINVAL);
154
155	e2fs_count -= entries;
156	auio = *uio;
157	auio.uio_iov = &aiov;
158	auio.uio_iovcnt = 1;
159	auio.uio_segflg = UIO_SYSSPACE;
160	aiov.iov_len = e2fs_count;
161	auio.uio_resid = e2fs_count;
162	dirbuf = malloc(e2fs_count, M_TEMP, M_WAITOK | M_ZERO);
163	aiov.iov_base = dirbuf;
164
165	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
166	if (error == 0) {
167		readcnt = e2fs_count - auio.uio_resid;
168		dp = (struct ext2fs_direct *) dirbuf;
169		while ((char *) dp < (char *) dirbuf + readcnt) {
170			e2d_reclen = letoh16(dp->e2d_reclen);
171			if (e2d_reclen == 0) {
172				error = EIO;
173				break;
174			}
175			ext2fs_dirconv2ffs(dp, &dstd);
176			if(dstd.d_reclen > uio->uio_resid) {
177				break;
178			}
179			dstd.d_off = off + e2d_reclen;
180			if ((error = uiomove((caddr_t)&dstd, dstd.d_reclen, uio)) != 0) {
181				break;
182			}
183			off = off + e2d_reclen;
184			/* advance dp */
185			dp = (struct ext2fs_direct *) ((char *)dp + e2d_reclen);
186		}
187		/* we need to correct uio_offset */
188		uio->uio_offset = off;
189	}
190	free(dirbuf, M_TEMP, e2fs_count);
191	*ap->a_eofflag = ext2fs_size(VTOI(ap->a_vp)) <= uio->uio_offset;
192	return (error);
193}
194
195/*
196 * Convert a component of a pathname into a pointer to a locked inode.
197 * This is a very central and rather complicated routine.
198 * If the file system is not maintained in a strict tree hierarchy,
199 * this can result in a deadlock situation (see comments in code below).
200 *
201 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
202 * on whether the name is to be looked up, created, renamed, or deleted.
203 * When CREATE, RENAME, or DELETE is specified, information usable in
204 * creating, renaming, or deleting a directory entry may be calculated.
205 * If flag has LOCKPARENT or'ed into it and the target of the pathname
206 * exists, lookup returns both the target and its parent directory locked.
207 * When creating or renaming and LOCKPARENT is specified, the target may
208 * not be ".".  When deleting and LOCKPARENT is specified, the target may
209 * be "."., but the caller must check to ensure it does an vrele and vput
210 * instead of two vputs.
211 *
212 * Overall outline of ext2fs_lookup:
213 *
214 *	check accessibility of directory
215 *	look for name in cache, if found, then if at end of path
216 *	  and deleting or creating, drop it, else return name
217 *	search for name in directory, to found or notfound
218 * notfound:
219 *	if creating, return locked directory, leaving info on available slots
220 *	else return error
221 * found:
222 *	if at end of path and deleting, return information to allow delete
223 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
224 *	  inode and return info to allow rewrite
225 *	if not at end, add name to cache; if at end and neither creating
226 *	  nor deleting, add name to cache
227 */
228int
229ext2fs_lookup(void *v)
230{
231	struct vop_lookup_args *ap = v;
232	struct vnode *vdp;	/* vnode for directory being searched */
233	struct inode *dp;	/* inode for directory being searched */
234	struct buf *bp;			/* a buffer of directory entries */
235	struct ext2fs_direct *ep; /* the current directory entry */
236	int entryoffsetinblock;		/* offset of ep in bp's buffer */
237	struct ext2fs_searchslot ss;
238	int numdirpasses;		/* strategy for directory search */
239	doff_t endsearch;		/* offset to end directory search */
240	doff_t prevoff;			/* prev entry dp->i_offset */
241	struct vnode *pdp;		/* saved dp during symlink work */
242	struct vnode *tdp;		/* returned by VFS_VGET */
243	doff_t enduseful;		/* pointer past last used dir slot */
244	u_long bmask;			/* block offset mask */
245	int lockparent;			/* 1 => lockparent flag is set */
246	int wantparent;			/* 1 => wantparent or lockparent flag */
247	struct vnode **vpp = ap->a_vpp;
248	struct componentname *cnp = ap->a_cnp;
249	struct ucred *cred = cnp->cn_cred;
250	int flags = cnp->cn_flags;
251	int nameiop = cnp->cn_nameiop;
252	struct proc *p = cnp->cn_proc;
253	int dirblksize, entry_found = 0, error;
254
255	ss.slotstatus = FOUND;
256	ss.slotoffset = -1;
257	ss.slotfreespace = ss.slotsize = ss.slotneeded = 0;
258
259	bp = NULL;
260	*vpp = NULL;
261	vdp = ap->a_dvp;
262	dp = VTOI(vdp);
263	dirblksize = dp->i_e2fs->e2fs_bsize;
264	lockparent = flags & LOCKPARENT;
265	wantparent = flags & (LOCKPARENT|WANTPARENT);
266
267	/*
268	 * Check accessiblity of directory.
269	 */
270	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
271		return (error);
272
273	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
274	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
275		return (EROFS);
276
277	/*
278	 * We now have a segment name to search for, and a directory to search.
279	 *
280	 * Before tediously performing a linear scan of the directory,
281	 * check the name cache to see if the directory/name pair
282	 * we are looking for is known already.
283	 */
284	if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
285		return (error);
286
287	/*
288	 * Suppress search for slots unless creating
289	 * file and at end of pathname, in which case
290	 * we watch for a place to put the new file in
291	 * case it doesn't already exist.
292	 */
293	if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) {
294		ss.slotstatus = NONE;
295		ss.slotneeded = EXT2FS_DIRSIZ(cnp->cn_namelen);
296	}
297
298	/*
299	 * If there is cached information on a previous search of
300	 * this directory, pick up where we last left off.
301	 * We cache only lookups as these are the most common
302	 * and have the greatest payoff. Caching CREATE has little
303	 * benefit as it usually must search the entire directory
304	 * to determine that the entry does not exist. Caching the
305	 * location of the last DELETE or RENAME has not reduced
306	 * profiling time and hence has been removed in the interest
307	 * of simplicity.
308	 */
309	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
310	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
311	    dp->i_diroff > ext2fs_size(dp)) {
312		entryoffsetinblock = 0;
313		dp->i_offset = 0;
314		numdirpasses = 1;
315	} else {
316		dp->i_offset = dp->i_diroff;
317		if ((entryoffsetinblock = dp->i_offset & bmask) &&
318		    (error = ext2fs_bufatoff(dp, (off_t)dp->i_offset,
319		    NULL, &bp)))
320			return (error);
321		numdirpasses = 2;
322	}
323	prevoff = dp->i_offset;
324	endsearch = roundup(ext2fs_size(dp), dirblksize);
325	enduseful = 0;
326
327searchloop:
328	while (dp->i_offset < endsearch) {
329		/*
330		 * If necessary, get the next directory block.
331		 */
332		if (bp != NULL)
333			brelse(bp);
334
335		error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, NULL, &bp);
336		if (error != 0)
337			return (error);
338		entryoffsetinblock = 0;
339
340		/*
341		 * If still looking for a slot, and at a dirblksize
342		 * boundary, have to start looking for free space again.
343		 */
344		if (ss.slotstatus == NONE) {
345			ss.slotoffset = -1;
346			ss.slotfreespace = 0;
347		}
348
349		error = ext2fs_search_dirblock(dp, bp->b_data, &entry_found,
350		    cnp, &entryoffsetinblock, &prevoff, &enduseful, &ss);
351		if (error) {
352			brelse(bp);
353			return (error);
354		}
355		if (entry_found) {
356			ep = (struct ext2fs_direct *)
357			    ((char *)bp->b_data + (entryoffsetinblock & bmask));
358/* foundentry: */
359			dp->i_ino = letoh32(ep->e2d_ino);
360			dp->i_reclen = letoh16(ep->e2d_reclen);
361			goto found;
362		}
363	}
364/* notfound: */
365	/*
366	 * If we started in the middle of the directory and failed
367	 * to find our target, we must check the beginning as well.
368	 */
369	if (numdirpasses == 2) {
370		numdirpasses--;
371		dp->i_offset = 0;
372		endsearch = dp->i_diroff;
373		goto searchloop;
374	}
375	if (bp != NULL)
376		brelse(bp);
377	/*
378	 * If creating, and at end of pathname and current
379	 * directory has not been removed, then can consider
380	 * allowing file to be created.
381	 */
382	if ((nameiop == CREATE || nameiop == RENAME) &&
383		(flags & ISLASTCN) && dp->i_e2fs_nlink != 0) {
384		/*
385		 * Creation of files on a read-only mounted file system
386		 * is pointless, so don't proceed any further.
387		 */
388		if (vdp->v_mount->mnt_flag & MNT_RDONLY)
389			return (EROFS);
390		/*
391		 * Access for write is interpreted as allowing
392		 * creation of files in the directory.
393		 */
394		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
395			return (error);
396		/*
397		 * Return an indication of where the new directory
398		 * entry should be put.  If we didn't find a slot,
399		 * then set dp->i_count to 0 indicating
400		 * that the new slot belongs at the end of the
401		 * directory. If we found a slot, then the new entry
402		 * can be put in the range from dp->i_offset to
403		 * dp->i_offset + dp->i_count.
404		 */
405		if (ss.slotstatus == NONE) {
406			dp->i_offset = roundup(ext2fs_size(dp), dirblksize);
407			dp->i_count = 0;
408			enduseful = dp->i_offset;
409		} else {
410			dp->i_offset = ss.slotoffset;
411			dp->i_count = ss.slotsize;
412			if (enduseful < ss.slotoffset + ss.slotsize)
413				enduseful = ss.slotoffset + ss.slotsize;
414		}
415		dp->i_endoff = roundup(enduseful, dirblksize);
416		dp->i_flag |= IN_CHANGE | IN_UPDATE;
417		/*
418		 * We return with the directory locked, so that
419		 * the parameters we set up above will still be
420		 * valid if we actually decide to do a direnter().
421		 * We return ni_vp == NULL to indicate that the entry
422		 * does not currently exist; we leave a pointer to
423		 * the (locked) directory inode in ndp->ni_dvp.
424		 * The pathname buffer is saved so that the name
425		 * can be obtained later.
426		 *
427		 * NB - if the directory is unlocked, then this
428		 * information cannot be used.
429		 */
430		cnp->cn_flags |= SAVENAME;
431		if (!lockparent) {
432			VOP_UNLOCK(vdp, p);
433			cnp->cn_flags |= PDIRUNLOCK;
434		}
435		return (EJUSTRETURN);
436	}
437	/*
438	 * Insert name into cache (as non-existent) if appropriate.
439	 */
440	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
441		cache_enter(vdp, *vpp, cnp);
442	return (ENOENT);
443
444found:
445	/*
446	 * Check that directory length properly reflects presence
447	 * of this entry.
448	 */
449	if (entryoffsetinblock + EXT2FS_DIRSIZ(ep->e2d_namlen)
450	    > ext2fs_size(dp)) {
451		ufs_dirbad(dp, dp->i_offset, "i_size too small");
452		error = ext2fs_setsize(dp,
453			entryoffsetinblock + EXT2FS_DIRSIZ(ep->e2d_namlen));
454		if (error) {
455			brelse(bp);
456			return(error);
457		}
458		dp->i_flag |= IN_CHANGE | IN_UPDATE;
459	}
460	brelse(bp);
461
462	/*
463	 * Found component in pathname.
464	 * If the final component of path name, save information
465	 * in the cache as to where the entry was found.
466	 */
467	if ((flags & ISLASTCN) && nameiop == LOOKUP)
468		dp->i_diroff = dp->i_offset &~ (dirblksize - 1);
469
470	/*
471	 * If deleting, and at end of pathname, return
472	 * parameters which can be used to remove file.
473	 * If the wantparent flag isn't set, we return only
474	 * the directory (in ndp->ni_dvp), otherwise we go
475	 * on and lock the inode, being careful with ".".
476	 */
477	if (nameiop == DELETE && (flags & ISLASTCN)) {
478		/*
479		 * Write access to directory required to delete files.
480		 */
481		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
482			return (error);
483		/*
484		 * Return pointer to current entry in dp->i_offset,
485		 * and distance past previous entry (if there
486		 * is a previous entry in this block) in dp->i_count.
487		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
488		 */
489		if ((dp->i_offset & (dirblksize - 1)) == 0)
490			dp->i_count = 0;
491		else
492			dp->i_count = dp->i_offset - prevoff;
493		if (dp->i_number == dp->i_ino) {
494			vref(vdp);
495			*vpp = vdp;
496			return (0);
497		}
498		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
499			return (error);
500		/*
501		 * If directory is "sticky", then user must own
502		 * the directory, or the file in it, else she
503		 * may not delete it (unless she's root). This
504		 * implements append-only directories.
505		 */
506		if ((dp->i_e2fs_mode & ISVTX) &&
507			cred->cr_uid != 0 &&
508			cred->cr_uid != dp->i_e2fs_uid &&
509			VTOI(tdp)->i_e2fs_uid != cred->cr_uid) {
510			vput(tdp);
511			return (EPERM);
512		}
513		*vpp = tdp;
514		if (!lockparent) {
515			VOP_UNLOCK(vdp, p);
516			cnp->cn_flags |= PDIRUNLOCK;
517		}
518		return (0);
519	}
520
521	/*
522	 * If rewriting (RENAME), return the inode and the
523	 * information required to rewrite the present directory
524	 * Must get inode of directory entry to verify it's a
525	 * regular file, or empty directory.
526	 */
527	if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
528		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
529			return (error);
530		/*
531		 * Careful about locking second inode.
532		 * This can only occur if the target is ".".
533		 */
534		if (dp->i_number == dp->i_ino)
535			return (EISDIR);
536		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
537			return (error);
538		*vpp = tdp;
539		cnp->cn_flags |= SAVENAME;
540		if (!lockparent) {
541			VOP_UNLOCK(vdp, p);
542			cnp->cn_flags |= PDIRUNLOCK;
543		}
544		return (0);
545	}
546
547	/*
548	 * Step through the translation in the name.  We do not `vput' the
549	 * directory because we may need it again if a symbolic link
550	 * is relative to the current directory.  Instead we save it
551	 * unlocked as "pdp".  We must get the target inode before unlocking
552	 * the directory to insure that the inode will not be removed
553	 * before we get it.  We prevent deadlock by always fetching
554	 * inodes from the root, moving down the directory tree. Thus
555	 * when following backward pointers ".." we must unlock the
556	 * parent directory before getting the requested directory.
557	 * There is a potential race condition here if both the current
558	 * and parent directories are removed before the VFS_VGET for the
559	 * inode associated with ".." returns.  We hope that this occurs
560	 * infrequently since we cannot avoid this race condition without
561	 * implementing a sophisticated deadlock detection algorithm.
562	 * Note also that this simple deadlock detection scheme will not
563	 * work if the file system has any hard links other than ".."
564	 * that point backwards in the directory structure.
565	 */
566	pdp = vdp;
567	if (flags & ISDOTDOT) {
568		VOP_UNLOCK(pdp, p);	/* race to get the inode */
569		cnp->cn_flags |= PDIRUNLOCK;
570		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) {
571			if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p) == 0)
572				cnp->cn_flags &= ~PDIRUNLOCK;
573			return (error);
574		}
575		if (lockparent && (flags & ISLASTCN)) {
576			if ((error = vn_lock(pdp, LK_EXCLUSIVE, p)) != 0) {
577				vput(tdp);
578				return (error);
579			}
580			cnp->cn_flags &= ~PDIRUNLOCK;
581		}
582		*vpp = tdp;
583	} else if (dp->i_number == dp->i_ino) {
584		vref(vdp);	/* we want ourself, ie "." */
585		*vpp = vdp;
586	} else {
587		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
588			return (error);
589		if (!lockparent || !(flags & ISLASTCN)) {
590			VOP_UNLOCK(pdp, p);
591			cnp->cn_flags |= PDIRUNLOCK;
592		}
593		*vpp = tdp;
594	}
595
596	/*
597	 * Insert name into cache if appropriate.
598	 */
599	if (cnp->cn_flags & MAKEENTRY)
600		cache_enter(vdp, *vpp, cnp);
601	return (0);
602}
603
604int
605ext2fs_search_dirblock(struct inode *ip, void *data, int *foundp,
606    struct componentname *cnp, int *entryoffsetinblockp,
607    doff_t *prevoffp, doff_t *endusefulp, struct ext2fs_searchslot *ssp)
608{
609	struct ext2fs_direct *ep, *lim;
610	struct vnode *vdp;
611	int offset = *entryoffsetinblockp;
612	int dirblksize = ip->i_e2fs->e2fs_bsize;
613	size_t namlen;
614
615	vdp = ITOV(ip);
616
617	lim = (struct ext2fs_direct *)
618	    ((char *)data + dirblksize - EXT2FS_DIRSIZ(0));
619	ep = (struct ext2fs_direct *) ((char *)data + offset);
620
621	while (ep < lim) {
622		/*
623		 * Full validation checks are slow, so we only check
624		 * enough to insure forward progress through the
625		 * directory. Complete checks can be run by patching
626		 * "dirchk" to be true.
627		 */
628		if (ep->e2d_reclen == 0 ||
629		    (dirchk && ext2fs_dirbadentry(vdp, ep, offset))) {
630			int i;
631			ufs_dirbad(ip, ip->i_offset, "mangled entry");
632			i = dirblksize - (offset & (dirblksize - 1));
633			ip->i_offset += i;
634			offset += i;
635			continue;
636		}
637
638		/*
639		 * If an appropriate sized slot has not yet been found,
640		 * check to see if one is available. Also accumulate space
641		 * in the current block so that we can determine if
642		 * compaction is viable.
643		 */
644		if (ssp->slotstatus != FOUND) {
645			int size = letoh16(ep->e2d_reclen);
646
647			if (ep->e2d_ino != 0)
648				size -= EXT2FS_DIRSIZ(ep->e2d_namlen);
649			if (size > 0) {
650				if (size >= ssp->slotneeded) {
651					ssp->slotstatus = FOUND;
652					ssp->slotoffset = ip->i_offset;
653					ssp->slotsize = letoh16(ep->e2d_reclen);
654				} else if (ssp->slotstatus == NONE) {
655					ssp->slotfreespace += size;
656					if (ssp->slotoffset == -1)
657						ssp->slotoffset = ip->i_offset;
658					if (ssp->slotfreespace >= ssp->slotneeded) {
659						ssp->slotstatus = COMPACT;
660						ssp->slotsize = ip->i_offset +
661							  letoh16(ep->e2d_reclen) - ssp->slotoffset;
662					}
663				}
664			}
665		}
666
667		/*
668		 * Check for a name match.
669		 */
670		if (ep->e2d_ino) {
671			namlen = ep->e2d_namlen;
672			if (namlen == cnp->cn_namelen &&
673			    !memcmp(cnp->cn_nameptr, ep->e2d_name, namlen)) {
674				/*
675				 * Save directory entry's inode number and
676				 * reclen in ndp->ni_ufs area, and release
677				 * directory buffer.
678				 */
679				*foundp = 1;
680				return (0);
681			}
682		}
683		*prevoffp = ip->i_offset;
684		ip->i_offset += letoh16(ep->e2d_reclen);
685		offset += letoh16(ep->e2d_reclen);
686		*entryoffsetinblockp = offset;
687		if (ep->e2d_ino)
688			*endusefulp = ip->i_offset;
689
690		/*
691		 * Get pointer to the next entry.
692		 */
693		ep = (struct ext2fs_direct *) ((char *)data + offset);
694	}
695
696	return (0);
697}
698
699/*
700 * Do consistency checking on a directory entry:
701 *	record length must be multiple of 4
702 *	entry must fit in rest of its dirblksize block
703 *	record must be large enough to contain entry
704 *	name is not longer than MAXNAMLEN
705 *	name must be as long as advertised, and null terminated
706 */
707/*
708 *	changed so that it confirms to ext2fs_check_dir_entry
709 */
710static int
711ext2fs_dirbadentry(struct vnode *dp, struct ext2fs_direct *de,
712    int entryoffsetinblock)
713{
714	int dirblksize = VTOI(dp)->i_e2fs->e2fs_bsize;
715	char *error_msg = NULL;
716	int reclen = letoh16(de->e2d_reclen);
717	int namlen = de->e2d_namlen;
718
719	if (reclen < EXT2FS_DIRSIZ(1)) /* e2d_namlen = 1 */
720		error_msg = "rec_len is smaller than minimal";
721	else if (reclen % 4 != 0)
722		error_msg = "rec_len % 4 != 0";
723	else if (reclen < EXT2FS_DIRSIZ(namlen))
724		error_msg = "reclen is too small for name_len";
725	else if (entryoffsetinblock + reclen > dirblksize)
726		error_msg = "directory entry across blocks";
727	else if (letoh32(de->e2d_ino) > VTOI(dp)->i_e2fs->e2fs.e2fs_icount)
728		error_msg = "inode out of bounds";
729
730	if (error_msg != NULL) {
731		printf("bad directory entry: %s\n"
732		    "offset=%d, inode=%u, rec_len=%d, name_len=%d \n",
733		    error_msg, entryoffsetinblock, letoh32(de->e2d_ino),
734		    reclen, namlen);
735		panic(__func__);
736	}
737	return (0);
738}
739
740/*
741 * Write a directory entry after a call to namei, using the parameters
742 * that it left in nameidata.  The argument ip is the inode which the new
743 * directory entry will refer to.  Dvp is a pointer to the directory to
744 * be written, which was left locked by namei. Remaining parameters
745 * (dp->i_offset, dp->i_count) indicate how the space for the new
746 * entry is to be obtained.
747 */
748int
749ext2fs_direnter(struct inode *ip, struct vnode *dvp,
750    struct componentname *cnp)
751{
752	struct ext2fs_direct *ep, *nep;
753	struct inode *dp;
754	struct buf *bp;
755	struct ext2fs_direct newdir;
756	struct iovec aiov;
757	struct uio auio;
758	u_int dsize;
759	int error, loc, newentrysize, spacefree;
760	char *dirbuf;
761	int dirblksize = ip->i_e2fs->e2fs_bsize;
762
763
764#ifdef DIAGNOSTIC
765	if ((cnp->cn_flags & SAVENAME) == 0)
766		panic("direnter: missing name");
767#endif
768	dp = VTOI(dvp);
769	newdir.e2d_ino = htole32(ip->i_number);
770	newdir.e2d_namlen = cnp->cn_namelen;
771	if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 &&
772	    (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) {
773		newdir.e2d_type = inot2ext2dt(IFTODT(ip->i_e2fs_mode));
774	} else {
775		newdir.e2d_type = 0;
776	};
777	memcpy(newdir.e2d_name, cnp->cn_nameptr, (unsigned)cnp->cn_namelen + 1);
778	newentrysize = EXT2FS_DIRSIZ(cnp->cn_namelen);
779	if (dp->i_count == 0) {
780		/*
781		 * If dp->i_count is 0, then namei could find no
782		 * space in the directory. Here, dp->i_offset will
783		 * be on a directory block boundary and we will write the
784		 * new entry into a fresh block.
785		 */
786		if (dp->i_offset & (dirblksize - 1))
787			panic("ext2fs_direnter: newblk");
788		auio.uio_offset = dp->i_offset;
789		newdir.e2d_reclen = htole16(dirblksize);
790		auio.uio_resid = newentrysize;
791		aiov.iov_len = newentrysize;
792		aiov.iov_base = (caddr_t)&newdir;
793		auio.uio_iov = &aiov;
794		auio.uio_iovcnt = 1;
795		auio.uio_rw = UIO_WRITE;
796		auio.uio_segflg = UIO_SYSSPACE;
797		auio.uio_procp = NULL;
798		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
799		if (dirblksize >
800			VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
801			/* XXX should grow with balloc() */
802			panic("ext2fs_direnter: frag size");
803		else if (!error) {
804			error = ext2fs_setsize(dp,
805				roundup(ext2fs_size(dp), dirblksize));
806			if (error)
807				return (error);
808			dp->i_flag |= IN_CHANGE;
809		}
810		return (error);
811	}
812
813	/*
814	 * If dp->i_count is non-zero, then namei found space
815	 * for the new entry in the range dp->i_offset to
816	 * dp->i_offset + dp->i_count in the directory.
817	 * To use this space, we may have to compact the entries located
818	 * there, by copying them together towards the beginning of the
819	 * block, leaving the free space in one usable chunk at the end.
820	 */
821
822	/*
823	 * Get the block containing the space for the new directory entry.
824	 */
825	if ((error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, &dirbuf, &bp))
826	    != 0)
827		return (error);
828	/*
829	 * Find space for the new entry. In the simple case, the entry at
830	 * offset base will have the space. If it does not, then namei
831	 * arranged that compacting the region dp->i_offset to
832	 * dp->i_offset + dp->i_count would yield the
833	 * space.
834	 */
835	ep = (struct ext2fs_direct *)dirbuf;
836	dsize = EXT2FS_DIRSIZ(ep->e2d_namlen);
837	spacefree = letoh16(ep->e2d_reclen) - dsize;
838	for (loc = letoh16(ep->e2d_reclen); loc < dp->i_count; ) {
839		nep = (struct ext2fs_direct *)(dirbuf + loc);
840		if (ep->e2d_ino) {
841			/* trim the existing slot */
842			ep->e2d_reclen = htole16(dsize);
843			ep = (struct ext2fs_direct *)((char *)ep + dsize);
844		} else {
845			/* overwrite; nothing there; header is ours */
846			spacefree += dsize;
847		}
848		dsize = EXT2FS_DIRSIZ(nep->e2d_namlen);
849		spacefree += letoh16(nep->e2d_reclen) - dsize;
850		loc += letoh16(nep->e2d_reclen);
851		memcpy(ep, nep, dsize);
852	}
853	/*
854	 * Update the pointer fields in the previous entry (if any),
855	 * copy in the new entry, and write out the block.
856	 */
857	if (ep->e2d_ino == 0) {
858#ifdef DIAGNOSTIC
859		if (spacefree + dsize < newentrysize)
860			panic("ext2fs_direnter: compact1");
861#endif
862		newdir.e2d_reclen = htole16(spacefree + dsize);
863	} else {
864#ifdef DIAGNOSTIC
865		if (spacefree < newentrysize) {
866			printf("ext2fs_direnter: compact2 %u %u",
867			    (u_int)spacefree, (u_int)newentrysize);
868			panic("ext2fs_direnter: compact2");
869		}
870#endif
871		newdir.e2d_reclen = htole16(spacefree);
872		ep->e2d_reclen = htole16(dsize);
873		ep = (struct ext2fs_direct *)((char *)ep + dsize);
874	}
875	memcpy(ep, &newdir, newentrysize);
876	error = VOP_BWRITE(bp);
877	dp->i_flag |= IN_CHANGE | IN_UPDATE;
878	if (!error && dp->i_endoff && dp->i_endoff < ext2fs_size(dp))
879		error = ext2fs_truncate(dp, (off_t)dp->i_endoff, IO_SYNC,
880		    cnp->cn_cred);
881	return (error);
882}
883
884/*
885 * Remove a directory entry after a call to namei, using
886 * the parameters which it left in nameidata. The entry
887 * dp->i_offset contains the offset into the directory of the
888 * entry to be eliminated.  The dp->i_count field contains the
889 * size of the previous record in the directory.  If this
890 * is 0, the first entry is being deleted, so we need only
891 * zero the inode number to mark the entry as free.  If the
892 * entry is not the first in the directory, we must reclaim
893 * the space of the now empty record by adding the record size
894 * to the size of the previous entry.
895 */
896int
897ext2fs_dirremove(struct vnode *dvp, struct componentname *cnp)
898{
899	struct inode *dp;
900	struct ext2fs_direct *ep;
901	struct buf *bp;
902	int error;
903
904	dp = VTOI(dvp);
905	if (dp->i_count == 0) {
906		/*
907		 * First entry in block: set d_ino to zero.
908		 */
909		error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, (char **)&ep,
910		    &bp);
911		if (error != 0)
912			return (error);
913		ep->e2d_ino = 0;
914		error = VOP_BWRITE(bp);
915		dp->i_flag |= IN_CHANGE | IN_UPDATE;
916		return (error);
917	}
918	/*
919	 * Collapse new free space into previous entry.
920	 */
921	error = ext2fs_bufatoff(dp, (off_t)(dp->i_offset - dp->i_count),
922	    (char **)&ep, &bp);
923	if (error != 0)
924		return (error);
925	ep->e2d_reclen = htole16(letoh16(ep->e2d_reclen) + dp->i_reclen);
926	error = VOP_BWRITE(bp);
927	dp->i_flag |= IN_CHANGE | IN_UPDATE;
928	return (error);
929}
930
931/*
932 * Rewrite an existing directory entry to point at the inode
933 * supplied.  The parameters describing the directory entry are
934 * set up by a call to namei.
935 */
936int
937ext2fs_dirrewrite(struct inode *dp, struct inode *ip,
938    struct componentname *cnp)
939{
940	struct buf *bp;
941	struct ext2fs_direct *ep;
942	int error;
943
944	error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, (char **)&ep, &bp);
945	if (error != 0)
946		return (error);
947	ep->e2d_ino = htole32(ip->i_number);
948	if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 &&
949	    (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) {
950		ep->e2d_type = inot2ext2dt(IFTODT(ip->i_e2fs_mode));
951	} else {
952		ep->e2d_type = 0;
953	}
954	error = VOP_BWRITE(bp);
955	dp->i_flag |= IN_CHANGE | IN_UPDATE;
956	return (error);
957}
958
959/*
960 * Check if a directory is empty or not.
961 * Inode supplied must be locked.
962 *
963 * Using a struct dirtemplate here is not precisely
964 * what we want, but better than using a struct ext2fs_direct.
965 *
966 * NB: does not handle corrupted directories.
967 */
968int
969ext2fs_dirempty(struct inode *ip, ufsino_t parentino, struct ucred *cred)
970{
971	off_t off;
972	struct ext2fs_dirtemplate dbuf;
973	struct ext2fs_direct *dp = (struct ext2fs_direct *)&dbuf;
974	int error, namlen;
975	size_t count;
976
977#define	MINDIRSIZ (sizeof (struct ext2fs_dirtemplate) / 2)
978
979	for (off = 0; off < ext2fs_size(ip); off += letoh16(dp->e2d_reclen)) {
980		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
981		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, curproc);
982		/*
983		 * Since we read MINDIRSIZ, residual must
984		 * be 0 unless we're at end of file.
985		 */
986		if (error || count != 0)
987			return (0);
988		/* avoid infinite loops */
989		if (dp->e2d_reclen == 0)
990			return (0);
991		/* skip empty entries */
992		if (dp->e2d_ino == 0)
993			continue;
994		/* accept only "." and ".." */
995		namlen = dp->e2d_namlen;
996		if (namlen > 2)
997			return (0);
998		if (dp->e2d_name[0] != '.')
999			return (0);
1000		/*
1001		 * At this point namlen must be 1 or 2.
1002		 * 1 implies ".", 2 implies ".." if second
1003		 * char is also "."
1004		 */
1005		if (namlen == 1)
1006			continue;
1007		if (dp->e2d_name[1] == '.' && letoh32(dp->e2d_ino) == parentino)
1008			continue;
1009		return (0);
1010	}
1011	return (1);
1012}
1013
1014/*
1015 * Check if source directory is in the path of the target directory.
1016 * Target is supplied locked, source is unlocked.
1017 * The target is always vput before returning.
1018 */
1019int
1020ext2fs_checkpath(struct inode *source, struct inode *target,
1021   struct ucred *cred)
1022{
1023	struct vnode *vp;
1024	int error, rootino, namlen;
1025	struct ext2fs_dirtemplate dirbuf;
1026	u_int32_t ino;
1027
1028	vp = ITOV(target);
1029	if (target->i_number == source->i_number) {
1030		error = EEXIST;
1031		goto out;
1032	}
1033	rootino = ROOTINO;
1034	error = 0;
1035	if (target->i_number == rootino)
1036		goto out;
1037
1038	for (;;) {
1039		if (vp->v_type != VDIR) {
1040			error = ENOTDIR;
1041			break;
1042		}
1043		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1044			sizeof (struct ext2fs_dirtemplate), (off_t)0,
1045			UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
1046			curproc);
1047		if (error != 0)
1048			break;
1049		namlen = dirbuf.dotdot_namlen;
1050		if (namlen != 2 ||
1051			dirbuf.dotdot_name[0] != '.' ||
1052			dirbuf.dotdot_name[1] != '.') {
1053			error = ENOTDIR;
1054			break;
1055		}
1056		ino = letoh32(dirbuf.dotdot_ino);
1057		if (ino == source->i_number) {
1058			error = EINVAL;
1059			break;
1060		}
1061		if (ino == rootino)
1062			break;
1063		vput(vp);
1064		error = VFS_VGET(vp->v_mount, ino, &vp);
1065		if (error != 0) {
1066			vp = NULL;
1067			break;
1068		}
1069	}
1070
1071out:
1072	if (error == ENOTDIR) {
1073		printf("checkpath: .. not a directory\n");
1074		panic("checkpath");
1075	}
1076	if (vp != NULL)
1077		vput(vp);
1078	return (error);
1079}
1080