1/*	$NetBSD$	*/
2
3/*
4 * Modified for NetBSD 1.2E
5 * May 1997, Manuel Bouyer
6 * Laboratoire d'informatique de Paris VI
7 */
8/*
9 *  modified for Lites 1.1
10 *
11 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
12 *  University of Utah, Department of Computer Science
13 */
14/*
15 * Copyright (c) 1989, 1993
16 *	The Regents of the University of California.  All rights reserved.
17 * (c) UNIX System Laboratories, Inc.
18 * All or some portions of this file are derived from material licensed
19 * to the University of California by American Telephone and Telegraph
20 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
21 * the permission of UNIX System Laboratories, Inc.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the above copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
48 */
49
50#include <sys/cdefs.h>
51__KERNEL_RCSID(0, "$NetBSD$");
52
53#include <sys/param.h>
54#include <sys/systm.h>
55#include <sys/namei.h>
56#include <sys/buf.h>
57#include <sys/file.h>
58#include <sys/mount.h>
59#include <sys/vnode.h>
60#include <sys/kmem.h>
61#include <sys/malloc.h>
62#include <sys/dirent.h>
63#include <sys/kauth.h>
64#include <sys/proc.h>
65
66#include <ufs/ufs/inode.h>
67#include <ufs/ufs/ufsmount.h>
68#include <ufs/ufs/ufs_extern.h>
69
70#include <ufs/ext2fs/ext2fs_extern.h>
71#include <ufs/ext2fs/ext2fs_dir.h>
72#include <ufs/ext2fs/ext2fs.h>
73
74extern	int dirchk;
75
76static void	ext2fs_dirconv2ffs(struct ext2fs_direct *e2dir,
77					  struct dirent *ffsdir);
78static int	ext2fs_dirbadentry(struct vnode *dp,
79					  struct ext2fs_direct *de,
80					  int entryoffsetinblock);
81
82/*
83 * the problem that is tackled below is the fact that FFS
84 * includes the terminating zero on disk while EXT2FS doesn't
85 * this implies that we need to introduce some padding.
86 * For instance, a filename "sbin" has normally a reclen 12
87 * in EXT2, but 16 in FFS.
88 * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...'
89 * If it wasn't for that, the complete ufs code for directories would
90 * have worked w/o changes (except for the difference in DIRBLKSIZ)
91 */
92static void
93ext2fs_dirconv2ffs(struct ext2fs_direct *e2dir, struct dirent *ffsdir)
94{
95	memset(ffsdir, 0, sizeof(struct dirent));
96	ffsdir->d_fileno = fs2h32(e2dir->e2d_ino);
97	ffsdir->d_namlen = e2dir->e2d_namlen;
98
99	ffsdir->d_type = DT_UNKNOWN;		/* don't know more here */
100#ifdef DIAGNOSTIC
101#if MAXNAMLEN < E2FS_MAXNAMLEN
102	/*
103	 * we should handle this more gracefully !
104	 */
105	if (e2dir->e2d_namlen > MAXNAMLEN)
106		panic("ext2fs: e2dir->e2d_namlen");
107#endif
108#endif
109	strncpy(ffsdir->d_name, e2dir->e2d_name, ffsdir->d_namlen);
110
111	/* Godmar thinks: since e2dir->e2d_reclen can be big and means
112	   nothing anyway, we compute our own reclen according to what
113	   we think is right
114	 */
115	ffsdir->d_reclen = _DIRENT_SIZE(ffsdir);
116}
117
118/*
119 * Vnode op for reading directories.
120 *
121 * Convert the on-disk entries to <sys/dirent.h> entries.
122 * the problem is that the conversion will blow up some entries by four bytes,
123 * so it can't be done in place. This is too bad. Right now the conversion is
124 * done entry by entry, the converted entry is sent via uiomove.
125 *
126 * XXX allocate a buffer, convert as many entries as possible, then send
127 * the whole buffer to uiomove
128 */
129int
130ext2fs_readdir(void *v)
131{
132	struct vop_readdir_args /* {
133		struct vnode *a_vp;
134		struct uio *a_uio;
135		kauth_cred_t a_cred;
136		int **a_eofflag;
137		off_t **a_cookies;
138		int ncookies;
139	} */ *ap = v;
140	struct uio *uio = ap->a_uio;
141	int error;
142	size_t e2fs_count, readcnt;
143	struct vnode *vp = ap->a_vp;
144	struct m_ext2fs *fs = VTOI(vp)->i_e2fs;
145
146	struct ext2fs_direct *dp;
147	struct dirent *dstd;
148	struct uio auio;
149	struct iovec aiov;
150	void *dirbuf;
151	off_t off = uio->uio_offset;
152	off_t *cookies = NULL;
153	int nc = 0, ncookies = 0;
154	int e2d_reclen;
155
156	if (vp->v_type != VDIR)
157		return (ENOTDIR);
158
159	e2fs_count = uio->uio_resid;
160	/* Make sure we don't return partial entries. */
161	e2fs_count -= (uio->uio_offset + e2fs_count) & (fs->e2fs_bsize -1);
162	if (e2fs_count <= 0)
163		return (EINVAL);
164
165	auio = *uio;
166	auio.uio_iov = &aiov;
167	auio.uio_iovcnt = 1;
168	aiov.iov_len = e2fs_count;
169	auio.uio_resid = e2fs_count;
170	UIO_SETUP_SYSSPACE(&auio);
171	dirbuf = kmem_alloc(e2fs_count, KM_SLEEP);
172	dstd = kmem_zalloc(sizeof(struct dirent), KM_SLEEP);
173	if (ap->a_ncookies) {
174		nc = e2fs_count / _DIRENT_MINSIZE((struct dirent *)0);
175		ncookies = nc;
176		cookies = malloc(sizeof (off_t) * ncookies, M_TEMP, M_WAITOK);
177		*ap->a_cookies = cookies;
178	}
179	aiov.iov_base = dirbuf;
180
181	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
182	if (error == 0) {
183		readcnt = e2fs_count - auio.uio_resid;
184		for (dp = (struct ext2fs_direct *)dirbuf;
185			(char *)dp < (char *)dirbuf + readcnt; ) {
186			e2d_reclen = fs2h16(dp->e2d_reclen);
187			if (e2d_reclen == 0) {
188				error = EIO;
189				break;
190			}
191			ext2fs_dirconv2ffs(dp, dstd);
192			if(dstd->d_reclen > uio->uio_resid) {
193				break;
194			}
195			error = uiomove(dstd, dstd->d_reclen, uio);
196			if (error != 0) {
197				break;
198			}
199			off = off + e2d_reclen;
200			if (cookies != NULL) {
201				*cookies++ = off;
202				if (--ncookies <= 0){
203					break;  /* out of cookies */
204				}
205			}
206			/* advance dp */
207			dp = (struct ext2fs_direct *) ((char *)dp + e2d_reclen);
208		}
209		/* we need to correct uio_offset */
210		uio->uio_offset = off;
211	}
212	kmem_free(dirbuf, e2fs_count);
213	kmem_free(dstd, sizeof(*dstd));
214	*ap->a_eofflag = ext2fs_size(VTOI(ap->a_vp)) <= uio->uio_offset;
215	if (ap->a_ncookies) {
216		if (error) {
217			free(*ap->a_cookies, M_TEMP);
218			*ap->a_ncookies = 0;
219			*ap->a_cookies = NULL;
220		} else
221			*ap->a_ncookies = nc - ncookies;
222	}
223	return (error);
224}
225
226/*
227 * Convert a component of a pathname into a pointer to a locked inode.
228 * This is a very central and rather complicated routine.
229 * If the file system is not maintained in a strict tree hierarchy,
230 * this can result in a deadlock situation (see comments in code below).
231 *
232 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
233 * on whether the name is to be looked up, created, renamed, or deleted.
234 * When CREATE, RENAME, or DELETE is specified, information usable in
235 * creating, renaming, or deleting a directory entry may be calculated.
236 * If flag has LOCKPARENT or'ed into it and the target of the pathname
237 * exists, lookup returns both the target and its parent directory locked.
238 * When creating or renaming and LOCKPARENT is specified, the target may
239 * not be ".".  When deleting and LOCKPARENT is specified, the target may
240 * be "."., but the caller must check to ensure it does an vrele and vput
241 * instead of two vputs.
242 *
243 * Overall outline of ext2fs_lookup:
244 *
245 *	check accessibility of directory
246 *	look for name in cache, if found, then if at end of path
247 *	  and deleting or creating, drop it, else return name
248 *	search for name in directory, to found or notfound
249 * notfound:
250 *	if creating, return locked directory, leaving info on available slots
251 *	else return error
252 * found:
253 *	if at end of path and deleting, return information to allow delete
254 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
255 *	  inode and return info to allow rewrite
256 *	if not at end, add name to cache; if at end and neither creating
257 *	  nor deleting, add name to cache
258 */
259int
260ext2fs_lookup(void *v)
261{
262	struct vop_lookup_args /* {
263		struct vnode *a_dvp;
264		struct vnode **a_vpp;
265		struct componentname *a_cnp;
266	} */ *ap = v;
267	struct vnode *vdp = ap->a_dvp;	/* vnode for directory being searched */
268	struct inode *dp = VTOI(vdp);	/* inode for directory being searched */
269	struct buf *bp;			/* a buffer of directory entries */
270	struct ext2fs_direct *ep;	/* the current directory entry */
271	int entryoffsetinblock;		/* offset of ep in bp's buffer */
272	enum {NONE, COMPACT, FOUND} slotstatus;
273	doff_t slotoffset;		/* offset of area with free space */
274	int slotsize;			/* size of area at slotoffset */
275	int slotfreespace;		/* amount of space free in slot */
276	int slotneeded;			/* size of the entry we're seeking */
277	int numdirpasses;		/* strategy for directory search */
278	doff_t endsearch;		/* offset to end directory search */
279	doff_t prevoff;			/* prev entry dp->i_offset */
280	struct vnode *pdp;		/* saved dp during symlink work */
281	struct vnode *tdp;		/* returned by VFS_VGET */
282	doff_t enduseful;		/* pointer past last used dir slot */
283	u_long bmask;			/* block offset mask */
284	int namlen, error;
285	struct vnode **vpp = ap->a_vpp;
286	struct componentname *cnp = ap->a_cnp;
287	kauth_cred_t cred = cnp->cn_cred;
288	int flags;
289	int nameiop = cnp->cn_nameiop;
290	struct ufsmount *ump = dp->i_ump;
291	int dirblksiz = ump->um_dirblksiz;
292	ino_t foundino;
293	struct ufs_lookup_results *results;
294
295	flags = cnp->cn_flags;
296
297	bp = NULL;
298	slotoffset = -1;
299	*vpp = NULL;
300
301	/*
302	 * Produce the auxiliary lookup results into i_crap. Increment
303	 * its serial number so elsewhere we can tell if we're using
304	 * stale results. This should not be done this way. XXX.
305	 */
306	results = &dp->i_crap;
307	dp->i_crapcounter++;
308
309	/*
310	 * Check accessiblity of directory.
311	 */
312	if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0)
313		return (error);
314
315	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
316	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
317		return (EROFS);
318
319	/*
320	 * We now have a segment name to search for, and a directory to search.
321	 *
322	 * Before tediously performing a linear scan of the directory,
323	 * check the name cache to see if the directory/name pair
324	 * we are looking for is known already.
325	 */
326	if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
327		return (error);
328
329	/*
330	 * Suppress search for slots unless creating
331	 * file and at end of pathname, in which case
332	 * we watch for a place to put the new file in
333	 * case it doesn't already exist.
334	 */
335	slotstatus = FOUND;
336	slotfreespace = slotsize = slotneeded = 0;
337	if ((nameiop == CREATE || nameiop == RENAME) &&
338	    (flags & ISLASTCN)) {
339		slotstatus = NONE;
340		slotneeded = EXT2FS_DIRSIZ(cnp->cn_namelen);
341	}
342
343	/*
344	 * If there is cached information on a previous search of
345	 * this directory, pick up where we last left off.
346	 * We cache only lookups as these are the most common
347	 * and have the greatest payoff. Caching CREATE has little
348	 * benefit as it usually must search the entire directory
349	 * to determine that the entry does not exist. Caching the
350	 * location of the last DELETE or RENAME has not reduced
351	 * profiling time and hence has been removed in the interest
352	 * of simplicity.
353	 */
354	bmask = vdp->v_mount->mnt_stat.f_iosize - 1;
355	if (nameiop != LOOKUP || results->ulr_diroff == 0 ||
356	    results->ulr_diroff >= ext2fs_size(dp)) {
357		entryoffsetinblock = 0;
358		results->ulr_offset = 0;
359		numdirpasses = 1;
360	} else {
361		results->ulr_offset = results->ulr_diroff;
362		if ((entryoffsetinblock = results->ulr_offset & bmask) &&
363		    (error = ext2fs_blkatoff(vdp, (off_t)results->ulr_offset, NULL, &bp)))
364			return (error);
365		numdirpasses = 2;
366		nchstats.ncs_2passes++;
367	}
368	prevoff = results->ulr_offset;
369	endsearch = roundup(ext2fs_size(dp), dirblksiz);
370	enduseful = 0;
371
372searchloop:
373	while (results->ulr_offset < endsearch) {
374		if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
375			preempt();
376		/*
377		 * If necessary, get the next directory block.
378		 */
379		if ((results->ulr_offset & bmask) == 0) {
380			if (bp != NULL)
381				brelse(bp, 0);
382			error = ext2fs_blkatoff(vdp, (off_t)results->ulr_offset, NULL,
383			    &bp);
384			if (error != 0)
385				return (error);
386			entryoffsetinblock = 0;
387		}
388		/*
389		 * If still looking for a slot, and at a dirblksize
390		 * boundary, have to start looking for free space again.
391		 */
392		if (slotstatus == NONE &&
393		    (entryoffsetinblock & (dirblksiz - 1)) == 0) {
394			slotoffset = -1;
395			slotfreespace = 0;
396		}
397		/*
398		 * Get pointer to next entry.
399		 * Full validation checks are slow, so we only check
400		 * enough to insure forward progress through the
401		 * directory. Complete checks can be run by patching
402		 * "dirchk" to be true.
403		 */
404		KASSERT(bp != NULL);
405		ep = (struct ext2fs_direct *)
406			((char *)bp->b_data + entryoffsetinblock);
407		if (ep->e2d_reclen == 0 ||
408		    (dirchk &&
409		     ext2fs_dirbadentry(vdp, ep, entryoffsetinblock))) {
410			int i;
411
412			ufs_dirbad(dp, results->ulr_offset, "mangled entry");
413			i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1));
414			results->ulr_offset += i;
415			entryoffsetinblock += i;
416			continue;
417		}
418
419		/*
420		 * If an appropriate sized slot has not yet been found,
421		 * check to see if one is available. Also accumulate space
422		 * in the current block so that we can determine if
423		 * compaction is viable.
424		 */
425		if (slotstatus != FOUND) {
426			int size = fs2h16(ep->e2d_reclen);
427
428			if (ep->e2d_ino != 0)
429				size -= EXT2FS_DIRSIZ(ep->e2d_namlen);
430			if (size > 0) {
431				if (size >= slotneeded) {
432					slotstatus = FOUND;
433					slotoffset = results->ulr_offset;
434					slotsize = fs2h16(ep->e2d_reclen);
435				} else if (slotstatus == NONE) {
436					slotfreespace += size;
437					if (slotoffset == -1)
438						slotoffset = results->ulr_offset;
439					if (slotfreespace >= slotneeded) {
440						slotstatus = COMPACT;
441						slotsize = results->ulr_offset +
442						    fs2h16(ep->e2d_reclen) -
443						    slotoffset;
444					}
445				}
446			}
447		}
448
449		/*
450		 * Check for a name match.
451		 */
452		if (ep->e2d_ino) {
453			namlen = ep->e2d_namlen;
454			if (namlen == cnp->cn_namelen &&
455			    !memcmp(cnp->cn_nameptr, ep->e2d_name,
456			    (unsigned)namlen)) {
457				/*
458				 * Save directory entry's inode number and
459				 * reclen in ndp->ni_ufs area, and release
460				 * directory buffer.
461				 */
462				foundino = fs2h32(ep->e2d_ino);
463				results->ulr_reclen = fs2h16(ep->e2d_reclen);
464				goto found;
465			}
466		}
467		prevoff = results->ulr_offset;
468		results->ulr_offset += fs2h16(ep->e2d_reclen);
469		entryoffsetinblock += fs2h16(ep->e2d_reclen);
470		if (ep->e2d_ino)
471			enduseful = results->ulr_offset;
472	}
473/* notfound: */
474	/*
475	 * If we started in the middle of the directory and failed
476	 * to find our target, we must check the beginning as well.
477	 */
478	if (numdirpasses == 2) {
479		numdirpasses--;
480		results->ulr_offset = 0;
481		endsearch = results->ulr_diroff;
482		goto searchloop;
483	}
484	if (bp != NULL)
485		brelse(bp, 0);
486	/*
487	 * If creating, and at end of pathname and current
488	 * directory has not been removed, then can consider
489	 * allowing file to be created.
490	 */
491	if ((nameiop == CREATE || nameiop == RENAME) &&
492	    (flags & ISLASTCN) && dp->i_e2fs_nlink != 0) {
493		/*
494		 * Access for write is interpreted as allowing
495		 * creation of files in the directory.
496		 */
497		error = VOP_ACCESS(vdp, VWRITE, cred);
498		if (error)
499			return (error);
500		/*
501		 * Return an indication of where the new directory
502		 * entry should be put.  If we didn't find a slot,
503		 * then set results->ulr_count to 0 indicating
504		 * that the new slot belongs at the end of the
505		 * directory. If we found a slot, then the new entry
506		 * can be put in the range from results->ulr_offset to
507		 * results->ulr_offset + results->ulr_count.
508		 */
509		if (slotstatus == NONE) {
510			results->ulr_offset = roundup(ext2fs_size(dp), dirblksiz);
511			results->ulr_count = 0;
512			enduseful = results->ulr_offset;
513		} else {
514			results->ulr_offset = slotoffset;
515			results->ulr_count = slotsize;
516			if (enduseful < slotoffset + slotsize)
517				enduseful = slotoffset + slotsize;
518		}
519		results->ulr_endoff = roundup(enduseful, dirblksiz);
520#if 0
521		dp->i_flag |= IN_CHANGE | IN_UPDATE;
522#endif
523		/*
524		 * We return with the directory locked, so that
525		 * the parameters we set up above will still be
526		 * valid if we actually decide to do a direnter().
527		 * We return ni_vp == NULL to indicate that the entry
528		 * does not currently exist; we leave a pointer to
529		 * the (locked) directory inode in ndp->ni_dvp.
530		 *
531		 * NB - if the directory is unlocked, then this
532		 * information cannot be used.
533		 */
534		return (EJUSTRETURN);
535	}
536	/*
537	 * Insert name into cache (as non-existent) if appropriate.
538	 */
539	if (nameiop != CREATE) {
540		cache_enter(vdp, *vpp, cnp);
541	}
542	return ENOENT;
543
544found:
545	if (numdirpasses == 2)
546		nchstats.ncs_pass2++;
547	/*
548	 * Check that directory length properly reflects presence
549	 * of this entry.
550	 */
551	if (results->ulr_offset + EXT2FS_DIRSIZ(ep->e2d_namlen) > ext2fs_size(dp)) {
552		ufs_dirbad(dp, results->ulr_offset, "i_size too small");
553		error = ext2fs_setsize(dp,
554				results->ulr_offset + EXT2FS_DIRSIZ(ep->e2d_namlen));
555		if (error) {
556			brelse(bp, 0);
557			return (error);
558		}
559		dp->i_flag |= IN_CHANGE | IN_UPDATE;
560		uvm_vnp_setsize(vdp, ext2fs_size(dp));
561	}
562	brelse(bp, 0);
563
564	/*
565	 * Found component in pathname.
566	 * If the final component of path name, save information
567	 * in the cache as to where the entry was found.
568	 */
569	if ((flags & ISLASTCN) && nameiop == LOOKUP)
570		results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1);
571
572	/*
573	 * If deleting, and at end of pathname, return
574	 * parameters which can be used to remove file.
575	 * Lock the inode, being careful with ".".
576	 */
577	if (nameiop == DELETE && (flags & ISLASTCN)) {
578		/*
579		 * Write access to directory required to delete files.
580		 */
581		if ((error = VOP_ACCESS(vdp, VWRITE, cred)) != 0)
582			return (error);
583		/*
584		 * Return pointer to current entry in results->ulr_offset,
585		 * and distance past previous entry (if there
586		 * is a previous entry in this block) in results->ulr_count.
587		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
588		 */
589		if ((results->ulr_offset & (dirblksiz - 1)) == 0)
590			results->ulr_count = 0;
591		else
592			results->ulr_count = results->ulr_offset - prevoff;
593		if (dp->i_number == foundino) {
594			vref(vdp);
595			*vpp = vdp;
596			return (0);
597		}
598		if (flags & ISDOTDOT)
599			VOP_UNLOCK(vdp); /* race to get the inode */
600		error = VFS_VGET(vdp->v_mount, foundino, &tdp);
601		if (flags & ISDOTDOT)
602			vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY);
603		if (error)
604			return (error);
605		/*
606		 * If directory is "sticky", then user must own
607		 * the directory, or the file in it, else she
608		 * may not delete it (unless she's root). This
609		 * implements append-only directories.
610		 */
611		if ((dp->i_e2fs_mode & ISVTX) &&
612		    kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) &&
613		    kauth_cred_geteuid(cred) != dp->i_uid &&
614		    VTOI(tdp)->i_uid != kauth_cred_geteuid(cred)) {
615			vput(tdp);
616			return (EPERM);
617		}
618		*vpp = tdp;
619		return (0);
620	}
621
622	/*
623	 * If rewriting (RENAME), return the inode and the
624	 * information required to rewrite the present directory
625	 * Must get inode of directory entry to verify it's a
626	 * regular file, or empty directory.
627	 */
628	if (nameiop == RENAME && (flags & ISLASTCN)) {
629		error = VOP_ACCESS(vdp, VWRITE, cred);
630		if (error)
631			return (error);
632		/*
633		 * Careful about locking second inode.
634		 * This can only occur if the target is ".".
635		 */
636		if (dp->i_number == foundino)
637			return (EISDIR);
638		if (flags & ISDOTDOT)
639			VOP_UNLOCK(vdp); /* race to get the inode */
640		error = VFS_VGET(vdp->v_mount, foundino, &tdp);
641		if (flags & ISDOTDOT)
642			vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY);
643		if (error)
644			return (error);
645		*vpp = tdp;
646		return (0);
647	}
648
649	/*
650	 * Step through the translation in the name.  We do not `vput' the
651	 * directory because we may need it again if a symbolic link
652	 * is relative to the current directory.  Instead we save it
653	 * unlocked as "pdp".  We must get the target inode before unlocking
654	 * the directory to insure that the inode will not be removed
655	 * before we get it.  We prevent deadlock by always fetching
656	 * inodes from the root, moving down the directory tree. Thus
657	 * when following backward pointers ".." we must unlock the
658	 * parent directory before getting the requested directory.
659	 * There is a potential race condition here if both the current
660	 * and parent directories are removed before the VFS_VGET for the
661	 * inode associated with ".." returns.  We hope that this occurs
662	 * infrequently since we cannot avoid this race condition without
663	 * implementing a sophisticated deadlock detection algorithm.
664	 * Note also that this simple deadlock detection scheme will not
665	 * work if the file system has any hard links other than ".."
666	 * that point backwards in the directory structure.
667	 */
668	pdp = vdp;
669	if (flags & ISDOTDOT) {
670		VOP_UNLOCK(pdp);	/* race to get the inode */
671		error = VFS_VGET(vdp->v_mount, foundino, &tdp);
672		vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY);
673		if (error) {
674			return (error);
675		}
676		*vpp = tdp;
677	} else if (dp->i_number == foundino) {
678		vref(vdp);	/* we want ourself, ie "." */
679		*vpp = vdp;
680	} else {
681		error = VFS_VGET(vdp->v_mount, foundino, &tdp);
682		if (error)
683			return (error);
684		*vpp = tdp;
685	}
686
687	/*
688	 * Insert name into cache if appropriate.
689	 */
690	cache_enter(vdp, *vpp, cnp);
691	return 0;
692}
693
694/*
695 * Do consistency checking on a directory entry:
696 *	record length must be multiple of 4
697 *	entry must fit in rest of its dirblksize block
698 *	record must be large enough to contain entry
699 *	name is not longer than EXT2FS_MAXNAMLEN
700 *	name must be as long as advertised, and null terminated
701 */
702/*
703 *	changed so that it confirms to ext2fs_check_dir_entry
704 */
705static int
706ext2fs_dirbadentry(struct vnode *dp, struct ext2fs_direct *de,
707		int entryoffsetinblock)
708{
709	struct ufsmount *ump = VFSTOUFS(dp->v_mount);
710	int dirblksiz = ump->um_dirblksiz;
711
712		const char *error_msg = NULL;
713		int reclen = fs2h16(de->e2d_reclen);
714		int namlen = de->e2d_namlen;
715
716		if (reclen < EXT2FS_DIRSIZ(1)) /* e2d_namlen = 1 */
717			error_msg = "rec_len is smaller than minimal";
718		else if (reclen % 4 != 0)
719			error_msg = "rec_len % 4 != 0";
720		else if (namlen > EXT2FS_MAXNAMLEN)
721			error_msg = "namlen > EXT2FS_MAXNAMLEN";
722		else if (reclen < EXT2FS_DIRSIZ(namlen))
723			error_msg = "reclen is too small for name_len";
724		else if (entryoffsetinblock + reclen > dirblksiz)
725			error_msg = "directory entry across blocks";
726		else if (fs2h32(de->e2d_ino) >
727		    VTOI(dp)->i_e2fs->e2fs.e2fs_icount)
728			error_msg = "inode out of bounds";
729
730		if (error_msg != NULL) {
731			printf( "bad directory entry: %s\n"
732			    "offset=%d, inode=%lu, rec_len=%d, name_len=%d \n",
733			    error_msg, entryoffsetinblock,
734			    (unsigned long) fs2h32(de->e2d_ino),
735			    reclen, namlen);
736			panic("ext2fs_dirbadentry");
737		}
738		return error_msg == NULL ? 0 : 1;
739}
740
741/*
742 * Write a directory entry after a call to namei, using the parameters
743 * that it left in nameidata.  The argument ip is the inode which the new
744 * directory entry will refer to.  Dvp is a pointer to the directory to
745 * be written, which was left locked by namei. Remaining parameters
746 * (ulr_offset, ulr_count) indicate how the space for the new
747 * entry is to be obtained.
748 */
749int
750ext2fs_direnter(struct inode *ip, struct vnode *dvp,
751		const struct ufs_lookup_results *ulr,
752		struct componentname *cnp)
753{
754	struct ext2fs_direct *ep, *nep;
755	struct inode *dp;
756	struct buf *bp;
757	struct ext2fs_direct newdir;
758	struct iovec aiov;
759	struct uio auio;
760	u_int dsize;
761	int error, loc, newentrysize, spacefree;
762	char *dirbuf;
763	struct ufsmount *ump = VFSTOUFS(dvp->v_mount);
764	int dirblksiz = ump->um_dirblksiz;
765
766	dp = VTOI(dvp);
767
768	newdir.e2d_ino = h2fs32(ip->i_number);
769	newdir.e2d_namlen = cnp->cn_namelen;
770	if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 &&
771	    (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) {
772		newdir.e2d_type = inot2ext2dt(IFTODT(ip->i_e2fs_mode));
773	} else {
774		newdir.e2d_type = 0;
775	}
776	memcpy(newdir.e2d_name, cnp->cn_nameptr, (unsigned)cnp->cn_namelen + 1);
777	newentrysize = EXT2FS_DIRSIZ(cnp->cn_namelen);
778	if (ulr->ulr_count == 0) {
779		/*
780		 * If ulr_count is 0, then namei could find no
781		 * space in the directory. Here, ulr_offset will
782		 * be on a directory block boundary and we will write the
783		 * new entry into a fresh block.
784		 */
785		if (ulr->ulr_offset & (dirblksiz - 1))
786			panic("ext2fs_direnter: newblk");
787		auio.uio_offset = ulr->ulr_offset;
788		newdir.e2d_reclen = h2fs16(dirblksiz);
789		auio.uio_resid = newentrysize;
790		aiov.iov_len = newentrysize;
791		aiov.iov_base = (void *)&newdir;
792		auio.uio_iov = &aiov;
793		auio.uio_iovcnt = 1;
794		auio.uio_rw = UIO_WRITE;
795		UIO_SETUP_SYSSPACE(&auio);
796		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
797		if (dirblksiz > dvp->v_mount->mnt_stat.f_bsize)
798			/* XXX should grow with balloc() */
799			panic("ext2fs_direnter: frag size");
800		else if (!error) {
801			error = ext2fs_setsize(dp,
802				roundup(ext2fs_size(dp), dirblksiz));
803			if (error)
804				return (error);
805			dp->i_flag |= IN_CHANGE;
806			uvm_vnp_setsize(dvp, ext2fs_size(dp));
807		}
808		return (error);
809	}
810
811	/*
812	 * If ulr_count is non-zero, then namei found space
813	 * for the new entry in the range ulr_offset to
814	 * ulr_offset + ulr_count in the directory.
815	 * To use this space, we may have to compact the entries located
816	 * there, by copying them together towards the beginning of the
817	 * block, leaving the free space in one usable chunk at the end.
818	 */
819
820	/*
821	 * Get the block containing the space for the new directory entry.
822	 */
823	if ((error = ext2fs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp)) != 0)
824		return (error);
825	/*
826	 * Find space for the new entry. In the simple case, the entry at
827	 * offset base will have the space. If it does not, then namei
828	 * arranged that compacting the region ulr_offset to
829	 * ulr_offset + ulr_count would yield the
830	 * space.
831	 */
832	ep = (struct ext2fs_direct *)dirbuf;
833	dsize = EXT2FS_DIRSIZ(ep->e2d_namlen);
834	spacefree = fs2h16(ep->e2d_reclen) - dsize;
835	for (loc = fs2h16(ep->e2d_reclen); loc < ulr->ulr_count; ) {
836		nep = (struct ext2fs_direct *)(dirbuf + loc);
837		if (ep->e2d_ino) {
838			/* trim the existing slot */
839			ep->e2d_reclen = h2fs16(dsize);
840			ep = (struct ext2fs_direct *)((char *)ep + dsize);
841		} else {
842			/* overwrite; nothing there; header is ours */
843			spacefree += dsize;
844		}
845		dsize = EXT2FS_DIRSIZ(nep->e2d_namlen);
846		spacefree += fs2h16(nep->e2d_reclen) - dsize;
847		loc += fs2h16(nep->e2d_reclen);
848		memcpy((void *)ep, (void *)nep, dsize);
849	}
850	/*
851	 * Update the pointer fields in the previous entry (if any),
852	 * copy in the new entry, and write out the block.
853	 */
854	if (ep->e2d_ino == 0) {
855#ifdef DIAGNOSTIC
856		if (spacefree + dsize < newentrysize)
857			panic("ext2fs_direnter: compact1");
858#endif
859		newdir.e2d_reclen = h2fs16(spacefree + dsize);
860	} else {
861#ifdef DIAGNOSTIC
862		if (spacefree < newentrysize) {
863			printf("ext2fs_direnter: compact2 %u %u",
864			    (u_int)spacefree, (u_int)newentrysize);
865			panic("ext2fs_direnter: compact2");
866		}
867#endif
868		newdir.e2d_reclen = h2fs16(spacefree);
869		ep->e2d_reclen = h2fs16(dsize);
870		ep = (struct ext2fs_direct *)((char *)ep + dsize);
871	}
872	memcpy((void *)ep, (void *)&newdir, (u_int)newentrysize);
873	error = VOP_BWRITE(bp->b_vp, bp);
874	dp->i_flag |= IN_CHANGE | IN_UPDATE;
875	if (!error && ulr->ulr_endoff && ulr->ulr_endoff < ext2fs_size(dp))
876		error = ext2fs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC,
877		    cnp->cn_cred);
878	return (error);
879}
880
881/*
882 * Remove a directory entry after a call to namei, using
883 * the auxiliary results it provided. The entry
884 * ulr_offset contains the offset into the directory of the
885 * entry to be eliminated.  The ulr_count field contains the
886 * size of the previous record in the directory.  If this
887 * is 0, the first entry is being deleted, so we need only
888 * zero the inode number to mark the entry as free.  If the
889 * entry is not the first in the directory, we must reclaim
890 * the space of the now empty record by adding the record size
891 * to the size of the previous entry.
892 */
893int
894ext2fs_dirremove(struct vnode *dvp, const struct ufs_lookup_results *ulr,
895		 struct componentname *cnp)
896{
897	struct inode *dp;
898	struct ext2fs_direct *ep;
899	struct buf *bp;
900	int error;
901
902	dp = VTOI(dvp);
903
904	if (ulr->ulr_count == 0) {
905		/*
906		 * First entry in block: set d_ino to zero.
907		 */
908		error = ext2fs_blkatoff(dvp, (off_t)ulr->ulr_offset,
909		    (void *)&ep, &bp);
910		if (error != 0)
911			return (error);
912		ep->e2d_ino = 0;
913		error = VOP_BWRITE(bp->b_vp, bp);
914		dp->i_flag |= IN_CHANGE | IN_UPDATE;
915		return (error);
916	}
917	/*
918	 * Collapse new free space into previous entry.
919	 */
920	error = ext2fs_blkatoff(dvp, (off_t)(ulr->ulr_offset - ulr->ulr_count),
921	    (void *)&ep, &bp);
922	if (error != 0)
923		return (error);
924	ep->e2d_reclen = h2fs16(fs2h16(ep->e2d_reclen) + ulr->ulr_reclen);
925	error = VOP_BWRITE(bp->b_vp, bp);
926	dp->i_flag |= IN_CHANGE | IN_UPDATE;
927	return (error);
928}
929
930/*
931 * Rewrite an existing directory entry to point at the inode
932 * supplied.  The parameters describing the directory entry are
933 * set up by a call to namei.
934 */
935int
936ext2fs_dirrewrite(struct inode *dp, const struct ufs_lookup_results *ulr,
937    struct inode *ip, struct componentname *cnp)
938{
939	struct buf *bp;
940	struct ext2fs_direct *ep;
941	struct vnode *vdp = ITOV(dp);
942	int error;
943
944	error = ext2fs_blkatoff(vdp, (off_t)ulr->ulr_offset, (void *)&ep, &bp);
945	if (error != 0)
946		return (error);
947	ep->e2d_ino = h2fs32(ip->i_number);
948	if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 &&
949	    (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) {
950		ep->e2d_type = inot2ext2dt(IFTODT(ip->i_e2fs_mode));
951	} else {
952		ep->e2d_type = 0;
953	}
954	error = VOP_BWRITE(bp->b_vp, bp);
955	dp->i_flag |= IN_CHANGE | IN_UPDATE;
956	return (error);
957}
958
959/*
960 * Check if a directory is empty or not.
961 * Inode supplied must be locked.
962 *
963 * Using a struct dirtemplate here is not precisely
964 * what we want, but better than using a struct ext2fs_direct.
965 *
966 * NB: does not handle corrupted directories.
967 */
968int
969ext2fs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred)
970{
971	off_t off;
972	struct ext2fs_dirtemplate dbuf;
973	struct ext2fs_direct *dp = (struct ext2fs_direct *)&dbuf;
974	int error, namlen;
975	size_t count;
976
977#define	MINDIRSIZ (sizeof (struct ext2fs_dirtemplate) / 2)
978
979	for (off = 0; off < ext2fs_size(ip); off += fs2h16(dp->e2d_reclen)) {
980		error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off,
981		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL);
982		/*
983		 * Since we read MINDIRSIZ, residual must
984		 * be 0 unless we're at end of file.
985		 */
986		if (error || count != 0)
987			return (0);
988		/* avoid infinite loops */
989		if (dp->e2d_reclen == 0)
990			return (0);
991		/* skip empty entries */
992		if (dp->e2d_ino == 0)
993			continue;
994		/* accept only "." and ".." */
995		namlen = dp->e2d_namlen;
996		if (namlen > 2)
997			return (0);
998		if (dp->e2d_name[0] != '.')
999			return (0);
1000		/*
1001		 * At this point namlen must be 1 or 2.
1002		 * 1 implies ".", 2 implies ".." if second
1003		 * char is also "."
1004		 */
1005		if (namlen == 1)
1006			continue;
1007		if (dp->e2d_name[1] == '.' && fs2h32(dp->e2d_ino) == parentino)
1008			continue;
1009		return (0);
1010	}
1011	return (1);
1012}
1013
1014/*
1015 * Check if source directory is in the path of the target directory.
1016 * Target is supplied locked, source is unlocked.
1017 * The target is always vput before returning.
1018 */
1019int
1020ext2fs_checkpath(struct inode *source, struct inode *target,
1021	kauth_cred_t cred)
1022{
1023	struct vnode *vp;
1024	int error, rootino, namlen;
1025	struct ext2fs_dirtemplate dirbuf;
1026	uint32_t ino;
1027
1028	vp = ITOV(target);
1029	if (target->i_number == source->i_number) {
1030		error = EEXIST;
1031		goto out;
1032	}
1033	rootino = ROOTINO;
1034	error = 0;
1035	if (target->i_number == rootino)
1036		goto out;
1037
1038	for (;;) {
1039		if (vp->v_type != VDIR) {
1040			error = ENOTDIR;
1041			break;
1042		}
1043		error = vn_rdwr(UIO_READ, vp, (void *)&dirbuf,
1044			sizeof (struct ext2fs_dirtemplate), (off_t)0,
1045			UIO_SYSSPACE, IO_NODELOCKED, cred, (size_t *)0,
1046			NULL);
1047		if (error != 0)
1048			break;
1049		namlen = dirbuf.dotdot_namlen;
1050		if (namlen != 2 ||
1051			dirbuf.dotdot_name[0] != '.' ||
1052			dirbuf.dotdot_name[1] != '.') {
1053			error = ENOTDIR;
1054			break;
1055		}
1056		ino = fs2h32(dirbuf.dotdot_ino);
1057		if (ino == source->i_number) {
1058			error = EINVAL;
1059			break;
1060		}
1061		if (ino == rootino)
1062			break;
1063		vput(vp);
1064		error = VFS_VGET(vp->v_mount, ino, &vp);
1065		if (error != 0) {
1066			vp = NULL;
1067			break;
1068		}
1069	}
1070
1071out:
1072	if (error == ENOTDIR) {
1073		printf("checkpath: .. not a directory\n");
1074		panic("checkpath");
1075	}
1076	if (vp != NULL)
1077		vput(vp);
1078	return (error);
1079}
1080