1/*	$OpenBSD: ufs_lookup.c,v 1.61 2024/02/03 18:51:58 beck Exp $	*/
2/*	$NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)ufs_lookup.c	8.9 (Berkeley) 8/11/94
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/namei.h>
44#include <sys/buf.h>
45#include <sys/stat.h>
46#include <sys/mount.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49
50#include <ufs/ufs/quota.h>
51#include <ufs/ufs/inode.h>
52#include <ufs/ufs/dir.h>
53#ifdef UFS_DIRHASH
54#include <ufs/ufs/dirhash.h>
55#endif
56#include <ufs/ufs/ufsmount.h>
57#include <ufs/ufs/ufs_extern.h>
58
59extern	struct nchstats nchstats;
60
61#ifdef DIAGNOSTIC
62int	dirchk = 1;
63#else
64int	dirchk = 0;
65#endif
66
67/*
68 * Convert a component of a pathname into a pointer to a locked inode.
69 * This is a very central and rather complicated routine.
70 * If the file system is not maintained in a strict tree hierarchy,
71 * this can result in a deadlock situation (see comments in code below).
72 *
73 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
74 * on whether the name is to be looked up, created, renamed, or deleted.
75 * When CREATE, RENAME, or DELETE is specified, information usable in
76 * creating, renaming, or deleting a directory entry may be calculated.
77 * If flag has LOCKPARENT or'ed into it and the target of the pathname
78 * exists, lookup returns both the target and its parent directory locked.
79 * When creating or renaming and LOCKPARENT is specified, the target may
80 * not be ".".  When deleting and LOCKPARENT is specified, the target may
81 * be "."., but the caller must check to ensure it does an vrele and vput
82 * instead of two vputs.
83 *
84 * Overall outline of ufs_lookup:
85 *
86 *	check accessibility of directory
87 *	look for name in cache, if found, then if at end of path
88 *	  and deleting or creating, drop it, else return name
89 *	search for name in directory, to found or notfound
90 * notfound:
91 *	if creating, return locked directory, leaving info on available slots
92 *	else return error
93 * found:
94 *	if at end of path and deleting, return information to allow delete
95 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
96 *	  inode and return info to allow rewrite
97 *	if not at end, add name to cache; if at end and neither creating
98 *	  nor deleting, add name to cache
99 */
100int
101ufs_lookup(void *v)
102{
103	struct vop_lookup_args *ap = v;
104	struct vnode *vdp;		/* vnode for directory being searched */
105	struct inode *dp;		/* inode for directory being searched */
106	struct buf *bp;			/* a buffer of directory entries */
107	struct direct *ep;		/* the current directory entry */
108	int entryoffsetinblock;		/* offset of ep in bp's buffer */
109	enum {NONE, COMPACT, FOUND} slotstatus;
110	doff_t slotoffset;		/* offset of area with free space */
111	int slotsize;			/* size of area at slotoffset */
112	int slotfreespace;		/* amount of space free in slot */
113	int slotneeded;			/* size of the entry we're seeking */
114	int numdirpasses;		/* strategy for directory search */
115	doff_t endsearch;		/* offset to end directory search */
116	doff_t prevoff;			/* prev entry dp->i_offset */
117	struct vnode *pdp;		/* saved dp during symlink work */
118	struct vnode *tdp;		/* returned by VFS_VGET */
119	doff_t enduseful;		/* pointer past last used dir slot */
120	u_long bmask;			/* block offset mask */
121	int lockparent;			/* 1 => lockparent flag is set */
122	int wantparent;			/* 1 => wantparent or lockparent flag */
123	int namlen, error;
124	struct vnode **vpp = ap->a_vpp;
125	struct componentname *cnp = ap->a_cnp;
126	struct ucred *cred = cnp->cn_cred;
127	int flags;
128	int nameiop = cnp->cn_nameiop;
129
130	cnp->cn_flags &= ~PDIRUNLOCK;
131	flags = cnp->cn_flags;
132
133	bp = NULL;
134	slotoffset = -1;
135	*vpp = NULL;
136	vdp = ap->a_dvp;
137	dp = VTOI(vdp);
138	lockparent = flags & LOCKPARENT;
139	wantparent = flags & (LOCKPARENT|WANTPARENT);
140
141	/*
142	 * Check accessibility of directory.
143	 */
144	if ((DIP(dp, mode) & IFMT) != IFDIR)
145		return (ENOTDIR);
146	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
147		return (error);
148
149	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
150	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
151		return (EROFS);
152
153	/*
154	 * We now have a segment name to search for, and a directory to search.
155	 *
156	 * Before tediously performing a linear scan of the directory,
157	 * check the name cache to see if the directory/name pair
158	 * we are looking for is known already.
159	 */
160	if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
161		return (error);
162
163	/*
164	 * Suppress search for slots unless creating
165	 * file and at end of pathname, in which case
166	 * we watch for a place to put the new file in
167	 * case it doesn't already exist.
168	 */
169	slotstatus = FOUND;
170	slotfreespace = slotsize = slotneeded = 0;
171	if ((nameiop == CREATE || nameiop == RENAME) &&
172	    (flags & ISLASTCN)) {
173		slotstatus = NONE;
174		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
175			cnp->cn_namelen + 3) &~ 3;
176	}
177
178	/*
179	 * If there is cached information on a previous search of
180	 * this directory, pick up where we last left off.
181	 * We cache only lookups as these are the most common
182	 * and have the greatest payoff. Caching CREATE has little
183	 * benefit as it usually must search the entire directory
184	 * to determine that the entry does not exist. Caching the
185	 * location of the last DELETE or RENAME has not reduced
186	 * profiling time and hence has been removed in the interest
187	 * of simplicity.
188	 */
189	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
190
191#ifdef UFS_DIRHASH
192	/*
193	 * Use dirhash for fast operations on large directories. The logic
194	 * to determine whether to hash the directory is contained within
195	 * ufsdirhash_build(); a zero return means that it decided to hash
196	 * this directory and it successfully built up the hash table.
197	 */
198	if (ufsdirhash_build(dp) == 0) {
199		/* Look for a free slot if needed. */
200		enduseful = DIP(dp, size);
201		if (slotstatus != FOUND) {
202			slotoffset = ufsdirhash_findfree(dp, slotneeded,
203			    &slotsize);
204			if (slotoffset >= 0) {
205				slotstatus = COMPACT;
206				enduseful = ufsdirhash_enduseful(dp);
207				if (enduseful < 0)
208					enduseful = DIP(dp, size);
209			}
210		}
211		/* Look up the component. */
212		numdirpasses = 1;
213		entryoffsetinblock = 0; /* silence compiler warning */
214		switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
215		    &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
216		case 0:
217			ep = (struct direct *)((char *)bp->b_data +
218			    (dp->i_offset & bmask));
219			goto foundentry;
220		case ENOENT:
221#define roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
222			dp->i_offset = roundup2(DIP(dp, size), DIRBLKSIZ);
223			goto notfound;
224		default:
225			/* Something failed; just do a linear search. */
226			break;
227		}
228	}
229#endif /* UFS_DIRHASH */
230
231	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
232	    dp->i_diroff >= DIP(dp, size)) {
233		entryoffsetinblock = 0;
234		dp->i_offset = 0;
235		numdirpasses = 1;
236	} else {
237		dp->i_offset = dp->i_diroff;
238		if ((entryoffsetinblock = dp->i_offset & bmask) &&
239		    (error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp)))
240			return (error);
241		numdirpasses = 2;
242		nchstats.ncs_2passes++;
243	}
244	prevoff = dp->i_offset;
245	endsearch = roundup(DIP(dp, size), DIRBLKSIZ);
246	enduseful = 0;
247
248searchloop:
249	while (dp->i_offset < endsearch) {
250		/*
251		 * If necessary, get the next directory block.
252		 */
253		if ((dp->i_offset & bmask) == 0) {
254			if (bp != NULL)
255				brelse(bp);
256			error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL,
257					     &bp);
258			if (error)
259				return (error);
260			entryoffsetinblock = 0;
261		}
262		/*
263		 * If still looking for a slot, and at a DIRBLKSIZE
264		 * boundary, have to start looking for free space again.
265		 */
266		if (slotstatus == NONE &&
267		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
268			slotoffset = -1;
269			slotfreespace = 0;
270		}
271		/*
272		 * Get pointer to next entry.
273		 * Full validation checks are slow, so we only check
274		 * enough to insure forward progress through the
275		 * directory. Complete checks can be run by patching
276		 * "dirchk" to be true.
277		 */
278		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
279		if (ep->d_reclen == 0 ||
280		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
281			int i;
282
283			ufs_dirbad(dp, dp->i_offset, "mangled entry");
284			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
285			dp->i_offset += i;
286			entryoffsetinblock += i;
287			continue;
288		}
289
290		/*
291		 * If an appropriate sized slot has not yet been found,
292		 * check to see if one is available. Also accumulate space
293		 * in the current block so that we can determine if
294		 * compaction is viable.
295		 */
296		if (slotstatus != FOUND) {
297			int size = ep->d_reclen;
298
299			if (ep->d_ino != 0)
300				size -= DIRSIZ(ep);
301			if (size > 0) {
302				if (size >= slotneeded) {
303					slotstatus = FOUND;
304					slotoffset = dp->i_offset;
305					slotsize = ep->d_reclen;
306				} else if (slotstatus == NONE) {
307					slotfreespace += size;
308					if (slotoffset == -1)
309						slotoffset = dp->i_offset;
310					if (slotfreespace >= slotneeded) {
311						slotstatus = COMPACT;
312						slotsize = dp->i_offset +
313						      ep->d_reclen - slotoffset;
314					}
315				}
316			}
317		}
318
319		/*
320		 * Check for a name match.
321		 */
322		if (ep->d_ino) {
323			namlen = ep->d_namlen;
324			if (namlen == cnp->cn_namelen &&
325			    !memcmp(cnp->cn_nameptr, ep->d_name, namlen)) {
326#ifdef UFS_DIRHASH
327foundentry:
328#endif
329				/*
330				 * Save directory entry's inode number and
331				 * reclen in ndp->ni_ufs area, and release
332				 * directory buffer.
333				 */
334				dp->i_ino = ep->d_ino;
335				dp->i_reclen = ep->d_reclen;
336				goto found;
337			}
338		}
339		prevoff = dp->i_offset;
340		dp->i_offset += ep->d_reclen;
341		entryoffsetinblock += ep->d_reclen;
342		if (ep->d_ino)
343			enduseful = dp->i_offset;
344	}
345#ifdef UFS_DIRHASH
346notfound:
347#endif
348	/*
349	 * If we started in the middle of the directory and failed
350	 * to find our target, we must check the beginning as well.
351	 */
352	if (numdirpasses == 2) {
353		numdirpasses--;
354		dp->i_offset = 0;
355		endsearch = dp->i_diroff;
356		goto searchloop;
357	}
358	if (bp != NULL)
359		brelse(bp);
360	/*
361	 * If creating, and at end of pathname and current
362	 * directory has not been removed, then can consider
363	 * allowing file to be created.
364	 */
365	if ((nameiop == CREATE || nameiop == RENAME) &&
366	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
367		/*
368		 * Access for write is interpreted as allowing
369		 * creation of files in the directory.
370		 */
371		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
372		if (error)
373			return (error);
374		/*
375		 * Return an indication of where the new directory
376		 * entry should be put.  If we didn't find a slot,
377		 * then set dp->i_count to 0 indicating
378		 * that the new slot belongs at the end of the
379		 * directory. If we found a slot, then the new entry
380		 * can be put in the range from dp->i_offset to
381		 * dp->i_offset + dp->i_count.
382		 */
383		if (slotstatus == NONE) {
384			dp->i_offset = roundup(DIP(dp, size), DIRBLKSIZ);
385			dp->i_count = 0;
386			enduseful = dp->i_offset;
387		} else if (nameiop == DELETE) {
388			dp->i_offset = slotoffset;
389			if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
390				dp->i_count = 0;
391			else
392				dp->i_count = dp->i_offset - prevoff;
393		} else {
394			dp->i_offset = slotoffset;
395			dp->i_count = slotsize;
396			if (enduseful < slotoffset + slotsize)
397				enduseful = slotoffset + slotsize;
398		}
399		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
400		/*
401		 * We return with the directory locked, so that
402		 * the parameters we set up above will still be
403		 * valid if we actually decide to do a direnter().
404		 * We return ni_vp == NULL to indicate that the entry
405		 * does not currently exist; we leave a pointer to
406		 * the (locked) directory inode in ndp->ni_dvp.
407		 * The pathname buffer is saved so that the name
408		 * can be obtained later.
409		 *
410		 * NB - if the directory is unlocked, then this
411		 * information cannot be used.
412		 */
413		cnp->cn_flags |= SAVENAME;
414		if (!lockparent) {
415			VOP_UNLOCK(vdp);
416			cnp->cn_flags |= PDIRUNLOCK;
417		}
418		return (EJUSTRETURN);
419	}
420	/*
421	 * Insert name into cache (as non-existent) if appropriate.
422	 */
423	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
424		cache_enter(vdp, *vpp, cnp);
425	return (ENOENT);
426
427found:
428	if (numdirpasses == 2)
429		nchstats.ncs_pass2++;
430	/*
431	 * Check that directory length properly reflects presence
432	 * of this entry.
433	 */
434	if (dp->i_offset + DIRSIZ(ep) > DIP(dp, size)) {
435		ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small");
436		DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(ep));
437		dp->i_flag |= IN_CHANGE | IN_UPDATE;
438	}
439	brelse(bp);
440
441	/*
442	 * Found component in pathname.
443	 * If the final component of path name, save information
444	 * in the cache as to where the entry was found.
445	 */
446	if ((flags & ISLASTCN) && nameiop == LOOKUP)
447		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
448
449	/*
450	 * If deleting, and at end of pathname, return
451	 * parameters which can be used to remove file.
452	 * If the wantparent flag isn't set, we return only
453	 * the directory (in ndp->ni_dvp), otherwise we go
454	 * on and lock the inode, being careful with ".".
455	 */
456	if (nameiop == DELETE && (flags & ISLASTCN)) {
457		/*
458		 * Write access to directory required to delete files.
459		 */
460		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
461		if (error)
462			return (error);
463		/*
464		 * Return pointer to current entry in dp->i_offset,
465		 * and distance past previous entry (if there
466		 * is a previous entry in this block) in dp->i_count.
467		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
468		 */
469		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
470			dp->i_count = 0;
471		else
472			dp->i_count = dp->i_offset - prevoff;
473		if (dp->i_number == dp->i_ino) {
474			vref(vdp);
475			*vpp = vdp;
476			return (0);
477		}
478		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
479		if (error)
480			return (error);
481		/*
482		 * If directory is "sticky", then user must own
483		 * the directory, or the file in it, else she
484		 * may not delete it (unless she's root). This
485		 * implements append-only directories.
486		 */
487		if ((DIP(dp, mode) & ISVTX) &&
488		    cred->cr_uid != 0 &&
489		    cred->cr_uid != DIP(dp, uid) &&
490		    !vnoperm(vdp) &&
491		    DIP(VTOI(tdp), uid) != cred->cr_uid) {
492			vput(tdp);
493			return (EPERM);
494		}
495		*vpp = tdp;
496		if (!lockparent) {
497			VOP_UNLOCK(vdp);
498			cnp->cn_flags |= PDIRUNLOCK;
499		}
500		return (0);
501	}
502
503	/*
504	 * If rewriting (RENAME), return the inode and the
505	 * information required to rewrite the present directory
506	 * Must get inode of directory entry to verify it's a
507	 * regular file, or empty directory.
508	 */
509	if (nameiop == RENAME && wantparent &&
510	    (flags & ISLASTCN)) {
511		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
512		if (error)
513			return (error);
514		/*
515		 * Careful about locking second inode.
516		 * This can only occur if the target is ".".
517		 */
518		if (dp->i_number == dp->i_ino)
519			return (EISDIR);
520		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
521		if (error)
522			return (error);
523		*vpp = tdp;
524		cnp->cn_flags |= SAVENAME;
525		if (!lockparent) {
526			VOP_UNLOCK(vdp);
527			cnp->cn_flags |= PDIRUNLOCK;
528		}
529		return (0);
530	}
531
532	/*
533	 * Step through the translation in the name.  We do not `vput' the
534	 * directory because we may need it again if a symbolic link
535	 * is relative to the current directory.  Instead we save it
536	 * unlocked as "pdp".  We must get the target inode before unlocking
537	 * the directory to insure that the inode will not be removed
538	 * before we get it.  We prevent deadlock by always fetching
539	 * inodes from the root, moving down the directory tree. Thus
540	 * when following backward pointers ".." we must unlock the
541	 * parent directory before getting the requested directory.
542	 * There is a potential race condition here if both the current
543	 * and parent directories are removed before the VFS_VGET for the
544	 * inode associated with ".." returns.  We hope that this occurs
545	 * infrequently since we cannot avoid this race condition without
546	 * implementing a sophisticated deadlock detection algorithm.
547	 * Note also that this simple deadlock detection scheme will not
548	 * work if the file system has any hard links other than ".."
549	 * that point backwards in the directory structure.
550	 */
551	pdp = vdp;
552	if (flags & ISDOTDOT) {
553		VOP_UNLOCK(pdp);	/* race to get the inode */
554		cnp->cn_flags |= PDIRUNLOCK;
555		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
556		if (error) {
557			if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY) == 0)
558				cnp->cn_flags &= ~PDIRUNLOCK;
559			return (error);
560		}
561		if (lockparent && (flags & ISLASTCN)) {
562			if ((error = vn_lock(pdp, LK_EXCLUSIVE))) {
563				vput(tdp);
564				return (error);
565			}
566			cnp->cn_flags &= ~PDIRUNLOCK;
567		}
568		*vpp = tdp;
569	} else if (dp->i_number == dp->i_ino) {
570		vref(vdp);	/* we want ourself, ie "." */
571		*vpp = vdp;
572	} else {
573		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
574		if (error)
575			return (error);
576		if (!lockparent || !(flags & ISLASTCN)) {
577			VOP_UNLOCK(pdp);
578			cnp->cn_flags |= PDIRUNLOCK;
579		}
580		*vpp = tdp;
581	}
582
583	/*
584	 * Insert name into cache if appropriate.
585	 */
586	if (cnp->cn_flags & MAKEENTRY)
587		cache_enter(vdp, *vpp, cnp);
588	return (0);
589}
590
591void
592ufs_dirbad(struct inode *ip, doff_t offset, char *how)
593{
594	struct mount *mp;
595
596	mp = ITOV(ip)->v_mount;
597	(void)printf("%s: bad dir ino %u at offset %d: %s\n",
598	    mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
599	if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
600		panic("bad dir");
601}
602
603/*
604 * Do consistency checking on a directory entry:
605 *	record length must be multiple of 4
606 *	entry must fit in rest of its DIRBLKSIZ block
607 *	record must be large enough to contain entry
608 *	name is not longer than MAXNAMLEN
609 *	name must be as long as advertised, and null terminated
610 */
611int
612ufs_dirbadentry(struct vnode *vdp, struct direct *ep, int entryoffsetinblock)
613{
614	struct inode *dp;
615	int i;
616	int namlen;
617
618	dp = VTOI(vdp);
619
620	namlen = ep->d_namlen;
621	if ((ep->d_reclen & 0x3) != 0 ||
622	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
623	    ep->d_reclen < DIRSIZ(ep) || namlen > MAXNAMLEN) {
624		/*return (1); */
625		printf("First bad\n");
626		goto bad;
627	}
628	if (ep->d_ino == 0)
629		return (0);
630	for (i = 0; i < namlen; i++)
631		if (ep->d_name[i] == '\0') {
632			/*return (1); */
633			printf("Second bad\n");
634			goto bad;
635	}
636	if (ep->d_name[i])
637		goto bad;
638	return (0);
639bad:
640	return (1);
641}
642
643/*
644 * Construct a new directory entry after a call to namei, using the
645 * parameters that it left in the componentname argument cnp. The
646 * argument ip is the inode to which the new directory entry will refer.
647 */
648void
649ufs_makedirentry(struct inode *ip, struct componentname *cnp,
650    struct direct *newdirp)
651{
652#ifdef DIAGNOSTIC
653  	if ((cnp->cn_flags & SAVENAME) == 0)
654		panic("ufs_makedirentry: missing name");
655#endif
656	newdirp->d_ino = ip->i_number;
657	newdirp->d_namlen = cnp->cn_namelen;
658	memset(newdirp->d_name + (cnp->cn_namelen & ~(DIR_ROUNDUP-1)),
659	    0, DIR_ROUNDUP);
660	memcpy(newdirp->d_name, cnp->cn_nameptr, cnp->cn_namelen);
661	newdirp->d_type = IFTODT(DIP(ip, mode));
662}
663
664/*
665 * Write a directory entry after a call to namei, using the parameters
666 * that it left in nameidata. The argument dirp is the new directory
667 * entry contents. Dvp is a pointer to the directory to be written,
668 * which was left locked by namei. Remaining parameters (dp->i_offset,
669 * dp->i_count) indicate how the space for the new entry is to be obtained.
670 * Non-null bp indicates that a directory is being created (for the
671 * soft dependency code).
672 */
673int
674ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
675    struct componentname *cnp, struct buf *newdirbp)
676{
677	struct ucred *cr;
678	struct proc *p;
679	int newentrysize;
680	struct inode *dp;
681	struct buf *bp;
682	u_int dsize;
683	struct direct *ep, *nep;
684	int error, ret, blkoff, loc, spacefree, flags;
685	char *dirbuf;
686
687	error = 0;
688	cr = cnp->cn_cred;
689	p = cnp->cn_proc;
690	dp = VTOI(dvp);
691	newentrysize = DIRSIZ(dirp);
692
693	if (dp->i_count == 0) {
694		/*
695		 * If dp->i_count is 0, then namei could find no
696		 * space in the directory. Here, dp->i_offset will
697		 * be on a directory block boundary and we will write the
698		 * new entry into a fresh block.
699		 */
700		if (dp->i_offset & (DIRBLKSIZ - 1))
701			panic("ufs_direnter: newblk");
702		flags = B_CLRBUF;
703		flags |= B_SYNC;
704		if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ,
705		    cr, flags, &bp)) != 0) {
706			return (error);
707		}
708		DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ);
709		dp->i_flag |= IN_CHANGE | IN_UPDATE;
710		uvm_vnp_setsize(dvp, DIP(dp, size));
711		dirp->d_reclen = DIRBLKSIZ;
712		blkoff = dp->i_offset &
713		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
714		memcpy(bp->b_data + blkoff, dirp, newentrysize);
715
716#ifdef UFS_DIRHASH
717		if (dp->i_dirhash != NULL) {
718			ufsdirhash_newblk(dp, dp->i_offset);
719			ufsdirhash_add(dp, dirp, dp->i_offset);
720			ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
721			dp->i_offset);
722		}
723#endif
724
725		error = VOP_BWRITE(bp);
726		ret = UFS_UPDATE(dp, 1);
727		if (error == 0)
728			return (ret);
729		return (error);
730	}
731
732	/*
733	 * If dp->i_count is non-zero, then namei found space for the new
734	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
735	 * in the directory. To use this space, we may have to compact
736	 * the entries located there, by copying them together towards the
737	 * beginning of the block, leaving the free space in one usable
738	 * chunk at the end.
739	 */
740
741	/*
742	 * Increase size of directory if entry eats into new space.
743	 * This should never push the size past a new multiple of
744	 * DIRBLKSIZE.
745	 *
746	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
747	 */
748	if (dp->i_offset + dp->i_count > DIP(dp, size))
749		DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count);
750	/*
751	 * Get the block containing the space for the new directory entry.
752	 */
753	if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp))
754	    != 0) {
755		return (error);
756	}
757	/*
758	 * Find space for the new entry. In the simple case, the entry at
759	 * offset base will have the space. If it does not, then namei
760	 * arranged that compacting the region dp->i_offset to
761	 * dp->i_offset + dp->i_count would yield the space.
762	 */
763	ep = (struct direct *)dirbuf;
764	dsize = ep->d_ino ? DIRSIZ(ep) : 0;
765	spacefree = ep->d_reclen - dsize;
766	for (loc = ep->d_reclen; loc < dp->i_count; ) {
767		nep = (struct direct *)(dirbuf + loc);
768
769		/* Trim the existing slot (NB: dsize may be zero). */
770		ep->d_reclen = dsize;
771		ep = (struct direct *)((char *)ep + dsize);
772
773		/* Read nep->d_reclen now as the memmove() may clobber it. */
774		loc += nep->d_reclen;
775		if (nep->d_ino == 0) {
776			/*
777			 * A mid-block unused entry. Such entries are
778			 * never created by the kernel, but fsck_ffs
779			 * can create them (and it doesn't fix them).
780			 *
781			 * Add up the free space, and initialise the
782			 * relocated entry since we don't memmove it.
783			 */
784			spacefree += nep->d_reclen;
785			ep->d_ino = 0;
786			dsize = 0;
787			continue;
788		}
789		dsize = DIRSIZ(nep);
790		spacefree += nep->d_reclen - dsize;
791#ifdef UFS_DIRHASH
792		if (dp->i_dirhash != NULL)
793			ufsdirhash_move(dp, nep,
794			    dp->i_offset + ((char *)nep - dirbuf),
795			    dp->i_offset + ((char *)ep - dirbuf));
796#endif
797		memmove(ep, nep, dsize);
798	}
799	/*
800	 * Here, `ep' points to a directory entry containing `dsize' in-use
801	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
802	 * then the entry is completely unused (dsize == 0). The value
803	 * of ep->d_reclen is always indeterminate.
804	 *
805	 * Update the pointer fields in the previous entry (if any),
806	 * copy in the new entry, and write out the block.
807	 */
808	if (ep->d_ino == 0) {
809		if (spacefree + dsize < newentrysize)
810			panic("ufs_direnter: compact1");
811		dirp->d_reclen = spacefree + dsize;
812	} else {
813		if (spacefree < newentrysize)
814			panic("ufs_direnter: compact2");
815		dirp->d_reclen = spacefree;
816		ep->d_reclen = dsize;
817		ep = (struct direct *)((char *)ep + dsize);
818	}
819
820#ifdef UFS_DIRHASH
821	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
822	    dirp->d_reclen == spacefree))
823		ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
824#endif
825	memcpy(ep, dirp, newentrysize);
826#ifdef UFS_DIRHASH
827	if (dp->i_dirhash != NULL)
828		ufsdirhash_checkblock(dp, dirbuf -
829		    (dp->i_offset & (DIRBLKSIZ - 1)),
830		    dp->i_offset & ~(DIRBLKSIZ - 1));
831#endif
832
833	error = VOP_BWRITE(bp);
834	dp->i_flag |= IN_CHANGE | IN_UPDATE;
835
836	/*
837	 * If all went well, and the directory can be shortened, proceed
838	 * with the truncation. Note that we have to unlock the inode for
839	 * the entry that we just entered, as the truncation may need to
840	 * lock other inodes which can lead to deadlock if we also hold a
841	 * lock on the newly entered node.
842	 */
843
844	if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) {
845		if (tvp != NULL)
846			VOP_UNLOCK(tvp);
847		error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_SYNC, cr);
848#ifdef UFS_DIRHASH
849		if (error == 0 && dp->i_dirhash != NULL)
850			ufsdirhash_dirtrunc(dp, dp->i_endoff);
851#endif
852		if (tvp != NULL)
853			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
854	}
855	return (error);
856}
857
858/*
859 * Remove a directory entry after a call to namei, using
860 * the parameters which it left in nameidata. The entry
861 * dp->i_offset contains the offset into the directory of the
862 * entry to be eliminated.  The dp->i_count field contains the
863 * size of the previous record in the directory.  If this
864 * is 0, the first entry is being deleted, so we need only
865 * zero the inode number to mark the entry as free.  If the
866 * entry is not the first in the directory, we must reclaim
867 * the space of the now empty record by adding the record size
868 * to the size of the previous entry.
869 */
870int
871ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
872{
873	struct inode *dp;
874	struct direct *ep;
875	struct buf *bp;
876	int error;
877
878	dp = VTOI(dvp);
879
880	if ((error = UFS_BUFATOFF(dp,
881	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
882		return (error);
883#ifdef UFS_DIRHASH
884	/*
885	 * Remove the dirhash entry. This is complicated by the fact
886	 * that `ep' is the previous entry when dp->i_count != 0.
887	 */
888	if (dp->i_dirhash != NULL)
889		ufsdirhash_remove(dp, (dp->i_count == 0) ? ep :
890		(struct direct *)((char *)ep + ep->d_reclen), dp->i_offset);
891#endif
892
893	if (dp->i_count == 0) {
894		/*
895		 * First entry in block: set d_ino to zero.
896		 */
897		ep->d_ino = 0;
898	} else {
899		/*
900		 * Collapse new free space into previous entry.
901		 */
902		ep->d_reclen += dp->i_reclen;
903	}
904#ifdef UFS_DIRHASH
905	if (dp->i_dirhash != NULL)
906		ufsdirhash_checkblock(dp, (char *)ep -
907		    ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
908		    dp->i_offset & ~(DIRBLKSIZ - 1));
909#endif
910	if (ip) {
911		ip->i_effnlink--;
912		DIP_ADD(ip, nlink, -1);
913		ip->i_flag |= IN_CHANGE;
914	}
915	if (DOINGASYNC(dvp) && dp->i_count != 0) {
916		bdwrite(bp);
917		error = 0;
918	} else
919		error = bwrite(bp);
920
921	dp->i_flag |= IN_CHANGE | IN_UPDATE;
922	return (error);
923}
924
925/*
926 * Rewrite an existing directory entry to point at the inode
927 * supplied.  The parameters describing the directory entry are
928 * set up by a call to namei.
929 */
930int
931ufs_dirrewrite(struct inode *dp, struct inode *oip, ufsino_t newinum,
932    int newtype, int isrmdir)
933{
934	struct buf *bp;
935	struct direct *ep;
936	struct vnode *vdp = ITOV(dp);
937	int error;
938
939	error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, (char **)&ep, &bp);
940	if (error)
941		return (error);
942	ep->d_ino = newinum;
943	ep->d_type = newtype;
944	oip->i_effnlink--;
945	DIP_ADD(oip, nlink, -1);
946	oip->i_flag |= IN_CHANGE;
947	if (DOINGASYNC(vdp)) {
948		bdwrite(bp);
949		error = 0;
950	} else {
951		error = VOP_BWRITE(bp);
952	}
953	dp->i_flag |= IN_CHANGE | IN_UPDATE;
954	return (error);
955}
956
957/*
958 * Check if a directory is empty or not.
959 * Inode supplied must be locked.
960 *
961 * Using a struct dirtemplate here is not precisely
962 * what we want, but better than using a struct direct.
963 *
964 * NB: does not handle corrupted directories.
965 */
966int
967ufs_dirempty(struct inode *ip, ufsino_t parentino, struct ucred *cred)
968{
969	off_t off, m;
970	struct dirtemplate dbuf;
971	struct direct *dp = (struct direct *)&dbuf;
972	int error, namlen;
973	size_t count;
974#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
975
976	m = DIP(ip, size);
977	for (off = 0; off < m; off += dp->d_reclen) {
978		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
979		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, curproc);
980		/*
981		 * Since we read MINDIRSIZ, residual must
982		 * be 0 unless we're at end of file.
983		 */
984		if (error || count != 0)
985			return (0);
986		/* avoid infinite loops */
987		if (dp->d_reclen == 0)
988			return (0);
989		/* skip empty entries */
990		if (dp->d_ino == 0)
991			continue;
992		/* accept only "." and ".." */
993		namlen = dp->d_namlen;
994		if (namlen > 2)
995			return (0);
996		if (dp->d_name[0] != '.')
997			return (0);
998		/*
999		 * At this point namlen must be 1 or 2.
1000		 * 1 implies ".", 2 implies ".." if second
1001		 * char is also "."
1002		 */
1003		if (namlen == 1 && dp->d_ino == ip->i_number)
1004			continue;
1005		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1006			continue;
1007		return (0);
1008	}
1009	return (1);
1010}
1011
1012/*
1013 * Check if source directory is in the path of the target directory.
1014 * Target is supplied locked, source is unlocked.
1015 * The target is always vput before returning.
1016 */
1017int
1018ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred)
1019{
1020	struct vnode *nextvp, *vp;
1021	int error, rootino, namlen;
1022	struct dirtemplate dirbuf;
1023
1024	vp = ITOV(target);
1025	if (target->i_number == source->i_number) {
1026		error = EEXIST;
1027		goto out;
1028	}
1029	rootino = ROOTINO;
1030	error = 0;
1031	if (target->i_number == rootino)
1032		goto out;
1033
1034	for (;;) {
1035		if (vp->v_type != VDIR) {
1036			error = ENOTDIR;
1037			break;
1038		}
1039		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1040			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
1041			IO_NODELOCKED, cred, NULL, curproc);
1042		if (error != 0)
1043			break;
1044		namlen = dirbuf.dotdot_namlen;
1045		if (namlen != 2 ||
1046		    dirbuf.dotdot_name[0] != '.' ||
1047		    dirbuf.dotdot_name[1] != '.') {
1048			error = ENOTDIR;
1049			break;
1050		}
1051		if (dirbuf.dotdot_ino == source->i_number) {
1052			error = EINVAL;
1053			break;
1054		}
1055		if (dirbuf.dotdot_ino == rootino)
1056			break;
1057		VOP_UNLOCK(vp);
1058		error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &nextvp);
1059		vrele(vp);
1060		if (error) {
1061			vp = NULL;
1062			break;
1063		}
1064		vp = nextvp;
1065	}
1066
1067out:
1068	if (error == ENOTDIR)
1069		printf("checkpath: .. not a directory\n");
1070	if (vp != NULL)
1071		vput(vp);
1072	return (error);
1073}
1074