1/*	$NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31/*
32 * Copyright (c) 1991, 1993
33 *	The Regents of the University of California.  All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 *    may be used to endorse or promote products derived from this software
45 *    without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
60 */
61
62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $");
64
65#if defined(_KERNEL_OPT)
66#include "opt_quota.h"
67#endif
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/buf.h>
73#include <sys/lock.h>
74#include <sys/vnode.h>
75#include <sys/syslog.h>
76#include <sys/mount.h>
77#include <sys/malloc.h>
78#include <sys/pool.h>
79#include <sys/proc.h>
80#include <sys/kauth.h>
81
82#include <ufs/lfs/ulfs_quotacommon.h>
83#include <ufs/lfs/ulfs_inode.h>
84#include <ufs/lfs/ulfsmount.h>
85#include <ufs/lfs/ulfs_extern.h>
86
87#include <ufs/lfs/lfs.h>
88#include <ufs/lfs/lfs_accessors.h>
89#include <ufs/lfs/lfs_extern.h>
90#include <ufs/lfs/lfs_kernel.h>
91
92/* Constants for inode free bitmap */
93#define BMSHIFT 5	/* 2 ** 5 = 32 */
94#define BMMASK  ((1 << BMSHIFT) - 1)
95#define SET_BITMAP_FREE(F, I) do { \
96	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), 	\
97	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
98	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK));	\
99} while (0)
100#define CLR_BITMAP_FREE(F, I) do { \
101	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), 	\
102	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
103	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK));	\
104} while(0)
105
106#define ISSET_BITMAP_FREE(F, I) \
107	((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
108
109/*
110 * Add a new block to the Ifile, to accommodate future file creations.
111 * Called with the segment lock held.
112 */
113int
114lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115{
116	struct vnode *vp;
117	struct inode *ip;
118	IFILE64 *ifp64;
119	IFILE32 *ifp32;
120	IFILE_V1 *ifp_v1;
121	struct buf *bp, *cbp;
122	int error;
123	daddr_t i, blkno, xmax;
124	ino_t oldlast, maxino;
125	CLEANERINFO *cip;
126
127	ASSERT_SEGLOCK(fs);
128
129	/* XXX should check or assert that we aren't readonly. */
130
131	/*
132	 * Get a block and extend the ifile inode. Leave the buffer for
133	 * the block in bp.
134	 */
135
136	vp = fs->lfs_ivnode;
137	ip = VTOI(vp);
138	blkno = lfs_lblkno(fs, ip->i_size);
139	if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140				&bp)) != 0) {
141		return (error);
142	}
143	ip->i_size += lfs_sb_getbsize(fs);
144	lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145	uvm_vnp_setsize(vp, ip->i_size);
146
147	/*
148	 * Compute the new number of inodes, and reallocate the in-memory
149	 * inode freemap.
150	 */
151
152	maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153		  lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154	fs->lfs_ino_bitmap = (lfs_bm_t *)
155		realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156			sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157	KASSERT(fs->lfs_ino_bitmap != NULL);
158
159	/* first new inode number */
160	i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161		lfs_sb_getifpb(fs);
162
163	/*
164	 * We insert the new inodes at the head of the free list.
165	 * Under normal circumstances, the free list is empty here,
166	 * so we are also incidentally placing them at the end (which
167	 * we must do if we are to keep them in order).
168	 */
169	LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
170	LFS_PUT_HEADFREE(fs, cip, cbp, i);
171	KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
172	    "inode 0 allocated [2]");
173
174	/* inode number to stop at (XXX: why *x*max?) */
175	xmax = i + lfs_sb_getifpb(fs);
176
177	/*
178	 * Initialize the ifile block.
179	 *
180	 * XXX: these loops should be restructured to use the accessor
181	 * functions instead of using cutpaste polymorphism.
182	 */
183
184	if (fs->lfs_is64) {
185		for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
186			SET_BITMAP_FREE(fs, i);
187			ifp64->if_version = 1;
188			ifp64->if_daddr = LFS_UNUSED_DADDR;
189			ifp64->if_nextfree = ++i;
190		}
191		ifp64--;
192		ifp64->if_nextfree = oldlast;
193	} else if (lfs_sb_getversion(fs) > 1) {
194		for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
195			SET_BITMAP_FREE(fs, i);
196			ifp32->if_version = 1;
197			ifp32->if_daddr = LFS_UNUSED_DADDR;
198			ifp32->if_nextfree = ++i;
199		}
200		ifp32--;
201		ifp32->if_nextfree = oldlast;
202	} else {
203		for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
204			SET_BITMAP_FREE(fs, i);
205			ifp_v1->if_version = 1;
206			ifp_v1->if_daddr = LFS_UNUSED_DADDR;
207			ifp_v1->if_nextfree = ++i;
208		}
209		ifp_v1--;
210		ifp_v1->if_nextfree = oldlast;
211	}
212	LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
213
214	/*
215	 * Write out the new block.
216	 */
217
218	(void) LFS_BWRITE_LOG(bp); /* Ifile */
219
220	return 0;
221}
222
223/*
224 * Allocate an inode for a new file.
225 *
226 * Takes the segment lock. Also (while holding it) takes lfs_lock
227 * to frob fs->lfs_fmod.
228 *
229 * XXX: the mode argument is unused; should just get rid of it.
230 */
231/* ARGSUSED */
232/* VOP_BWRITE 2i times */
233int
234lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
235    ino_t *ino, int *gen)
236{
237	struct lfs *fs;
238	struct buf *bp, *cbp;
239	IFILE *ifp;
240	int error;
241	CLEANERINFO *cip;
242
243	fs = VTOI(pvp)->i_lfs;
244	if (fs->lfs_ronly)
245		return EROFS;
246
247	ASSERT_NO_SEGLOCK(fs);
248
249	lfs_seglock(fs, SEGM_PROT);
250
251	/* Get the head of the freelist. */
252	LFS_GET_HEADFREE(fs, cip, cbp, ino);
253
254	/* paranoia */
255	KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
256	DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
257	     *ino));
258
259	/* Update the in-memory inode freemap */
260	CLR_BITMAP_FREE(fs, *ino);
261
262	/*
263	 * Fetch the ifile entry and make sure the inode is really
264	 * free.
265	 */
266	LFS_IENTRY(ifp, fs, *ino, bp);
267	if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
268		panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
269		    *ino);
270
271	/* Update the inode freelist head in the superblock. */
272	LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
273	DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
274	     *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
275
276	/*
277	 * Retrieve the version number from the ifile entry. It was
278	 * bumped by vfree, so don't bump it again.
279	 */
280	*gen = lfs_if_getversion(fs, ifp);
281
282	/* Done with ifile entry */
283	brelse(bp, 0);
284
285	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
286		/*
287		 * No more inodes; extend the ifile so that the next
288		 * lfs_valloc will succeed.
289		 */
290		if ((error = lfs_extend_ifile(fs, cred)) != 0) {
291			/* restore the freelist */
292			LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
293
294			/* unlock and return */
295			lfs_segunlock(fs);
296			return error;
297		}
298	}
299	KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
300	    "inode 0 allocated [3]");
301
302	/* Set superblock modified bit */
303	mutex_enter(&lfs_lock);
304	fs->lfs_fmod = 1;
305	mutex_exit(&lfs_lock);
306
307	/* increment file count */
308	lfs_sb_addnfiles(fs, 1);
309
310	/* done */
311	lfs_segunlock(fs);
312	return 0;
313}
314
315/*
316 * Allocate an inode for a new file, with given inode number and
317 * version.
318 *
319 * Called in the same context as lfs_valloc and therefore shares the
320 * same locking assumptions.
321 */
322int
323lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
324{
325	IFILE *ifp;
326	struct buf *bp, *cbp;
327	ino_t headino, thisino, oldnext;
328	CLEANERINFO *cip;
329
330	if (fs->lfs_ronly)
331		return EROFS;
332
333	ASSERT_NO_SEGLOCK(fs);
334
335	lfs_seglock(fs, SEGM_PROT);
336
337	/*
338	 * If the ifile is too short to contain this inum, extend it.
339	 *
340	 * XXX: lfs_extend_ifile should take a size instead of always
341	 * doing just one block at time.
342	 */
343	while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
344		lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
345		<< lfs_sb_getbshift(fs)) {
346		lfs_extend_ifile(fs, NOCRED);
347	}
348
349	/*
350	 * fetch the ifile entry; get the inode freelist next pointer,
351	 * and set the version as directed.
352	 */
353	LFS_IENTRY(ifp, fs, ino, bp);
354	oldnext = lfs_if_getnextfree(fs, ifp);
355	lfs_if_setversion(fs, ifp, vers);
356	brelse(bp, 0);
357
358	/* Get head of inode freelist */
359	LFS_GET_HEADFREE(fs, cip, cbp, &headino);
360	if (headino == ino) {
361		/* Easy case: the inode we wanted was at the head */
362		LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
363	} else {
364		ino_t nextfree;
365
366		/* Have to find the desired inode in the freelist... */
367
368		thisino = headino;
369		while (1) {
370			/* read this ifile entry */
371			LFS_IENTRY(ifp, fs, thisino, bp);
372			nextfree = lfs_if_getnextfree(fs, ifp);
373			/* stop if we find it or we hit the end */
374			if (nextfree == ino ||
375			    nextfree == LFS_UNUSED_INUM)
376				break;
377			/* nope, keep going... */
378			thisino = nextfree;
379			brelse(bp, 0);
380		}
381		if (nextfree == LFS_UNUSED_INUM) {
382			/* hit the end -- this inode is not available */
383			brelse(bp, 0);
384			lfs_segunlock(fs);
385			return ENOENT;
386		}
387		/* found it; update the next pointer */
388		lfs_if_setnextfree(fs, ifp, oldnext);
389		/* write the ifile block */
390		LFS_BWRITE_LOG(bp);
391	}
392
393	/* done */
394	lfs_segunlock(fs);
395	return 0;
396}
397
398#if 0
399/*
400 * Find the highest-numbered allocated inode.
401 * This will be used to shrink the Ifile.
402 */
403static inline ino_t
404lfs_last_alloc_ino(struct lfs *fs)
405{
406	ino_t ino, maxino;
407
408	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
409		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
410		lfs_sb_getifpb(fs);
411	for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
412		if (ISSET_BITMAP_FREE(fs, ino) == 0)
413			break;
414	}
415	return ino;
416}
417#endif
418
419/*
420 * Find the previous (next lowest numbered) free inode, if any.
421 * If there is none, return LFS_UNUSED_INUM.
422 *
423 * XXX: locking?
424 */
425static inline ino_t
426lfs_freelist_prev(struct lfs *fs, ino_t ino)
427{
428	ino_t tino, bound, bb, freehdbb;
429
430	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
431		/* No free inodes at all */
432		return LFS_UNUSED_INUM;
433	}
434
435	/* Search our own word first */
436	bound = ino & ~BMMASK;
437	for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
438		if (ISSET_BITMAP_FREE(fs, tino))
439			return tino;
440	/* If there are no lower words to search, just return */
441	if (ino >> BMSHIFT == 0)
442		return LFS_UNUSED_INUM;
443
444	/*
445	 * Find a word with a free inode in it.  We have to be a bit
446	 * careful here since ino_t is unsigned.
447	 */
448	freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
449	for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
450		if (fs->lfs_ino_bitmap[bb])
451			break;
452	if (fs->lfs_ino_bitmap[bb] == 0)
453		return LFS_UNUSED_INUM;
454
455	/* Search the word we found */
456	for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
457	     tino > LFS_UNUSED_INUM; tino--)
458		if (ISSET_BITMAP_FREE(fs, tino))
459			break;
460
461	/* Avoid returning reserved inode numbers */
462	if (tino <= LFS_IFILE_INUM)
463		tino = LFS_UNUSED_INUM;
464
465	return tino;
466}
467
468/*
469 * Free an inode.
470 *
471 * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
472 */
473/* ARGUSED */
474/* VOP_BWRITE 2i times */
475int
476lfs_vfree(struct vnode *vp, ino_t ino, int mode)
477{
478	SEGUSE *sup;
479	CLEANERINFO *cip;
480	struct buf *cbp, *bp;
481	IFILE *ifp;
482	struct inode *ip;
483	struct lfs *fs;
484	daddr_t old_iaddr;
485	ino_t otail;
486
487	/* Get the inode number and file system. */
488	ip = VTOI(vp);
489	fs = ip->i_lfs;
490	ino = ip->i_number;
491
492	/* XXX: assert not readonly */
493
494	ASSERT_NO_SEGLOCK(fs);
495	DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
496
497	/* Drain of pending writes */
498	mutex_enter(vp->v_interlock);
499	while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
500		cv_wait(&vp->v_cv, vp->v_interlock);
501	}
502	mutex_exit(vp->v_interlock);
503
504	lfs_seglock(fs, SEGM_PROT);
505
506	/*
507	 * If the inode was in a dirop, it isn't now.
508	 *
509	 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
510	 * not updated together in one function? (and why do both exist,
511	 * anyway?)
512	 */
513	UNMARK_VNODE(vp);
514
515	mutex_enter(&lfs_lock);
516	if (vp->v_uflag & VU_DIROP) {
517		vp->v_uflag &= ~VU_DIROP;
518		--lfs_dirvcount;
519		--fs->lfs_dirvcount;
520		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
521		wakeup(&fs->lfs_dirvcount);
522		wakeup(&lfs_dirvcount);
523		mutex_exit(&lfs_lock);
524		vrele(vp);
525
526		/*
527		 * If this inode is not going to be written any more, any
528		 * segment accounting left over from its truncation needs
529		 * to occur at the end of the next dirops flush.  Attach
530		 * them to the fs-wide list for that purpose.
531		 */
532		if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
533			struct segdelta *sd;
534
535			while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
536				LIST_REMOVE(sd, list);
537				LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
538			}
539		}
540	} else {
541		/*
542		 * If it's not a dirop, we can finalize right away.
543		 */
544		mutex_exit(&lfs_lock);
545		lfs_finalize_ino_seguse(fs, ip);
546	}
547
548	/* it is no longer an unwritten inode, so update the counts */
549	mutex_enter(&lfs_lock);
550	LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
551	mutex_exit(&lfs_lock);
552
553	/* Turn off all inode modification flags */
554	ip->i_state &= ~IN_ALLMOD;
555
556	/* Mark it deleted */
557	ip->i_lfs_iflags |= LFSI_DELETED;
558
559	/* Mark it free in the in-memory inode freemap */
560	SET_BITMAP_FREE(fs, ino);
561
562	/*
563	 * Set the ifile's inode entry to unused, increment its version number
564	 * and link it onto the free chain.
565	 */
566
567	/* fetch the ifile entry */
568	LFS_IENTRY(ifp, fs, ino, bp);
569
570	/* update the on-disk address (to "nowhere") */
571	old_iaddr = lfs_if_getdaddr(fs, ifp);
572	lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
573
574	/* bump the version */
575	lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
576
577	if (lfs_sb_getversion(fs) == 1) {
578		ino_t nextfree;
579
580		/* insert on freelist */
581		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
582		lfs_if_setnextfree(fs, ifp, nextfree);
583		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
584
585		/* write the ifile block */
586		(void) LFS_BWRITE_LOG(bp); /* Ifile */
587	} else {
588		ino_t tino, onf;
589
590		/*
591		 * Clear the freelist next pointer and write the ifile
592		 * block. XXX: why? I'm sure there must be a reason but
593		 * it seems both silly and dangerous.
594		 */
595		lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
596		(void) LFS_BWRITE_LOG(bp); /* Ifile */
597
598		/*
599		 * Insert on freelist in order.
600		 */
601
602		/* Find the next lower (by number) free inode */
603		tino = lfs_freelist_prev(fs, ino);
604
605		if (tino == LFS_UNUSED_INUM) {
606			ino_t nextfree;
607
608			/*
609			 * There isn't one; put us on the freelist head.
610			 */
611
612			/* reload the ifile block */
613			LFS_IENTRY(ifp, fs, ino, bp);
614			/* update the list */
615			LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
616			lfs_if_setnextfree(fs, ifp, nextfree);
617			LFS_PUT_HEADFREE(fs, cip, cbp, ino);
618			DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
619			     (long long)nextfree, (long long)ino));
620			/* write the ifile block */
621			LFS_BWRITE_LOG(bp); /* Ifile */
622
623			/* If the list was empty, set tail too */
624			LFS_GET_TAILFREE(fs, cip, cbp, &otail);
625			if (otail == LFS_UNUSED_INUM) {
626				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
627				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
628				      "-> %lld\n", (long long)otail,
629				      (long long)ino));
630			}
631		} else {
632			/*
633			 * Insert this inode into the list after tino.
634			 * We hold the segment lock so we don't have to
635			 * worry about blocks being written out of order.
636			 */
637
638			DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
639			      " after %lld\n", ino, tino));
640
641			/* load the previous inode's ifile block */
642			LFS_IENTRY(ifp, fs, tino, bp);
643			/* update the list pointer */
644			onf = lfs_if_getnextfree(fs, ifp);
645			lfs_if_setnextfree(fs, ifp, ino);
646			/* write the block */
647			LFS_BWRITE_LOG(bp);	/* Ifile */
648
649			/* load this inode's ifile block */
650			LFS_IENTRY(ifp, fs, ino, bp);
651			/* update the list pointer */
652			lfs_if_setnextfree(fs, ifp, onf);
653			/* write the block */
654			LFS_BWRITE_LOG(bp);	/* Ifile */
655
656			/* If we're last, put us on the tail */
657			if (onf == LFS_UNUSED_INUM) {
658				LFS_GET_TAILFREE(fs, cip, cbp, &otail);
659				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
660				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
661				      "-> %lld\n", (long long)otail,
662				      (long long)ino));
663			}
664		}
665	}
666	/* XXX: shouldn't this check be further up *before* we trash the fs? */
667	KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
668
669	/*
670	 * Update the segment summary for the segment where the on-disk
671	 * copy used to be.
672	 */
673	if (old_iaddr != LFS_UNUSED_DADDR) {
674		/* load it */
675		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
676		/* the number of bytes in the segment should not become < 0 */
677		KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
678		    "lfs_vfree: negative byte count"
679		    " (segment %" PRIu32 " short by %d)\n",
680		    lfs_dtosn(fs, old_iaddr),
681		    (int)DINOSIZE(fs) - sup->su_nbytes);
682		/* update the number of bytes in the segment */
683		sup->su_nbytes -= DINOSIZE(fs);
684		/* write the segment entry */
685		LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
686	}
687
688	/* Set superblock modified bit. */
689	mutex_enter(&lfs_lock);
690	fs->lfs_fmod = 1;
691	mutex_exit(&lfs_lock);
692
693	/* Decrement file count. */
694	lfs_sb_subnfiles(fs, 1);
695
696	lfs_segunlock(fs);
697
698	return (0);
699}
700
701/*
702 * Sort the freelist and set up the free-inode bitmap.
703 * To be called by lfs_mountfs().
704 *
705 * Takes the segmenet lock.
706 */
707void
708lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp)
709{
710	CLEANERINFO *cip;
711	IFILE *ifp = NULL;
712	struct buf *bp;
713	ino_t ino, firstino, lastino, maxino;
714	ino_t *orphan = NULL;
715	size_t norphan = 0;
716	size_t norphan_alloc = 0;
717
718	ASSERT_NO_SEGLOCK(fs);
719	lfs_seglock(fs, SEGM_PROT);
720
721	/* largest inode on fs */
722	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
723		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
724
725	/* allocate the in-memory inode freemap */
726	/* XXX: assert that fs->lfs_ino_bitmap is null here */
727	fs->lfs_ino_bitmap =
728		malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
729		       M_SEGMENT, M_WAITOK | M_ZERO);
730	KASSERT(fs->lfs_ino_bitmap != NULL);
731
732	/*
733	 * Scan the ifile.
734	 */
735
736	firstino = lastino = LFS_UNUSED_INUM;
737	for (ino = 0; ino < maxino; ino++) {
738		/* Load this inode's ifile entry. */
739		if (ino % lfs_sb_getifpb(fs) == 0)
740			LFS_IENTRY(ifp, fs, ino, bp);
741		else
742			LFS_IENTRY_NEXT(ifp, fs);
743
744		/* Don't put zero or ifile on the free list */
745		if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
746			continue;
747
748		/*
749		 * Address orphaned files.
750		 *
751		 * The idea of this is to free inodes belonging to
752		 * files that were unlinked but not reclaimed, I guess
753		 * because if we're going to scan the whole ifile
754		 * anyway it costs very little to do this. I don't
755		 * immediately see any reason this should be disabled,
756		 * but presumably it doesn't work... not sure what
757		 * happens to such files currently. -- dholland 20160806
758		 */
759		if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) {
760			if (orphan == NULL) {
761				norphan_alloc = 32; /* XXX pulled from arse */
762				orphan = kmem_zalloc(sizeof(orphan[0]) *
763				    norphan_alloc, KM_SLEEP);
764			} else if (norphan == norphan_alloc) {
765				ino_t *orphan_new;
766				if (norphan_alloc >= 4096)
767					norphan_alloc += 4096;
768				else
769					norphan_alloc *= 2;
770				orphan_new = kmem_zalloc(sizeof(orphan[0]) *
771				    norphan_alloc, KM_SLEEP);
772				memcpy(orphan_new, orphan, sizeof(orphan[0]) *
773				    norphan);
774				kmem_free(orphan, sizeof(orphan[0]) * norphan);
775				orphan = orphan_new;
776			}
777			orphan[norphan++] = ino;
778		}
779
780		if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
781
782			/*
783			 * This inode is free. Put it on the free list.
784			 */
785
786			if (firstino == LFS_UNUSED_INUM) {
787				/* XXX: assert lastino == LFS_UNUSED_INUM? */
788				/* remember the first free inode */
789				firstino = ino;
790			} else {
791				/* release this inode's ifile entry */
792				brelse(bp, 0);
793
794				/* XXX: assert lastino != LFS_UNUSED_INUM? */
795
796				/* load lastino's ifile entry */
797				LFS_IENTRY(ifp, fs, lastino, bp);
798				/* set the list pointer */
799				lfs_if_setnextfree(fs, ifp, ino);
800				/* write the block */
801				LFS_BWRITE_LOG(bp);
802
803				/* reload this inode's ifile entry */
804				LFS_IENTRY(ifp, fs, ino, bp);
805			}
806			/* remember the last free inode seen so far */
807			lastino = ino;
808
809			/* Mark this inode free in the in-memory freemap */
810			SET_BITMAP_FREE(fs, ino);
811		}
812
813		/* If moving to the next ifile block, release the buffer. */
814		if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
815			brelse(bp, 0);
816	}
817
818	/* Write the freelist head and tail pointers */
819	/* XXX: do we need to mark the superblock dirty? */
820	LFS_PUT_HEADFREE(fs, cip, bp, firstino);
821	LFS_PUT_TAILFREE(fs, cip, bp, lastino);
822
823	/* done */
824	lfs_segunlock(fs);
825
826	/*
827	 * Shrink the array of orphans so we don't have to carry around
828	 * the allocation size.
829	 */
830	if (norphan < norphan_alloc) {
831		ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan,
832		    KM_SLEEP);
833		memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan);
834		kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc);
835		orphan = orphan_new;
836		norphan_alloc = norphan;
837	}
838
839	*orphanp = orphan;
840	*norphanp = norphan;
841}
842
843/*
844 * Mark a file orphaned (unlinked but not yet reclaimed) by inode
845 * number. Do this with a magic freelist next pointer.
846 *
847 * XXX: howzabout some locking?
848 */
849void
850lfs_orphan(struct lfs *fs, ino_t ino)
851{
852	IFILE *ifp;
853	struct buf *bp;
854
855	LFS_IENTRY(ifp, fs, ino, bp);
856	lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs));
857	LFS_BWRITE_LOG(bp);
858}
859
860/*
861 * Free orphans discovered during mount.  This is a separate stage
862 * because it requires fs->lfs_suflags to be set up, which is not done
863 * by the time we run lfs_order_freelist.  It's possible that we could
864 * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner)
865 * but that requires more thought than I can put into this at the
866 * moment.
867 */
868void
869lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan)
870{
871	size_t i;
872
873	for (i = 0; i < norphan; i++) {
874		ino_t ino = orphan[i];
875		unsigned segno;
876		struct vnode *vp;
877		struct inode *ip;
878		struct buf *bp;
879		IFILE *ifp;
880		SEGUSE *sup;
881		int error;
882
883		/* Get the segment the inode is in on disk.  */
884		LFS_IENTRY(ifp, fs, ino, bp);
885		segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
886		brelse(bp, 0);
887
888		/*
889		 * Try to get the vnode.  If we can't, tough -- hope
890		 * you have backups!
891		 */
892		error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE,
893		    &vp);
894		if (error) {
895			printf("orphan %jd vget error %d\n", (intmax_t)ino,
896			    error);
897			continue;
898		}
899
900		/*
901		 * Sanity-check the inode.
902		 *
903		 * XXX What to do if it is still referenced?
904		 */
905		ip = VTOI(vp);
906		if (ip->i_nlink != 0)
907			printf("orphan %jd nlink %d\n", (intmax_t)ino,
908			    ip->i_nlink);
909
910		/*
911		 * Truncate the inode, to free any blocks allocated for
912		 * it, and release it, to free the inode number.
913		 *
914		 * XXX Isn't it redundant to truncate?  Won't vput do
915		 * that for us?
916		 */
917		error = lfs_truncate(vp, 0, 0, NOCRED);
918		if (error)
919			printf("orphan %jd truncate error %d", (intmax_t)ino,
920			    error);
921		vput(vp);
922
923		/* Update the number of bytes in the segment summary.  */
924		LFS_SEGENTRY(sup, fs, segno, bp);
925		KASSERT(sup->su_nbytes >= DINOSIZE(fs));
926		sup->su_nbytes -= DINOSIZE(fs);
927		LFS_WRITESEGENTRY(sup, fs, segno, bp);
928
929		/* Drop the on-disk address.  */
930		LFS_IENTRY(ifp, fs, ino, bp);
931		lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
932		LFS_BWRITE_LOG(bp);
933	}
934
935	if (orphan)
936		kmem_free(orphan, sizeof(orphan[0]) * norphan);
937}
938