1/*	$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $	*/
2
3/*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
32 *  Modified for ext2fs by Manuel Bouyer.
33 */
34
35/*
36 * Copyright (c) 1997 Manuel Bouyer.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 *
58 *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
59 *  Modified for ext2fs by Manuel Bouyer.
60 */
61
62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $");
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/buf.h>
68#include <sys/proc.h>
69#include <sys/vnode.h>
70#include <sys/mount.h>
71#include <sys/kernel.h>
72#include <sys/syslog.h>
73#include <sys/kauth.h>
74
75#include <lib/libkern/crc16.h>
76
77#include <ufs/ufs/inode.h>
78#include <ufs/ufs/ufs_extern.h>
79#include <ufs/ufs/ufsmount.h>
80
81#include <ufs/ext2fs/ext2fs.h>
82#include <ufs/ext2fs/ext2fs_extern.h>
83
84u_long ext2gennumber;
85
86static daddr_t	ext2fs_alloccg(struct inode *, int, daddr_t, int);
87static u_long	ext2fs_dirpref(struct m_ext2fs *);
88static void	ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
89static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
90		    daddr_t (*)(struct inode *, int, daddr_t, int));
91static daddr_t	ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
92static daddr_t	ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
93static __inline void	ext2fs_cg_update(struct m_ext2fs *, int,
94    struct ext2_gd *, int, int, int, daddr_t);
95static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
96static void	ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *,
97    char *);
98
99/*
100 * Allocate a block in the file system.
101 *
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 *   1) allocate the requested block.
105 *   2) allocate a rotationally optimal block in the same cylinder.
106 *   3) allocate a block in the same cylinder group.
107 *   4) quadradically rehash into other cylinder groups, until an
108 *	  available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 *   1) allocate a block in the cylinder group that contains the
112 *	  inode for the file.
113 *   2) quadradically rehash into other cylinder groups, until an
114 *	  available block is located.
115 */
116int
117ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
118    kauth_cred_t cred, daddr_t *bnp)
119{
120	struct m_ext2fs *fs;
121	daddr_t bno;
122	int cg;
123
124	*bnp = 0;
125	fs = ip->i_e2fs;
126#ifdef DIAGNOSTIC
127	if (cred == NOCRED)
128		panic("ext2fs_alloc: missing credential");
129#endif /* DIAGNOSTIC */
130	if (fs->e2fs.e2fs_fbcount == 0)
131		goto nospace;
132	if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
133	    NULL, NULL) != 0 &&
134	    freespace(fs) <= 0)
135		goto nospace;
136	if (bpref >= fs->e2fs.e2fs_bcount)
137		bpref = 0;
138	if (bpref == 0)
139		cg = ino_to_cg(fs, ip->i_number);
140	else
141		cg = dtog(fs, bpref);
142	bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
143	    ext2fs_alloccg);
144	if (bno > 0) {
145		ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
146		ip->i_flag |= IN_CHANGE | IN_UPDATE;
147		*bnp = bno;
148		return 0;
149	}
150nospace:
151	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
152	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
153	return ENOSPC;
154}
155
156/*
157 * Allocate an inode in the file system.
158 *
159 * If allocating a directory, use ext2fs_dirpref to select the inode.
160 * If allocating in a directory, the following hierarchy is followed:
161 *   1) allocate the preferred inode.
162 *   2) allocate an inode in the same cylinder group.
163 *   3) quadradically rehash into other cylinder groups, until an
164 *	  available inode is located.
165 * If no inode preference is given the following hierarchy is used
166 * to allocate an inode:
167 *   1) allocate an inode in cylinder group 0.
168 *   2) quadradically rehash into other cylinder groups, until an
169 *	  available inode is located.
170 */
171int
172ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
173{
174	struct inode *pip;
175	struct m_ext2fs *fs;
176	ino_t ino, ipref;
177	int cg;
178
179	pip = VTOI(pvp);
180	fs = pip->i_e2fs;
181	if (fs->e2fs.e2fs_ficount == 0)
182		goto noinodes;
183
184	if ((mode & IFMT) == IFDIR)
185		cg = ext2fs_dirpref(fs);
186	else
187		cg = ino_to_cg(fs, pip->i_number);
188	ipref = cg * fs->e2fs.e2fs_ipg + 1;
189	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
190	if (ino == 0)
191		goto noinodes;
192
193	*inop = ino;
194	return 0;
195
196noinodes:
197	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
198	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
199	return ENOSPC;
200}
201
202/*
203 * Find a cylinder to place a directory.
204 *
205 * The policy implemented by this algorithm is to select from
206 * among those cylinder groups with above the average number of
207 * free inodes, the one with the smallest number of directories.
208 */
209static u_long
210ext2fs_dirpref(struct m_ext2fs *fs)
211{
212	int cg, maxspace, mincg, avgifree;
213
214	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
215	maxspace = 0;
216	mincg = -1;
217	for (cg = 0; cg < fs->e2fs_ncg; cg++) {
218		uint32_t nifree =
219		    (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
220		    | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
221		if (nifree < avgifree) {
222			continue;
223		}
224		uint32_t nbfree =
225		    (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
226		    | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
227		if (mincg == -1 || nbfree > maxspace) {
228			mincg = cg;
229			maxspace = nbfree;
230		}
231	}
232	return mincg;
233}
234
235/*
236 * Select the desired position for the next block in a file.  The file is
237 * logically divided into sections. The first section is composed of the
238 * direct blocks. Each additional section contains fs_maxbpg blocks.
239 *
240 * If no blocks have been allocated in the first section, the policy is to
241 * request a block in the same cylinder group as the inode that describes
242 * the file. Otherwise, the policy is to try to allocate the blocks
243 * contiguously. The two fields of the ext2 inode extension (see
244 * ufs/ufs/inode.h) help this.
245 */
246daddr_t
247ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
248		int32_t *bap /* XXX ondisk32 */)
249{
250	struct m_ext2fs *fs;
251	int cg, i;
252
253	fs = ip->i_e2fs;
254	/*
255	 * if we are doing contiguous lbn allocation, try to alloc blocks
256	 * contiguously on disk
257	 */
258
259	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
260		return ip->i_e2fs_last_blk + 1;
261	}
262
263	/*
264	 * bap, if provided, gives us a list of blocks to which we want to
265	 * stay close
266	 */
267
268	if (bap) {
269		for (i = indx; i >= 0 ; i--) {
270			if (bap[i]) {
271				return fs2h32(bap[i]) + 1;
272			}
273		}
274	}
275
276	/* fall back to the first block of the cylinder containing the inode */
277
278	cg = ino_to_cg(fs, ip->i_number);
279	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
280}
281
282/*
283 * Implement the cylinder overflow algorithm.
284 *
285 * The policy implemented by this algorithm is:
286 *   1) allocate the block in its requested cylinder group.
287 *   2) quadradically rehash on the cylinder group number.
288 *   3) brute force search for a free block.
289 */
290static u_long
291ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
292		daddr_t (*allocator)(struct inode *, int, daddr_t, int))
293{
294	struct m_ext2fs *fs;
295	long result;
296	int i, icg = cg;
297
298	fs = ip->i_e2fs;
299	/*
300	 * 1: preferred cylinder group
301	 */
302	result = (*allocator)(ip, cg, pref, size);
303	if (result)
304		return result;
305	/*
306	 * 2: quadratic rehash
307	 */
308	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
309		cg += i;
310		if (cg >= fs->e2fs_ncg)
311			cg -= fs->e2fs_ncg;
312		result = (*allocator)(ip, cg, 0, size);
313		if (result)
314			return result;
315	}
316	/*
317	 * 3: brute force search
318	 * Note that we start at i == 2, since 0 was checked initially,
319	 * and 1 is always checked in the quadratic rehash.
320	 */
321	cg = (icg + 2) % fs->e2fs_ncg;
322	for (i = 2; i < fs->e2fs_ncg; i++) {
323		result = (*allocator)(ip, cg, 0, size);
324		if (result)
325			return result;
326		cg++;
327		if (cg == fs->e2fs_ncg)
328			cg = 0;
329	}
330	return 0;
331}
332
333/*
334 * Determine whether a block can be allocated.
335 *
336 * Check to see if a block of the appropriate size is available,
337 * and if it is, allocate it.
338 */
339
340static daddr_t
341ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
342{
343	struct m_ext2fs *fs;
344	char *bbp;
345	struct buf *bp;
346	int error, bno, start, end, loc;
347
348	fs = ip->i_e2fs;
349	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 &&
350	    fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
351		return 0;
352	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
353		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
354		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
355		(int)fs->e2fs_bsize, B_MODIFY, &bp);
356	if (error) {
357		return 0;
358	}
359	bbp = (char *)bp->b_data;
360
361	if (dtog(fs, bpref) != cg)
362		bpref = 0;
363
364	/* initialize block bitmap now if uninit */
365	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
366	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
367		ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
368		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
369	}
370
371	if (bpref != 0) {
372		bpref = dtogd(fs, bpref);
373		/*
374		 * if the requested block is available, use it
375		 */
376		if (isclr(bbp, bpref)) {
377			bno = bpref;
378			goto gotit;
379		}
380	}
381	/*
382	 * no blocks in the requested cylinder, so take next
383	 * available one in this cylinder group.
384	 * first try to get 8 contiguous blocks, then fall back to a single
385	 * block.
386	 */
387	if (bpref)
388		start = dtogd(fs, bpref) / NBBY;
389	else
390		start = 0;
391	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
392	for (loc = start; loc < end; loc++) {
393		if (bbp[loc] == 0) {
394			bno = loc * NBBY;
395			goto gotit;
396		}
397	}
398	for (loc = 0; loc < start; loc++) {
399		if (bbp[loc] == 0) {
400			bno = loc * NBBY;
401			goto gotit;
402		}
403	}
404
405	bno = ext2fs_mapsearch(fs, bbp, bpref);
406#if 0
407	/*
408	 * XXX jdolecek mapsearch actually never fails, it panics instead.
409	 * If re-enabling, make sure to brele() before returning.
410	 */
411	if (bno < 0)
412		return 0;
413#endif
414gotit:
415#ifdef DIAGNOSTIC
416	if (isset(bbp, (daddr_t)bno)) {
417		printf("%s: cg=%d bno=%d fs=%s\n", __func__,
418		    cg, bno, fs->e2fs_fsmnt);
419		panic("ext2fs_alloccg: dup alloc");
420	}
421#endif
422	setbit(bbp, (daddr_t)bno);
423	fs->e2fs.e2fs_fbcount--;
424	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
425	fs->e2fs_fmod = 1;
426	bdwrite(bp);
427	return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
428}
429
430/*
431 * Determine whether an inode can be allocated.
432 *
433 * Check to see if an inode is available, and if it is,
434 * allocate it using the following policy:
435 *   1) allocate the requested inode.
436 *   2) allocate the next available inode after the requested
437 *	  inode in the specified cylinder group.
438 */
439static daddr_t
440ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
441{
442	struct m_ext2fs *fs;
443	char *ibp;
444	struct buf *bp;
445	int error, start, len, loc, map, i;
446
447	ipref--; /* to avoid a lot of (ipref -1) */
448	if (ipref == -1)
449		ipref = 0;
450	fs = ip->i_e2fs;
451	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 &&
452	    fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
453		return 0;
454	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
455		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
456		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
457		(int)fs->e2fs_bsize, B_MODIFY, &bp);
458	if (error) {
459		return 0;
460	}
461	ibp = (char *)bp->b_data;
462
463	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
464
465	/* initialize inode bitmap now if uninit */
466	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
467	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
468		KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
469		memset(ibp, 0, fs->e2fs_bsize);
470		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
471	}
472
473	if (ipref) {
474		ipref %= fs->e2fs.e2fs_ipg;
475		if (isclr(ibp, ipref))
476			goto gotit;
477	}
478	start = ipref / NBBY;
479	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
480	loc = skpc(0xff, len, &ibp[start]);
481	if (loc == 0) {
482		len = start + 1;
483		start = 0;
484		loc = skpc(0xff, len, &ibp[0]);
485		if (loc == 0) {
486			printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__,
487			    cg, (long long)ipref, fs->e2fs_fsmnt);
488			panic("%s: map corrupted", __func__);
489			/* NOTREACHED */
490		}
491	}
492	i = start + len - loc;
493	map = ibp[i] ^ 0xff;
494	if (map == 0) {
495		printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
496		panic("%s: inode not in map", __func__);
497	}
498	ipref = i * NBBY + ffs(map) - 1;
499gotit:
500	setbit(ibp, ipref);
501	fs->e2fs.e2fs_ficount--;
502	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
503		0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
504	fs->e2fs_fmod = 1;
505	bdwrite(bp);
506	return cg * fs->e2fs.e2fs_ipg + ipref + 1;
507}
508
509/*
510 * Free a block.
511 *
512 * The specified block is placed back in the
513 * free map.
514 */
515void
516ext2fs_blkfree(struct inode *ip, daddr_t bno)
517{
518	struct m_ext2fs *fs;
519	char *bbp;
520	struct buf *bp;
521	int error, cg;
522
523	fs = ip->i_e2fs;
524	cg = dtog(fs, bno);
525
526	KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
527	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
528
529	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
530		printf("%s: bad block %jd, ino %ju\n",
531		    __func__, (intmax_t)bno, (uintmax_t)ip->i_number);
532		ext2fs_fserr(fs, ip->i_uid, "bad block");
533		return;
534	}
535	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
536	    fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
537	    fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
538	    (int)fs->e2fs_bsize, B_MODIFY, &bp);
539	if (error) {
540		return;
541	}
542	bbp = (char *)bp->b_data;
543	bno = dtogd(fs, bno);
544	if (isclr(bbp, bno)) {
545		printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__,
546		    (uintmax_t)ip->i_dev, (intmax_t)bno,
547		    fs->e2fs_fsmnt);
548		panic("%s: freeing free block", __func__);
549	}
550	clrbit(bbp, bno);
551	fs->e2fs.e2fs_fbcount++;
552	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
553	fs->e2fs_fmod = 1;
554	bdwrite(bp);
555}
556
557/*
558 * Free an inode.
559 *
560 * The specified inode is placed back in the free map.
561 */
562int
563ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
564{
565	struct m_ext2fs *fs;
566	char *ibp;
567	struct inode *pip;
568	struct buf *bp;
569	int error, cg;
570
571	pip = VTOI(pvp);
572	fs = pip->i_e2fs;
573
574	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
575		panic("%s: range: dev = %#jx, ino = %ju, fs = %s",
576		    __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino,
577		    fs->e2fs_fsmnt);
578
579	cg = ino_to_cg(fs, ino);
580
581	KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
582	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
583
584	error = bread(pip->i_devvp, EXT2_FSBTODB64(fs,
585	    fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
586	    fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
587	    (int)fs->e2fs_bsize, B_MODIFY, &bp);
588	if (error) {
589		return 0;
590	}
591	ibp = (char *)bp->b_data;
592	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
593	if (isclr(ibp, ino)) {
594		printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__,
595		    (uintmax_t)pip->i_dev,
596		    (uintmax_t)ino, fs->e2fs_fsmnt);
597		if (fs->e2fs_ronly == 0)
598			panic("%s: freeing free inode", __func__);
599	}
600	clrbit(ibp, ino);
601	fs->e2fs.e2fs_ficount++;
602	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
603		0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
604	fs->e2fs_fmod = 1;
605	bdwrite(bp);
606	return 0;
607}
608
609/*
610 * Find a block in the specified cylinder group.
611 *
612 * It is a panic if a request is made to find a block if none are
613 * available.
614 */
615
616static daddr_t
617ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
618{
619	int start, len, loc, i, map;
620
621	/*
622	 * find the fragment by searching through the free block
623	 * map for an appropriate bit pattern
624	 */
625	if (bpref)
626		start = dtogd(fs, bpref) / NBBY;
627	else
628		start = 0;
629	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
630	loc = skpc(0xff, len, &bbp[start]);
631	if (loc == 0) {
632		len = start + 1;
633		start = 0;
634		loc = skpc(0xff, len, &bbp[start]);
635		if (loc == 0) {
636			printf("%s: start = %d, len = %d, fs = %s\n",
637			    __func__, start, len, fs->e2fs_fsmnt);
638			panic("%s: map corrupted", __func__);
639			/* NOTREACHED */
640		}
641	}
642	i = start + len - loc;
643	map = bbp[i] ^ 0xff;
644	if (map == 0) {
645		printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
646		panic("%s: block not in map", __func__);
647	}
648	return i * NBBY + ffs(map) - 1;
649}
650
651/*
652 * Fserr prints the name of a file system with an error diagnostic.
653 *
654 * The form of the error message is:
655 *	fs: error message
656 */
657static void
658ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
659{
660
661	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
662}
663
664static __inline void
665ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
666{
667	if (nifree) {
668		uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) |
669		    (fs2h16(gd->ext2bgd_nifree_hi) << 16);
670		ext2bgd_nifree += nifree;
671		gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
672		gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
673		/*
674		 * If we allocated inode on bigger offset than what was
675		 * ever used before, bump the itable_unused count. This
676		 * member only ever grows, and is used only for initialization
677		 * !INODE_ZEROED groups with used inodes. Of course, by the
678		 * time we get here the itables are already zeroed, but
679		 * e2fstools fsck.ext4 still checks this.
680		 */
681		if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 &&
682		    (ioff + 1) >= (fs->e2fs.e2fs_ipg -
683		    fs2h16(gd->ext2bgd_itable_unused_lo))) {
684			gd->ext2bgd_itable_unused_lo =
685			    h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
686		}
687
688		KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
689		    gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
690	}
691
692
693	if (nbfree) {
694		uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree)
695		    | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
696		ext2bgd_nbfree += nbfree;
697		gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
698		gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
699	}
700
701	if (ndirs) {
702		uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs)
703		    | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
704		ext2bgd_ndirs += ndirs;
705		gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
706		gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
707	}
708
709	if (E2FS_HAS_GD_CSUM(fs))
710		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
711}
712
713static const uint32_t ext2fs_crc32c_table[256] = {
714	0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
715	0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
716	0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
717	0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
718	0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
719	0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
720	0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
721	0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
722	0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
723	0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
724	0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
725	0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
726	0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
727	0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
728	0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
729	0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
730	0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
731	0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
732	0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
733	0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
734	0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
735	0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
736	0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
737	0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
738	0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
739	0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
740	0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
741	0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
742	0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
743	0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
744	0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
745	0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
746	0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
747	0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
748	0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
749	0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
750	0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
751	0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
752	0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
753	0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
754	0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
755	0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
756	0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
757	0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
758	0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
759	0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
760	0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
761	0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
762	0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
763	0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
764	0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
765	0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
766	0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
767	0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
768	0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
769	0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
770	0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
771	0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
772	0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
773	0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
774	0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
775	0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
776	0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
777	0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
778};
779
780static uint32_t
781ext2fs_crc32c(uint32_t last, const void *vbuf, size_t len)
782{
783	uint32_t crc = last;
784	const uint8_t *buf = vbuf;
785
786	while (len--)
787		crc = ext2fs_crc32c_table[(crc ^ *buf++) & 0xff] ^ (crc >> 8);
788
789	return crc;
790}
791
792/*
793 * Compute group description csum. Structure data must be LE (not host).
794 * Returned as LE (disk encoding).
795 */
796static uint16_t
797ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
798{
799	uint16_t crc;
800	size_t cgsize = 1 << fs->e2fs_group_desc_shift;
801	uint32_t cg_bswapped = h2fs32((uint32_t)cg);
802	size_t off;
803
804	if (EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
805		uint32_t crc32;
806		uint8_t dummy[2] = {0, 0};
807
808		off = offsetof(struct ext2_gd, ext2bgd_checksum);
809
810		crc32 = ext2fs_crc32c(~0, fs->e2fs.e2fs_uuid,
811		    sizeof(fs->e2fs.e2fs_uuid));
812		crc32 = ext2fs_crc32c(crc32, &cg_bswapped, sizeof(cg_bswapped));
813		crc32 = ext2fs_crc32c(crc32, gd, off);
814		crc32 = ext2fs_crc32c(crc32, dummy, 2);
815		crc32 = ext2fs_crc32c(crc32, gd + off + 2, cgsize - (off + 2));
816
817		return h2fs16(crc32 & 0xffff);
818	}
819
820	if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
821		return 0;
822
823	off = offsetof(struct ext2_gd, ext2bgd_checksum);
824
825	crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid,
826	    sizeof(fs->e2fs.e2fs_uuid));
827	crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
828	crc = crc16(crc, (uint8_t *)gd, off);
829	crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
830
831	return h2fs16(crc);
832}
833
834static void
835ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
836{
837	int i;
838
839	memset(bbp, 0, fs->e2fs_bsize);
840
841	/*
842	 * No block was ever allocated on this cg before, so the only used
843	 * blocks are metadata blocks on start of the group. We could optimize
844	 * this to set by bytes, but since this is done once per the group
845	 * in lifetime of filesystem, it really is not worth it.
846	 */
847	for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
848		setbit(bbp, i);
849}
850
851/*
852 * Verify csum and initialize itable if not done already
853 */
854int
855ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
856{
857	struct ext2_gd *gd;
858	ino_t ioff;
859	size_t boff;
860	struct buf *bp;
861	int cg, i, error;
862
863	if (!E2FS_HAS_GD_CSUM(fs))
864		return 0;
865
866	for(cg = 0; cg < fs->e2fs_ncg; cg++) {
867		gd = &fs->e2fs_gd[cg];
868
869		/* Verify checksum */
870		uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd);
871		if (gd->ext2bgd_checksum != csum) {
872			printf("%s: group %d invalid csum (%#x != %#x)\n",
873			    __func__, cg, gd->ext2bgd_checksum, csum);
874			return EINVAL;
875		}
876
877		/* if mounting read-write, zero itable if not already done */
878		if (ronly ||
879		    (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
880			continue;
881
882		/*
883		 * We are skipping already used inodes, zero rest of itable
884		 * blocks. First block to zero could be only partial wipe, all
885		 * others are wiped completely. This might take a while,
886		 * there could be many inode table blocks. We use
887		 * delayed writes, so this shouldn't block for very
888		 * long.
889		 */
890		ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
891		boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
892
893		for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
894			if (boff) {
895				/* partial wipe, must read old data */
896				error = bread(devvp, EXT2_FSBTODB64OFF(fs,
897				    fs2h32(gd->ext2bgd_i_tables),
898				    fs2h32(gd->ext2bgd_i_tables_hi), i),
899				    (int)fs->e2fs_bsize, B_MODIFY, &bp);
900				if (error) {
901					printf("%s: can't read itable block",
902					    __func__);
903					return error;
904				}
905				memset((char *)bp->b_data + boff, 0,
906				    fs->e2fs_bsize - boff);
907				boff = 0;
908			} else {
909				/*
910				 * Complete wipe, don't need to read data. This
911				 * assumes nothing else is changing the data.
912				 */
913				bp = getblk(devvp, EXT2_FSBTODB64OFF(fs,
914				    fs2h32(gd->ext2bgd_i_tables),
915				    fs2h32(gd->ext2bgd_i_tables_hi), i),
916				    (int)fs->e2fs_bsize, 0, 0);
917				clrbuf(bp);
918			}
919
920			bdwrite(bp);
921		}
922
923		gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
924		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
925		fs->e2fs_fmod = 1;
926	}
927
928	return 0;
929}
930