1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022 The FreeBSD Foundation
5 *
6 * This software was developed by Mark Johnston under sponsorship from
7 * the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are
11 * met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <assert.h>
32#include <stdlib.h>
33#include <string.h>
34
35#include <util.h>
36
37#include "zfs.h"
38
39#define	DNODES_PER_CHUNK	(MAXBLOCKSIZE / sizeof(dnode_phys_t))
40
41struct objset_dnode_chunk {
42	dnode_phys_t	buf[DNODES_PER_CHUNK];
43	unsigned int	nextfree;
44	STAILQ_ENTRY(objset_dnode_chunk) next;
45};
46
47typedef struct zfs_objset {
48	/* Physical object set. */
49	objset_phys_t	*phys;
50	off_t		osloc;
51	off_t		osblksz;
52	blkptr_t	osbp;		/* set in objset_write() */
53
54	/* Accounting. */
55	off_t		space;		/* bytes allocated to this objset */
56
57	/* dnode allocator. */
58	uint64_t	dnodecount;
59	STAILQ_HEAD(, objset_dnode_chunk) dnodechunks;
60} zfs_objset_t;
61
62static void
63dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype,
64    uint16_t bonuslen)
65{
66	dnode->dn_indblkshift = MAXBLOCKSHIFT;
67	dnode->dn_type = type;
68	dnode->dn_bonustype = bonustype;
69	dnode->dn_bonuslen = bonuslen;
70	dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4;
71	dnode->dn_nlevels = 1;
72	dnode->dn_nblkptr = 1;
73	dnode->dn_flags = DNODE_FLAG_USED_BYTES;
74}
75
76zfs_objset_t *
77objset_alloc(zfs_opt_t *zfs, uint64_t type)
78{
79	struct objset_dnode_chunk *chunk;
80	zfs_objset_t *os;
81
82	os = ecalloc(1, sizeof(*os));
83	os->osblksz = sizeof(objset_phys_t);
84	os->osloc = objset_space_alloc(zfs, os, &os->osblksz);
85
86	/*
87	 * Object ID zero is always reserved for the meta dnode, which is
88	 * embedded in the objset itself.
89	 */
90	STAILQ_INIT(&os->dnodechunks);
91	chunk = ecalloc(1, sizeof(*chunk));
92	chunk->nextfree = 1;
93	STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next);
94	os->dnodecount = 1;
95
96	os->phys = ecalloc(1, os->osblksz);
97	os->phys->os_type = type;
98
99	dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0);
100	os->phys->os_meta_dnode.dn_datablkszsec =
101	    DNODE_BLOCK_SIZE >> MINBLOCKSHIFT;
102
103	return (os);
104}
105
106/*
107 * Write the dnode array and physical object set to disk.
108 */
109static void
110_objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c,
111    off_t loc)
112{
113	struct objset_dnode_chunk *chunk, *tmp;
114	unsigned int total;
115
116	/*
117	 * Write out the dnode array, i.e., the meta-dnode.  For some reason its
118	 * data blocks must be 16KB in size no matter how large the array is.
119	 */
120	total = 0;
121	STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) {
122		unsigned int i;
123
124		assert(chunk->nextfree > 0);
125		assert(chunk->nextfree <= os->dnodecount);
126		assert(chunk->nextfree <= DNODES_PER_CHUNK);
127
128		for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) {
129			blkptr_t *bp;
130			uint64_t fill;
131
132			if (chunk->nextfree - i < DNODES_PER_BLOCK)
133				fill = DNODES_PER_BLOCK - (chunk->nextfree - i);
134			else
135				fill = 0;
136			bp = dnode_cursor_next(zfs, c,
137			    (total + i) * sizeof(dnode_phys_t));
138			vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode,
139			    0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp);
140			loc += DNODE_BLOCK_SIZE;
141		}
142		total += i;
143
144		free(chunk);
145	}
146	dnode_cursor_finish(zfs, c);
147	STAILQ_INIT(&os->dnodechunks);
148
149	/*
150	 * Write the object set itself.  The saved block pointer will be copied
151	 * into the referencing DSL dataset or the uberblocks.
152	 */
153	vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0,
154	    os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp);
155}
156
157void
158objset_write(zfs_opt_t *zfs, zfs_objset_t *os)
159{
160	struct dnode_cursor *c;
161	off_t dnodeloc, dnodesz;
162	uint64_t dnodecount;
163
164	/*
165	 * There is a chicken-and-egg problem here when writing the MOS: we
166	 * cannot write space maps before we're finished allocating space from
167	 * the vdev, and we can't write the MOS without having allocated space
168	 * for indirect dnode blocks.  Thus, rather than lazily allocating
169	 * indirect blocks for the meta-dnode (which would be simpler), they are
170	 * allocated up-front and before writing space maps.
171	 */
172	dnodecount = os->dnodecount;
173	if (os == zfs->mos)
174		dnodecount += zfs->mscount;
175	dnodesz = dnodecount * sizeof(dnode_phys_t);
176	c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz,
177	    DNODE_BLOCK_SIZE);
178	dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE);
179	dnodeloc = objset_space_alloc(zfs, os, &dnodesz);
180
181	if (os == zfs->mos) {
182		vdev_spacemap_write(zfs);
183
184		/*
185		 * We've finished allocating space, account for it in $MOS and
186		 * in the parent directory.
187		 */
188		dsl_dir_root_finalize(zfs, os->space);
189	}
190	_objset_write(zfs, os, c, dnodeloc);
191}
192
193dnode_phys_t *
194objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype,
195    uint16_t bonuslen, uint64_t *idp)
196{
197	struct objset_dnode_chunk *chunk;
198	dnode_phys_t *dnode;
199
200	assert(bonuslen <= DN_OLD_MAX_BONUSLEN);
201	assert(!STAILQ_EMPTY(&os->dnodechunks));
202
203	chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next);
204	if (chunk->nextfree == DNODES_PER_CHUNK) {
205		chunk = ecalloc(1, sizeof(*chunk));
206		STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next);
207	}
208	*idp = os->dnodecount++;
209	dnode = &chunk->buf[chunk->nextfree++];
210	dnode_init(dnode, type, bonustype, bonuslen);
211	dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT;
212	return (dnode);
213}
214
215dnode_phys_t *
216objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp)
217{
218	return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp));
219}
220
221/*
222 * Look up a physical dnode by ID.  This is not used often so a linear search is
223 * fine.
224 */
225dnode_phys_t *
226objset_dnode_lookup(zfs_objset_t *os, uint64_t id)
227{
228	struct objset_dnode_chunk *chunk;
229
230	assert(id > 0);
231	assert(id < os->dnodecount);
232
233	STAILQ_FOREACH(chunk, &os->dnodechunks, next) {
234		if (id < DNODES_PER_CHUNK)
235			return (&chunk->buf[id]);
236		id -= DNODES_PER_CHUNK;
237	}
238	assert(0);
239	return (NULL);
240}
241
242off_t
243objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp)
244{
245	off_t loc;
246
247	loc = vdev_space_alloc(zfs, lenp);
248	os->space += *lenp;
249	return (loc);
250}
251
252uint64_t
253objset_space(const zfs_objset_t *os)
254{
255	return (os->space);
256}
257
258void
259objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp)
260{
261	memcpy(bp, &os->osbp, sizeof(blkptr_t));
262}
263