1/*-
2 * Copyright (c) 2010-2012 Semihalf.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/conf.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mount.h>
37#include <sys/mutex.h>
38#include <sys/namei.h>
39#include <sys/sysctl.h>
40#include <sys/vnode.h>
41#include <sys/buf.h>
42#include <sys/bio.h>
43
44#include <vm/vm.h>
45#include <vm/vm_param.h>
46#include <vm/vm_kern.h>
47#include <vm/vm_page.h>
48
49#include <fs/nandfs/nandfs_mount.h>
50#include <fs/nandfs/nandfs.h>
51#include <fs/nandfs/nandfs_subr.h>
52
53static void
54nandfs_get_desc_block_nr(struct nandfs_mdt *mdt, uint64_t desc,
55    uint64_t *desc_block)
56{
57
58	*desc_block = desc * mdt->blocks_per_desc_block;
59}
60
61static void
62nandfs_get_group_block_nr(struct nandfs_mdt *mdt, uint64_t group,
63    uint64_t *group_block)
64{
65	uint64_t desc, group_off;
66
67	desc = group / mdt->groups_per_desc_block;
68	group_off = group % mdt->groups_per_desc_block;
69	*group_block = desc * mdt->blocks_per_desc_block +
70	    1 + group_off * mdt->blocks_per_group;
71}
72
73static void
74init_desc_block(struct nandfs_mdt *mdt, uint8_t *block_data)
75{
76	struct nandfs_block_group_desc *desc;
77	uint32_t i;
78
79	desc = (struct nandfs_block_group_desc *) block_data;
80	for (i = 0; i < mdt->groups_per_desc_block; i++)
81		desc[i].bg_nfrees = mdt->entries_per_group;
82}
83
84int
85nandfs_find_free_entry(struct nandfs_mdt *mdt, struct nandfs_node *node,
86    struct nandfs_alloc_request *req)
87{
88	nandfs_daddr_t desc, group, maxgroup, maxdesc, pos = 0;
89	nandfs_daddr_t start_group, start_desc;
90	nandfs_daddr_t desc_block, group_block;
91	nandfs_daddr_t file_blocks;
92	struct nandfs_block_group_desc *descriptors;
93	struct buf *bp, *bp2;
94	uint32_t *mask, i, mcount, msize;
95	int error;
96
97	file_blocks = node->nn_inode.i_blocks;
98	maxgroup = 0x100000000ull / mdt->entries_per_group;
99	maxdesc = maxgroup / mdt->groups_per_desc_block;
100	start_group = req->entrynum / mdt->entries_per_group;
101	start_desc = start_group / mdt->groups_per_desc_block;
102
103	bp = bp2 = NULL;
104restart:
105	for (desc = start_desc; desc < maxdesc; desc++) {
106		nandfs_get_desc_block_nr(mdt, desc, &desc_block);
107
108		if (bp)
109			brelse(bp);
110		if (desc_block < file_blocks) {
111			error = nandfs_bread(node, desc_block, NOCRED, 0, &bp);
112			if (error) {
113				brelse(bp);
114				return (error);
115			}
116		} else {
117			error = nandfs_bcreate(node, desc_block, NOCRED, 0,
118			    &bp);
119			if (error)
120				return (error);
121			file_blocks++;
122			init_desc_block(mdt, bp->b_data);
123		}
124
125		descriptors = (struct nandfs_block_group_desc *) bp->b_data;
126		for (group = start_group; group < mdt->groups_per_desc_block;
127		    group++) {
128			if (descriptors[group].bg_nfrees > 0) {
129				nandfs_get_group_block_nr(mdt, group,
130				    &group_block);
131
132				if (bp2)
133					brelse(bp2);
134				if (group_block < file_blocks) {
135					error = nandfs_bread(node, group_block,
136					    NOCRED, 0, &bp2);
137					if (error) {
138						brelse(bp);
139						return (error);
140					}
141				} else {
142					error = nandfs_bcreate(node,
143					    group_block, NOCRED, 0, &bp2);
144					if (error)
145						return (error);
146					file_blocks++;
147				}
148				mask = (uint32_t *)bp2->b_data;
149				msize = (sizeof(uint32_t) * __CHAR_BIT);
150				mcount = mdt->entries_per_group / msize;
151				for (i = 0; i < mcount; i++) {
152					if (mask[i] == UINT32_MAX)
153						continue;
154
155					pos = ffs(~mask[i]) - 1;
156					pos += (msize * i);
157					pos += (group * mdt->entries_per_group);
158					pos += desc * group *
159					    mdt->groups_per_desc_block *
160					    mdt->entries_per_group;
161					goto found;
162				}
163			}
164		}
165		start_group = 0;
166	}
167
168	if (start_desc != 0) {
169		maxdesc = start_desc;
170		start_desc = 0;
171		req->entrynum = 0;
172		goto restart;
173	}
174
175	return (ENOENT);
176
177found:
178	req->entrynum = pos;
179	req->bp_desc = bp;
180	req->bp_bitmap = bp2;
181	DPRINTF(ALLOC, ("%s: desc: %p bitmap: %p entry: %#jx\n",
182	    __func__, req->bp_desc, req->bp_bitmap, (uintmax_t)pos));
183
184	return (0);
185}
186
187int
188nandfs_find_entry(struct nandfs_mdt* mdt, struct nandfs_node *nnode,
189    struct nandfs_alloc_request *req)
190{
191	uint64_t dblock, bblock, eblock;
192	uint32_t offset;
193	int error;
194
195	nandfs_mdt_trans_blk(mdt, req->entrynum, &dblock, &bblock, &eblock,
196	    &offset);
197
198	error = nandfs_bread(nnode, dblock, NOCRED, 0, &req->bp_desc);
199	if (error) {
200		brelse(req->bp_desc);
201		return (error);
202	}
203
204	error = nandfs_bread(nnode, bblock, NOCRED, 0, &req->bp_bitmap);
205	if (error) {
206		brelse(req->bp_desc);
207		brelse(req->bp_bitmap);
208		return (error);
209	}
210
211	error = nandfs_bread(nnode, eblock, NOCRED, 0, &req->bp_entry);
212	if (error) {
213		brelse(req->bp_desc);
214		brelse(req->bp_bitmap);
215		brelse(req->bp_entry);
216		return (error);
217	}
218
219	DPRINTF(ALLOC,
220	    ("%s: desc_buf: %p bitmap_buf %p entry_buf %p offset %x\n",
221	    __func__, req->bp_desc, req->bp_bitmap, req->bp_entry, offset));
222
223	return (0);
224}
225
226static __inline void
227nandfs_calc_idx_entry(struct nandfs_mdt* mdt, uint32_t entrynum,
228    uint64_t *group, uint64_t *bitmap_idx, uint64_t *bitmap_off)
229{
230
231	/* Find group_desc index */
232	entrynum = entrynum %
233	    (mdt->entries_per_group * mdt->groups_per_desc_block);
234	*group = entrynum / mdt->entries_per_group;
235	/* Find bitmap index and bit offset */
236	entrynum = entrynum % mdt->entries_per_group;
237	*bitmap_idx = entrynum / (sizeof(uint32_t) * __CHAR_BIT);
238	*bitmap_off = entrynum % (sizeof(uint32_t) * __CHAR_BIT);
239}
240
241int
242nandfs_free_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req)
243{
244	struct nandfs_block_group_desc *descriptors;
245	uint64_t bitmap_idx, bitmap_off;
246	uint64_t group;
247	uint32_t *mask, maskrw;
248
249	nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx,
250	    &bitmap_off);
251
252	DPRINTF(ALLOC, ("nandfs_free_entry: req->entrynum=%jx bitmap_idx=%jx"
253	   " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum,
254	   (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group));
255
256	/* Update counter of free entries for group */
257	descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data;
258	descriptors[group].bg_nfrees++;
259
260	/* Set bit to indicate that entry is taken */
261	mask = (uint32_t *)req->bp_bitmap->b_data;
262	maskrw = mask[bitmap_idx];
263	KASSERT(maskrw & (1 << bitmap_off), ("freeing unallocated vblock"));
264	maskrw &= ~(1 << bitmap_off);
265	mask[bitmap_idx] = maskrw;
266
267	/* Make descriptor, bitmap and entry buffer dirty */
268	if (nandfs_dirty_buf(req->bp_desc, 0) == 0) {
269		nandfs_dirty_buf(req->bp_bitmap, 1);
270		nandfs_dirty_buf(req->bp_entry, 1);
271	} else {
272		brelse(req->bp_bitmap);
273		brelse(req->bp_entry);
274		return (-1);
275	}
276
277	return (0);
278}
279
280int
281nandfs_alloc_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req)
282{
283	struct nandfs_block_group_desc *descriptors;
284	uint64_t bitmap_idx, bitmap_off;
285	uint64_t group;
286	uint32_t *mask, maskrw;
287
288	nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx,
289	    &bitmap_off);
290
291	DPRINTF(ALLOC, ("nandfs_alloc_entry: req->entrynum=%jx bitmap_idx=%jx"
292	    " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum,
293	    (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group));
294
295	/* Update counter of free entries for group */
296	descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data;
297	descriptors[group].bg_nfrees--;
298
299	/* Clear bit to indicate that entry is free */
300	mask = (uint32_t *)req->bp_bitmap->b_data;
301	maskrw = mask[bitmap_idx];
302	maskrw |= 1 << bitmap_off;
303	mask[bitmap_idx] = maskrw;
304
305	/* Make descriptor, bitmap and entry buffer dirty */
306	if (nandfs_dirty_buf(req->bp_desc, 0) == 0) {
307		nandfs_dirty_buf(req->bp_bitmap, 1);
308		nandfs_dirty_buf(req->bp_entry, 1);
309	} else {
310		brelse(req->bp_bitmap);
311		brelse(req->bp_entry);
312		return (-1);
313	}
314
315	return (0);
316}
317
318void
319nandfs_abort_entry(struct nandfs_alloc_request *req)
320{
321
322	brelse(req->bp_desc);
323	brelse(req->bp_bitmap);
324	brelse(req->bp_entry);
325}
326
327int
328nandfs_get_entry_block(struct nandfs_mdt *mdt, struct nandfs_node *node,
329    struct nandfs_alloc_request *req, uint32_t *entry, int create)
330{
331	struct buf *bp;
332	nandfs_lbn_t blocknr;
333	int	error;
334
335	/* Find buffer number for given entry */
336	nandfs_mdt_trans(mdt, req->entrynum, &blocknr, entry);
337	DPRINTF(ALLOC, ("%s: ino %#jx entrynum:%#jx block:%#jx entry:%x\n",
338	    __func__, (uintmax_t)node->nn_ino, (uintmax_t)req->entrynum,
339	    (uintmax_t)blocknr, *entry));
340
341	/* Read entry block or create if 'create' parameter is not zero */
342	bp = NULL;
343
344	if (blocknr < node->nn_inode.i_blocks)
345		error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
346	else if (create)
347		error = nandfs_bcreate(node, blocknr, NOCRED, 0, &bp);
348	else
349		error = E2BIG;
350
351	if (error) {
352		DPRINTF(ALLOC, ("%s: ino %#jx block %#jx entry %x error %d\n",
353		    __func__, (uintmax_t)node->nn_ino, (uintmax_t)blocknr,
354		    *entry, error));
355		if (bp)
356			brelse(bp);
357		return (error);
358	}
359
360	MPASS(nandfs_vblk_get(bp) != 0 || node->nn_ino == NANDFS_DAT_INO);
361
362	req->bp_entry = bp;
363	return (0);
364}
365