nandfs_subr.c revision 241844
1251881Speter/*-
2251881Speter * Copyright (c) 2010-2012 Semihalf
3251881Speter * Copyright (c) 2008, 2009 Reinoud Zandijk
4251881Speter * All rights reserved.
5251881Speter *
6251881Speter * Redistribution and use in source and binary forms, with or without
7251881Speter * modification, are permitted provided that the following conditions
8251881Speter * are met:
9251881Speter * 1. Redistributions of source code must retain the above copyright
10251881Speter *    notice, this list of conditions and the following disclaimer.
11251881Speter * 2. Redistributions in binary form must reproduce the above copyright
12251881Speter *    notice, this list of conditions and the following disclaimer in the
13251881Speter *    documentation and/or other materials provided with the distribution.
14251881Speter *
15251881Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16251881Speter * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17251881Speter * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18251881Speter * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19251881Speter * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20251881Speter * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21251881Speter * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22251881Speter * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23251881Speter * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24251881Speter * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25251881Speter *
26251881Speter * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
27251881Speter */
28251881Speter
29251881Speter#include <sys/cdefs.h>
30251881Speter__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_subr.c 241844 2012-10-22 03:00:37Z eadler $");
31251881Speter
32251881Speter#include <sys/param.h>
33251881Speter#include <sys/systm.h>
34251881Speter#include <sys/namei.h>
35251881Speter#include <sys/resourcevar.h>
36251881Speter#include <sys/kernel.h>
37251881Speter#include <sys/file.h>
38251881Speter#include <sys/stat.h>
39251881Speter#include <sys/buf.h>
40251881Speter#include <sys/bio.h>
41251881Speter#include <sys/proc.h>
42251881Speter#include <sys/mount.h>
43251881Speter#include <sys/vnode.h>
44251881Speter#include <sys/signalvar.h>
45251881Speter#include <sys/malloc.h>
46251881Speter#include <sys/dirent.h>
47251881Speter#include <sys/lockf.h>
48251881Speter#include <sys/libkern.h>
49251881Speter
50251881Speter#include <geom/geom.h>
51251881Speter#include <geom/geom_vfs.h>
52251881Speter
53251881Speter#include <vm/vm.h>
54251881Speter#include <vm/vm_extern.h>
55251881Speter
56251881Speter#include <machine/_inttypes.h>
57251881Speter#include "nandfs_mount.h"
58251881Speter#include "nandfs.h"
59251881Speter#include "nandfs_subr.h"
60251881Speter
61251881SpeterMALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount");
62251881SpeterMALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp");
63251881Speter
64251881Speteruma_zone_t nandfs_node_zone;
65251881Speter
66251881Spetervoid nandfs_bdflush(struct bufobj *bo, struct buf *bp);
67251881Speterint nandfs_bufsync(struct bufobj *bo, int waitfor);
68251881Speter
69251881Speterstruct buf_ops buf_ops_nandfs = {
70251881Speter	.bop_name	=	"buf_ops_nandfs",
71251881Speter	.bop_write	=	bufwrite,
72251881Speter	.bop_strategy	=	bufstrategy,
73251881Speter	.bop_sync	=	nandfs_bufsync,
74251881Speter	.bop_bdflush	=	nandfs_bdflush,
75251881Speter};
76251881Speter
77251881Speterint
78251881Speternandfs_bufsync(struct bufobj *bo, int waitfor)
79251881Speter{
80251881Speter	struct vnode *vp;
81251881Speter	int error = 0;
82251881Speter
83251881Speter	vp = bo->__bo_vnode;
84251881Speter
85251881Speter	ASSERT_VOP_LOCKED(vp, __func__);
86251881Speter	error = nandfs_sync_file(vp);
87251881Speter	if (error)
88251881Speter		nandfs_warning("%s: cannot flush buffers err:%d\n",
89251881Speter		    __func__, error);
90251881Speter
91251881Speter	return (error);
92251881Speter}
93251881Speter
94251881Spetervoid
95251881Speternandfs_bdflush(bo, bp)
96251881Speter	struct bufobj *bo;
97251881Speter	struct buf *bp;
98251881Speter{
99251881Speter	struct vnode *vp;
100251881Speter	int error;
101251881Speter
102251881Speter	if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10))
103251881Speter		return;
104251881Speter
105251881Speter	vp = bp->b_vp;
106251881Speter	if (NANDFS_SYS_NODE(VTON(vp)->nn_ino))
107251881Speter		return;
108251881Speter
109251881Speter	if (NANDFS_IS_INDIRECT(bp))
110251881Speter		return;
111251881Speter
112251881Speter	error = nandfs_sync_file(vp);
113251881Speter	if (error)
114251881Speter		nandfs_warning("%s: cannot flush buffers err:%d\n",
115251881Speter		    __func__, error);
116251881Speter}
117251881Speter
118251881Speterint
119251881Speternandfs_init(struct vfsconf *vfsp)
120251881Speter{
121251881Speter
122251881Speter	nandfs_node_zone = uma_zcreate("nandfs node zone",
123251881Speter	    sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0);
124251881Speter
125251881Speter	return (0);
126251881Speter}
127251881Speter
128251881Speterint
129251881Speternandfs_uninit(struct vfsconf *vfsp)
130251881Speter{
131251881Speter
132251881Speter	uma_zdestroy(nandfs_node_zone);
133251881Speter	return (0);
134251881Speter}
135251881Speter
136251881Speter/* Basic calculators */
137251881Speteruint64_t
138251881Speternandfs_get_segnum_of_block(struct nandfs_device *nandfsdev,
139251881Speter    nandfs_daddr_t blocknr)
140251881Speter{
141251881Speter	uint64_t segnum, blks_per_seg;
142251881Speter
143251881Speter	MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block);
144251881Speter
145251881Speter	blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
146251881Speter
147251881Speter	segnum = blocknr / blks_per_seg;
148251881Speter	segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg;
149251881Speter
150251881Speter	DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__,
151251881Speter	    blocknr, segnum));
152251881Speter
153251881Speter	return (segnum);
154251881Speter}
155251881Speter
156251881Spetervoid
157251881Speternandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum,
158251881Speter    uint64_t *seg_start, uint64_t *seg_end)
159251881Speter{
160251881Speter	uint64_t blks_per_seg;
161251881Speter
162251881Speter	blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
163251881Speter	*seg_start = nandfsdev->nd_fsdata.f_first_data_block +
164251881Speter	    blks_per_seg * segnum;
165251881Speter	if (seg_end != NULL)
166251881Speter		*seg_end = *seg_start + blks_per_seg -1;
167251881Speter}
168251881Speter
169251881Spetervoid nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev,
170251881Speter    struct nandfs_mdt *mdt, int entry_size)
171251881Speter{
172251881Speter	uint32_t blocksize = nandfsdev->nd_blocksize;
173251881Speter
174251881Speter	mdt->entries_per_group = blocksize * 8;
175251881Speter	mdt->entries_per_block = blocksize / entry_size;
176251881Speter
177251881Speter	mdt->blocks_per_group =
178251881Speter	    (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
179251881Speter	mdt->groups_per_desc_block =
180251881Speter	    blocksize / sizeof(struct nandfs_block_group_desc);
181251881Speter	mdt->blocks_per_desc_block =
182251881Speter	    mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
183251881Speter}
184251881Speter
185251881Speterint
186251881Speternandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr,
187251881Speter    struct ucred *cred, int flags, struct buf **bpp)
188251881Speter{
189251881Speter	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
190251881Speter	int error;
191251881Speter
192251881Speter	DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__,
193251881Speter	    blocknr * blk2dev, nandfsdev->nd_devvp));
194251881Speter	error = bread(nandfsdev->nd_devvp, blocknr * blk2dev,
195251881Speter	    nandfsdev->nd_blocksize, NOCRED, bpp);
196251881Speter	if (error)
197251881Speter		nandfs_error("%s: cannot read from device - blk:%jx\n",
198251881Speter		    __func__, blocknr);
199251881Speter	return (error);
200251881Speter}
201251881Speter
202251881Speter/* Read on a node */
203251881Speterint
204251881Speternandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr,
205251881Speter    struct ucred *cred, int flags, struct buf **bpp)
206251881Speter{
207251881Speter	nandfs_daddr_t vblk;
208251881Speter	int error;
209251881Speter
210251881Speter	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
211251881Speter	    blocknr));
212251881Speter
213251881Speter	error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
214251881Speter	    cred, bpp);
215251881Speter
216251881Speter	KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
217251881Speter	    NTOV(node), blocknr, error));
218251881Speter
219251881Speter	if (!nandfs_vblk_get(*bpp) &&
220251881Speter	    ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
221251881Speter		nandfs_bmap_lookup(node, blocknr, &vblk);
222251881Speter		nandfs_vblk_set(*bpp, vblk);
223251881Speter	}
224251881Speter	return (error);
225251881Speter}
226251881Speter
227251881Speterint
228251881Speternandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
229251881Speter    struct ucred *cred, int flags, struct buf **bpp)
230251881Speter{
231251881Speter	nandfs_daddr_t vblk;
232251881Speter	int error;
233251881Speter
234251881Speter	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
235251881Speter	    blocknr));
236251881Speter
237251881Speter	error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
238251881Speter	    cred, bpp);
239251881Speter
240251881Speter	KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
241251881Speter	    NTOV(node), blocknr, error));
242251881Speter
243251881Speter	if (!nandfs_vblk_get(*bpp) &&
244251881Speter	    ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
245251881Speter		nandfs_bmap_lookup(node, blocknr, &vblk);
246251881Speter		nandfs_vblk_set(*bpp, vblk);
247251881Speter	}
248251881Speter
249251881Speter	return (error);
250251881Speter}
251251881Speter
252251881Speterint
253251881Speternandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk)
254251881Speter{
255251881Speter	int error;
256251881Speter
257251881Speter	if (!NANDFS_SYS_NODE(node->nn_ino))
258251881Speter		NANDFS_WRITEASSERT(node->nn_nandfsdev);
259251881Speter
260251881Speter	error = nandfs_vblock_end(node->nn_nandfsdev, vblk);
261251881Speter	if (error) {
262251881Speter		nandfs_error("%s: ending vblk: %jx failed\n",
263251881Speter		    __func__, (uintmax_t)vblk);
264251881Speter		return (error);
265251881Speter	}
266251881Speter	node->nn_inode.i_blocks--;
267251881Speter
268251881Speter	return (0);
269251881Speter}
270251881Speter
271289180Speterint
272289180Speternandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr,
273289180Speter    struct ucred *cred, int flags, struct buf **bpp)
274289180Speter{
275289180Speter	int error;
276289180Speter
277289180Speter	ASSERT_VOP_LOCKED(NTOV(node), __func__);
278289180Speter	if (!NANDFS_SYS_NODE(node->nn_ino))
279289180Speter		NANDFS_WRITEASSERT(node->nn_nandfsdev);
280289180Speter
281289180Speter	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
282289180Speter	    blocknr));
283289180Speter
284289180Speter	*bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
285289180Speter	    0, 0, 0);
286289180Speter
287289180Speter	KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
288289180Speter	    NTOV(node), blocknr));
289289180Speter
290289180Speter	if (*bpp) {
291289180Speter		vfs_bio_clrbuf(*bpp);
292289180Speter		(*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
293289180Speter		error = nandfs_bmap_insert_block(node, blocknr, *bpp);
294289180Speter		if (error) {
295289180Speter			nandfs_warning("%s: failed bmap insert node:%p"
296289180Speter			    " blk:%jx\n", __func__, node, blocknr);
297289180Speter			brelse(*bpp);
298289180Speter			return (error);
299289180Speter		}
300289180Speter		node->nn_inode.i_blocks++;
301289180Speter
302289180Speter		return (0);
303289180Speter	}
304289180Speter
305289180Speter	return (-1);
306289180Speter}
307289180Speter
308289180Speterint
309289180Speternandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
310289180Speter    struct ucred *cred, int flags, struct buf **bpp)
311289180Speter{
312289180Speter	struct nandfs_device *fsdev;
313289180Speter	nandfs_daddr_t vblk;
314289180Speter	int error;
315289180Speter
316289180Speter	ASSERT_VOP_LOCKED(NTOV(node), __func__);
317289180Speter	NANDFS_WRITEASSERT(node->nn_nandfsdev);
318289180Speter
319289180Speter	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
320289180Speter	    blocknr));
321289180Speter
322289180Speter	fsdev = node->nn_nandfsdev;
323289180Speter
324289180Speter	*bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
325289180Speter	    0, 0, 0);
326289180Speter
327289180Speter	KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
328289180Speter	    NTOV(node), blocknr));
329289180Speter
330289180Speter	memset((*bpp)->b_data, 0, fsdev->nd_blocksize);
331289180Speter
332289180Speter	vfs_bio_clrbuf(*bpp);
333289180Speter	(*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
334289180Speter
335289180Speter	nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED);
336289180Speter
337289180Speter	if (node->nn_ino != NANDFS_DAT_INO) {
338289180Speter		error = nandfs_vblock_alloc(fsdev, &vblk);
339289180Speter		if (error) {
340289180Speter			nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED);
341289180Speter			brelse(*bpp);
342289180Speter			return (error);
343289180Speter		}
344289180Speter	} else
345289180Speter		vblk = fsdev->nd_fakevblk++;
346289180Speter
347289180Speter	nandfs_vblk_set(*bpp, vblk);
348289180Speter
349289180Speter	nandfs_bmap_insert_block(node, blocknr, *bpp);
350289180Speter	return (0);
351289180Speter}
352289180Speter
353289180Speter/* Translate index to a file block number and an entry */
354289180Spetervoid
355289180Speternandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index,
356289180Speter    nandfs_lbn_t *blocknr, uint32_t *entry_in_block)
357289180Speter{
358289180Speter	uint64_t blknr;
359289180Speter	uint64_t group, group_offset, blocknr_in_group;
360289180Speter	uint64_t desc_block, desc_offset;
361289180Speter
362289180Speter	/* Calculate our offset in the file */
363289180Speter	group = index / mdt->entries_per_group;
364289180Speter	group_offset = index % mdt->entries_per_group;
365289180Speter	desc_block = group / mdt->groups_per_desc_block;
366289180Speter	desc_offset = group % mdt->groups_per_desc_block;
367289180Speter	blocknr_in_group = group_offset / mdt->entries_per_block;
368289180Speter
369362181Sdim	/* To descgroup offset */
370362181Sdim	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
371362181Sdim
372362181Sdim	/* To group offset */
373362181Sdim	blknr += desc_offset * mdt->blocks_per_group;
374362181Sdim
375362181Sdim	/* To actual file block */
376362181Sdim	blknr += 1 + blocknr_in_group;
377362181Sdim
378362181Sdim	*blocknr = blknr;
379362181Sdim	*entry_in_block = group_offset % mdt->entries_per_block;
380362181Sdim}
381362181Sdim
382362181Sdimvoid
383362181Sdimnandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index,
384362181Sdim    uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr,
385362181Sdim    uint32_t *entry_in_block)
386362181Sdim{
387362181Sdim	uint64_t blknr;
388362181Sdim	uint64_t group, group_offset, blocknr_in_group;
389362181Sdim	uint64_t desc_block, desc_offset;
390362181Sdim
391362181Sdim	/* Calculate our offset in the file */
392362181Sdim	group = index / mdt->entries_per_group;
393362181Sdim	group_offset = index % mdt->entries_per_group;
394362181Sdim	desc_block = group / mdt->groups_per_desc_block;
395362181Sdim	desc_offset = group % mdt->groups_per_desc_block;
396362181Sdim	blocknr_in_group = group_offset / mdt->entries_per_block;
397362181Sdim
398362181Sdim	/* To descgroup offset */
399362181Sdim	*desc = desc_block * mdt->blocks_per_desc_block;
400362181Sdim	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
401362181Sdim
402362181Sdim	/* To group offset */
403362181Sdim	blknr += desc_offset * mdt->blocks_per_group;
404362181Sdim	*bitmap = blknr;
405362181Sdim
406362181Sdim	/* To actual file block */
407362181Sdim	blknr += 1 + blocknr_in_group;
408362181Sdim
409362181Sdim	*blocknr = blknr;
410362181Sdim	*entry_in_block = group_offset % mdt->entries_per_block;
411362181Sdim
412362181Sdim	DPRINTF(ALLOC,
413362181Sdim	    ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n",
414362181Sdim	    __func__, (uintmax_t)*desc, (uintmax_t)*bitmap,
415362181Sdim	    (uintmax_t)*blocknr, *entry_in_block));
416362181Sdim}
417362181Sdim
418362181Sdimint
419362181Sdimnandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr,
420362181Sdim    nandfs_daddr_t *pblocknr)
421362181Sdim{
422362181Sdim	struct nandfs_node *dat_node;
423362181Sdim	struct nandfs_dat_entry *entry;
424362181Sdim	struct buf *bp;
425362181Sdim	nandfs_lbn_t ldatblknr;
426362181Sdim	uint32_t entry_in_block;
427362181Sdim	int locked, error;
428362181Sdim
429362181Sdim	if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) {
430362181Sdim		*pblocknr = vblocknr;
431362181Sdim		return (0);
432362181Sdim	}
433362181Sdim
434362181Sdim	/* only translate valid vblocknrs */
435362181Sdim	if (vblocknr == 0)
436362181Sdim		return (0);
437362181Sdim
438362181Sdim	dat_node = node->nn_nandfsdev->nd_dat_node;
439362181Sdim	nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr,
440362181Sdim	    &entry_in_block);
441362181Sdim
442362181Sdim	locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node));
443362181Sdim	if (!locked)
444362181Sdim		VOP_LOCK(NTOV(dat_node), LK_SHARED);
445362181Sdim	error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp);
446362181Sdim	if (error) {
447		DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n",
448		    (uintmax_t)ldatblknr));
449		brelse(bp);
450		VOP_UNLOCK(NTOV(dat_node), 0);
451		return (error);
452	}
453
454	/* Get our translation */
455	entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block;
456	DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n",
457	    entry, bp->b_data, entry_in_block))
458	DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n",
459	    (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr,
460	    (uintmax_t)entry->de_start, (uintmax_t)entry->de_end));
461
462	*pblocknr = entry->de_blocknr;
463	brelse(bp);
464	if (!locked)
465		VOP_UNLOCK(NTOV(dat_node), 0);
466
467	MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block ||
468	    *pblocknr == 0);
469
470	return (0);
471}
472
473int
474nandfs_segsum_valid(struct nandfs_segment_summary *segsum)
475{
476
477	return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC);
478}
479
480int
481nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr,
482    struct nandfs_segment_summary *segsum)
483{
484	struct buf *bp;
485	int error;
486
487	DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n",
488	    (uintmax_t)blocknr));
489
490	error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp);
491	if (error)
492		return (error);
493
494	memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary));
495	brelse(bp);
496
497	if (!nandfs_segsum_valid(segsum)) {
498		DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__,
499		    blocknr));
500		return (EINVAL);
501	}
502
503	return (error);
504}
505
506static int
507nandfs_load_super_root(struct nandfs_device *nandfsdev,
508    struct nandfs_segment_summary *segsum, uint64_t pseg)
509{
510	struct nandfs_super_root super_root;
511	struct buf *bp;
512	uint64_t blocknr;
513	uint32_t super_root_crc, comp_crc;
514	int off, error;
515
516	/* Check if there is a superroot */
517	if ((segsum->ss_flags & NANDFS_SS_SR) == 0) {
518		DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__,
519		    pseg));
520		return (ENOENT);
521	}
522
523	/* Get our super root, located at the end of the pseg */
524	blocknr = pseg + segsum->ss_nblocks - 1;
525	DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr));
526
527	error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp);
528	if (error)
529		return (error);
530
531	memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root));
532	brelse(bp);
533
534	/* Check super root CRC */
535	super_root_crc = super_root.sr_sum;
536	off = sizeof(super_root.sr_sum);
537	comp_crc = crc32((uint8_t *)&super_root + off,
538	    NANDFS_SR_BYTES - off);
539
540	if (super_root_crc != comp_crc) {
541		DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n",
542		    __func__, super_root_crc, comp_crc));
543		return (EINVAL);
544	}
545
546	nandfsdev->nd_super_root = super_root;
547	DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__));
548
549	return (0);
550}
551
552/*
553 * Search for the last super root recorded.
554 */
555int
556nandfs_search_super_root(struct nandfs_device *nandfsdev)
557{
558	struct nandfs_super_block *super;
559	struct nandfs_segment_summary segsum;
560	uint64_t seg_start, seg_end, cno, seq, create, pseg;
561	uint64_t segnum;
562	int error, found;
563
564	error = found = 0;
565
566	/* Search for last super root */
567	pseg = nandfsdev->nd_super.s_last_pseg;
568	segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
569
570	cno = nandfsdev->nd_super.s_last_cno;
571	create = seq = 0;
572	DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__,
573	    (uintmax_t)pseg));
574
575	for (;;) {
576		error = nandfs_load_segsum(nandfsdev, pseg, &segsum);
577		if (error)
578			break;
579
580		if (segsum.ss_seq < seq || segsum.ss_create < create)
581			break;
582
583		/* Try to load super root */
584		if (segsum.ss_flags & NANDFS_SS_SR) {
585			error = nandfs_load_super_root(nandfsdev, &segsum, pseg);
586			if (error)
587				break;	/* confused */
588			found = 1;
589
590			super = &nandfsdev->nd_super;
591			nandfsdev->nd_last_segsum = segsum;
592			super->s_last_pseg = pseg;
593			super->s_last_cno = cno++;
594			super->s_last_seq = segsum.ss_seq;
595			super->s_state = NANDFS_VALID_FS;
596			seq = segsum.ss_seq;
597			create = segsum.ss_create;
598		} else {
599			seq = segsum.ss_seq;
600			create = segsum.ss_create;
601		}
602
603		/* Calculate next partial segment location */
604		pseg += segsum.ss_nblocks;
605		DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__,
606		    (uintmax_t)pseg));
607
608		/* Did we reach the end of the segment? if so, go to the next */
609		nandfs_get_segment_range(nandfsdev, segnum, &seg_start,
610		    &seg_end);
611		if (pseg >= seg_end) {
612			pseg = segsum.ss_next;
613			DPRINTF(VOLUMES,
614			    (" partial seg oor next is %jx[%jx - %jx]\n",
615			    (uintmax_t)pseg, (uintmax_t)seg_start,
616			    (uintmax_t)seg_end));
617		}
618		segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
619	}
620
621	if (error && !found)
622		return (error);
623
624	return (0);
625}
626
627int
628nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp,
629    uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep)
630{
631	struct nandfs_node *node;
632	struct vnode *nvp;
633	struct mount *mp;
634	int error;
635
636	*nodep = NULL;
637
638	/* Associate with mountpoint if present */
639	if (nmp) {
640		mp = nmp->nm_vfs_mountp;
641		error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp);
642		if (error) {
643			return (error);
644		}
645	} else {
646		mp = NULL;
647		error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops,
648		    &nvp);
649		if (error) {
650			return (error);
651		}
652	}
653
654	if (mp)
655		NANDFS_WRITELOCK(nandfsdev);
656
657	DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n",
658	    __func__, (uintmax_t)ino, nvp));
659	/* Lock node */
660	lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL);
661
662	if (mp) {
663		error = insmntque(nvp, mp);
664		if (error != 0) {
665			*nodep = NULL;
666			return (error);
667		}
668	}
669
670	node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO);
671
672	/* Crosslink */
673	node->nn_vnode = nvp;
674	nvp->v_bufobj.bo_ops = &buf_ops_nandfs;
675	node->nn_nmp = nmp;
676	node->nn_nandfsdev = nandfsdev;
677	nvp->v_data = node;
678
679	/* Initiase NANDFS node */
680	node->nn_ino = ino;
681	if (inode != NULL)
682		node->nn_inode = *inode;
683
684	nandfs_vinit(nvp, ino);
685
686	/* Return node */
687	*nodep = node;
688	DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n",
689	    __func__, (uintmax_t)ino, nvp, *nodep));
690
691	return (0);
692}
693
694int
695nandfs_get_node(struct nandfsmount *nmp, uint64_t ino,
696    struct nandfs_node **nodep)
697{
698	struct nandfs_device *nandfsdev;
699	struct nandfs_inode inode, *entry;
700	struct vnode *nvp, *vpp;
701	struct thread *td;
702	struct buf *bp;
703	uint64_t ivblocknr;
704	uint32_t entry_in_block;
705	int error;
706
707	/* Look up node in hash table */
708	td = curthread;
709	*nodep = NULL;
710
711	if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) {
712		printf("nandfs_get_node: system ino %"PRIu64" not in mount "
713		    "point!\n", ino);
714		return (ENOENT);
715	}
716
717	error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp,
718	    NULL, NULL);
719	if (error)
720		return (error);
721
722	if (nvp != NULL) {
723		*nodep = (struct nandfs_node *)nvp->v_data;
724		return (0);
725	}
726
727	/* Look up inode structure in mountpoints ifile */
728	nandfsdev = nmp->nm_nandfsdev;
729	nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr,
730	    &entry_in_block);
731
732	VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED);
733	error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp);
734	if (error) {
735		brelse(bp);
736		VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
737		return (ENOENT);
738	}
739
740	/* Get inode entry */
741	entry = (struct nandfs_inode *) bp->b_data + entry_in_block;
742	memcpy(&inode, entry, sizeof(struct nandfs_inode));
743	brelse(bp);
744	VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
745
746	/* Get node */
747	error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep);
748	if (error) {
749		*nodep = NULL;
750		return (error);
751	}
752
753	nvp = (*nodep)->nn_vnode;
754	error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL);
755	if (error) {
756		*nodep = NULL;
757		return (error);
758	}
759
760	return (error);
761}
762
763void
764nandfs_dispose_node(struct nandfs_node **nodep)
765{
766	struct nandfs_node *node;
767	struct vnode *vp;
768
769	/* Protect against rogue values */
770	node = *nodep;
771	if (!node) {
772		return;
773	}
774	DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep));
775
776	vp = NTOV(node);
777	vp->v_data = NULL;
778
779	/* Free our associated memory */
780	uma_zfree(nandfs_node_zone, node);
781
782	*nodep = NULL;
783}
784
785int
786nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
787    uint64_t *ino, int *found, uint64_t *off)
788{
789	struct nandfs_node *dir_node = VTON(dvp);
790	struct nandfs_dir_entry	*ndirent;
791	struct buf *bp;
792	uint64_t file_size, diroffset, blkoff;
793	uint64_t blocknr;
794	uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
795	uint8_t *pos, name_len;
796	int error;
797
798	*found = 0;
799
800	DPRINTF(VNCALL, ("%s: %s file\n", __func__, name));
801	if (dvp->v_type != VDIR) {
802		return (ENOTDIR);
803	}
804
805	/* Get directory filesize */
806	file_size = dir_node->nn_inode.i_size;
807
808	/* Walk the directory */
809	diroffset = 0;
810	blocknr = 0;
811	blkoff = 0;
812	error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
813	if (error) {
814		brelse(bp);
815		return (EIO);
816	}
817
818	while (diroffset < file_size) {
819		if (blkoff >= blocksize) {
820			blkoff = 0; blocknr++;
821			brelse(bp);
822			error = nandfs_bread(dir_node, blocknr, NOCRED, 0,
823			    &bp);
824			if (error) {
825				brelse(bp);
826				return (EIO);
827			}
828		}
829
830		/* Read in one dirent */
831		pos = (uint8_t *) bp->b_data + blkoff;
832		ndirent = (struct nandfs_dir_entry *) pos;
833		name_len = ndirent->name_len;
834
835		if ((name_len == namelen) &&
836		    (strncmp(name, ndirent->name, name_len) == 0) &&
837		    (ndirent->inode != 0)) {
838			*ino = ndirent->inode;
839			*off = diroffset;
840			DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n",
841			    name_len, ndirent->name, *ino));
842			*found = 1;
843			break;
844		}
845
846		/* Advance */
847		diroffset += ndirent->rec_len;
848		blkoff += ndirent->rec_len;
849	}
850	brelse(bp);
851
852	return (error);
853}
854
855int
856nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo)
857{
858	struct nandfs_device *fsdev;
859
860	fsdev = nmp->nm_nandfsdev;
861
862	memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
863	memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super));
864	snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev),
865	    "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname);
866
867	return (0);
868}
869
870void
871nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode)
872{
873	struct timespec ts;
874
875	vfs_timestamp(&ts);
876
877	inode->i_blocks = 0;
878	inode->i_size = 0;
879	inode->i_ctime = ts.tv_sec;
880	inode->i_ctime_nsec = ts.tv_nsec;
881	inode->i_mtime = ts.tv_sec;
882	inode->i_mtime_nsec = ts.tv_nsec;
883	inode->i_mode = mode;
884	inode->i_links_count = 1;
885	if (S_ISDIR(mode))
886		inode->i_links_count = 2;
887	inode->i_flags = 0;
888
889	inode->i_special = 0;
890	memset(inode->i_db, 0, sizeof(inode->i_db));
891	memset(inode->i_ib, 0, sizeof(inode->i_ib));
892}
893
894void
895nandfs_inode_destroy(struct nandfs_inode *inode)
896{
897
898	MPASS(inode->i_blocks == 0);
899	bzero(inode, sizeof(*inode));
900}
901
902int
903nandfs_fs_full(struct nandfs_device *nffsdev)
904{
905	uint64_t space, bps;
906
907	bps = nffsdev->nd_fsdata.f_blocks_per_segment;
908	space = (nffsdev->nd_clean_segs - 1) * bps;
909
910	DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__,
911	    (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space));
912
913	if (nffsdev->nd_dirty_bufs + (10 * bps) >= space)
914		return (1);
915
916	return (0);
917}
918
919static int
920_nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force)
921{
922	struct nandfs_device *nffsdev;
923	struct nandfs_node *node;
924	uint64_t ino, bps;
925
926	if (NANDFS_ISGATHERED(bp)) {
927		bqrelse(bp);
928		return (0);
929	}
930	if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) {
931		bqrelse(bp);
932		return (0);
933	}
934
935	node = VTON(bp->b_vp);
936	nffsdev = node->nn_nandfsdev;
937	DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp));
938	ino = node->nn_ino;
939
940	if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) {
941		brelse(bp);
942		return (ENOSPC);
943	}
944
945	bp->b_flags |= B_MANAGED;
946	bdwrite(bp);
947
948	nandfs_dirty_bufs_increment(nffsdev);
949
950	KASSERT((bp->b_vp), ("vp missing for bp"));
951	KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO),
952	    ("bp vblk is 0"));
953
954	/*
955	 * To maintain consistency of FS we need to force making
956	 * meta buffers dirty, even if free space is low.
957	 */
958	if (dirty_meta && ino != NANDFS_GC_INO)
959		nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
960
961	bps = nffsdev->nd_fsdata.f_blocks_per_segment;
962
963	if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) {
964		mtx_lock(&nffsdev->nd_sync_mtx);
965		if (nffsdev->nd_syncing == 0) {
966			DPRINTF(SYNC, ("%s: wakeup gc\n", __func__));
967			nffsdev->nd_syncing = 1;
968			wakeup(&nffsdev->nd_syncing);
969		}
970		mtx_unlock(&nffsdev->nd_sync_mtx);
971	}
972
973	return (0);
974}
975
976int
977nandfs_dirty_buf(struct buf *bp, int force)
978{
979
980	return (_nandfs_dirty_buf(bp, 1, force));
981}
982
983int
984nandfs_dirty_buf_meta(struct buf *bp, int force)
985{
986
987	return (_nandfs_dirty_buf(bp, 0, force));
988}
989
990void
991nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp)
992{
993
994	BUF_ASSERT_HELD(bp);
995
996	if (bp->b_flags & B_DELWRI) {
997		bp->b_flags &= ~(B_DELWRI|B_MANAGED);
998		nandfs_dirty_bufs_decrement(nffsdev);
999	}
1000	/*
1001	 * Since it is now being written, we can clear its deferred write flag.
1002	 */
1003	bp->b_flags &= ~B_DEFERRED;
1004
1005	brelse(bp);
1006}
1007
1008void
1009nandfs_undirty_buf(struct buf *bp)
1010{
1011	struct nandfs_node *node;
1012
1013	node = VTON(bp->b_vp);
1014
1015	nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp);
1016}
1017
1018void
1019nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr)
1020{
1021
1022	nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
1023	*vblk = blocknr;
1024}
1025
1026nandfs_daddr_t
1027nandfs_vblk_get(struct buf *bp)
1028{
1029
1030	nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
1031	return (*vblk);
1032}
1033
1034void
1035nandfs_buf_set(struct buf *bp, uint32_t bits)
1036{
1037	uintptr_t flags;
1038
1039	flags = (uintptr_t)bp->b_fsprivate3;
1040	flags |= (uintptr_t)bits;
1041	bp->b_fsprivate3 = (void *)flags;
1042}
1043
1044void
1045nandfs_buf_clear(struct buf *bp, uint32_t bits)
1046{
1047	uintptr_t flags;
1048
1049	flags = (uintptr_t)bp->b_fsprivate3;
1050	flags &= ~(uintptr_t)bits;
1051	bp->b_fsprivate3 = (void *)flags;
1052}
1053
1054int
1055nandfs_buf_check(struct buf *bp, uint32_t bits)
1056{
1057	uintptr_t flags;
1058
1059	flags = (uintptr_t)bp->b_fsprivate3;
1060	if (flags & bits)
1061		return (1);
1062	return (0);
1063}
1064
1065int
1066nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size)
1067{
1068	struct buf *bp;
1069	int read_size, error, i;
1070
1071	DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n",
1072	    __func__, offset, size));
1073
1074	MPASS(size % fsdev->nd_erasesize == 0);
1075
1076	if (fsdev->nd_is_nand) {
1077		error = g_delete_data(fsdev->nd_gconsumer, offset, size);
1078		return (error);
1079	}
1080
1081	if (size > MAXBSIZE)
1082		read_size = MAXBSIZE;
1083	else
1084		read_size = size;
1085
1086	error = 0;
1087	for (i = 0; i < size / MAXBSIZE; i++) {
1088		error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
1089		    read_size, NOCRED, &bp);
1090		if (error) {
1091			brelse(bp);
1092			return (error);
1093		}
1094		memset(bp->b_data, 0xff, read_size);
1095		error = bwrite(bp);
1096		if (error) {
1097			nandfs_error("%s: err:%d from bwrite\n",
1098			    __func__, error);
1099			return (error);
1100		}
1101	}
1102
1103	return (error);
1104}
1105
1106int
1107nandfs_vop_islocked(struct vnode *vp)
1108{
1109	int islocked;
1110
1111	islocked = VOP_ISLOCKED(vp);
1112	return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED);
1113}
1114
1115nandfs_daddr_t
1116nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block)
1117{
1118
1119	return (btodb(block * fsdev->nd_blocksize));
1120}
1121