1/*-
2 * Copyright (c) 2010-2012 Semihalf
3 * Copyright (c) 2008, 2009 Reinoud Zandijk
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/namei.h>
35#include <sys/resourcevar.h>
36#include <sys/kernel.h>
37#include <sys/file.h>
38#include <sys/stat.h>
39#include <sys/buf.h>
40#include <sys/bio.h>
41#include <sys/proc.h>
42#include <sys/mount.h>
43#include <sys/vnode.h>
44#include <sys/signalvar.h>
45#include <sys/malloc.h>
46#include <sys/dirent.h>
47#include <sys/lockf.h>
48#include <sys/libkern.h>
49
50#include <geom/geom.h>
51#include <geom/geom_vfs.h>
52
53#include <vm/vm.h>
54#include <vm/vm_extern.h>
55
56#include <machine/_inttypes.h>
57#include "nandfs_mount.h"
58#include "nandfs.h"
59#include "nandfs_subr.h"
60
61MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount");
62MALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp");
63
64uma_zone_t nandfs_node_zone;
65
66void nandfs_bdflush(struct bufobj *bo, struct buf *bp);
67int nandfs_bufsync(struct bufobj *bo, int waitfor);
68
69struct buf_ops buf_ops_nandfs = {
70	.bop_name	=	"buf_ops_nandfs",
71	.bop_write	=	bufwrite,
72	.bop_strategy	=	bufstrategy,
73	.bop_sync	=	nandfs_bufsync,
74	.bop_bdflush	=	nandfs_bdflush,
75};
76
77int
78nandfs_bufsync(struct bufobj *bo, int waitfor)
79{
80	struct vnode *vp;
81	int error = 0;
82
83	vp = bo->__bo_vnode;
84
85	ASSERT_VOP_LOCKED(vp, __func__);
86	error = nandfs_sync_file(vp);
87	if (error)
88		nandfs_warning("%s: cannot flush buffers err:%d\n",
89		    __func__, error);
90
91	return (error);
92}
93
94void
95nandfs_bdflush(bo, bp)
96	struct bufobj *bo;
97	struct buf *bp;
98{
99	struct vnode *vp;
100	int error;
101
102	if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10))
103		return;
104
105	vp = bp->b_vp;
106	if (NANDFS_SYS_NODE(VTON(vp)->nn_ino))
107		return;
108
109	if (NANDFS_IS_INDIRECT(bp))
110		return;
111
112	error = nandfs_sync_file(vp);
113	if (error)
114		nandfs_warning("%s: cannot flush buffers err:%d\n",
115		    __func__, error);
116}
117
118int
119nandfs_init(struct vfsconf *vfsp)
120{
121
122	nandfs_node_zone = uma_zcreate("nandfs node zone",
123	    sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0);
124
125	return (0);
126}
127
128int
129nandfs_uninit(struct vfsconf *vfsp)
130{
131
132	uma_zdestroy(nandfs_node_zone);
133	return (0);
134}
135
136/* Basic calculators */
137uint64_t
138nandfs_get_segnum_of_block(struct nandfs_device *nandfsdev,
139    nandfs_daddr_t blocknr)
140{
141	uint64_t segnum, blks_per_seg;
142
143	MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block);
144
145	blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
146
147	segnum = blocknr / blks_per_seg;
148	segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg;
149
150	DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__,
151	    blocknr, segnum));
152
153	return (segnum);
154}
155
156void
157nandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum,
158    uint64_t *seg_start, uint64_t *seg_end)
159{
160	uint64_t blks_per_seg;
161
162	blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
163	*seg_start = nandfsdev->nd_fsdata.f_first_data_block +
164	    blks_per_seg * segnum;
165	if (seg_end != NULL)
166		*seg_end = *seg_start + blks_per_seg -1;
167}
168
169void nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev,
170    struct nandfs_mdt *mdt, int entry_size)
171{
172	uint32_t blocksize = nandfsdev->nd_blocksize;
173
174	mdt->entries_per_group = blocksize * 8;
175	mdt->entries_per_block = blocksize / entry_size;
176
177	mdt->blocks_per_group =
178	    (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
179	mdt->groups_per_desc_block =
180	    blocksize / sizeof(struct nandfs_block_group_desc);
181	mdt->blocks_per_desc_block =
182	    mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
183}
184
185int
186nandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr,
187    struct ucred *cred, int flags, struct buf **bpp)
188{
189	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
190	int error;
191
192	DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__,
193	    blocknr * blk2dev, nandfsdev->nd_devvp));
194	error = bread(nandfsdev->nd_devvp, blocknr * blk2dev,
195	    nandfsdev->nd_blocksize, NOCRED, bpp);
196	if (error)
197		nandfs_error("%s: cannot read from device - blk:%jx\n",
198		    __func__, blocknr);
199	return (error);
200}
201
202/* Read on a node */
203int
204nandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr,
205    struct ucred *cred, int flags, struct buf **bpp)
206{
207	nandfs_daddr_t vblk;
208	int error;
209
210	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
211	    blocknr));
212
213	error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
214	    cred, bpp);
215
216	KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
217	    NTOV(node), blocknr, error));
218
219	if (!nandfs_vblk_get(*bpp) &&
220	    ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
221		nandfs_bmap_lookup(node, blocknr, &vblk);
222		nandfs_vblk_set(*bpp, vblk);
223	}
224	return (error);
225}
226
227int
228nandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
229    struct ucred *cred, int flags, struct buf **bpp)
230{
231	nandfs_daddr_t vblk;
232	int error;
233
234	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
235	    blocknr));
236
237	error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
238	    cred, bpp);
239
240	KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
241	    NTOV(node), blocknr, error));
242
243	if (!nandfs_vblk_get(*bpp) &&
244	    ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
245		nandfs_bmap_lookup(node, blocknr, &vblk);
246		nandfs_vblk_set(*bpp, vblk);
247	}
248
249	return (error);
250}
251
252int
253nandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk)
254{
255	int error;
256
257	if (!NANDFS_SYS_NODE(node->nn_ino))
258		NANDFS_WRITEASSERT(node->nn_nandfsdev);
259
260	error = nandfs_vblock_end(node->nn_nandfsdev, vblk);
261	if (error) {
262		nandfs_error("%s: ending vblk: %jx failed\n",
263		    __func__, (uintmax_t)vblk);
264		return (error);
265	}
266	node->nn_inode.i_blocks--;
267
268	return (0);
269}
270
271int
272nandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr,
273    struct ucred *cred, int flags, struct buf **bpp)
274{
275	int error;
276
277	ASSERT_VOP_LOCKED(NTOV(node), __func__);
278	if (!NANDFS_SYS_NODE(node->nn_ino))
279		NANDFS_WRITEASSERT(node->nn_nandfsdev);
280
281	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
282	    blocknr));
283
284	*bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
285	    0, 0, 0);
286
287	KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
288	    NTOV(node), blocknr));
289
290	if (*bpp) {
291		vfs_bio_clrbuf(*bpp);
292		(*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
293		error = nandfs_bmap_insert_block(node, blocknr, *bpp);
294		if (error) {
295			nandfs_warning("%s: failed bmap insert node:%p"
296			    " blk:%jx\n", __func__, node, blocknr);
297			brelse(*bpp);
298			return (error);
299		}
300		node->nn_inode.i_blocks++;
301
302		return (0);
303	}
304
305	return (-1);
306}
307
308int
309nandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
310    struct ucred *cred, int flags, struct buf **bpp)
311{
312	struct nandfs_device *fsdev;
313	nandfs_daddr_t vblk;
314	int error;
315
316	ASSERT_VOP_LOCKED(NTOV(node), __func__);
317	NANDFS_WRITEASSERT(node->nn_nandfsdev);
318
319	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
320	    blocknr));
321
322	fsdev = node->nn_nandfsdev;
323
324	*bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
325	    0, 0, 0);
326
327	KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
328	    NTOV(node), blocknr));
329
330	memset((*bpp)->b_data, 0, fsdev->nd_blocksize);
331
332	vfs_bio_clrbuf(*bpp);
333	(*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
334
335	nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED);
336
337	if (node->nn_ino != NANDFS_DAT_INO) {
338		error = nandfs_vblock_alloc(fsdev, &vblk);
339		if (error) {
340			nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED);
341			brelse(*bpp);
342			return (error);
343		}
344	} else
345		vblk = fsdev->nd_fakevblk++;
346
347	nandfs_vblk_set(*bpp, vblk);
348
349	nandfs_bmap_insert_block(node, blocknr, *bpp);
350	return (0);
351}
352
353/* Translate index to a file block number and an entry */
354void
355nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index,
356    nandfs_lbn_t *blocknr, uint32_t *entry_in_block)
357{
358	uint64_t blknr;
359	uint64_t group, group_offset, blocknr_in_group;
360	uint64_t desc_block, desc_offset;
361
362	/* Calculate our offset in the file */
363	group = index / mdt->entries_per_group;
364	group_offset = index % mdt->entries_per_group;
365	desc_block = group / mdt->groups_per_desc_block;
366	desc_offset = group % mdt->groups_per_desc_block;
367	blocknr_in_group = group_offset / mdt->entries_per_block;
368
369	/* To descgroup offset */
370	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
371
372	/* To group offset */
373	blknr += desc_offset * mdt->blocks_per_group;
374
375	/* To actual file block */
376	blknr += 1 + blocknr_in_group;
377
378	*blocknr = blknr;
379	*entry_in_block = group_offset % mdt->entries_per_block;
380}
381
382void
383nandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index,
384    uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr,
385    uint32_t *entry_in_block)
386{
387	uint64_t blknr;
388	uint64_t group, group_offset, blocknr_in_group;
389	uint64_t desc_block, desc_offset;
390
391	/* Calculate our offset in the file */
392	group = index / mdt->entries_per_group;
393	group_offset = index % mdt->entries_per_group;
394	desc_block = group / mdt->groups_per_desc_block;
395	desc_offset = group % mdt->groups_per_desc_block;
396	blocknr_in_group = group_offset / mdt->entries_per_block;
397
398	/* To descgroup offset */
399	*desc = desc_block * mdt->blocks_per_desc_block;
400	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
401
402	/* To group offset */
403	blknr += desc_offset * mdt->blocks_per_group;
404	*bitmap = blknr;
405
406	/* To actual file block */
407	blknr += 1 + blocknr_in_group;
408
409	*blocknr = blknr;
410	*entry_in_block = group_offset % mdt->entries_per_block;
411
412	DPRINTF(ALLOC,
413	    ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n",
414	    __func__, (uintmax_t)*desc, (uintmax_t)*bitmap,
415	    (uintmax_t)*blocknr, *entry_in_block));
416}
417
418int
419nandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr,
420    nandfs_daddr_t *pblocknr)
421{
422	struct nandfs_node *dat_node;
423	struct nandfs_dat_entry *entry;
424	struct buf *bp;
425	nandfs_lbn_t ldatblknr;
426	uint32_t entry_in_block;
427	int locked, error;
428
429	if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) {
430		*pblocknr = vblocknr;
431		return (0);
432	}
433
434	/* only translate valid vblocknrs */
435	if (vblocknr == 0)
436		return (0);
437
438	dat_node = node->nn_nandfsdev->nd_dat_node;
439	nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr,
440	    &entry_in_block);
441
442	locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node));
443	if (!locked)
444		VOP_LOCK(NTOV(dat_node), LK_SHARED);
445	error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp);
446	if (error) {
447		DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n",
448		    (uintmax_t)ldatblknr));
449		brelse(bp);
450		VOP_UNLOCK(NTOV(dat_node), 0);
451		return (error);
452	}
453
454	/* Get our translation */
455	entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block;
456	DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n",
457	    entry, bp->b_data, entry_in_block))
458	DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n",
459	    (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr,
460	    (uintmax_t)entry->de_start, (uintmax_t)entry->de_end));
461
462	*pblocknr = entry->de_blocknr;
463	brelse(bp);
464	if (!locked)
465		VOP_UNLOCK(NTOV(dat_node), 0);
466
467	MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block ||
468	    *pblocknr == 0);
469
470	return (0);
471}
472
473int
474nandfs_segsum_valid(struct nandfs_segment_summary *segsum)
475{
476
477	return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC);
478}
479
480int
481nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr,
482    struct nandfs_segment_summary *segsum)
483{
484	struct buf *bp;
485	int error;
486
487	DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n",
488	    (uintmax_t)blocknr));
489
490	error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp);
491	if (error)
492		return (error);
493
494	memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary));
495	brelse(bp);
496
497	if (!nandfs_segsum_valid(segsum)) {
498		DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__,
499		    blocknr));
500		return (EINVAL);
501	}
502
503	return (error);
504}
505
506static int
507nandfs_load_super_root(struct nandfs_device *nandfsdev,
508    struct nandfs_segment_summary *segsum, uint64_t pseg)
509{
510	struct nandfs_super_root super_root;
511	struct buf *bp;
512	uint64_t blocknr;
513	uint32_t super_root_crc, comp_crc;
514	int off, error;
515
516	/* Check if there is a superroot */
517	if ((segsum->ss_flags & NANDFS_SS_SR) == 0) {
518		DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__,
519		    pseg));
520		return (ENOENT);
521	}
522
523	/* Get our super root, located at the end of the pseg */
524	blocknr = pseg + segsum->ss_nblocks - 1;
525	DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr));
526
527	error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp);
528	if (error)
529		return (error);
530
531	memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root));
532	brelse(bp);
533
534	/* Check super root CRC */
535	super_root_crc = super_root.sr_sum;
536	off = sizeof(super_root.sr_sum);
537	comp_crc = crc32((uint8_t *)&super_root + off,
538	    NANDFS_SR_BYTES - off);
539
540	if (super_root_crc != comp_crc) {
541		DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n",
542		    __func__, super_root_crc, comp_crc));
543		return (EINVAL);
544	}
545
546	nandfsdev->nd_super_root = super_root;
547	DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__));
548
549	return (0);
550}
551
552/*
553 * Search for the last super root recorded.
554 */
555int
556nandfs_search_super_root(struct nandfs_device *nandfsdev)
557{
558	struct nandfs_super_block *super;
559	struct nandfs_segment_summary segsum;
560	uint64_t seg_start, seg_end, cno, seq, create, pseg;
561	uint64_t segnum;
562	int error, found;
563
564	error = found = 0;
565
566	/* Search for last super root */
567	pseg = nandfsdev->nd_super.s_last_pseg;
568	segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
569
570	cno = nandfsdev->nd_super.s_last_cno;
571	create = seq = 0;
572	DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__,
573	    (uintmax_t)pseg));
574
575	for (;;) {
576		error = nandfs_load_segsum(nandfsdev, pseg, &segsum);
577		if (error)
578			break;
579
580		if (segsum.ss_seq < seq || segsum.ss_create < create)
581			break;
582
583		/* Try to load super root */
584		if (segsum.ss_flags & NANDFS_SS_SR) {
585			error = nandfs_load_super_root(nandfsdev, &segsum, pseg);
586			if (error)
587				break;	/* confused */
588			found = 1;
589
590			super = &nandfsdev->nd_super;
591			nandfsdev->nd_last_segsum = segsum;
592			super->s_last_pseg = pseg;
593			super->s_last_cno = cno++;
594			super->s_last_seq = segsum.ss_seq;
595			super->s_state = NANDFS_VALID_FS;
596			seq = segsum.ss_seq;
597			create = segsum.ss_create;
598		} else {
599			seq = segsum.ss_seq;
600			create = segsum.ss_create;
601		}
602
603		/* Calculate next partial segment location */
604		pseg += segsum.ss_nblocks;
605		DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__,
606		    (uintmax_t)pseg));
607
608		/* Did we reach the end of the segment? if so, go to the next */
609		nandfs_get_segment_range(nandfsdev, segnum, &seg_start,
610		    &seg_end);
611		if (pseg >= seg_end) {
612			pseg = segsum.ss_next;
613			DPRINTF(VOLUMES,
614			    (" partial seg oor next is %jx[%jx - %jx]\n",
615			    (uintmax_t)pseg, (uintmax_t)seg_start,
616			    (uintmax_t)seg_end));
617		}
618		segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
619	}
620
621	if (error && !found)
622		return (error);
623
624	return (0);
625}
626
627int
628nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp,
629    uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep)
630{
631	struct nandfs_node *node;
632	struct vnode *nvp;
633	struct mount *mp;
634	int error;
635
636	*nodep = NULL;
637
638	/* Associate with mountpoint if present */
639	if (nmp) {
640		mp = nmp->nm_vfs_mountp;
641		error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp);
642		if (error)
643			return (error);
644	} else {
645		mp = NULL;
646		error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops,
647		    &nvp);
648		if (error)
649			return (error);
650	}
651
652	if (mp)
653		NANDFS_WRITELOCK(nandfsdev);
654
655	DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n",
656	    __func__, (uintmax_t)ino, nvp));
657	/* Lock node */
658	lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL);
659
660	if (mp) {
661		error = insmntque(nvp, mp);
662		if (error != 0) {
663			*nodep = NULL;
664			return (error);
665		}
666	}
667
668	node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO);
669
670	/* Crosslink */
671	node->nn_vnode = nvp;
672	nvp->v_bufobj.bo_ops = &buf_ops_nandfs;
673	node->nn_nmp = nmp;
674	node->nn_nandfsdev = nandfsdev;
675	nvp->v_data = node;
676
677	/* Initiase NANDFS node */
678	node->nn_ino = ino;
679	if (inode != NULL)
680		node->nn_inode = *inode;
681
682	nandfs_vinit(nvp, ino);
683
684	/* Return node */
685	*nodep = node;
686	DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n",
687	    __func__, (uintmax_t)ino, nvp, *nodep));
688
689	return (0);
690}
691
692int
693nandfs_get_node(struct nandfsmount *nmp, uint64_t ino,
694    struct nandfs_node **nodep)
695{
696	struct nandfs_device *nandfsdev;
697	struct nandfs_inode inode, *entry;
698	struct vnode *nvp, *vpp;
699	struct thread *td;
700	struct buf *bp;
701	uint64_t ivblocknr;
702	uint32_t entry_in_block;
703	int error;
704
705	/* Look up node in hash table */
706	td = curthread;
707	*nodep = NULL;
708
709	if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) {
710		printf("nandfs_get_node: system ino %"PRIu64" not in mount "
711		    "point!\n", ino);
712		return (ENOENT);
713	}
714
715	error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp,
716	    NULL, NULL);
717	if (error)
718		return (error);
719
720	if (nvp != NULL) {
721		*nodep = (struct nandfs_node *)nvp->v_data;
722		return (0);
723	}
724
725	/* Look up inode structure in mountpoints ifile */
726	nandfsdev = nmp->nm_nandfsdev;
727	nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr,
728	    &entry_in_block);
729
730	VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED);
731	error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp);
732	if (error) {
733		brelse(bp);
734		VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
735		return (ENOENT);
736	}
737
738	/* Get inode entry */
739	entry = (struct nandfs_inode *) bp->b_data + entry_in_block;
740	memcpy(&inode, entry, sizeof(struct nandfs_inode));
741	brelse(bp);
742	VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
743
744	/* Get node */
745	error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep);
746	if (error) {
747		*nodep = NULL;
748		return (error);
749	}
750
751	nvp = (*nodep)->nn_vnode;
752	error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL);
753	if (error) {
754		*nodep = NULL;
755		return (error);
756	}
757
758	return (error);
759}
760
761void
762nandfs_dispose_node(struct nandfs_node **nodep)
763{
764	struct nandfs_node *node;
765	struct vnode *vp;
766
767	/* Protect against rogue values */
768	node = *nodep;
769	if (!node) {
770		return;
771	}
772	DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep));
773
774	vp = NTOV(node);
775	vp->v_data = NULL;
776
777	/* Free our associated memory */
778	uma_zfree(nandfs_node_zone, node);
779
780	*nodep = NULL;
781}
782
783int
784nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
785    uint64_t *ino, int *found, uint64_t *off)
786{
787	struct nandfs_node *dir_node = VTON(dvp);
788	struct nandfs_dir_entry	*ndirent;
789	struct buf *bp;
790	uint64_t file_size, diroffset, blkoff;
791	uint64_t blocknr;
792	uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
793	uint8_t *pos, name_len;
794	int error;
795
796	*found = 0;
797
798	DPRINTF(VNCALL, ("%s: %s file\n", __func__, name));
799	if (dvp->v_type != VDIR) {
800		return (ENOTDIR);
801	}
802
803	/* Get directory filesize */
804	file_size = dir_node->nn_inode.i_size;
805
806	/* Walk the directory */
807	diroffset = 0;
808	blocknr = 0;
809	blkoff = 0;
810	error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
811	if (error) {
812		brelse(bp);
813		return (EIO);
814	}
815
816	while (diroffset < file_size) {
817		if (blkoff >= blocksize) {
818			blkoff = 0; blocknr++;
819			brelse(bp);
820			error = nandfs_bread(dir_node, blocknr, NOCRED, 0,
821			    &bp);
822			if (error) {
823				brelse(bp);
824				return (EIO);
825			}
826		}
827
828		/* Read in one dirent */
829		pos = (uint8_t *) bp->b_data + blkoff;
830		ndirent = (struct nandfs_dir_entry *) pos;
831		name_len = ndirent->name_len;
832
833		if ((name_len == namelen) &&
834		    (strncmp(name, ndirent->name, name_len) == 0) &&
835		    (ndirent->inode != 0)) {
836			*ino = ndirent->inode;
837			*off = diroffset;
838			DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n",
839			    name_len, ndirent->name, *ino));
840			*found = 1;
841			break;
842		}
843
844		/* Advance */
845		diroffset += ndirent->rec_len;
846		blkoff += ndirent->rec_len;
847	}
848	brelse(bp);
849
850	return (error);
851}
852
853int
854nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo)
855{
856	struct nandfs_device *fsdev;
857
858	fsdev = nmp->nm_nandfsdev;
859
860	memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
861	memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super));
862	snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev),
863	    "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname);
864
865	return (0);
866}
867
868void
869nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode)
870{
871	struct timespec ts;
872
873	vfs_timestamp(&ts);
874
875	inode->i_blocks = 0;
876	inode->i_size = 0;
877	inode->i_ctime = ts.tv_sec;
878	inode->i_ctime_nsec = ts.tv_nsec;
879	inode->i_mtime = ts.tv_sec;
880	inode->i_mtime_nsec = ts.tv_nsec;
881	inode->i_mode = mode;
882	inode->i_links_count = 1;
883	if (S_ISDIR(mode))
884		inode->i_links_count = 2;
885	inode->i_flags = 0;
886
887	inode->i_special = 0;
888	memset(inode->i_db, 0, sizeof(inode->i_db));
889	memset(inode->i_ib, 0, sizeof(inode->i_ib));
890}
891
892void
893nandfs_inode_destroy(struct nandfs_inode *inode)
894{
895
896	MPASS(inode->i_blocks == 0);
897	bzero(inode, sizeof(*inode));
898}
899
900int
901nandfs_fs_full(struct nandfs_device *nffsdev)
902{
903	uint64_t space, bps;
904
905	bps = nffsdev->nd_fsdata.f_blocks_per_segment;
906	space = (nffsdev->nd_clean_segs - 1) * bps;
907
908	DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__,
909	    (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space));
910
911	if (nffsdev->nd_dirty_bufs + (nffsdev->nd_segs_reserved * bps) >= space)
912		return (1);
913
914	return (0);
915}
916
917static int
918_nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force)
919{
920	struct nandfs_device *nffsdev;
921	struct nandfs_node *node;
922	uint64_t ino, bps;
923
924	if (NANDFS_ISGATHERED(bp)) {
925		bqrelse(bp);
926		return (0);
927	}
928	if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) {
929		bqrelse(bp);
930		return (0);
931	}
932
933	node = VTON(bp->b_vp);
934	nffsdev = node->nn_nandfsdev;
935	DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp));
936	ino = node->nn_ino;
937
938	if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) {
939		brelse(bp);
940		return (ENOSPC);
941	}
942
943	bp->b_flags |= B_MANAGED;
944	bdwrite(bp);
945
946	nandfs_dirty_bufs_increment(nffsdev);
947
948	KASSERT((bp->b_vp), ("vp missing for bp"));
949	KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO),
950	    ("bp vblk is 0"));
951
952	/*
953	 * To maintain consistency of FS we need to force making
954	 * meta buffers dirty, even if free space is low.
955	 */
956	if (dirty_meta && ino != NANDFS_GC_INO)
957		nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
958
959	bps = nffsdev->nd_fsdata.f_blocks_per_segment;
960
961	if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) {
962		mtx_lock(&nffsdev->nd_sync_mtx);
963		if (nffsdev->nd_syncing == 0) {
964			DPRINTF(SYNC, ("%s: wakeup gc\n", __func__));
965			nffsdev->nd_syncing = 1;
966			wakeup(&nffsdev->nd_syncing);
967		}
968		mtx_unlock(&nffsdev->nd_sync_mtx);
969	}
970
971	return (0);
972}
973
974int
975nandfs_dirty_buf(struct buf *bp, int force)
976{
977
978	return (_nandfs_dirty_buf(bp, 1, force));
979}
980
981int
982nandfs_dirty_buf_meta(struct buf *bp, int force)
983{
984
985	return (_nandfs_dirty_buf(bp, 0, force));
986}
987
988void
989nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp)
990{
991
992	BUF_ASSERT_HELD(bp);
993
994	if (bp->b_flags & B_DELWRI) {
995		bp->b_flags &= ~(B_DELWRI|B_MANAGED);
996		nandfs_dirty_bufs_decrement(nffsdev);
997	}
998	/*
999	 * Since it is now being written, we can clear its deferred write flag.
1000	 */
1001	bp->b_flags &= ~B_DEFERRED;
1002
1003	brelse(bp);
1004}
1005
1006void
1007nandfs_undirty_buf(struct buf *bp)
1008{
1009	struct nandfs_node *node;
1010
1011	node = VTON(bp->b_vp);
1012
1013	nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp);
1014}
1015
1016void
1017nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr)
1018{
1019
1020	nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
1021	*vblk = blocknr;
1022}
1023
1024nandfs_daddr_t
1025nandfs_vblk_get(struct buf *bp)
1026{
1027
1028	nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
1029	return (*vblk);
1030}
1031
1032void
1033nandfs_buf_set(struct buf *bp, uint32_t bits)
1034{
1035	uintptr_t flags;
1036
1037	flags = (uintptr_t)bp->b_fsprivate3;
1038	flags |= (uintptr_t)bits;
1039	bp->b_fsprivate3 = (void *)flags;
1040}
1041
1042void
1043nandfs_buf_clear(struct buf *bp, uint32_t bits)
1044{
1045	uintptr_t flags;
1046
1047	flags = (uintptr_t)bp->b_fsprivate3;
1048	flags &= ~(uintptr_t)bits;
1049	bp->b_fsprivate3 = (void *)flags;
1050}
1051
1052int
1053nandfs_buf_check(struct buf *bp, uint32_t bits)
1054{
1055	uintptr_t flags;
1056
1057	flags = (uintptr_t)bp->b_fsprivate3;
1058	if (flags & bits)
1059		return (1);
1060	return (0);
1061}
1062
1063int
1064nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size)
1065{
1066	DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n",
1067	    __func__, offset, size));
1068
1069	MPASS(size % fsdev->nd_erasesize == 0);
1070
1071	return (g_delete_data(fsdev->nd_gconsumer, offset, size));
1072}
1073
1074int
1075nandfs_vop_islocked(struct vnode *vp)
1076{
1077	int islocked;
1078
1079	islocked = VOP_ISLOCKED(vp);
1080	return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED);
1081}
1082
1083nandfs_daddr_t
1084nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block)
1085{
1086
1087	return (btodb(block * fsdev->nd_blocksize));
1088}
1089