1/*-
2 * Copyright (c) 2010-2012 Semihalf
3 * Copyright (c) 2008, 2009 Reinoud Zandijk
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * From: NetBSD: nilfs_vfsops.c,v 1.1 2009/07/18 16:31:42 reinoud Exp
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: releng/11.0/sys/fs/nandfs/nandfs_vfsops.c 298848 2016-04-30 14:41:18Z pfg $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/fcntl.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/mount.h>
39#include <sys/namei.h>
40#include <sys/proc.h>
41#include <sys/priv.h>
42#include <sys/vnode.h>
43#include <sys/buf.h>
44#include <sys/sysctl.h>
45#include <sys/libkern.h>
46
47#include <geom/geom.h>
48#include <geom/geom_vfs.h>
49
50#include <machine/_inttypes.h>
51
52#include <fs/nandfs/nandfs_mount.h>
53#include <fs/nandfs/nandfs.h>
54#include <fs/nandfs/nandfs_subr.h>
55
56static MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount structure");
57
58#define	NANDFS_SET_SYSTEMFILE(vp) {	\
59	(vp)->v_vflag |= VV_SYSTEM;	\
60	vref(vp);			\
61	vput(vp); }
62
63#define	NANDFS_UNSET_SYSTEMFILE(vp) {	\
64	VOP_LOCK(vp, LK_EXCLUSIVE);	\
65	MPASS(vp->v_bufobj.bo_dirty.bv_cnt == 0); \
66	(vp)->v_vflag &= ~VV_SYSTEM;	\
67	vgone(vp);			\
68	vput(vp); }
69
70/* Globals */
71struct _nandfs_devices nandfs_devices;
72
73/* Parameters */
74int nandfs_verbose = 0;
75
76static void
77nandfs_tunable_init(void *arg)
78{
79
80	TUNABLE_INT_FETCH("vfs.nandfs.verbose", &nandfs_verbose);
81}
82SYSINIT(nandfs_tunables, SI_SUB_VFS, SI_ORDER_ANY, nandfs_tunable_init, NULL);
83
84static SYSCTL_NODE(_vfs, OID_AUTO, nandfs, CTLFLAG_RD, 0, "NAND filesystem");
85static SYSCTL_NODE(_vfs_nandfs, OID_AUTO, mount, CTLFLAG_RD, 0,
86    "NANDFS mountpoints");
87SYSCTL_INT(_vfs_nandfs, OID_AUTO, verbose, CTLFLAG_RW, &nandfs_verbose, 0, "");
88
89#define NANDFS_CONSTR_INTERVAL	5
90int nandfs_sync_interval = NANDFS_CONSTR_INTERVAL; /* sync every 5 seconds */
91SYSCTL_UINT(_vfs_nandfs, OID_AUTO, sync_interval, CTLFLAG_RW,
92    &nandfs_sync_interval, 0, "");
93
94#define NANDFS_MAX_DIRTY_SEGS	5
95int nandfs_max_dirty_segs = NANDFS_MAX_DIRTY_SEGS; /* sync when 5 dirty seg */
96SYSCTL_UINT(_vfs_nandfs, OID_AUTO, max_dirty_segs, CTLFLAG_RW,
97    &nandfs_max_dirty_segs, 0, "");
98
99#define NANDFS_CPS_BETWEEN_SBLOCKS 5
100int nandfs_cps_between_sblocks = NANDFS_CPS_BETWEEN_SBLOCKS; /* write superblock every 5 checkpoints */
101SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cps_between_sblocks, CTLFLAG_RW,
102    &nandfs_cps_between_sblocks, 0, "");
103
104#define NANDFS_CLEANER_ENABLE 1
105int nandfs_cleaner_enable = NANDFS_CLEANER_ENABLE;
106SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_enable, CTLFLAG_RW,
107    &nandfs_cleaner_enable, 0, "");
108
109#define NANDFS_CLEANER_INTERVAL 5
110int nandfs_cleaner_interval = NANDFS_CLEANER_INTERVAL;
111SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_interval, CTLFLAG_RW,
112    &nandfs_cleaner_interval, 0, "");
113
114#define NANDFS_CLEANER_SEGMENTS 5
115int nandfs_cleaner_segments = NANDFS_CLEANER_SEGMENTS;
116SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_segments, CTLFLAG_RW,
117    &nandfs_cleaner_segments, 0, "");
118
119static int nandfs_mountfs(struct vnode *devvp, struct mount *mp);
120static vfs_mount_t	nandfs_mount;
121static vfs_root_t	nandfs_root;
122static vfs_statfs_t	nandfs_statfs;
123static vfs_unmount_t	nandfs_unmount;
124static vfs_vget_t	nandfs_vget;
125static vfs_sync_t	nandfs_sync;
126static const char *nandfs_opts[] = {
127	"snap", "from", "noatime", NULL
128};
129
130/* System nodes */
131static int
132nandfs_create_system_nodes(struct nandfs_device *nandfsdev)
133{
134	int error;
135
136	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_DAT_INO,
137	    &nandfsdev->nd_super_root.sr_dat, &nandfsdev->nd_dat_node);
138	if (error)
139		goto errorout;
140
141	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_CPFILE_INO,
142	    &nandfsdev->nd_super_root.sr_cpfile, &nandfsdev->nd_cp_node);
143	if (error)
144		goto errorout;
145
146	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_SUFILE_INO,
147	    &nandfsdev->nd_super_root.sr_sufile, &nandfsdev->nd_su_node);
148	if (error)
149		goto errorout;
150
151	error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_GC_INO,
152	    NULL, &nandfsdev->nd_gc_node);
153	if (error)
154		goto errorout;
155
156	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
157	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
158	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
159	NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
160
161	DPRINTF(VOLUMES, ("System vnodes: dat: %p cp: %p su: %p\n",
162	    NTOV(nandfsdev->nd_dat_node), NTOV(nandfsdev->nd_cp_node),
163	    NTOV(nandfsdev->nd_su_node)));
164	return (0);
165
166errorout:
167	nandfs_dispose_node(&nandfsdev->nd_gc_node);
168	nandfs_dispose_node(&nandfsdev->nd_dat_node);
169	nandfs_dispose_node(&nandfsdev->nd_cp_node);
170	nandfs_dispose_node(&nandfsdev->nd_su_node);
171
172	return (error);
173}
174
175static void
176nandfs_release_system_nodes(struct nandfs_device *nandfsdev)
177{
178
179	if (!nandfsdev)
180		return;
181	if (nandfsdev->nd_refcnt > 0)
182		return;
183
184	if (nandfsdev->nd_gc_node)
185		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
186	if (nandfsdev->nd_dat_node)
187		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
188	if (nandfsdev->nd_cp_node)
189		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
190	if (nandfsdev->nd_su_node)
191		NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
192}
193
194static int
195nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata)
196{
197	uint32_t fsdata_crc, comp_crc;
198
199	if (fsdata->f_magic != NANDFS_FSDATA_MAGIC)
200		return (0);
201
202	/* Preserve CRC */
203	fsdata_crc = fsdata->f_sum;
204
205	/* Calculate */
206	fsdata->f_sum = (0);
207	comp_crc = crc32((uint8_t *)fsdata, fsdata->f_bytes);
208
209	/* Restore */
210	fsdata->f_sum = fsdata_crc;
211
212	/* Check CRC */
213	return (fsdata_crc == comp_crc);
214}
215
216static int
217nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata,
218    struct nandfs_super_block *super)
219{
220	uint32_t super_crc, comp_crc;
221
222	/* Check super block magic */
223	if (super->s_magic != NANDFS_SUPER_MAGIC)
224		return (0);
225
226	/* Preserve CRC */
227	super_crc = super->s_sum;
228
229	/* Calculate */
230	super->s_sum = (0);
231	comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
232
233	/* Restore */
234	super->s_sum = super_crc;
235
236	/* Check CRC */
237	return (super_crc == comp_crc);
238}
239
240static void
241nandfs_calc_superblock_crc(struct nandfs_fsdata *fsdata,
242    struct nandfs_super_block *super)
243{
244	uint32_t comp_crc;
245
246	/* Calculate */
247	super->s_sum = 0;
248	comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
249
250	/* Restore */
251	super->s_sum = comp_crc;
252}
253
254static int
255nandfs_is_empty(u_char *area, int size)
256{
257	int i;
258
259	for (i = 0; i < size; i++)
260		if (area[i] != 0xff)
261			return (0);
262
263	return (1);
264}
265
266static __inline int
267nandfs_sblocks_in_esize(struct nandfs_device *fsdev)
268{
269
270	return ((fsdev->nd_erasesize - NANDFS_SBLOCK_OFFSET_BYTES) /
271	    sizeof(struct nandfs_super_block));
272}
273
274static __inline int
275nandfs_max_sblocks(struct nandfs_device *fsdev)
276{
277
278	return (NANDFS_NFSAREAS * nandfs_sblocks_in_esize(fsdev));
279}
280
281static __inline int
282nandfs_sblocks_in_block(struct nandfs_device *fsdev)
283{
284
285	return (fsdev->nd_devblocksize / sizeof(struct nandfs_super_block));
286}
287
288#if 0
289static __inline int
290nandfs_sblocks_in_first_block(struct nandfs_device *fsdev)
291{
292	int n;
293
294	n = nandfs_sblocks_in_block(fsdev) -
295	    NANDFS_SBLOCK_OFFSET_BYTES / sizeof(struct nandfs_super_block);
296	if (n < 0)
297		n = 0;
298
299	return (n);
300}
301#endif
302
303static int
304nandfs_write_superblock_at(struct nandfs_device *fsdev,
305    struct nandfs_fsarea *fstp)
306{
307	struct nandfs_super_block *super, *supert;
308	struct buf *bp;
309	int sb_per_sector, sbs_in_fsd, read_block;
310	int index, pos, error;
311	off_t offset;
312
313	DPRINTF(SYNC, ("%s: last_used %d nandfs_sblocks_in_esize %d\n",
314	    __func__, fstp->last_used, nandfs_sblocks_in_esize(fsdev)));
315	if (fstp->last_used == nandfs_sblocks_in_esize(fsdev) - 1)
316		index = 0;
317	else
318		index = fstp->last_used + 1;
319
320	super = &fsdev->nd_super;
321	supert = NULL;
322
323	sb_per_sector = nandfs_sblocks_in_block(fsdev);
324	sbs_in_fsd = sizeof(struct nandfs_fsdata) /
325	    sizeof(struct nandfs_super_block);
326	index += sbs_in_fsd;
327	offset = fstp->offset;
328
329	DPRINTF(SYNC, ("%s: offset %#jx s_last_pseg %#jx s_last_cno %#jx "
330	    "s_last_seq %#jx wtime %jd index %d\n", __func__, offset,
331	    super->s_last_pseg, super->s_last_cno, super->s_last_seq,
332	    super->s_wtime, index));
333
334	read_block = btodb(offset + rounddown(index, sb_per_sector) *
335	    sizeof(struct nandfs_super_block));
336
337	DPRINTF(SYNC, ("%s: read_block %#x\n", __func__, read_block));
338
339	if (index == sbs_in_fsd) {
340		error = nandfs_erase(fsdev, offset, fsdev->nd_erasesize);
341		if (error)
342			return (error);
343
344		error = bread(fsdev->nd_devvp, btodb(offset),
345		    fsdev->nd_devblocksize, NOCRED, &bp);
346		if (error) {
347			printf("NANDFS: couldn't read initial data: %d\n",
348			    error);
349			brelse(bp);
350			return (error);
351		}
352		memcpy(bp->b_data, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
353		/*
354		 * 0xff-out the rest. This bp could be cached, so potentially
355		 * b_data contains stale super blocks.
356		 *
357		 * We don't mind cached bp since most of the time we just add
358		 * super blocks to already 0xff-out b_data and don't need to
359		 * perform actual read.
360		 */
361		if (fsdev->nd_devblocksize > sizeof(fsdev->nd_fsdata))
362			memset(bp->b_data + sizeof(fsdev->nd_fsdata), 0xff,
363			    fsdev->nd_devblocksize - sizeof(fsdev->nd_fsdata));
364		error = bwrite(bp);
365		if (error) {
366			printf("NANDFS: cannot rewrite initial data at %jx\n",
367			    offset);
368			return (error);
369		}
370	}
371
372	error = bread(fsdev->nd_devvp, read_block, fsdev->nd_devblocksize,
373	    NOCRED, &bp);
374	if (error) {
375		brelse(bp);
376		return (error);
377	}
378
379	supert = (struct nandfs_super_block *)(bp->b_data);
380	pos = index % sb_per_sector;
381
382	DPRINTF(SYNC, ("%s: storing at %d\n", __func__, pos));
383	memcpy(&supert[pos], super, sizeof(struct nandfs_super_block));
384
385	/*
386	 * See comment above in code that performs erase.
387	 */
388	if (pos == 0)
389		memset(&supert[1], 0xff,
390		    (sb_per_sector - 1) * sizeof(struct nandfs_super_block));
391
392	error = bwrite(bp);
393	if (error) {
394		printf("NANDFS: cannot update superblock at %jx\n", offset);
395		return (error);
396	}
397
398	DPRINTF(SYNC, ("%s: fstp->last_used %d -> %d\n", __func__,
399	    fstp->last_used, index - sbs_in_fsd));
400	fstp->last_used = index - sbs_in_fsd;
401
402	return (0);
403}
404
405int
406nandfs_write_superblock(struct nandfs_device *fsdev)
407{
408	struct nandfs_super_block *super;
409	struct timespec ts;
410	int error;
411	int i, j;
412
413	vfs_timestamp(&ts);
414
415	super = &fsdev->nd_super;
416
417	super->s_last_pseg = fsdev->nd_last_pseg;
418	super->s_last_cno = fsdev->nd_last_cno;
419	super->s_last_seq = fsdev->nd_seg_sequence;
420	super->s_wtime = ts.tv_sec;
421
422	nandfs_calc_superblock_crc(&fsdev->nd_fsdata, super);
423
424	error = 0;
425	for (i = 0, j = fsdev->nd_last_fsarea; i < NANDFS_NFSAREAS;
426	    i++, j = (j + 1 % NANDFS_NFSAREAS)) {
427		if (fsdev->nd_fsarea[j].flags & NANDFS_FSSTOR_FAILED) {
428			DPRINTF(SYNC, ("%s: skipping %d\n", __func__, j));
429			continue;
430		}
431		error = nandfs_write_superblock_at(fsdev, &fsdev->nd_fsarea[j]);
432		if (error) {
433			printf("NANDFS: writing superblock at offset %d failed:"
434			    "%d\n", j * fsdev->nd_erasesize, error);
435			fsdev->nd_fsarea[j].flags |= NANDFS_FSSTOR_FAILED;
436		} else
437			break;
438	}
439
440	if (i == NANDFS_NFSAREAS) {
441		printf("NANDFS: superblock was not written\n");
442		/*
443		 * TODO: switch to read-only?
444		 */
445		return (error);
446	} else
447		fsdev->nd_last_fsarea = (j + 1) % NANDFS_NFSAREAS;
448
449	return (0);
450}
451
452static int
453nandfs_select_fsdata(struct nandfs_device *fsdev,
454    struct nandfs_fsdata *fsdatat, struct nandfs_fsdata **fsdata, int nfsds)
455{
456	int i;
457
458	*fsdata = NULL;
459	for (i = 0; i < nfsds; i++) {
460		DPRINTF(VOLUMES, ("%s: i %d f_magic %x f_crc %x\n", __func__,
461		    i, fsdatat[i].f_magic, fsdatat[i].f_sum));
462		if (!nandfs_check_fsdata_crc(&fsdatat[i]))
463			continue;
464		*fsdata = &fsdatat[i];
465		break;
466	}
467
468	return (*fsdata != NULL ? 0 : EINVAL);
469}
470
471static int
472nandfs_select_sb(struct nandfs_device *fsdev,
473    struct nandfs_super_block *supert, struct nandfs_super_block **super,
474    int nsbs)
475{
476	int i;
477
478	*super = NULL;
479	for (i = 0; i < nsbs; i++) {
480		if (!nandfs_check_superblock_crc(&fsdev->nd_fsdata, &supert[i]))
481			continue;
482		DPRINTF(SYNC, ("%s: i %d s_last_cno %jx s_magic %x "
483		    "s_wtime %jd\n", __func__, i, supert[i].s_last_cno,
484		    supert[i].s_magic, supert[i].s_wtime));
485		if (*super == NULL || supert[i].s_last_cno >
486		    (*super)->s_last_cno)
487			*super = &supert[i];
488	}
489
490	return (*super != NULL ? 0 : EINVAL);
491}
492
493static int
494nandfs_read_structures_at(struct nandfs_device *fsdev,
495    struct nandfs_fsarea *fstp, struct nandfs_fsdata *fsdata,
496    struct nandfs_super_block *super)
497{
498	struct nandfs_super_block *tsuper, *tsuperd;
499	struct buf *bp;
500	int error, read_size;
501	int i;
502	int offset;
503
504	offset = fstp->offset;
505
506	if (fsdev->nd_erasesize > MAXBSIZE)
507		read_size = MAXBSIZE;
508	else
509		read_size = fsdev->nd_erasesize;
510
511	error = bread(fsdev->nd_devvp, btodb(offset), read_size, NOCRED, &bp);
512	if (error) {
513		printf("couldn't read: %d\n", error);
514		brelse(bp);
515		fstp->flags |= NANDFS_FSSTOR_FAILED;
516		return (error);
517	}
518
519	tsuper = super;
520
521	memcpy(fsdata, bp->b_data, sizeof(struct nandfs_fsdata));
522	memcpy(tsuper, (bp->b_data + sizeof(struct nandfs_fsdata)),
523	    read_size - sizeof(struct nandfs_fsdata));
524	brelse(bp);
525
526	tsuper += (read_size - sizeof(struct nandfs_fsdata)) /
527	    sizeof(struct nandfs_super_block);
528
529	for (i = 1; i < fsdev->nd_erasesize / read_size; i++) {
530		error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
531		    read_size, NOCRED, &bp);
532		if (error) {
533			printf("couldn't read: %d\n", error);
534			brelse(bp);
535			fstp->flags |= NANDFS_FSSTOR_FAILED;
536			return (error);
537		}
538		memcpy(tsuper, bp->b_data, read_size);
539		tsuper += read_size / sizeof(struct nandfs_super_block);
540		brelse(bp);
541	}
542
543	tsuper -= 1;
544	fstp->last_used = nandfs_sblocks_in_esize(fsdev) - 1;
545	for (tsuperd = super - 1; (tsuper != tsuperd); tsuper -= 1) {
546		if (nandfs_is_empty((u_char *)tsuper, sizeof(*tsuper)))
547			fstp->last_used--;
548		else
549			break;
550	}
551
552	DPRINTF(VOLUMES, ("%s: last_used %d\n", __func__, fstp->last_used));
553
554	return (0);
555}
556
557static int
558nandfs_read_structures(struct nandfs_device *fsdev)
559{
560	struct nandfs_fsdata *fsdata, *fsdatat;
561	struct nandfs_super_block *sblocks, *ssblock;
562	int nsbs, nfsds, i;
563	int error = 0;
564	int nrsbs;
565
566	nfsds = NANDFS_NFSAREAS;
567	nsbs = nandfs_max_sblocks(fsdev);
568
569	fsdatat = malloc(sizeof(struct nandfs_fsdata) * nfsds, M_NANDFSTEMP,
570	    M_WAITOK | M_ZERO);
571	sblocks = malloc(sizeof(struct nandfs_super_block) * nsbs, M_NANDFSTEMP,
572	    M_WAITOK | M_ZERO);
573
574	nrsbs = 0;
575	for (i = 0; i < NANDFS_NFSAREAS; i++) {
576		fsdev->nd_fsarea[i].offset = i * fsdev->nd_erasesize;
577		error = nandfs_read_structures_at(fsdev, &fsdev->nd_fsarea[i],
578		    &fsdatat[i], sblocks + nrsbs);
579		if (error)
580			continue;
581		nrsbs += (fsdev->nd_fsarea[i].last_used + 1);
582		if (fsdev->nd_fsarea[fsdev->nd_last_fsarea].last_used >
583		    fsdev->nd_fsarea[i].last_used)
584			fsdev->nd_last_fsarea = i;
585	}
586
587	if (nrsbs == 0) {
588		printf("nandfs: no valid superblocks found\n");
589		error = EINVAL;
590		goto out;
591	}
592
593	error = nandfs_select_fsdata(fsdev, fsdatat, &fsdata, nfsds);
594	if (error)
595		goto out;
596	memcpy(&fsdev->nd_fsdata, fsdata, sizeof(struct nandfs_fsdata));
597
598	error = nandfs_select_sb(fsdev, sblocks, &ssblock, nsbs);
599	if (error)
600		goto out;
601
602	memcpy(&fsdev->nd_super, ssblock, sizeof(struct nandfs_super_block));
603out:
604	free(fsdatat, M_NANDFSTEMP);
605	free(sblocks, M_NANDFSTEMP);
606
607	if (error == 0)
608		DPRINTF(VOLUMES, ("%s: selected sb with w_time %jd "
609		    "last_pseg %#jx\n", __func__, fsdev->nd_super.s_wtime,
610		    fsdev->nd_super.s_last_pseg));
611
612	return (error);
613}
614
615static void
616nandfs_unmount_base(struct nandfs_device *nandfsdev)
617{
618	int error;
619
620	if (!nandfsdev)
621		return;
622
623	/* Remove all our information */
624	error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0);
625	if (error) {
626		/*
627		 * Flushing buffers failed when fs was umounting, can't do
628		 * much now, just printf error and continue with umount.
629		 */
630		nandfs_error("%s(): error:%d when umounting FS\n",
631		    __func__, error);
632	}
633
634	/* Release the device's system nodes */
635	nandfs_release_system_nodes(nandfsdev);
636}
637
638static void
639nandfs_get_ncleanseg(struct nandfs_device *nandfsdev)
640{
641	struct nandfs_seg_stat nss;
642
643	nandfs_get_seg_stat(nandfsdev, &nss);
644	nandfsdev->nd_clean_segs = nss.nss_ncleansegs;
645	DPRINTF(VOLUMES, ("nandfs_mount: clean segs: %jx\n",
646	    (uintmax_t)nandfsdev->nd_clean_segs));
647}
648
649
650static int
651nandfs_mount_base(struct nandfs_device *nandfsdev, struct mount *mp,
652    struct nandfs_args *args)
653{
654	uint32_t log_blocksize;
655	int error;
656
657	/* Flush out any old buffers remaining from a previous use. */
658	if ((error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0)))
659		return (error);
660
661	error = nandfs_read_structures(nandfsdev);
662	if (error) {
663		printf("nandfs: could not get valid filesystem structures\n");
664		return (error);
665	}
666
667	if (nandfsdev->nd_fsdata.f_rev_level != NANDFS_CURRENT_REV) {
668		printf("nandfs: unsupported file system revision: %d "
669		    "(supported is %d).\n", nandfsdev->nd_fsdata.f_rev_level,
670		    NANDFS_CURRENT_REV);
671		return (EINVAL);
672	}
673
674	if (nandfsdev->nd_fsdata.f_erasesize != nandfsdev->nd_erasesize) {
675		printf("nandfs: erasesize mismatch (device %#x, fs %#x)\n",
676		    nandfsdev->nd_erasesize, nandfsdev->nd_fsdata.f_erasesize);
677		return (EINVAL);
678	}
679
680	/* Get our blocksize */
681	log_blocksize = nandfsdev->nd_fsdata.f_log_block_size;
682	nandfsdev->nd_blocksize = (uint64_t) 1 << (log_blocksize + 10);
683	DPRINTF(VOLUMES, ("%s: blocksize:%x\n", __func__,
684	    nandfsdev->nd_blocksize));
685
686	DPRINTF(VOLUMES, ("%s: accepted super block with cp %#jx\n", __func__,
687	    (uintmax_t)nandfsdev->nd_super.s_last_cno));
688
689	/* Calculate dat structure parameters */
690	nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_dat_mdt,
691	    nandfsdev->nd_fsdata.f_dat_entry_size);
692	nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_ifile_mdt,
693	    nandfsdev->nd_fsdata.f_inode_size);
694
695	/* Search for the super root and roll forward when needed */
696	if (nandfs_search_super_root(nandfsdev)) {
697		printf("Cannot find valid SuperRoot\n");
698		return (EINVAL);
699	}
700
701	nandfsdev->nd_mount_state = nandfsdev->nd_super.s_state;
702	if (nandfsdev->nd_mount_state != NANDFS_VALID_FS) {
703		printf("FS is seriously damaged, needs repairing\n");
704		printf("aborting mount\n");
705		return (EINVAL);
706	}
707
708	/*
709	 * FS should be ok now. The superblock and the last segsum could be
710	 * updated from the repair so extract running values again.
711	 */
712	nandfsdev->nd_last_pseg = nandfsdev->nd_super.s_last_pseg;
713	nandfsdev->nd_seg_sequence = nandfsdev->nd_super.s_last_seq;
714	nandfsdev->nd_seg_num = nandfs_get_segnum_of_block(nandfsdev,
715	    nandfsdev->nd_last_pseg);
716	nandfsdev->nd_next_seg_num = nandfs_get_segnum_of_block(nandfsdev,
717	    nandfsdev->nd_last_segsum.ss_next);
718	nandfsdev->nd_ts.tv_sec = nandfsdev->nd_last_segsum.ss_create;
719	nandfsdev->nd_last_cno = nandfsdev->nd_super.s_last_cno;
720	nandfsdev->nd_fakevblk = 1;
721	/*
722	 * FIXME: bogus calculation. Should use actual number of usable segments
723	 * instead of total amount.
724	 */
725	nandfsdev->nd_segs_reserved =
726	    nandfsdev->nd_fsdata.f_nsegments *
727	    nandfsdev->nd_fsdata.f_r_segments_percentage / 100;
728	nandfsdev->nd_last_ino  = NANDFS_USER_INO;
729	DPRINTF(VOLUMES, ("%s: last_pseg %#jx last_cno %#jx last_seq %#jx\n"
730	    "fsdev: last_seg: seq %#jx num %#jx, next_seg_num %#jx "
731	    "segs_reserved %#jx\n",
732	    __func__, (uintmax_t)nandfsdev->nd_last_pseg,
733	    (uintmax_t)nandfsdev->nd_last_cno,
734	    (uintmax_t)nandfsdev->nd_seg_sequence,
735	    (uintmax_t)nandfsdev->nd_seg_sequence,
736	    (uintmax_t)nandfsdev->nd_seg_num,
737	    (uintmax_t)nandfsdev->nd_next_seg_num,
738	    (uintmax_t)nandfsdev->nd_segs_reserved));
739
740	DPRINTF(VOLUMES, ("nandfs_mount: accepted super root\n"));
741
742	/* Create system vnodes for DAT, CP and SEGSUM */
743	error = nandfs_create_system_nodes(nandfsdev);
744	if (error)
745		nandfs_unmount_base(nandfsdev);
746
747	nandfs_get_ncleanseg(nandfsdev);
748
749	return (error);
750}
751
752static void
753nandfs_unmount_device(struct nandfs_device *nandfsdev)
754{
755
756	/* Is there anything? */
757	if (nandfsdev == NULL)
758		return;
759
760	/* Remove the device only if we're the last reference */
761	nandfsdev->nd_refcnt--;
762	if (nandfsdev->nd_refcnt >= 1)
763		return;
764
765	MPASS(nandfsdev->nd_syncer == NULL);
766	MPASS(nandfsdev->nd_cleaner == NULL);
767	MPASS(nandfsdev->nd_free_base == NULL);
768
769	/* Unmount our base */
770	nandfs_unmount_base(nandfsdev);
771
772	/* Remove from our device list */
773	SLIST_REMOVE(&nandfs_devices, nandfsdev, nandfs_device, nd_next_device);
774
775	DROP_GIANT();
776	g_topology_lock();
777	g_vfs_close(nandfsdev->nd_gconsumer);
778	g_topology_unlock();
779	PICKUP_GIANT();
780
781	DPRINTF(VOLUMES, ("closing device\n"));
782
783	/* Clear our mount reference and release device node */
784	vrele(nandfsdev->nd_devvp);
785
786	dev_rel(nandfsdev->nd_devvp->v_rdev);
787
788	/* Free our device info */
789	cv_destroy(&nandfsdev->nd_sync_cv);
790	mtx_destroy(&nandfsdev->nd_sync_mtx);
791	cv_destroy(&nandfsdev->nd_clean_cv);
792	mtx_destroy(&nandfsdev->nd_clean_mtx);
793	mtx_destroy(&nandfsdev->nd_mutex);
794	lockdestroy(&nandfsdev->nd_seg_const);
795	free(nandfsdev, M_NANDFSMNT);
796}
797
798static int
799nandfs_check_mounts(struct nandfs_device *nandfsdev, struct mount *mp,
800    struct nandfs_args *args)
801{
802	struct nandfsmount *nmp;
803	uint64_t last_cno;
804
805	/* no double-mounting of the same checkpoint */
806	STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
807		if (nmp->nm_mount_args.cpno == args->cpno)
808			return (EBUSY);
809	}
810
811	/* Allow readonly mounts without questioning here */
812	if (mp->mnt_flag & MNT_RDONLY)
813		return (0);
814
815	/* Read/write mount */
816	STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
817		/* Only one RW mount on this device! */
818		if ((nmp->nm_vfs_mountp->mnt_flag & MNT_RDONLY)==0)
819			return (EROFS);
820		/* RDONLY on last mountpoint is device busy */
821		last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
822		if (nmp->nm_mount_args.cpno == last_cno)
823			return (EBUSY);
824	}
825
826	/* OK for now */
827	return (0);
828}
829
830static int
831nandfs_mount_device(struct vnode *devvp, struct mount *mp,
832    struct nandfs_args *args, struct nandfs_device **nandfsdev_p)
833{
834	struct nandfs_device *nandfsdev;
835	struct g_provider *pp;
836	struct g_consumer *cp;
837	struct cdev *dev;
838	uint32_t erasesize;
839	int error, size;
840	int ronly;
841
842	DPRINTF(VOLUMES, ("Mounting NANDFS device\n"));
843
844	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
845
846	/* Look up device in our nandfs_mountpoints */
847	*nandfsdev_p = NULL;
848	SLIST_FOREACH(nandfsdev, &nandfs_devices, nd_next_device)
849		if (nandfsdev->nd_devvp == devvp)
850			break;
851
852	if (nandfsdev) {
853		DPRINTF(VOLUMES, ("device already mounted\n"));
854		error = nandfs_check_mounts(nandfsdev, mp, args);
855		if (error)
856			return error;
857		nandfsdev->nd_refcnt++;
858		*nandfsdev_p = nandfsdev;
859
860		if (!ronly) {
861			DROP_GIANT();
862			g_topology_lock();
863			error = g_access(nandfsdev->nd_gconsumer, 0, 1, 0);
864			g_topology_unlock();
865			PICKUP_GIANT();
866		}
867		return (error);
868	}
869
870	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
871	dev = devvp->v_rdev;
872	dev_ref(dev);
873	DROP_GIANT();
874	g_topology_lock();
875	error = g_vfs_open(devvp, &cp, "nandfs", ronly ? 0 : 1);
876	pp = g_dev_getprovider(dev);
877	g_topology_unlock();
878	PICKUP_GIANT();
879	VOP_UNLOCK(devvp, 0);
880	if (error) {
881		dev_rel(dev);
882		return (error);
883	}
884
885	nandfsdev = malloc(sizeof(struct nandfs_device), M_NANDFSMNT, M_WAITOK | M_ZERO);
886
887	/* Initialise */
888	nandfsdev->nd_refcnt = 1;
889	nandfsdev->nd_devvp = devvp;
890	nandfsdev->nd_syncing = 0;
891	nandfsdev->nd_cleaning = 0;
892	nandfsdev->nd_gconsumer = cp;
893	cv_init(&nandfsdev->nd_sync_cv, "nandfssync");
894	mtx_init(&nandfsdev->nd_sync_mtx, "nffssyncmtx", NULL, MTX_DEF);
895	cv_init(&nandfsdev->nd_clean_cv, "nandfsclean");
896	mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF);
897	mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF);
898	lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT,
899	    LK_CANRECURSE);
900	STAILQ_INIT(&nandfsdev->nd_mounts);
901
902	nandfsdev->nd_devsize = pp->mediasize;
903	nandfsdev->nd_devblocksize = pp->sectorsize;
904
905	size = sizeof(erasesize);
906	error = g_io_getattr("NAND::blocksize", nandfsdev->nd_gconsumer, &size,
907	    &erasesize);
908	if (error) {
909		DPRINTF(VOLUMES, ("couldn't get erasesize: %d\n", error));
910
911		if (error == ENOIOCTL || error == EOPNOTSUPP) {
912			/*
913			 * We conclude that this is not NAND storage
914			 */
915			erasesize = NANDFS_DEF_ERASESIZE;
916		} else {
917			DROP_GIANT();
918			g_topology_lock();
919			g_vfs_close(nandfsdev->nd_gconsumer);
920			g_topology_unlock();
921			PICKUP_GIANT();
922			dev_rel(dev);
923			free(nandfsdev, M_NANDFSMNT);
924			return (error);
925		}
926	}
927	nandfsdev->nd_erasesize = erasesize;
928
929	DPRINTF(VOLUMES, ("%s: erasesize %x\n", __func__,
930	    nandfsdev->nd_erasesize));
931
932	/* Register nandfs_device in list */
933	SLIST_INSERT_HEAD(&nandfs_devices, nandfsdev, nd_next_device);
934
935	error = nandfs_mount_base(nandfsdev, mp, args);
936	if (error) {
937		/* Remove all our information */
938		nandfs_unmount_device(nandfsdev);
939		return (EINVAL);
940	}
941
942	nandfsdev->nd_maxfilesize = nandfs_get_maxfilesize(nandfsdev);
943
944	*nandfsdev_p = nandfsdev;
945	DPRINTF(VOLUMES, ("NANDFS device mounted ok\n"));
946
947	return (0);
948}
949
950static int
951nandfs_mount_checkpoint(struct nandfsmount *nmp)
952{
953	struct nandfs_cpfile_header *cphdr;
954	struct nandfs_checkpoint *cp;
955	struct nandfs_inode ifile_inode;
956	struct nandfs_node *cp_node;
957	struct buf *bp;
958	uint64_t ncp, nsn, cpno, fcpno, blocknr, last_cno;
959	uint32_t off, dlen;
960	int cp_per_block, error;
961
962	cpno = nmp->nm_mount_args.cpno;
963	if (cpno == 0)
964		cpno = nmp->nm_nandfsdev->nd_super.s_last_cno;
965
966	DPRINTF(VOLUMES, ("%s: trying to mount checkpoint number %"PRIu64"\n",
967	    __func__, cpno));
968
969	cp_node = nmp->nm_nandfsdev->nd_cp_node;
970
971	VOP_LOCK(NTOV(cp_node), LK_SHARED);
972	/* Get cpfile header from 1st block of cp file */
973	error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
974	if (error) {
975		brelse(bp);
976		VOP_UNLOCK(NTOV(cp_node), 0);
977		return (error);
978	}
979
980	cphdr = (struct nandfs_cpfile_header *) bp->b_data;
981	ncp = cphdr->ch_ncheckpoints;
982	nsn = cphdr->ch_nsnapshots;
983
984	brelse(bp);
985
986	DPRINTF(VOLUMES, ("mount_nandfs: checkpoint header read in\n"));
987	DPRINTF(VOLUMES, ("\tNumber of checkpoints %"PRIu64"\n", ncp));
988	DPRINTF(VOLUMES, ("\tNumber of snapshots %"PRIu64"\n", nsn));
989
990	/* Read in our specified checkpoint */
991	dlen = nmp->nm_nandfsdev->nd_fsdata.f_checkpoint_size;
992	cp_per_block = nmp->nm_nandfsdev->nd_blocksize / dlen;
993
994	fcpno = cpno + NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1;
995	blocknr = fcpno / cp_per_block;
996	off = (fcpno % cp_per_block) * dlen;
997	error = nandfs_bread(cp_node, blocknr, NOCRED, 0, &bp);
998	if (error) {
999		brelse(bp);
1000		VOP_UNLOCK(NTOV(cp_node), 0);
1001		printf("mount_nandfs: couldn't read cp block %"PRIu64"\n",
1002		    fcpno);
1003		return (EINVAL);
1004	}
1005
1006	/* Needs to be a valid checkpoint */
1007	cp = (struct nandfs_checkpoint *) ((uint8_t *) bp->b_data + off);
1008	if (cp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
1009		printf("mount_nandfs: checkpoint marked invalid\n");
1010		brelse(bp);
1011		VOP_UNLOCK(NTOV(cp_node), 0);
1012		return (EINVAL);
1013	}
1014
1015	/* Is this really the checkpoint we want? */
1016	if (cp->cp_cno != cpno) {
1017		printf("mount_nandfs: checkpoint file corrupt? "
1018		    "expected cpno %"PRIu64", found cpno %"PRIu64"\n",
1019		    cpno, cp->cp_cno);
1020		brelse(bp);
1021		VOP_UNLOCK(NTOV(cp_node), 0);
1022		return (EINVAL);
1023	}
1024
1025	/* Check if it's a snapshot ! */
1026	last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
1027	if (cpno != last_cno) {
1028		/* Only allow snapshots if not mounting on the last cp */
1029		if ((cp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) == 0) {
1030			printf( "mount_nandfs: checkpoint %"PRIu64" is not a "
1031			    "snapshot\n", cpno);
1032			brelse(bp);
1033			VOP_UNLOCK(NTOV(cp_node), 0);
1034			return (EINVAL);
1035		}
1036	}
1037
1038	ifile_inode = cp->cp_ifile_inode;
1039	brelse(bp);
1040
1041	/* Get ifile inode */
1042	error = nandfs_get_node_raw(nmp->nm_nandfsdev, NULL, NANDFS_IFILE_INO,
1043	    &ifile_inode, &nmp->nm_ifile_node);
1044	if (error) {
1045		printf("mount_nandfs: can't read ifile node\n");
1046		VOP_UNLOCK(NTOV(cp_node), 0);
1047		return (EINVAL);
1048	}
1049
1050	NANDFS_SET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
1051	VOP_UNLOCK(NTOV(cp_node), 0);
1052	/* Get root node? */
1053
1054	return (0);
1055}
1056
1057static void
1058free_nandfs_mountinfo(struct mount *mp)
1059{
1060	struct nandfsmount *nmp = VFSTONANDFS(mp);
1061
1062	if (nmp == NULL)
1063		return;
1064
1065	free(nmp, M_NANDFSMNT);
1066}
1067
1068void
1069nandfs_wakeup_wait_sync(struct nandfs_device *nffsdev, int reason)
1070{
1071	char *reasons[] = {
1072	    "umount",
1073	    "vfssync",
1074	    "bdflush",
1075	    "fforce",
1076	    "fsync",
1077	    "ro_upd"
1078	};
1079
1080	DPRINTF(SYNC, ("%s: %s\n", __func__, reasons[reason]));
1081	mtx_lock(&nffsdev->nd_sync_mtx);
1082	if (nffsdev->nd_syncing)
1083		cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
1084	if (reason == SYNCER_UMOUNT)
1085		nffsdev->nd_syncer_exit = 1;
1086	nffsdev->nd_syncing = 1;
1087	wakeup(&nffsdev->nd_syncing);
1088	cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
1089
1090	mtx_unlock(&nffsdev->nd_sync_mtx);
1091}
1092
1093static void
1094nandfs_gc_finished(struct nandfs_device *nffsdev, int exit)
1095{
1096	int error;
1097
1098	mtx_lock(&nffsdev->nd_sync_mtx);
1099	nffsdev->nd_syncing = 0;
1100	DPRINTF(SYNC, ("%s: cleaner finish\n", __func__));
1101	cv_broadcast(&nffsdev->nd_sync_cv);
1102	mtx_unlock(&nffsdev->nd_sync_mtx);
1103	if (!exit) {
1104		error = tsleep(&nffsdev->nd_syncing, PRIBIO, "-",
1105		    hz * nandfs_sync_interval);
1106		DPRINTF(SYNC, ("%s: cleaner waked up: %d\n",
1107		    __func__, error));
1108	}
1109}
1110
1111static void
1112nandfs_syncer(struct nandfsmount *nmp)
1113{
1114	struct nandfs_device *nffsdev;
1115	struct mount *mp;
1116	int flags, error;
1117
1118	mp = nmp->nm_vfs_mountp;
1119	nffsdev = nmp->nm_nandfsdev;
1120	tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval);
1121
1122	while (!nffsdev->nd_syncer_exit) {
1123		DPRINTF(SYNC, ("%s: syncer run\n", __func__));
1124		nffsdev->nd_syncing = 1;
1125
1126		flags = (nmp->nm_flags & (NANDFS_FORCE_SYNCER | NANDFS_UMOUNT));
1127
1128		error = nandfs_segment_constructor(nmp, flags);
1129		if (error)
1130			nandfs_error("%s: error:%d when creating segments\n",
1131			    __func__, error);
1132
1133		nmp->nm_flags &= ~flags;
1134
1135		nandfs_gc_finished(nffsdev, 0);
1136	}
1137
1138	MPASS(nffsdev->nd_cleaner == NULL);
1139	error = nandfs_segment_constructor(nmp,
1140	    NANDFS_FORCE_SYNCER | NANDFS_UMOUNT);
1141	if (error)
1142		nandfs_error("%s: error:%d when creating segments\n",
1143		    __func__, error);
1144	nandfs_gc_finished(nffsdev, 1);
1145	nffsdev->nd_syncer = NULL;
1146	MPASS(nffsdev->nd_free_base == NULL);
1147
1148	DPRINTF(SYNC, ("%s: exiting\n", __func__));
1149	kthread_exit();
1150}
1151
1152static int
1153start_syncer(struct nandfsmount *nmp)
1154{
1155	int error;
1156
1157	MPASS(nmp->nm_nandfsdev->nd_syncer == NULL);
1158
1159	DPRINTF(SYNC, ("%s: start syncer\n", __func__));
1160
1161	nmp->nm_nandfsdev->nd_syncer_exit = 0;
1162
1163	error = kthread_add((void(*)(void *))nandfs_syncer, nmp, NULL,
1164	    &nmp->nm_nandfsdev->nd_syncer, 0, 0, "nandfs_syncer");
1165
1166	if (error)
1167		printf("nandfs: could not start syncer: %d\n", error);
1168
1169	return (error);
1170}
1171
1172static int
1173stop_syncer(struct nandfsmount *nmp)
1174{
1175
1176	MPASS(nmp->nm_nandfsdev->nd_syncer != NULL);
1177
1178	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_UMOUNT);
1179
1180	DPRINTF(SYNC, ("%s: stop syncer\n", __func__));
1181	return (0);
1182}
1183
1184/*
1185 * Mount null layer
1186 */
1187static int
1188nandfs_mount(struct mount *mp)
1189{
1190	struct nandfsmount *nmp;
1191	struct vnode *devvp;
1192	struct nameidata nd;
1193	struct vfsoptlist *opts;
1194	struct thread *td;
1195	char *from;
1196	int error = 0, flags;
1197
1198	DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
1199
1200	td = curthread;
1201	opts = mp->mnt_optnew;
1202
1203	if (vfs_filteropt(opts, nandfs_opts))
1204		return (EINVAL);
1205
1206	/*
1207	 * Update is a no-op
1208	 */
1209	if (mp->mnt_flag & MNT_UPDATE) {
1210		nmp = VFSTONANDFS(mp);
1211		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
1212			return (error);
1213		}
1214		if (!(nmp->nm_ronly) && vfs_flagopt(opts, "ro", NULL, 0)) {
1215			vn_start_write(NULL, &mp, V_WAIT);
1216			error = VFS_SYNC(mp, MNT_WAIT);
1217			if (error)
1218				return (error);
1219			vn_finished_write(mp);
1220
1221			flags = WRITECLOSE;
1222			if (mp->mnt_flag & MNT_FORCE)
1223				flags |= FORCECLOSE;
1224
1225			nandfs_wakeup_wait_sync(nmp->nm_nandfsdev,
1226			    SYNCER_ROUPD);
1227			error = vflush(mp, 0, flags, td);
1228			if (error)
1229				return (error);
1230
1231			nandfs_stop_cleaner(nmp->nm_nandfsdev);
1232			stop_syncer(nmp);
1233			DROP_GIANT();
1234			g_topology_lock();
1235			g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0);
1236			g_topology_unlock();
1237			PICKUP_GIANT();
1238			MNT_ILOCK(mp);
1239			mp->mnt_flag |= MNT_RDONLY;
1240			MNT_IUNLOCK(mp);
1241			nmp->nm_ronly = 1;
1242
1243		} else if ((nmp->nm_ronly) &&
1244		    !vfs_flagopt(opts, "ro", NULL, 0)) {
1245			/*
1246			 * Don't allow read-write snapshots.
1247			 */
1248			if (nmp->nm_mount_args.cpno != 0)
1249				return (EROFS);
1250			/*
1251			 * If upgrade to read-write by non-root, then verify
1252			 * that user has necessary permissions on the device.
1253			 */
1254			devvp = nmp->nm_nandfsdev->nd_devvp;
1255			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1256			error = VOP_ACCESS(devvp, VREAD | VWRITE,
1257			    td->td_ucred, td);
1258			if (error) {
1259				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
1260				if (error) {
1261					VOP_UNLOCK(devvp, 0);
1262					return (error);
1263				}
1264			}
1265
1266			VOP_UNLOCK(devvp, 0);
1267			DROP_GIANT();
1268			g_topology_lock();
1269			error = g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, 1,
1270			    0);
1271			g_topology_unlock();
1272			PICKUP_GIANT();
1273			if (error)
1274				return (error);
1275
1276			MNT_ILOCK(mp);
1277			mp->mnt_flag &= ~MNT_RDONLY;
1278			MNT_IUNLOCK(mp);
1279			error = start_syncer(nmp);
1280			if (error == 0)
1281				error = nandfs_start_cleaner(nmp->nm_nandfsdev);
1282			if (error) {
1283				DROP_GIANT();
1284				g_topology_lock();
1285				g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1,
1286				    0);
1287				g_topology_unlock();
1288				PICKUP_GIANT();
1289				return (error);
1290			}
1291
1292			nmp->nm_ronly = 0;
1293		}
1294		return (0);
1295	}
1296
1297	from = vfs_getopts(opts, "from", &error);
1298	if (error)
1299		return (error);
1300
1301	/*
1302	 * Find device node
1303	 */
1304	NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, from, curthread);
1305	error = namei(&nd);
1306	if (error)
1307		return (error);
1308	NDFREE(&nd, NDF_ONLY_PNBUF);
1309
1310	devvp = nd.ni_vp;
1311
1312	if (!vn_isdisk(devvp, &error)) {
1313		vput(devvp);
1314		return (error);
1315	}
1316
1317	/* Check the access rights on the mount device */
1318	error = VOP_ACCESS(devvp, VREAD, curthread->td_ucred, curthread);
1319	if (error)
1320		error = priv_check(curthread, PRIV_VFS_MOUNT_PERM);
1321	if (error) {
1322		vput(devvp);
1323		return (error);
1324	}
1325
1326	vfs_getnewfsid(mp);
1327
1328	error = nandfs_mountfs(devvp, mp);
1329	if (error)
1330		return (error);
1331	vfs_mountedfrom(mp, from);
1332
1333	return (0);
1334}
1335
1336static int
1337nandfs_mountfs(struct vnode *devvp, struct mount *mp)
1338{
1339	struct nandfsmount *nmp = NULL;
1340	struct nandfs_args *args = NULL;
1341	struct nandfs_device *nandfsdev;
1342	char *from;
1343	int error, ronly;
1344	char *cpno;
1345
1346	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
1347
1348	if (devvp->v_rdev->si_iosize_max != 0)
1349		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
1350	VOP_UNLOCK(devvp, 0);
1351
1352	if (mp->mnt_iosize_max > MAXPHYS)
1353		mp->mnt_iosize_max = MAXPHYS;
1354
1355	from = vfs_getopts(mp->mnt_optnew, "from", &error);
1356	if (error)
1357		goto error;
1358
1359	error = vfs_getopt(mp->mnt_optnew, "snap", (void **)&cpno, NULL);
1360	if (error == ENOENT)
1361		cpno = NULL;
1362	else if (error)
1363		goto error;
1364
1365	args = (struct nandfs_args *)malloc(sizeof(struct nandfs_args),
1366	    M_NANDFSMNT, M_WAITOK | M_ZERO);
1367
1368	if (cpno != NULL)
1369		args->cpno = strtoul(cpno, (char **)NULL, 10);
1370	else
1371		args->cpno = 0;
1372	args->fspec = from;
1373
1374	if (args->cpno != 0 && !ronly) {
1375		error = EROFS;
1376		goto error;
1377	}
1378
1379	printf("WARNING: NANDFS is considered to be a highly experimental "
1380	    "feature in FreeBSD.\n");
1381
1382	error = nandfs_mount_device(devvp, mp, args, &nandfsdev);
1383	if (error)
1384		goto error;
1385
1386	nmp = (struct nandfsmount *) malloc(sizeof(struct nandfsmount),
1387	    M_NANDFSMNT, M_WAITOK | M_ZERO);
1388
1389	mp->mnt_data = nmp;
1390	nmp->nm_vfs_mountp = mp;
1391	nmp->nm_ronly = ronly;
1392	MNT_ILOCK(mp);
1393	mp->mnt_flag |= MNT_LOCAL;
1394	mp->mnt_kern_flag |= MNTK_USES_BCACHE;
1395	MNT_IUNLOCK(mp);
1396	nmp->nm_nandfsdev = nandfsdev;
1397	/* Add our mountpoint */
1398	STAILQ_INSERT_TAIL(&nandfsdev->nd_mounts, nmp, nm_next_mount);
1399
1400	if (args->cpno > nandfsdev->nd_last_cno) {
1401		printf("WARNING: supplied checkpoint number (%jd) is greater "
1402		    "than last known checkpoint on filesystem (%jd). Mounting"
1403		    " checkpoint %jd\n", (uintmax_t)args->cpno,
1404		    (uintmax_t)nandfsdev->nd_last_cno,
1405		    (uintmax_t)nandfsdev->nd_last_cno);
1406		args->cpno = nandfsdev->nd_last_cno;
1407	}
1408
1409	/* Setting up other parameters */
1410	nmp->nm_mount_args = *args;
1411	free(args, M_NANDFSMNT);
1412	error = nandfs_mount_checkpoint(nmp);
1413	if (error) {
1414		nandfs_unmount(mp, MNT_FORCE);
1415		goto unmounted;
1416	}
1417
1418	if (!ronly) {
1419		error = start_syncer(nmp);
1420		if (error == 0)
1421			error = nandfs_start_cleaner(nmp->nm_nandfsdev);
1422		if (error)
1423			nandfs_unmount(mp, MNT_FORCE);
1424	}
1425
1426	return (0);
1427
1428error:
1429	if (args != NULL)
1430		free(args, M_NANDFSMNT);
1431
1432	if (nmp != NULL) {
1433		free(nmp, M_NANDFSMNT);
1434		mp->mnt_data = NULL;
1435	}
1436unmounted:
1437	return (error);
1438}
1439
1440static int
1441nandfs_unmount(struct mount *mp, int mntflags)
1442{
1443	struct nandfs_device *nandfsdev;
1444	struct nandfsmount *nmp;
1445	int error;
1446	int flags = 0;
1447
1448	DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
1449
1450	if (mntflags & MNT_FORCE)
1451		flags |= FORCECLOSE;
1452
1453	nmp = mp->mnt_data;
1454	nandfsdev = nmp->nm_nandfsdev;
1455
1456	error = vflush(mp, 0, flags | SKIPSYSTEM, curthread);
1457	if (error)
1458		return (error);
1459
1460	if (!(nmp->nm_ronly)) {
1461		nandfs_stop_cleaner(nandfsdev);
1462		stop_syncer(nmp);
1463	}
1464
1465	if (nmp->nm_ifile_node)
1466		NANDFS_UNSET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
1467
1468	/* Remove our mount point */
1469	STAILQ_REMOVE(&nandfsdev->nd_mounts, nmp, nandfsmount, nm_next_mount);
1470
1471	/* Unmount the device itself when we're the last one */
1472	nandfs_unmount_device(nandfsdev);
1473
1474	free_nandfs_mountinfo(mp);
1475
1476	/*
1477	 * Finally, throw away the null_mount structure
1478	 */
1479	mp->mnt_data = 0;
1480	MNT_ILOCK(mp);
1481	mp->mnt_flag &= ~MNT_LOCAL;
1482	MNT_IUNLOCK(mp);
1483
1484	return (0);
1485}
1486
1487static int
1488nandfs_statfs(struct mount *mp, struct statfs *sbp)
1489{
1490	struct nandfsmount *nmp;
1491	struct nandfs_device *nandfsdev;
1492	struct nandfs_fsdata *fsdata;
1493	struct nandfs_super_block *sb;
1494	struct nandfs_block_group_desc *groups;
1495	struct nandfs_node *ifile;
1496	struct nandfs_mdt *mdt;
1497	struct buf *bp;
1498	int i, error;
1499	uint32_t entries_per_group;
1500	uint64_t files = 0;
1501
1502	nmp = mp->mnt_data;
1503	nandfsdev = nmp->nm_nandfsdev;
1504	fsdata = &nandfsdev->nd_fsdata;
1505	sb = &nandfsdev->nd_super;
1506	ifile = nmp->nm_ifile_node;
1507	mdt = &nandfsdev->nd_ifile_mdt;
1508	entries_per_group = mdt->entries_per_group;
1509
1510	VOP_LOCK(NTOV(ifile), LK_SHARED);
1511	error = nandfs_bread(ifile, 0, NOCRED, 0, &bp);
1512	if (error) {
1513		brelse(bp);
1514		VOP_UNLOCK(NTOV(ifile), 0);
1515		return (error);
1516	}
1517
1518	groups = (struct nandfs_block_group_desc *)bp->b_data;
1519
1520	for (i = 0; i < mdt->groups_per_desc_block; i++)
1521		files += (entries_per_group - groups[i].bg_nfrees);
1522
1523	brelse(bp);
1524	VOP_UNLOCK(NTOV(ifile), 0);
1525
1526	sbp->f_bsize = nandfsdev->nd_blocksize;
1527	sbp->f_iosize = sbp->f_bsize;
1528	sbp->f_blocks = fsdata->f_blocks_per_segment * fsdata->f_nsegments;
1529	sbp->f_bfree = sb->s_free_blocks_count;
1530	sbp->f_bavail = sbp->f_bfree;
1531	sbp->f_files = files;
1532	sbp->f_ffree = 0;
1533	return (0);
1534}
1535
1536static int
1537nandfs_root(struct mount *mp, int flags, struct vnode **vpp)
1538{
1539	struct nandfsmount *nmp = VFSTONANDFS(mp);
1540	struct nandfs_node *node;
1541	int error;
1542
1543	error = nandfs_get_node(nmp, NANDFS_ROOT_INO, &node);
1544	if (error)
1545		return (error);
1546
1547	KASSERT(NTOV(node)->v_vflag & VV_ROOT,
1548	    ("root_vp->v_vflag & VV_ROOT"));
1549
1550	*vpp = NTOV(node);
1551
1552	return (error);
1553}
1554
1555static int
1556nandfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1557{
1558	struct nandfsmount *nmp = VFSTONANDFS(mp);
1559	struct nandfs_node *node;
1560	int error;
1561
1562	error = nandfs_get_node(nmp, ino, &node);
1563	if (node)
1564		*vpp = NTOV(node);
1565
1566	return (error);
1567}
1568
1569static int
1570nandfs_sync(struct mount *mp, int waitfor)
1571{
1572	struct nandfsmount *nmp = VFSTONANDFS(mp);
1573
1574	DPRINTF(SYNC, ("%s: mp %p waitfor %d\n", __func__, mp, waitfor));
1575
1576	/*
1577	 * XXX: A hack to be removed soon
1578	 */
1579	if (waitfor == MNT_LAZY)
1580		return (0);
1581	if (waitfor == MNT_SUSPEND)
1582		return (0);
1583	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_VFS_SYNC);
1584	return (0);
1585}
1586
1587static struct vfsops nandfs_vfsops = {
1588	.vfs_init =		nandfs_init,
1589	.vfs_mount =		nandfs_mount,
1590	.vfs_root =		nandfs_root,
1591	.vfs_statfs =		nandfs_statfs,
1592	.vfs_uninit =		nandfs_uninit,
1593	.vfs_unmount =		nandfs_unmount,
1594	.vfs_vget =		nandfs_vget,
1595	.vfs_sync =		nandfs_sync,
1596};
1597
1598VFS_SET(nandfs_vfsops, nandfs, VFCF_LOOPBACK);
1599