1/*-
2 * Copyright (c) 2012 Semihalf
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD$");
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/namei.h>
32#include <sys/kernel.h>
33#include <sys/stat.h>
34#include <sys/buf.h>
35#include <sys/bio.h>
36#include <sys/proc.h>
37#include <sys/mount.h>
38#include <sys/vnode.h>
39#include <sys/signalvar.h>
40#include <sys/malloc.h>
41#include <sys/dirent.h>
42#include <sys/lockf.h>
43#include <sys/ktr.h>
44#include <sys/kdb.h>
45
46#include <vm/vm.h>
47#include <vm/vm_extern.h>
48#include <vm/vm_object.h>
49#include <vm/vnode_pager.h>
50
51#include <machine/_inttypes.h>
52
53#include <vm/vm.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_object.h>
56#include <vm/vnode_pager.h>
57
58#include "nandfs_mount.h"
59#include "nandfs.h"
60#include "nandfs_subr.h"
61#include "bmap.h"
62
63static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t,
64    struct nandfs_indir *, int *);
65
66int
67bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk)
68{
69	struct nandfs_inode *ip;
70	struct nandfs_indir a[NIADDR + 1], *ap;
71	nandfs_daddr_t daddr;
72	struct buf *bp;
73	int error;
74	int num, *nump;
75
76	DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk));
77	ip = &node->nn_inode;
78
79	ap = a;
80	nump = &num;
81
82	error = bmap_getlbns(node, lblk, ap, nump);
83	if (error)
84		return (error);
85
86	if (num == 0) {
87		*vblk = ip->i_db[lblk];
88		return (0);
89	}
90
91	DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__,
92	    node, lblk, ap->in_off));
93	daddr = ip->i_ib[ap->in_off];
94	for (bp = NULL, ++ap; --num; ap++) {
95		if (daddr == 0) {
96			DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with "
97			    "vblk 0\n", __func__, node, lblk));
98			*vblk = 0;
99			return (0);
100		}
101		if (ap->in_lbn == lblk) {
102			DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx "
103			    "returning address of indirect block (%jx)\n",
104			    __func__, node, lblk, ap->in_lbn, daddr));
105			*vblk = daddr;
106			return (0);
107		}
108
109		DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block "
110		    "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn));
111
112		error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
113		if (error) {
114			brelse(bp);
115			return (error);
116		}
117
118		daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
119		brelse(bp);
120	}
121
122	DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__,
123	    node, lblk, daddr));
124	*vblk = daddr;
125
126	return (0);
127}
128
129int
130bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force)
131{
132	struct nandfs_indir a[NIADDR+1], *ap;
133#ifdef DEBUG
134	nandfs_daddr_t daddr;
135#endif
136	struct buf *bp;
137	int error;
138	int num, *nump;
139
140	DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk));
141
142	ap = a;
143	nump = &num;
144
145	error = bmap_getlbns(node, lblk, ap, nump);
146	if (error)
147		return (error);
148
149	/*
150	 * Direct block, nothing to do
151	 */
152	if (num == 0)
153		return (0);
154
155	DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node));
156
157	for (bp = NULL, ++ap; --num; ap++) {
158		error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
159		if (error) {
160			brelse(bp);
161			return (error);
162		}
163
164#ifdef DEBUG
165		daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
166		MPASS(daddr != 0 || node->nn_ino == 3);
167#endif
168
169		error = nandfs_dirty_buf_meta(bp, force);
170		if (error)
171			return (error);
172	}
173
174	return (0);
175}
176
177int
178bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk,
179    nandfs_daddr_t vblk)
180{
181	struct nandfs_inode *ip;
182	struct nandfs_indir a[NIADDR+1], *ap;
183	struct buf *bp;
184	nandfs_daddr_t daddr;
185	int error;
186	int num, *nump, i;
187
188	DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk,
189	    vblk));
190
191	ip = &node->nn_inode;
192
193	ap = a;
194	nump = &num;
195
196	error = bmap_getlbns(node, lblk, ap, nump);
197	if (error)
198		return (error);
199
200	DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__,
201	    node, lblk, vblk, num));
202
203	if (num == 0) {
204		DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__,
205		    node, lblk));
206		ip->i_db[lblk] = vblk;
207		return (0);
208	}
209
210	DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n",
211	    __func__, node, lblk, ap->in_off));
212
213	if (num == 1) {
214		DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting "
215		    "%jx as vblk for indirect block %d\n", __func__, node,
216		    lblk, vblk, ap->in_off));
217		ip->i_ib[ap->in_off] = vblk;
218		return (0);
219	}
220
221	bp = NULL;
222	daddr = ip->i_ib[a[0].in_off];
223	for (i = 1; i < num; i++) {
224		if (bp)
225			brelse(bp);
226		if (daddr == 0) {
227			DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create "
228			    "block %jx %d\n", __func__, node, lblk, vblk,
229			    a[i].in_lbn, a[i].in_off));
230			error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED,
231			    0, &bp);
232			if (error)
233				return (error);
234		} else {
235			DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read "
236			    "block %jx %d\n", __func__, node, daddr, vblk,
237			    a[i].in_lbn, a[i].in_off));
238			error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp);
239			if (error) {
240				brelse(bp);
241				return (error);
242			}
243		}
244		daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off];
245	}
246	i--;
247
248	DPRINTF(BMAP,
249	    ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at "
250	    "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off,
251	    daddr));
252
253	if (!bp) {
254		nandfs_error("%s: cannot find indirect block\n", __func__);
255		return (-1);
256	}
257	((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk;
258
259	error = nandfs_dirty_buf_meta(bp, 0);
260	if (error) {
261		nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp);
262		return (error);
263	}
264	DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__,
265	    node, lblk, vblk));
266
267	return (error);
268}
269
270CTASSERT(NIADDR <= 3);
271#define SINGLE	0	/* index of single indirect block */
272#define DOUBLE	1	/* index of double indirect block */
273#define TRIPLE	2	/* index of triple indirect block */
274
275static __inline nandfs_lbn_t
276lbn_offset(struct nandfs_device *fsdev, int level)
277{
278	nandfs_lbn_t res;
279
280	for (res = 1; level > 0; level--)
281		res *= MNINDIR(fsdev);
282	return (res);
283}
284
285static nandfs_lbn_t
286blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip)
287{
288	nandfs_lbn_t blocks;
289
290	for (blocks = 1; level >= SINGLE; level--, nip++) {
291		MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev));
292		blocks += nip->in_off * lbn_offset(fsdev, level);
293	}
294
295	return (blocks);
296}
297
298static int
299bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left,
300    int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp,
301    nandfs_daddr_t *copy)
302{
303	struct buf *bp;
304	nandfs_lbn_t i, lbn, nlbn, factor, tosub;
305	struct nandfs_device *fsdev;
306	int error, lcleaned, modified;
307
308	DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__,
309	    node, level, *left));
310
311	fsdev = node->nn_nandfsdev;
312
313	MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev));
314
315	factor = lbn_offset(fsdev, level);
316	lbn = ap->in_lbn;
317
318	error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
319	if (error) {
320		brelse(bp);
321		return (error);
322	}
323
324	bcopy(bp->b_data, copy, fsdev->nd_blocksize);
325	bqrelse(bp);
326
327	modified = 0;
328
329	i = ap->in_off;
330
331	if (ap != fp)
332		ap++;
333	for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--,
334	    nlbn += factor) {
335		lcleaned = 0;
336
337		DPRINTF(BMAP,
338		    ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n",
339		    __func__, node, i, nlbn, *left, ap, copy[i]));
340
341		if (copy[i] == 0) {
342			tosub = blocks_inside(fsdev, level - 1, ap);
343			if (tosub > *left)
344				tosub = 0;
345
346			*left -= tosub;
347		} else {
348			if (level > SINGLE) {
349				if (ap == fp)
350					ap->in_lbn = nlbn;
351
352				error = bmap_truncate_indirect(node, level - 1,
353				    left, &lcleaned, ap, fp,
354				    copy + MNINDIR(fsdev));
355				if (error)
356					return (error);
357			} else {
358				error = nandfs_bdestroy(node, copy[i]);
359				if (error)
360					return (error);
361				lcleaned = 1;
362				*left -= 1;
363			}
364		}
365
366		if (lcleaned) {
367			if (level > SINGLE) {
368				error = nandfs_vblock_end(fsdev, copy[i]);
369				if (error)
370					return (error);
371			}
372			copy[i] = 0;
373			modified++;
374		}
375
376		ap = fp;
377	}
378
379	if (i == -1)
380		*cleaned = 1;
381
382	error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
383	if (error) {
384		brelse(bp);
385		return (error);
386	}
387	if (modified)
388		bcopy(copy, bp->b_data, fsdev->nd_blocksize);
389
390	error = nandfs_dirty_buf_meta(bp, 0);
391	if (error)
392		return (error);
393
394	return (error);
395}
396
397int
398bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk,
399    nandfs_lbn_t todo)
400{
401	struct nandfs_inode *ip;
402	struct nandfs_indir a[NIADDR + 1], f[NIADDR], *ap;
403	nandfs_daddr_t indir_lbn[NIADDR];
404	nandfs_daddr_t *copy;
405	int error, level;
406	nandfs_lbn_t left, tosub;
407	struct nandfs_device *fsdev;
408	int cleaned, i;
409	int num, *nump;
410
411	DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__,
412	    node, lastblk, todo));
413
414	ip = &node->nn_inode;
415	fsdev = node->nn_nandfsdev;
416
417	ap = a;
418	nump = &num;
419
420	error = bmap_getlbns(node, lastblk, ap, nump);
421	if (error)
422		return (error);
423
424	indir_lbn[SINGLE] = -NDADDR;
425	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1;
426	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev)
427	    * MNINDIR(fsdev) - 1;
428
429	for (i = 0; i < NIADDR; i++) {
430		f[i].in_off = MNINDIR(fsdev) - 1;
431		f[i].in_lbn = 0xdeadbeef;
432	}
433
434	left = todo;
435
436#ifdef DEBUG
437	a[num].in_off = -1;
438#endif
439
440	ap++;
441	num -= 2;
442
443	if (num < 0)
444		goto direct;
445
446	copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1),
447	    M_NANDFSTEMP, M_WAITOK);
448
449	for (level = num; level >= SINGLE && left > 0; level--) {
450		cleaned = 0;
451
452		if (ip->i_ib[level] == 0) {
453			tosub = blocks_inside(fsdev, level, ap);
454			if (tosub > left)
455				left = 0;
456			else
457				left -= tosub;
458		} else {
459			if (ap == f)
460				ap->in_lbn = indir_lbn[level];
461			error = bmap_truncate_indirect(node, level, &left,
462			    &cleaned, ap, f, copy);
463			if (error) {
464				nandfs_error("%s: error %d when truncate "
465				    "at level %d\n", __func__, error, level);
466				return (error);
467			}
468		}
469
470		if (cleaned) {
471			nandfs_vblock_end(fsdev, ip->i_ib[level]);
472			ip->i_ib[level] = 0;
473		}
474
475		ap = f;
476	}
477
478	free(copy, M_NANDFSTEMP);
479
480direct:
481	if (num < 0)
482		i = lastblk;
483	else
484		i = NDADDR - 1;
485
486	for (; i >= 0 && left > 0; i--) {
487		if (ip->i_db[i] != 0) {
488			error = nandfs_bdestroy(node, ip->i_db[i]);
489			if (error) {
490				nandfs_error("%s: cannot destroy "
491				    "block %jx, error %d\n", __func__,
492				    (uintmax_t)ip->i_db[i], error);
493				return (error);
494			}
495			ip->i_db[i] = 0;
496		}
497
498		left--;
499	}
500
501	KASSERT(left == 0,
502	    ("truncated wrong number of blocks (%jd should be 0)", left));
503
504	return (error);
505}
506
507nandfs_lbn_t
508get_maxfilesize(struct nandfs_device *fsdev)
509{
510	struct nandfs_indir f[NIADDR];
511	nandfs_lbn_t max;
512	int i;
513
514	max = NDADDR;
515
516	for (i = 0; i < NIADDR; i++) {
517		f[i].in_off = MNINDIR(fsdev) - 1;
518		max += blocks_inside(fsdev, i, f);
519	}
520
521	max *= fsdev->nd_blocksize;
522
523	return (max);
524}
525
526/*
527 * This is ufs_getlbns with minor modifications.
528 */
529/*
530 * Create an array of logical block number/offset pairs which represent the
531 * path of indirect blocks required to access a data block.  The first "pair"
532 * contains the logical block number of the appropriate single, double or
533 * triple indirect block and the offset into the inode indirect block array.
534 * Note, the logical block number of the inode single/double/triple indirect
535 * block appears twice in the array, once with the offset into the i_ib and
536 * once with the offset into the page itself.
537 */
538static int
539bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump)
540{
541	nandfs_daddr_t blockcnt;
542	nandfs_lbn_t metalbn, realbn;
543	struct nandfs_device *fsdev;
544	int i, numlevels, off;
545
546	fsdev = node->nn_nandfsdev;
547
548	DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__,
549	    node, bn, MNINDIR(fsdev)));
550
551	if (nump)
552		*nump = 0;
553	numlevels = 0;
554	realbn = bn;
555
556	if (bn < 0)
557		bn = -bn;
558
559	/* The first NDADDR blocks are direct blocks. */
560	if (bn < NDADDR)
561		return (0);
562
563	/*
564	 * Determine the number of levels of indirection.  After this loop
565	 * is done, blockcnt indicates the number of data blocks possible
566	 * at the previous level of indirection, and NIADDR - i is the number
567	 * of levels of indirection needed to locate the requested block.
568	 */
569	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
570		DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__,
571		    blockcnt, i, bn));
572		if (i == 0)
573			return (EFBIG);
574		blockcnt *= MNINDIR(fsdev);
575		if (bn < blockcnt)
576			break;
577	}
578
579	/* Calculate the address of the first meta-block. */
580	if (realbn >= 0)
581		metalbn = -(realbn - bn + NIADDR - i);
582	else
583		metalbn = -(-realbn - bn + NIADDR - i);
584
585	/*
586	 * At each iteration, off is the offset into the bap array which is
587	 * an array of disk addresses at the current level of indirection.
588	 * The logical block number and the offset in that block are stored
589	 * into the argument array.
590	 */
591	ap->in_lbn = metalbn;
592	ap->in_off = off = NIADDR - i;
593
594	DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__,
595	    metalbn, off));
596
597	ap++;
598	for (++numlevels; i <= NIADDR; i++) {
599		/* If searching for a meta-data block, quit when found. */
600		if (metalbn == realbn)
601			break;
602
603		blockcnt /= MNINDIR(fsdev);
604		off = (bn / blockcnt) % MNINDIR(fsdev);
605
606		++numlevels;
607		ap->in_lbn = metalbn;
608		ap->in_off = off;
609
610		DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__,
611		    ap->in_lbn, ap->in_off));
612		++ap;
613
614		metalbn -= -1 + off * blockcnt;
615	}
616	if (nump)
617		*nump = numlevels;
618
619	DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels));
620
621	return (0);
622}
623