1/*
2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
23#include "meta_io.h"
24#include "quota.h"
25#include "rgrp.h"
26#include "trans.h"
27#include "dir.h"
28#include "util.h"
29#include "ops_address.h"
30
31/* This doesn't need to be that large as max 64 bit pointers in a 4k
32 * block is 512, so __u16 is fine for that. It saves stack space to
33 * keep it small.
34 */
35struct metapath {
36	__u16 mp_list[GFS2_MAX_META_HEIGHT];
37};
38
39typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
40			     struct buffer_head *bh, __be64 *top,
41			     __be64 *bottom, unsigned int height,
42			     void *data);
43
44struct strip_mine {
45	int sm_first;
46	unsigned int sm_height;
47};
48
49/**
50 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
51 * @ip: the inode
52 * @dibh: the dinode buffer
53 * @block: the block number that was allocated
54 * @private: any locked page held by the caller process
55 *
56 * Returns: errno
57 */
58
59static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
60			       u64 block, struct page *page)
61{
62	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
63	struct inode *inode = &ip->i_inode;
64	struct buffer_head *bh;
65	int release = 0;
66
67	if (!page || page->index) {
68		page = grab_cache_page(inode->i_mapping, 0);
69		if (!page)
70			return -ENOMEM;
71		release = 1;
72	}
73
74	if (!PageUptodate(page)) {
75		void *kaddr = kmap(page);
76
77		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
78		       ip->i_di.di_size);
79		memset(kaddr + ip->i_di.di_size, 0,
80		       PAGE_CACHE_SIZE - ip->i_di.di_size);
81		kunmap(page);
82
83		SetPageUptodate(page);
84	}
85
86	if (!page_has_buffers(page))
87		create_empty_buffers(page, 1 << inode->i_blkbits,
88				     (1 << BH_Uptodate));
89
90	bh = page_buffers(page);
91
92	if (!buffer_mapped(bh))
93		map_bh(bh, inode->i_sb, block);
94
95	set_buffer_uptodate(bh);
96	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
97		gfs2_trans_add_bh(ip->i_gl, bh, 0);
98	mark_buffer_dirty(bh);
99
100	if (release) {
101		unlock_page(page);
102		page_cache_release(page);
103	}
104
105	return 0;
106}
107
108/**
109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
110 * @ip: The GFS2 inode to unstuff
111 * @unstuffer: the routine that handles unstuffing a non-zero length file
112 * @private: private data for the unstuffer
113 *
114 * This routine unstuffs a dinode and returns it to a "normal" state such
115 * that the height can be grown in the traditional way.
116 *
117 * Returns: errno
118 */
119
120int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
121{
122	struct buffer_head *bh, *dibh;
123	struct gfs2_dinode *di;
124	u64 block = 0;
125	int isdir = gfs2_is_dir(ip);
126	int error;
127
128	down_write(&ip->i_rw_mutex);
129
130	error = gfs2_meta_inode_buffer(ip, &dibh);
131	if (error)
132		goto out;
133
134	if (ip->i_di.di_size) {
135		/* Get a free block, fill it with the stuffed data,
136		   and write it out to disk */
137
138		if (isdir) {
139			block = gfs2_alloc_meta(ip);
140
141			error = gfs2_dir_get_new_buffer(ip, block, &bh);
142			if (error)
143				goto out_brelse;
144			gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
145					      dibh, sizeof(struct gfs2_dinode));
146			brelse(bh);
147		} else {
148			block = gfs2_alloc_data(ip);
149
150			error = gfs2_unstuffer_page(ip, dibh, block, page);
151			if (error)
152				goto out_brelse;
153		}
154	}
155
156	/*  Set up the pointer to the new block  */
157
158	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
159	di = (struct gfs2_dinode *)dibh->b_data;
160	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
161
162	if (ip->i_di.di_size) {
163		*(__be64 *)(di + 1) = cpu_to_be64(block);
164		ip->i_di.di_blocks++;
165		gfs2_set_inode_blocks(&ip->i_inode);
166		di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
167	}
168
169	ip->i_di.di_height = 1;
170	di->di_height = cpu_to_be16(1);
171
172out_brelse:
173	brelse(dibh);
174out:
175	up_write(&ip->i_rw_mutex);
176	return error;
177}
178
179/**
180 * calc_tree_height - Calculate the height of a metadata tree
181 * @ip: The GFS2 inode
182 * @size: The proposed size of the file
183 *
184 * Work out how tall a metadata tree needs to be in order to accommodate a
185 * file of a particular size. If size is less than the current size of
186 * the inode, then the current size of the inode is used instead of the
187 * supplied one.
188 *
189 * Returns: the height the tree should be
190 */
191
192static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size)
193{
194	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
195	u64 *arr;
196	unsigned int max, height;
197
198	if (ip->i_di.di_size > size)
199		size = ip->i_di.di_size;
200
201	if (gfs2_is_dir(ip)) {
202		arr = sdp->sd_jheightsize;
203		max = sdp->sd_max_jheight;
204	} else {
205		arr = sdp->sd_heightsize;
206		max = sdp->sd_max_height;
207	}
208
209	for (height = 0; height < max; height++)
210		if (arr[height] >= size)
211			break;
212
213	return height;
214}
215
216/**
217 * build_height - Build a metadata tree of the requested height
218 * @ip: The GFS2 inode
219 * @height: The height to build to
220 *
221 *
222 * Returns: errno
223 */
224
225static int build_height(struct inode *inode, unsigned height)
226{
227	struct gfs2_inode *ip = GFS2_I(inode);
228	unsigned new_height = height - ip->i_di.di_height;
229	struct buffer_head *dibh;
230	struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
231	struct gfs2_dinode *di;
232	int error;
233	__be64 *bp;
234	u64 bn;
235	unsigned n;
236
237	if (height <= ip->i_di.di_height)
238		return 0;
239
240	error = gfs2_meta_inode_buffer(ip, &dibh);
241	if (error)
242		return error;
243
244	for(n = 0; n < new_height; n++) {
245		bn = gfs2_alloc_meta(ip);
246		blocks[n] = gfs2_meta_new(ip->i_gl, bn);
247		gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
248	}
249
250	n = 0;
251	bn = blocks[0]->b_blocknr;
252	if (new_height > 1) {
253		for(; n < new_height-1; n++) {
254			gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
255					  GFS2_FORMAT_IN);
256			gfs2_buffer_clear_tail(blocks[n],
257					       sizeof(struct gfs2_meta_header));
258			bp = (__be64 *)(blocks[n]->b_data +
259				     sizeof(struct gfs2_meta_header));
260			*bp = cpu_to_be64(blocks[n+1]->b_blocknr);
261			brelse(blocks[n]);
262			blocks[n] = NULL;
263		}
264	}
265	gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
266	gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
267			      dibh, sizeof(struct gfs2_dinode));
268	brelse(blocks[n]);
269	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
270	di = (struct gfs2_dinode *)dibh->b_data;
271	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
272	*(__be64 *)(di + 1) = cpu_to_be64(bn);
273	ip->i_di.di_height += new_height;
274	ip->i_di.di_blocks += new_height;
275	gfs2_set_inode_blocks(&ip->i_inode);
276	di->di_height = cpu_to_be16(ip->i_di.di_height);
277	di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
278	brelse(dibh);
279	return error;
280}
281
282/**
283 * find_metapath - Find path through the metadata tree
284 * @ip: The inode pointer
285 * @mp: The metapath to return the result in
286 * @block: The disk block to look up
287 *
288 *   This routine returns a struct metapath structure that defines a path
289 *   through the metadata of inode "ip" to get to block "block".
290 *
291 *   Example:
292 *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
293 *   filesystem with a blocksize of 4096.
294 *
295 *   find_metapath() would return a struct metapath structure set to:
296 *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
297 *   and mp_list[2] = 165.
298 *
299 *   That means that in order to get to the block containing the byte at
300 *   offset 101342453, we would load the indirect block pointed to by pointer
301 *   0 in the dinode.  We would then load the indirect block pointed to by
302 *   pointer 48 in that indirect block.  We would then load the data block
303 *   pointed to by pointer 165 in that indirect block.
304 *
305 *             ----------------------------------------
306 *             | Dinode |                             |
307 *             |        |                            4|
308 *             |        |0 1 2 3 4 5                 9|
309 *             |        |                            6|
310 *             ----------------------------------------
311 *                       |
312 *                       |
313 *                       V
314 *             ----------------------------------------
315 *             | Indirect Block                       |
316 *             |                                     5|
317 *             |            4 4 4 4 4 5 5            1|
318 *             |0           5 6 7 8 9 0 1            2|
319 *             ----------------------------------------
320 *                                |
321 *                                |
322 *                                V
323 *             ----------------------------------------
324 *             | Indirect Block                       |
325 *             |                         1 1 1 1 1   5|
326 *             |                         6 6 6 6 6   1|
327 *             |0                        3 4 5 6 7   2|
328 *             ----------------------------------------
329 *                                           |
330 *                                           |
331 *                                           V
332 *             ----------------------------------------
333 *             | Data block containing offset         |
334 *             |            101342453                 |
335 *             |                                      |
336 *             |                                      |
337 *             ----------------------------------------
338 *
339 */
340
341static void find_metapath(struct gfs2_inode *ip, u64 block,
342			  struct metapath *mp)
343{
344	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
345	u64 b = block;
346	unsigned int i;
347
348	for (i = ip->i_di.di_height; i--;)
349		mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
350
351}
352
353/**
354 * metapointer - Return pointer to start of metadata in a buffer
355 * @bh: The buffer
356 * @height: The metadata height (0 = dinode)
357 * @mp: The metapath
358 *
359 * Return a pointer to the block number of the next height of the metadata
360 * tree given a buffer containing the pointer to the current height of the
361 * metadata tree.
362 */
363
364static inline __be64 *metapointer(struct buffer_head *bh, int *boundary,
365			       unsigned int height, const struct metapath *mp)
366{
367	unsigned int head_size = (height > 0) ?
368		sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
369	__be64 *ptr;
370	*boundary = 0;
371	ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
372	if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size))
373		*boundary = 1;
374	return ptr;
375}
376
377/**
378 * lookup_block - Get the next metadata block in metadata tree
379 * @ip: The GFS2 inode
380 * @bh: Buffer containing the pointers to metadata blocks
381 * @height: The height of the tree (0 = dinode)
382 * @mp: The metapath
383 * @create: Non-zero if we may create a new meatdata block
384 * @new: Used to indicate if we did create a new metadata block
385 * @block: the returned disk block number
386 *
387 * Given a metatree, complete to a particular height, checks to see if the next
388 * height of the tree exists. If not the next height of the tree is created.
389 * The block number of the next height of the metadata tree is returned.
390 *
391 */
392
393static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
394			unsigned int height, struct metapath *mp, int create,
395			int *new, u64 *block)
396{
397	int boundary;
398	__be64 *ptr = metapointer(bh, &boundary, height, mp);
399
400	if (*ptr) {
401		*block = be64_to_cpu(*ptr);
402		return boundary;
403	}
404
405	*block = 0;
406
407	if (!create)
408		return 0;
409
410	if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip))
411		*block = gfs2_alloc_data(ip);
412	else
413		*block = gfs2_alloc_meta(ip);
414
415	gfs2_trans_add_bh(ip->i_gl, bh, 1);
416
417	*ptr = cpu_to_be64(*block);
418	ip->i_di.di_blocks++;
419	gfs2_set_inode_blocks(&ip->i_inode);
420
421	*new = 1;
422	return 0;
423}
424
425static inline void bmap_lock(struct inode *inode, int create)
426{
427	struct gfs2_inode *ip = GFS2_I(inode);
428	if (create)
429		down_write(&ip->i_rw_mutex);
430	else
431		down_read(&ip->i_rw_mutex);
432}
433
434static inline void bmap_unlock(struct inode *inode, int create)
435{
436	struct gfs2_inode *ip = GFS2_I(inode);
437	if (create)
438		up_write(&ip->i_rw_mutex);
439	else
440		up_read(&ip->i_rw_mutex);
441}
442
443/**
444 * gfs2_block_map - Map a block from an inode to a disk block
445 * @inode: The inode
446 * @lblock: The logical block number
447 * @bh_map: The bh to be mapped
448 *
449 * Find the block number on the current device which corresponds to an
450 * inode's block. If the block had to be created, "new" will be set.
451 *
452 * Returns: errno
453 */
454
455int gfs2_block_map(struct inode *inode, u64 lblock, int create,
456		   struct buffer_head *bh_map)
457{
458	struct gfs2_inode *ip = GFS2_I(inode);
459	struct gfs2_sbd *sdp = GFS2_SB(inode);
460	struct buffer_head *bh;
461	unsigned int bsize;
462	unsigned int height;
463	unsigned int end_of_metadata;
464	unsigned int x;
465	int error = 0;
466	int new = 0;
467	u64 dblock = 0;
468	int boundary;
469	unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
470	struct metapath mp;
471	u64 size;
472
473	BUG_ON(maxlen == 0);
474
475	if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
476		return 0;
477
478	bmap_lock(inode, create);
479	clear_buffer_mapped(bh_map);
480	clear_buffer_new(bh_map);
481	clear_buffer_boundary(bh_map);
482	bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
483	size = (lblock + 1) * bsize;
484
485	if (size > ip->i_di.di_size) {
486		height = calc_tree_height(ip, size);
487		if (ip->i_di.di_height < height) {
488			if (!create)
489				goto out_ok;
490
491			error = build_height(inode, height);
492			if (error)
493				goto out_fail;
494		}
495	}
496
497	find_metapath(ip, lblock, &mp);
498	end_of_metadata = ip->i_di.di_height - 1;
499	error = gfs2_meta_inode_buffer(ip, &bh);
500	if (error)
501		goto out_fail;
502
503	for (x = 0; x < end_of_metadata; x++) {
504		lookup_block(ip, bh, x, &mp, create, &new, &dblock);
505		brelse(bh);
506		if (!dblock)
507			goto out_ok;
508
509		error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
510		if (error)
511			goto out_fail;
512	}
513
514	boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock);
515	if (dblock) {
516		map_bh(bh_map, inode->i_sb, dblock);
517		if (boundary)
518			set_buffer_boundary(bh_map);
519		if (new) {
520			struct buffer_head *dibh;
521			error = gfs2_meta_inode_buffer(ip, &dibh);
522			if (!error) {
523				gfs2_trans_add_bh(ip->i_gl, dibh, 1);
524				gfs2_dinode_out(ip, dibh->b_data);
525				brelse(dibh);
526			}
527			set_buffer_new(bh_map);
528			goto out_brelse;
529		}
530		while(--maxlen && !buffer_boundary(bh_map)) {
531			u64 eblock;
532
533			mp.mp_list[end_of_metadata]++;
534			boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock);
535			if (eblock != ++dblock)
536				break;
537			bh_map->b_size += (1 << inode->i_blkbits);
538			if (boundary)
539				set_buffer_boundary(bh_map);
540		}
541	}
542out_brelse:
543	brelse(bh);
544out_ok:
545	error = 0;
546out_fail:
547	bmap_unlock(inode, create);
548	return error;
549}
550
551int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
552{
553	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
554	int ret;
555	int create = *new;
556
557	BUG_ON(!extlen);
558	BUG_ON(!dblock);
559	BUG_ON(!new);
560
561	bh.b_size = 1 << (inode->i_blkbits + 5);
562	ret = gfs2_block_map(inode, lblock, create, &bh);
563	*extlen = bh.b_size >> inode->i_blkbits;
564	*dblock = bh.b_blocknr;
565	if (buffer_new(&bh))
566		*new = 1;
567	else
568		*new = 0;
569	return ret;
570}
571
572/**
573 * recursive_scan - recursively scan through the end of a file
574 * @ip: the inode
575 * @dibh: the dinode buffer
576 * @mp: the path through the metadata to the point to start
577 * @height: the height the recursion is at
578 * @block: the indirect block to look at
579 * @first: 1 if this is the first block
580 * @bc: the call to make for each piece of metadata
581 * @data: data opaque to this function to pass to @bc
582 *
583 * When this is first called @height and @block should be zero and
584 * @first should be 1.
585 *
586 * Returns: errno
587 */
588
589static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
590			  struct metapath *mp, unsigned int height,
591			  u64 block, int first, block_call_t bc,
592			  void *data)
593{
594	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
595	struct buffer_head *bh = NULL;
596	__be64 *top, *bottom;
597	u64 bn;
598	int error;
599	int mh_size = sizeof(struct gfs2_meta_header);
600
601	if (!height) {
602		error = gfs2_meta_inode_buffer(ip, &bh);
603		if (error)
604			return error;
605		dibh = bh;
606
607		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
608		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
609	} else {
610		error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
611		if (error)
612			return error;
613
614		top = (__be64 *)(bh->b_data + mh_size) +
615				  (first ? mp->mp_list[height] : 0);
616
617		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
618	}
619
620	error = bc(ip, dibh, bh, top, bottom, height, data);
621	if (error)
622		goto out;
623
624	if (height < ip->i_di.di_height - 1)
625		for (; top < bottom; top++, first = 0) {
626			if (!*top)
627				continue;
628
629			bn = be64_to_cpu(*top);
630
631			error = recursive_scan(ip, dibh, mp, height + 1, bn,
632					       first, bc, data);
633			if (error)
634				break;
635		}
636
637out:
638	brelse(bh);
639	return error;
640}
641
642/**
643 * do_strip - Look for a layer a particular layer of the file and strip it off
644 * @ip: the inode
645 * @dibh: the dinode buffer
646 * @bh: A buffer of pointers
647 * @top: The first pointer in the buffer
648 * @bottom: One more than the last pointer
649 * @height: the height this buffer is at
650 * @data: a pointer to a struct strip_mine
651 *
652 * Returns: errno
653 */
654
655static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
656		    struct buffer_head *bh, __be64 *top, __be64 *bottom,
657		    unsigned int height, void *data)
658{
659	struct strip_mine *sm = data;
660	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
661	struct gfs2_rgrp_list rlist;
662	u64 bn, bstart;
663	u32 blen;
664	__be64 *p;
665	unsigned int rg_blocks = 0;
666	int metadata;
667	unsigned int revokes = 0;
668	int x;
669	int error;
670
671	if (!*top)
672		sm->sm_first = 0;
673
674	if (height != sm->sm_height)
675		return 0;
676
677	if (sm->sm_first) {
678		top++;
679		sm->sm_first = 0;
680	}
681
682	metadata = (height != ip->i_di.di_height - 1);
683	if (metadata)
684		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
685
686	error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
687	if (error)
688		return error;
689
690	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
691	bstart = 0;
692	blen = 0;
693
694	for (p = top; p < bottom; p++) {
695		if (!*p)
696			continue;
697
698		bn = be64_to_cpu(*p);
699
700		if (bstart + blen == bn)
701			blen++;
702		else {
703			if (bstart)
704				gfs2_rlist_add(sdp, &rlist, bstart);
705
706			bstart = bn;
707			blen = 1;
708		}
709	}
710
711	if (bstart)
712		gfs2_rlist_add(sdp, &rlist, bstart);
713	else
714		goto out; /* Nothing to do */
715
716	gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
717
718	for (x = 0; x < rlist.rl_rgrps; x++) {
719		struct gfs2_rgrpd *rgd;
720		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
721		rg_blocks += rgd->rd_ri.ri_length;
722	}
723
724	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
725	if (error)
726		goto out_rlist;
727
728	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
729				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
730				 revokes);
731	if (error)
732		goto out_rg_gunlock;
733
734	down_write(&ip->i_rw_mutex);
735
736	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
737	gfs2_trans_add_bh(ip->i_gl, bh, 1);
738
739	bstart = 0;
740	blen = 0;
741
742	for (p = top; p < bottom; p++) {
743		if (!*p)
744			continue;
745
746		bn = be64_to_cpu(*p);
747
748		if (bstart + blen == bn)
749			blen++;
750		else {
751			if (bstart) {
752				if (metadata)
753					gfs2_free_meta(ip, bstart, blen);
754				else
755					gfs2_free_data(ip, bstart, blen);
756			}
757
758			bstart = bn;
759			blen = 1;
760		}
761
762		*p = 0;
763		if (!ip->i_di.di_blocks)
764			gfs2_consist_inode(ip);
765		ip->i_di.di_blocks--;
766		gfs2_set_inode_blocks(&ip->i_inode);
767	}
768	if (bstart) {
769		if (metadata)
770			gfs2_free_meta(ip, bstart, blen);
771		else
772			gfs2_free_data(ip, bstart, blen);
773	}
774
775	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
776
777	gfs2_dinode_out(ip, dibh->b_data);
778
779	up_write(&ip->i_rw_mutex);
780
781	gfs2_trans_end(sdp);
782
783out_rg_gunlock:
784	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
785out_rlist:
786	gfs2_rlist_free(&rlist);
787out:
788	gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
789	return error;
790}
791
792/**
793 * do_grow - Make a file look bigger than it is
794 * @ip: the inode
795 * @size: the size to set the file to
796 *
797 * Called with an exclusive lock on @ip.
798 *
799 * Returns: errno
800 */
801
802static int do_grow(struct gfs2_inode *ip, u64 size)
803{
804	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
805	struct gfs2_alloc *al;
806	struct buffer_head *dibh;
807	unsigned int h;
808	int error;
809
810	al = gfs2_alloc_get(ip);
811
812	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
813	if (error)
814		goto out;
815
816	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
817	if (error)
818		goto out_gunlock_q;
819
820	al->al_requested = sdp->sd_max_height + RES_DATA;
821
822	error = gfs2_inplace_reserve(ip);
823	if (error)
824		goto out_gunlock_q;
825
826	error = gfs2_trans_begin(sdp,
827			sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
828			RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
829	if (error)
830		goto out_ipres;
831
832	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
833		if (gfs2_is_stuffed(ip)) {
834			error = gfs2_unstuff_dinode(ip, NULL);
835			if (error)
836				goto out_end_trans;
837		}
838
839		h = calc_tree_height(ip, size);
840		if (ip->i_di.di_height < h) {
841			down_write(&ip->i_rw_mutex);
842			error = build_height(&ip->i_inode, h);
843			up_write(&ip->i_rw_mutex);
844			if (error)
845				goto out_end_trans;
846		}
847	}
848
849	ip->i_di.di_size = size;
850	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
851
852	error = gfs2_meta_inode_buffer(ip, &dibh);
853	if (error)
854		goto out_end_trans;
855
856	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
857	gfs2_dinode_out(ip, dibh->b_data);
858	brelse(dibh);
859
860out_end_trans:
861	gfs2_trans_end(sdp);
862out_ipres:
863	gfs2_inplace_release(ip);
864out_gunlock_q:
865	gfs2_quota_unlock(ip);
866out:
867	gfs2_alloc_put(ip);
868	return error;
869}
870
871
872/**
873 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
874 *
875 * This is partly borrowed from ext3.
876 */
877static int gfs2_block_truncate_page(struct address_space *mapping)
878{
879	struct inode *inode = mapping->host;
880	struct gfs2_inode *ip = GFS2_I(inode);
881	struct gfs2_sbd *sdp = GFS2_SB(inode);
882	loff_t from = inode->i_size;
883	unsigned long index = from >> PAGE_CACHE_SHIFT;
884	unsigned offset = from & (PAGE_CACHE_SIZE-1);
885	unsigned blocksize, iblock, length, pos;
886	struct buffer_head *bh;
887	struct page *page;
888	void *kaddr;
889	int err;
890
891	page = grab_cache_page(mapping, index);
892	if (!page)
893		return 0;
894
895	blocksize = inode->i_sb->s_blocksize;
896	length = blocksize - (offset & (blocksize - 1));
897	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
898
899	if (!page_has_buffers(page))
900		create_empty_buffers(page, blocksize, 0);
901
902	/* Find the buffer that contains "offset" */
903	bh = page_buffers(page);
904	pos = blocksize;
905	while (offset >= pos) {
906		bh = bh->b_this_page;
907		iblock++;
908		pos += blocksize;
909	}
910
911	err = 0;
912
913	if (!buffer_mapped(bh)) {
914		gfs2_get_block(inode, iblock, bh, 0);
915		/* unmapped? It's a hole - nothing to do */
916		if (!buffer_mapped(bh))
917			goto unlock;
918	}
919
920	/* Ok, it's mapped. Make sure it's up-to-date */
921	if (PageUptodate(page))
922		set_buffer_uptodate(bh);
923
924	if (!buffer_uptodate(bh)) {
925		err = -EIO;
926		ll_rw_block(READ, 1, &bh);
927		wait_on_buffer(bh);
928		/* Uhhuh. Read error. Complain and punt. */
929		if (!buffer_uptodate(bh))
930			goto unlock;
931	}
932
933	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
934		gfs2_trans_add_bh(ip->i_gl, bh, 0);
935
936	kaddr = kmap_atomic(page, KM_USER0);
937	memset(kaddr + offset, 0, length);
938	flush_dcache_page(page);
939	kunmap_atomic(kaddr, KM_USER0);
940
941unlock:
942	unlock_page(page);
943	page_cache_release(page);
944	return err;
945}
946
947static int trunc_start(struct gfs2_inode *ip, u64 size)
948{
949	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
950	struct buffer_head *dibh;
951	int journaled = gfs2_is_jdata(ip);
952	int error;
953
954	error = gfs2_trans_begin(sdp,
955				 RES_DINODE + (journaled ? RES_JDATA : 0), 0);
956	if (error)
957		return error;
958
959	error = gfs2_meta_inode_buffer(ip, &dibh);
960	if (error)
961		goto out;
962
963	if (gfs2_is_stuffed(ip)) {
964		ip->i_di.di_size = size;
965		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
966		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
967		gfs2_dinode_out(ip, dibh->b_data);
968		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
969		error = 1;
970
971	} else {
972		if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
973			error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
974
975		if (!error) {
976			ip->i_di.di_size = size;
977			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
978			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
979			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
980			gfs2_dinode_out(ip, dibh->b_data);
981		}
982	}
983
984	brelse(dibh);
985
986out:
987	gfs2_trans_end(sdp);
988	return error;
989}
990
991static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
992{
993	unsigned int height = ip->i_di.di_height;
994	u64 lblock;
995	struct metapath mp;
996	int error;
997
998	if (!size)
999		lblock = 0;
1000	else
1001		lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift;
1002
1003	find_metapath(ip, lblock, &mp);
1004	gfs2_alloc_get(ip);
1005
1006	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1007	if (error)
1008		goto out;
1009
1010	while (height--) {
1011		struct strip_mine sm;
1012		sm.sm_first = !!size;
1013		sm.sm_height = height;
1014
1015		error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
1016		if (error)
1017			break;
1018	}
1019
1020	gfs2_quota_unhold(ip);
1021
1022out:
1023	gfs2_alloc_put(ip);
1024	return error;
1025}
1026
1027static int trunc_end(struct gfs2_inode *ip)
1028{
1029	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1030	struct buffer_head *dibh;
1031	int error;
1032
1033	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1034	if (error)
1035		return error;
1036
1037	down_write(&ip->i_rw_mutex);
1038
1039	error = gfs2_meta_inode_buffer(ip, &dibh);
1040	if (error)
1041		goto out;
1042
1043	if (!ip->i_di.di_size) {
1044		ip->i_di.di_height = 0;
1045		ip->i_di.di_goal_meta =
1046			ip->i_di.di_goal_data =
1047			ip->i_num.no_addr;
1048		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1049	}
1050	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1051	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1052
1053	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1054	gfs2_dinode_out(ip, dibh->b_data);
1055	brelse(dibh);
1056
1057out:
1058	up_write(&ip->i_rw_mutex);
1059	gfs2_trans_end(sdp);
1060	return error;
1061}
1062
1063/**
1064 * do_shrink - make a file smaller
1065 * @ip: the inode
1066 * @size: the size to make the file
1067 * @truncator: function to truncate the last partial block
1068 *
1069 * Called with an exclusive lock on @ip.
1070 *
1071 * Returns: errno
1072 */
1073
1074static int do_shrink(struct gfs2_inode *ip, u64 size)
1075{
1076	int error;
1077
1078	error = trunc_start(ip, size);
1079	if (error < 0)
1080		return error;
1081	if (error > 0)
1082		return 0;
1083
1084	error = trunc_dealloc(ip, size);
1085	if (!error)
1086		error = trunc_end(ip);
1087
1088	return error;
1089}
1090
1091/**
1092 * gfs2_truncatei - make a file a given size
1093 * @ip: the inode
1094 * @size: the size to make the file
1095 * @truncator: function to truncate the last partial block
1096 *
1097 * The file size can grow, shrink, or stay the same size.
1098 *
1099 * Returns: errno
1100 */
1101
1102int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
1103{
1104	int error;
1105
1106	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
1107		return -EINVAL;
1108
1109	if (size > ip->i_di.di_size)
1110		error = do_grow(ip, size);
1111	else
1112		error = do_shrink(ip, size);
1113
1114	return error;
1115}
1116
1117int gfs2_truncatei_resume(struct gfs2_inode *ip)
1118{
1119	int error;
1120	error = trunc_dealloc(ip, ip->i_di.di_size);
1121	if (!error)
1122		error = trunc_end(ip);
1123	return error;
1124}
1125
1126int gfs2_file_dealloc(struct gfs2_inode *ip)
1127{
1128	return trunc_dealloc(ip, 0);
1129}
1130
1131/**
1132 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1133 * @ip: the file
1134 * @len: the number of bytes to be written to the file
1135 * @data_blocks: returns the number of data blocks required
1136 * @ind_blocks: returns the number of indirect blocks required
1137 *
1138 */
1139
1140void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1141			    unsigned int *data_blocks, unsigned int *ind_blocks)
1142{
1143	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1144	unsigned int tmp;
1145
1146	if (gfs2_is_dir(ip)) {
1147		*data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
1148		*ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1149	} else {
1150		*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1151		*ind_blocks = 3 * (sdp->sd_max_height - 1);
1152	}
1153
1154	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1155		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1156		*ind_blocks += tmp;
1157	}
1158}
1159
1160/**
1161 * gfs2_write_alloc_required - figure out if a write will require an allocation
1162 * @ip: the file being written to
1163 * @offset: the offset to write to
1164 * @len: the number of bytes being written
1165 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1166 *
1167 * Returns: errno
1168 */
1169
1170int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1171			      unsigned int len, int *alloc_required)
1172{
1173	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1174	u64 lblock, lblock_stop, dblock;
1175	u32 extlen;
1176	int new = 0;
1177	int error = 0;
1178
1179	*alloc_required = 0;
1180
1181	if (!len)
1182		return 0;
1183
1184	if (gfs2_is_stuffed(ip)) {
1185		if (offset + len >
1186		    sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1187			*alloc_required = 1;
1188		return 0;
1189	}
1190
1191	if (gfs2_is_dir(ip)) {
1192		unsigned int bsize = sdp->sd_jbsize;
1193		lblock = offset;
1194		do_div(lblock, bsize);
1195		lblock_stop = offset + len + bsize - 1;
1196		do_div(lblock_stop, bsize);
1197	} else {
1198		unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1199		lblock = offset >> shift;
1200		lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1201	}
1202
1203	for (; lblock < lblock_stop; lblock += extlen) {
1204		error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
1205		if (error)
1206			return error;
1207
1208		if (!dblock) {
1209			*alloc_required = 1;
1210			return 0;
1211		}
1212	}
1213
1214	return 0;
1215}
1216