1/*
2 *  linux/fs/sysv/itree.c
3 *
4 *  Handling of indirect blocks' trees.
5 *  AV, Sep--Dec 2000
6 */
7
8#include <linux/fs.h>
9#include <linux/sysv_fs.h>
10#include <linux/locks.h>
11#include <linux/smp_lock.h>
12
13enum {DIRECT = 10, DEPTH = 4};	/* Have triple indirect */
14
15static inline void dirty_indirect(struct buffer_head *bh, struct inode *inode)
16{
17	mark_buffer_dirty_inode(bh, inode);
18	if (IS_SYNC(inode)) {
19		ll_rw_block (WRITE, 1, &bh);
20		wait_on_buffer (bh);
21	}
22}
23
24static int block_to_path(struct inode *inode, long block, int offsets[DEPTH])
25{
26	struct super_block *sb = inode->i_sb;
27	int ptrs_bits = sb->sv_ind_per_block_bits;
28	unsigned long	indirect_blocks = sb->sv_ind_per_block,
29			double_blocks = sb->sv_ind_per_block_2;
30	int n = 0;
31
32	if (block < 0) {
33		printk("sysv_block_map: block < 0\n");
34	} else if (block < DIRECT) {
35		offsets[n++] = block;
36	} else if ( (block -= DIRECT) < indirect_blocks) {
37		offsets[n++] = DIRECT;
38		offsets[n++] = block;
39	} else if ((block -= indirect_blocks) < double_blocks) {
40		offsets[n++] = DIRECT+1;
41		offsets[n++] = block >> ptrs_bits;
42		offsets[n++] = block & (indirect_blocks - 1);
43	} else if (((block -= double_blocks) >> (ptrs_bits * 2)) < indirect_blocks) {
44		offsets[n++] = DIRECT+2;
45		offsets[n++] = block >> (ptrs_bits * 2);
46		offsets[n++] = (block >> ptrs_bits) & (indirect_blocks - 1);
47		offsets[n++] = block & (indirect_blocks - 1);
48	} else {
49		/* nothing */;
50	}
51	return n;
52}
53
54static inline int block_to_cpu(struct super_block *sb, u32 nr)
55{
56	return sb->sv_block_base + fs32_to_cpu(sb, nr);
57}
58
59typedef struct {
60	u32     *p;
61	u32     key;
62	struct buffer_head *bh;
63} Indirect;
64
65static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
66{
67	p->key = *(p->p = v);
68	p->bh = bh;
69}
70
71static inline int verify_chain(Indirect *from, Indirect *to)
72{
73	while (from <= to && from->key == *from->p)
74		from++;
75	return (from > to);
76}
77
78static inline u32 *block_end(struct buffer_head *bh)
79{
80	return (u32*)((char*)bh->b_data + bh->b_size);
81}
82
83static Indirect *get_branch(struct inode *inode,
84			    int depth,
85			    int offsets[],
86			    Indirect chain[],
87			    int *err)
88{
89	struct super_block *sb = inode->i_sb;
90	Indirect *p = chain;
91	struct buffer_head *bh;
92
93	*err = 0;
94	add_chain (chain, NULL, inode->u.sysv_i.i_data + *offsets);
95	if (!p->key)
96		goto no_block;
97	while (--depth) {
98		int block = block_to_cpu(sb, p->key);
99		bh = sb_bread(sb, block);
100		if (!bh)
101			goto failure;
102		if (!verify_chain(chain, p))
103			goto changed;
104		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
105		if (!p->key)
106		goto no_block;
107	}
108	return NULL;
109
110changed:
111	*err = -EAGAIN;
112	goto no_block;
113failure:
114	*err = -EIO;
115no_block:
116	return p;
117}
118
119static int alloc_branch(struct inode *inode,
120			int num,
121			int *offsets,
122			Indirect *branch)
123{
124	int blocksize = inode->i_sb->s_blocksize;
125	int n = 0;
126	int i;
127
128	branch[0].key = sysv_new_block(inode->i_sb);
129	if (branch[0].key) for (n = 1; n < num; n++) {
130		struct buffer_head *bh;
131		int parent;
132		/* Allocate the next block */
133		branch[n].key = sysv_new_block(inode->i_sb);
134		if (!branch[n].key)
135			break;
136		/*
137		 * Get buffer_head for parent block, zero it out and set
138		 * the pointer to new one, then send parent to disk.
139		 */
140		parent = block_to_cpu(inode->i_sb, branch[n-1].key);
141		bh = sb_getblk(inode->i_sb, parent);
142		lock_buffer(bh);
143		memset(bh->b_data, 0, blocksize);
144		branch[n].bh = bh;
145		branch[n].p = (u32*) bh->b_data + offsets[n];
146		*branch[n].p = branch[n].key;
147		mark_buffer_uptodate(bh, 1);
148		unlock_buffer(bh);
149		dirty_indirect(bh, inode);
150	}
151	if (n == num)
152		return 0;
153
154	/* Allocation failed, free what we already allocated */
155	for (i = 1; i < n; i++)
156		bforget(branch[i].bh);
157	for (i = 0; i < n; i++)
158		sysv_free_block(inode->i_sb, branch[i].key);
159	return -ENOSPC;
160}
161
162static inline int splice_branch(struct inode *inode,
163				Indirect chain[],
164				Indirect *where,
165				int num)
166{
167	int i;
168	/* Verify that place we are splicing to is still there and vacant */
169
170	if (!verify_chain(chain, where-1) || *where->p)
171		goto changed;
172
173	*where->p = where->key;
174	inode->i_ctime = CURRENT_TIME;
175
176	/* had we spliced it onto indirect block? */
177	if (where->bh)
178		dirty_indirect(where->bh, inode);
179
180	if (IS_SYNC(inode))
181		sysv_sync_inode(inode);
182	else
183		mark_inode_dirty(inode);
184	return 0;
185
186changed:
187	for (i = 1; i < num; i++)
188		bforget(where[i].bh);
189	for (i = 0; i < num; i++)
190		sysv_free_block(inode->i_sb, where[i].key);
191	return -EAGAIN;
192}
193
194static int get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
195{
196	int err = -EIO;
197	int offsets[DEPTH];
198	Indirect chain[DEPTH];
199	struct super_block *sb = inode->i_sb;
200	Indirect *partial;
201	int left;
202	int depth = block_to_path(inode, iblock, offsets);
203
204	if (depth == 0)
205		goto out;
206
207	lock_kernel();
208reread:
209	partial = get_branch(inode, depth, offsets, chain, &err);
210
211	/* Simplest case - block found, no allocation needed */
212	if (!partial) {
213got_it:
214		bh_result->b_dev = sb->s_dev;
215		bh_result->b_blocknr = block_to_cpu(sb, chain[depth-1].key);
216		bh_result->b_state |= (1UL << BH_Mapped);
217		/* Clean up and exit */
218		partial = chain+depth-1; /* the whole chain */
219		goto cleanup;
220	}
221
222	/* Next simple case - plain lookup or failed read of indirect block */
223	if (!create || err == -EIO) {
224cleanup:
225		while (partial > chain) {
226			brelse(partial->bh);
227			partial--;
228		}
229		unlock_kernel();
230out:
231		return err;
232	}
233
234	/*
235	 * Indirect block might be removed by truncate while we were
236	 * reading it. Handling of that case (forget what we've got and
237	 * reread) is taken out of the main path.
238	 */
239	if (err == -EAGAIN)
240		goto changed;
241
242	left = (chain + depth) - partial;
243	err = alloc_branch(inode, left, offsets+(partial-chain), partial);
244	if (err)
245		goto cleanup;
246
247	if (splice_branch(inode, chain, partial, left) < 0)
248		goto changed;
249
250	bh_result->b_state |= (1UL << BH_New);
251	goto got_it;
252
253changed:
254	while (partial > chain) {
255		brelse(partial->bh);
256		partial--;
257	}
258	goto reread;
259}
260
261static inline int all_zeroes(u32 *p, u32 *q)
262{
263	while (p < q)
264		if (*p++)
265			return 0;
266	return 1;
267}
268
269static Indirect *find_shared(struct inode *inode,
270				int depth,
271				int offsets[],
272				Indirect chain[],
273				u32 *top)
274{
275	Indirect *partial, *p;
276	int k, err;
277
278	*top = 0;
279	for (k = depth; k > 1 && !offsets[k-1]; k--)
280		;
281	partial = get_branch(inode, k, offsets, chain, &err);
282	if (!partial)
283		partial = chain + k-1;
284	/*
285	 * If the branch acquired continuation since we've looked at it -
286	 * fine, it should all survive and (new) top doesn't belong to us.
287	 */
288	if (!partial->key && *partial->p)
289		goto no_top;
290	for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
291		;
292	/*
293	 * OK, we've found the last block that must survive. The rest of our
294	 * branch should be detached before unlocking. However, if that rest
295	 * of branch is all ours and does not grow immediately from the inode
296	 * it's easier to cheat and just decrement partial->p.
297	 */
298	if (p == chain + k - 1 && p > chain) {
299		p->p--;
300	} else {
301		*top = *p->p;
302		*p->p = 0;
303	}
304
305	while(partial > p) {
306		brelse(partial->bh);
307		partial--;
308	}
309no_top:
310	return partial;
311}
312
313static inline void free_data(struct inode *inode, u32 *p, u32 *q)
314{
315	for ( ; p < q ; p++) {
316		u32 nr = *p;
317		if (nr) {
318			*p = 0;
319			sysv_free_block(inode->i_sb, nr);
320			mark_inode_dirty(inode);
321		}
322	}
323}
324
325static void free_branches(struct inode *inode, u32 *p, u32 *q, int depth)
326{
327	struct buffer_head * bh;
328	struct super_block *sb = inode->i_sb;
329
330	if (depth--) {
331		for ( ; p < q ; p++) {
332			int block;
333			u32 nr = *p;
334			if (!nr)
335				continue;
336			*p = 0;
337			block = block_to_cpu(sb, nr);
338			bh = sb_bread(sb, block);
339			if (!bh)
340				continue;
341			free_branches(inode, (u32*)bh->b_data,
342					block_end(bh), depth);
343			bforget(bh);
344			sysv_free_block(sb, nr);
345			mark_inode_dirty(inode);
346		}
347	} else
348		free_data(inode, p, q);
349}
350
351void sysv_truncate (struct inode * inode)
352{
353	u32 *i_data = inode->u.sysv_i.i_data;
354	int offsets[DEPTH];
355	Indirect chain[DEPTH];
356	Indirect *partial;
357	int nr = 0;
358	int n;
359	long iblock;
360	unsigned blocksize;
361
362	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
363	    S_ISLNK(inode->i_mode)))
364		return;
365
366	blocksize = inode->i_sb->s_blocksize;
367	iblock = (inode->i_size + blocksize-1)
368					>> inode->i_sb->s_blocksize_bits;
369
370	block_truncate_page(inode->i_mapping, inode->i_size, get_block);
371
372	n = block_to_path(inode, iblock, offsets);
373	if (n == 0)
374		return;
375
376	if (n == 1) {
377		free_data(inode, i_data+offsets[0], i_data + DIRECT);
378		goto do_indirects;
379	}
380
381	partial = find_shared(inode, n, offsets, chain, &nr);
382	/* Kill the top of shared branch (already detached) */
383	if (nr) {
384		if (partial == chain)
385			mark_inode_dirty(inode);
386		else
387			dirty_indirect(partial->bh, inode);
388		free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
389	}
390	/* Clear the ends of indirect blocks on the shared branch */
391	while (partial > chain) {
392		free_branches(inode, partial->p + 1, block_end(partial->bh),
393				(chain+n-1) - partial);
394		dirty_indirect(partial->bh, inode);
395		brelse (partial->bh);
396		partial--;
397	}
398do_indirects:
399	/* Kill the remaining (whole) subtrees (== subtrees deeper than...) */
400	while (n < DEPTH) {
401		nr = i_data[DIRECT + n - 1];
402		if (nr) {
403			i_data[DIRECT + n - 1] = 0;
404			mark_inode_dirty(inode);
405			free_branches(inode, &nr, &nr+1, n);
406		}
407		n++;
408	}
409	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
410	if (IS_SYNC(inode))
411		sysv_sync_inode (inode);
412	else
413		mark_inode_dirty(inode);
414}
415
416static int sysv_writepage(struct page *page)
417{
418	return block_write_full_page(page,get_block);
419}
420static int sysv_readpage(struct file *file, struct page *page)
421{
422	return block_read_full_page(page,get_block);
423}
424static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
425{
426	return block_prepare_write(page,from,to,get_block);
427}
428static int sysv_bmap(struct address_space *mapping, long block)
429{
430	return generic_block_bmap(mapping,block,get_block);
431}
432struct address_space_operations sysv_aops = {
433	readpage: sysv_readpage,
434	writepage: sysv_writepage,
435	sync_page: block_sync_page,
436	prepare_write: sysv_prepare_write,
437	commit_write: generic_commit_write,
438	bmap: sysv_bmap
439};
440