1/*
2 *   Copyright (C) International Business Machines  Corp., 2000-2004
3 *
4 *   This program is free software;  you can redistribute it and/or modify
5 *   it under the terms of the GNU General Public License as published by
6 *   the Free Software Foundation; either version 2 of the License, or
7 *   (at your option) any later version.
8 *
9 *   This program is distributed in the hope that it will be useful,
10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12 *   the GNU General Public License for more details.
13 *
14 *   You should have received a copy of the GNU General Public License
15 *   along with this program;  if not, write to the Free Software
16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19#include <linux/fs.h>
20#include <linux/buffer_head.h>
21#include <linux/quotaops.h>
22#include "jfs_incore.h"
23#include "jfs_filsys.h"
24#include "jfs_metapage.h"
25#include "jfs_dinode.h"
26#include "jfs_imap.h"
27#include "jfs_dmap.h"
28#include "jfs_superblock.h"
29#include "jfs_txnmgr.h"
30#include "jfs_debug.h"
31
32#define BITSPERPAGE	(PSIZE << 3)
33#define L2MEGABYTE	20
34#define MEGABYTE	(1 << L2MEGABYTE)
35#define MEGABYTE32	(MEGABYTE << 5)
36
37/* convert block number to bmap file page number */
38#define BLKTODMAPN(b)\
39	(((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
40
41/*
42 *	jfs_extendfs()
43 *
44 * function: extend file system;
45 *
46 *   |-------------------------------|----------|----------|
47 *   file system space               fsck       inline log
48 *                                   workspace  space
49 *
50 * input:
51 *	new LVSize: in LV blocks (required)
52 *	new LogSize: in LV blocks (optional)
53 *	new FSSize: in LV blocks (optional)
54 *
55 * new configuration:
56 * 1. set new LogSize as specified or default from new LVSize;
57 * 2. compute new FSCKSize from new LVSize;
58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
59 *    assert(new FSSize >= old FSSize),
60 *    i.e., file system must not be shrinked;
61 */
62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
63{
64	int rc = 0;
65	struct jfs_sb_info *sbi = JFS_SBI(sb);
66	struct inode *ipbmap = sbi->ipbmap;
67	struct inode *ipbmap2;
68	struct inode *ipimap = sbi->ipimap;
69	struct jfs_log *log = sbi->log;
70	struct bmap *bmp = sbi->bmap;
71	s64 newLogAddress, newFSCKAddress;
72	int newFSCKSize;
73	s64 newMapSize = 0, mapSize;
74	s64 XAddress, XSize, nblocks, xoff, xaddr, t64;
75	s64 oldLVSize;
76	s64 newFSSize;
77	s64 VolumeSize;
78	int newNpages = 0, nPages, newPage, xlen, t32;
79	int tid;
80	int log_formatted = 0;
81	struct inode *iplist[1];
82	struct jfs_superblock *j_sb, *j_sb2;
83	uint old_agsize;
84	int agsizechanged = 0;
85	struct buffer_head *bh, *bh2;
86
87	/* If the volume hasn't grown, get out now */
88
89	if (sbi->mntflag & JFS_INLINELOG)
90		oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd);
91	else
92		oldLVSize = addressPXD(&sbi->fsckpxd) +
93		    lengthPXD(&sbi->fsckpxd);
94
95	if (oldLVSize >= newLVSize) {
96		printk(KERN_WARNING
97		       "jfs_extendfs: volume hasn't grown, returning\n");
98		goto out;
99	}
100
101	VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
102
103	if (VolumeSize) {
104		if (newLVSize > VolumeSize) {
105			printk(KERN_WARNING "jfs_extendfs: invalid size\n");
106			rc = -EINVAL;
107			goto out;
108		}
109	} else {
110		/* check the device */
111		bh = sb_bread(sb, newLVSize - 1);
112		if (!bh) {
113			printk(KERN_WARNING "jfs_extendfs: invalid size\n");
114			rc = -EINVAL;
115			goto out;
116		}
117		bforget(bh);
118	}
119
120	/* Can't extend write-protected drive */
121
122	if (isReadOnly(ipbmap)) {
123		printk(KERN_WARNING "jfs_extendfs: read-only file system\n");
124		rc = -EROFS;
125		goto out;
126	}
127
128	/*
129	 *	reconfigure LV spaces
130	 *	---------------------
131	 *
132	 * validate new size, or, if not specified, determine new size
133	 */
134
135	/*
136	 * reconfigure inline log space:
137	 */
138	if ((sbi->mntflag & JFS_INLINELOG)) {
139		if (newLogSize == 0) {
140			/*
141			 * no size specified: default to 1/256 of aggregate
142			 * size; rounded up to a megabyte boundary;
143			 */
144			newLogSize = newLVSize >> 8;
145			t32 = (1 << (20 - sbi->l2bsize)) - 1;
146			newLogSize = (newLogSize + t32) & ~t32;
147			newLogSize =
148			    min(newLogSize, MEGABYTE32 >> sbi->l2bsize);
149		} else {
150			/*
151			 * convert the newLogSize to fs blocks.
152			 *
153			 * Since this is given in megabytes, it will always be
154			 * an even number of pages.
155			 */
156			newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize;
157		}
158
159	} else
160		newLogSize = 0;
161
162	newLogAddress = newLVSize - newLogSize;
163
164	/*
165	 * reconfigure fsck work space:
166	 *
167	 * configure it to the end of the logical volume regardless of
168	 * whether file system extends to the end of the aggregate;
169	 * Need enough 4k pages to cover:
170	 *  - 1 bit per block in aggregate rounded up to BPERDMAP boundary
171	 *  - 1 extra page to handle control page and intermediate level pages
172	 *  - 50 extra pages for the chkdsk service log
173	 */
174	t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP)
175	    << L2BPERDMAP;
176	t32 = DIV_ROUND_UP(t64, BITSPERPAGE) + 1 + 50;
177	newFSCKSize = t32 << sbi->l2nbperpage;
178	newFSCKAddress = newLogAddress - newFSCKSize;
179
180	/*
181	 * compute new file system space;
182	 */
183	newFSSize = newLVSize - newLogSize - newFSCKSize;
184
185	/* file system cannot be shrinked */
186	if (newFSSize < bmp->db_mapsize) {
187		rc = -EINVAL;
188		goto out;
189	}
190
191	/*
192	 * If we're expanding enough that the inline log does not overlap
193	 * the old one, we can format the new log before we quiesce the
194	 * filesystem.
195	 */
196	if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) {
197		if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
198			goto out;
199		log_formatted = 1;
200	}
201	/*
202	 *	quiesce file system
203	 *
204	 * (prepare to move the inline log and to prevent map update)
205	 *
206	 * block any new transactions and wait for completion of
207	 * all wip transactions and flush modified pages s.t.
208	 * on-disk file system is in consistent state and
209	 * log is not required for recovery.
210	 */
211	txQuiesce(sb);
212
213	/* Reset size of direct inode */
214	sbi->direct_inode->i_size =  sb->s_bdev->bd_inode->i_size;
215
216	if (sbi->mntflag & JFS_INLINELOG) {
217		/*
218		 * deactivate old inline log
219		 */
220		lmLogShutdown(log);
221
222		/*
223		 * mark on-disk super block for fs in transition;
224		 *
225		 * update on-disk superblock for the new space configuration
226		 * of inline log space and fsck work space descriptors:
227		 * N.B. FS descriptor is NOT updated;
228		 *
229		 * crash recovery:
230		 * logredo(): if FM_EXTENDFS, return to fsck() for cleanup;
231		 * fsck(): if FM_EXTENDFS, reformat inline log and fsck
232		 * workspace from superblock inline log descriptor and fsck
233		 * workspace descriptor;
234		 */
235
236		/* read in superblock */
237		if ((rc = readSuper(sb, &bh)))
238			goto error_out;
239		j_sb = (struct jfs_superblock *)bh->b_data;
240
241		/* mark extendfs() in progress */
242		j_sb->s_state |= cpu_to_le32(FM_EXTENDFS);
243		j_sb->s_xsize = cpu_to_le64(newFSSize);
244		PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress);
245		PXDlength(&j_sb->s_xfsckpxd, newFSCKSize);
246		PXDaddress(&j_sb->s_xlogpxd, newLogAddress);
247		PXDlength(&j_sb->s_xlogpxd, newLogSize);
248
249		/* synchronously update superblock */
250		mark_buffer_dirty(bh);
251		sync_dirty_buffer(bh);
252		brelse(bh);
253
254		/*
255		 * format new inline log synchronously;
256		 *
257		 * crash recovery: if log move in progress,
258		 * reformat log and exit success;
259		 */
260		if (!log_formatted)
261			if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
262				goto error_out;
263
264		/*
265		 * activate new log
266		 */
267		log->base = newLogAddress;
268		log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits);
269		if ((rc = lmLogInit(log)))
270			goto error_out;
271	}
272
273	/*
274	 *	extend block allocation map
275	 *	---------------------------
276	 *
277	 * extendfs() for new extension, retry after crash recovery;
278	 *
279	 * note: both logredo() and fsck() rebuild map from
280	 * the bitmap and configuration parameter from superblock
281	 * (disregarding all other control information in the map);
282	 *
283	 * superblock:
284	 *  s_size: aggregate size in physical blocks;
285	 */
286	/*
287	 *	compute the new block allocation map configuration
288	 *
289	 * map dinode:
290	 *  di_size: map file size in byte;
291	 *  di_nblocks: number of blocks allocated for map file;
292	 *  di_mapsize: number of blocks in aggregate (covered by map);
293	 * map control page:
294	 *  db_mapsize: number of blocks in aggregate (covered by map);
295	 */
296	newMapSize = newFSSize;
297	/* number of data pages of new bmap file:
298	 * roundup new size to full dmap page boundary and
299	 * add 1 extra dmap page for next extendfs()
300	 */
301	t64 = (newMapSize - 1) + BPERDMAP;
302	newNpages = BLKTODMAPN(t64) + 1;
303
304	/*
305	 *	extend map from current map (WITHOUT growing mapfile)
306	 *
307	 * map new extension with unmapped part of the last partial
308	 * dmap page, if applicable, and extra page(s) allocated
309	 * at end of bmap by mkfs() or previous extendfs();
310	 */
311      extendBmap:
312	/* compute number of blocks requested to extend */
313	mapSize = bmp->db_mapsize;
314	XAddress = mapSize;	/* eXtension Address */
315	XSize = newMapSize - mapSize;	/* eXtension Size */
316	old_agsize = bmp->db_agsize;	/* We need to know if this changes */
317
318	/* compute number of blocks that can be extended by current mapfile */
319	t64 = dbMapFileSizeToMapSize(ipbmap);
320	if (mapSize > t64) {
321		printk(KERN_ERR "jfs_extendfs: mapSize (0x%Lx) > t64 (0x%Lx)\n",
322		       (long long) mapSize, (long long) t64);
323		rc = -EIO;
324		goto error_out;
325	}
326	nblocks = min(t64 - mapSize, XSize);
327
328	/*
329	 * update map pages for new extension:
330	 *
331	 * update/init dmap and bubble up the control hierarchy
332	 * incrementally fold up dmaps into upper levels;
333	 * update bmap control page;
334	 */
335	if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
336		goto error_out;
337
338	agsizechanged |= (bmp->db_agsize != old_agsize);
339
340	/*
341	 * the map now has extended to cover additional nblocks:
342	 * dn_mapsize = oldMapsize + nblocks;
343	 */
344	/* ipbmap->i_mapsize += nblocks; */
345	XSize -= nblocks;
346
347	/*
348	 *	grow map file to cover remaining extension
349	 *	and/or one extra dmap page for next extendfs();
350	 *
351	 * allocate new map pages and its backing blocks, and
352	 * update map file xtree
353	 */
354	/* compute number of data pages of current bmap file */
355	nPages = ipbmap->i_size >> L2PSIZE;
356
357	/* need to grow map file ? */
358	if (nPages == newNpages)
359		goto finalizeBmap;
360
361	/*
362	 * grow bmap file for the new map pages required:
363	 *
364	 * allocate growth at the start of newly extended region;
365	 * bmap file only grows sequentially, i.e., both data pages
366	 * and possibly xtree index pages may grow in append mode,
367	 * s.t. logredo() can reconstruct pre-extension state
368	 * by washing away bmap file of pages outside s_size boundary;
369	 */
370	/*
371	 * journal map file growth as if a regular file growth:
372	 * (note: bmap is created with di_mode = IFJOURNAL|IFREG);
373	 *
374	 * journaling of bmap file growth is not required since
375	 * logredo() do/can not use log records of bmap file growth
376	 * but it provides careful write semantics, pmap update, etc.;
377	 */
378	/* synchronous write of data pages: bmap data pages are
379	 * cached in meta-data cache, and not written out
380	 * by txCommit();
381	 */
382	filemap_fdatawait(ipbmap->i_mapping);
383	filemap_write_and_wait(ipbmap->i_mapping);
384	diWriteSpecial(ipbmap, 0);
385
386	newPage = nPages;	/* first new page number */
387	xoff = newPage << sbi->l2nbperpage;
388	xlen = (newNpages - nPages) << sbi->l2nbperpage;
389	xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1);
390	xaddr = XAddress;
391
392	tid = txBegin(sb, COMMIT_FORCE);
393
394	if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) {
395		txEnd(tid);
396		goto error_out;
397	}
398	/* update bmap file size */
399	ipbmap->i_size += xlen << sbi->l2bsize;
400	inode_add_bytes(ipbmap, xlen << sbi->l2bsize);
401
402	iplist[0] = ipbmap;
403	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
404
405	txEnd(tid);
406
407	if (rc)
408		goto error_out;
409
410	/*
411	 * map file has been grown now to cover extension to further out;
412	 * di_size = new map file size;
413	 *
414	 * if huge extension, the previous extension based on previous
415	 * map file size may not have been sufficient to cover whole extension
416	 * (it could have been used up for new map pages),
417	 * but the newly grown map file now covers lot bigger new free space
418	 * available for further extension of map;
419	 */
420	/* any more blocks to extend ? */
421	if (XSize)
422		goto extendBmap;
423
424      finalizeBmap:
425	/* finalize bmap */
426	dbFinalizeBmap(ipbmap);
427
428	/*
429	 *	update inode allocation map
430	 *	---------------------------
431	 *
432	 * move iag lists from old to new iag;
433	 * agstart field is not updated for logredo() to reconstruct
434	 * iag lists if system crash occurs.
435	 * (computation of ag number from agstart based on agsize
436	 * will correctly identify the new ag);
437	 */
438	/* if new AG size the same as old AG size, done! */
439	if (agsizechanged) {
440		if ((rc = diExtendFS(ipimap, ipbmap)))
441			goto error_out;
442
443		/* finalize imap */
444		if ((rc = diSync(ipimap)))
445			goto error_out;
446	}
447
448	/*
449	 *	finalize
450	 *	--------
451	 *
452	 * extension is committed when on-disk super block is
453	 * updated with new descriptors: logredo will recover
454	 * crash before it to pre-extension state;
455	 */
456
457	/* sync log to skip log replay of bmap file growth transaction; */
458	/* lmLogSync(log, 1); */
459
460	/*
461	 * synchronous write bmap global control page;
462	 * for crash before completion of write
463	 * logredo() will recover to pre-extendfs state;
464	 * for crash after completion of write,
465	 * logredo() will recover post-extendfs state;
466	 */
467	if ((rc = dbSync(ipbmap)))
468		goto error_out;
469
470	/*
471	 * copy primary bmap inode to secondary bmap inode
472	 */
473
474	ipbmap2 = diReadSpecial(sb, BMAP_I, 1);
475	if (ipbmap2 == NULL) {
476		printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n");
477		goto error_out;
478	}
479	memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288);
480	ipbmap2->i_size = ipbmap->i_size;
481	ipbmap2->i_blocks = ipbmap->i_blocks;
482
483	diWriteSpecial(ipbmap2, 1);
484	diFreeSpecial(ipbmap2);
485
486	/*
487	 *	update superblock
488	 */
489	if ((rc = readSuper(sb, &bh)))
490		goto error_out;
491	j_sb = (struct jfs_superblock *)bh->b_data;
492
493	/* mark extendfs() completion */
494	j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS);
495	j_sb->s_size = cpu_to_le64(bmp->db_mapsize <<
496				   le16_to_cpu(j_sb->s_l2bfactor));
497	j_sb->s_agsize = cpu_to_le32(bmp->db_agsize);
498
499	/* update inline log space descriptor */
500	if (sbi->mntflag & JFS_INLINELOG) {
501		PXDaddress(&(j_sb->s_logpxd), newLogAddress);
502		PXDlength(&(j_sb->s_logpxd), newLogSize);
503	}
504
505	/* record log's mount serial number */
506	j_sb->s_logserial = cpu_to_le32(log->serial);
507
508	/* update fsck work space descriptor */
509	PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress);
510	PXDlength(&(j_sb->s_fsckpxd), newFSCKSize);
511	j_sb->s_fscklog = 1;
512	/* sb->s_fsckloglen remains the same */
513
514	/* Update secondary superblock */
515	bh2 = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits);
516	if (bh2) {
517		j_sb2 = (struct jfs_superblock *)bh2->b_data;
518		memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock));
519
520		mark_buffer_dirty(bh);
521		sync_dirty_buffer(bh2);
522		brelse(bh2);
523	}
524
525	/* write primary superblock */
526	mark_buffer_dirty(bh);
527	sync_dirty_buffer(bh);
528	brelse(bh);
529
530	goto resume;
531
532      error_out:
533	jfs_error(sb, "jfs_extendfs");
534
535      resume:
536	/*
537	 *	resume file system transactions
538	 */
539	txResume(sb);
540
541      out:
542	return rc;
543}
544