1/*
2 *   Copyright (C) International Business Machines Corp., 2000-2004
3 *
4 *   This program is free software;  you can redistribute it and/or modify
5 *   it under the terms of the GNU General Public License as published by
6 *   the Free Software Foundation; either version 2 of the License, or
7 *   (at your option) any later version.
8 *
9 *   This program is distributed in the hope that it will be useful,
10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12 *   the GNU General Public License for more details.
13 *
14 *   You should have received a copy of the GNU General Public License
15 *   along with this program;  if not, write to the Free Software
16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/fs.h>
20#include <linux/quotaops.h>
21#include "jfs_incore.h"
22#include "jfs_inode.h"
23#include "jfs_superblock.h"
24#include "jfs_dmap.h"
25#include "jfs_extent.h"
26#include "jfs_debug.h"
27
28/*
29 * forward references
30 */
31static int extBalloc(struct inode *, s64, s64 *, s64 *);
32#ifdef _NOTYET
33static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
34#endif
35static s64 extRoundDown(s64 nb);
36
37#define DPD(a)          (printk("(a): %d\n",(a)))
38#define DPC(a)          (printk("(a): %c\n",(a)))
39#define DPL1(a)					\
40{						\
41	if ((a) >> 32)				\
42		printk("(a): %x%08x  ",(a));	\
43	else					\
44		printk("(a): %x  ",(a) << 32);	\
45}
46#define DPL(a)					\
47{						\
48	if ((a) >> 32)				\
49		printk("(a): %x%08x\n",(a));	\
50	else					\
51		printk("(a): %x\n",(a) << 32);	\
52}
53
54#define DPD1(a)         (printk("(a): %d  ",(a)))
55#define DPX(a)          (printk("(a): %08x\n",(a)))
56#define DPX1(a)         (printk("(a): %08x  ",(a)))
57#define DPS(a)          (printk("%s\n",(a)))
58#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
59#define DPE1(a)          (printk("\nENTERING: %s",(a)))
60#define DPS1(a)         (printk("  %s  ",(a)))
61
62
63/*
64 * NAME:	extAlloc()
65 *
66 * FUNCTION:    allocate an extent for a specified page range within a
67 *		file.
68 *
69 * PARAMETERS:
70 *	ip	- the inode of the file.
71 *	xlen	- requested extent length.
72 *	pno	- the starting page number with the file.
73 *	xp	- pointer to an xad.  on entry, xad describes an
74 *		  extent that is used as an allocation hint if the
75 *		  xaddr of the xad is non-zero.  on successful exit,
76 *		  the xad describes the newly allocated extent.
77 *	abnr	- bool indicating whether the newly allocated extent
78 *		  should be marked as allocated but not recorded.
79 *
80 * RETURN VALUES:
81 *      0       - success
82 *      -EIO	- i/o error.
83 *      -ENOSPC	- insufficient disk resources.
84 */
85int
86extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
87{
88	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
89	s64 nxlen, nxaddr, xoff, hint, xaddr = 0;
90	int rc;
91	int xflag;
92
93	/* This blocks if we are low on resources */
94	txBeginAnon(ip->i_sb);
95
96	/* Avoid race with jfs_commit_inode() */
97	mutex_lock(&JFS_IP(ip)->commit_mutex);
98
99	/* validate extent length */
100	if (xlen > MAXXLEN)
101		xlen = MAXXLEN;
102
103	/* get the page's starting extent offset */
104	xoff = pno << sbi->l2nbperpage;
105
106	/* check if an allocation hint was provided */
107	if ((hint = addressXAD(xp))) {
108		/* get the size of the extent described by the hint */
109		nxlen = lengthXAD(xp);
110
111		/* check if the hint is for the portion of the file
112		 * immediately previous to the current allocation
113		 * request and if hint extent has the same abnr
114		 * value as the current request.  if so, we can
115		 * extend the hint extent to include the current
116		 * extent if we can allocate the blocks immediately
117		 * following the hint extent.
118		 */
119		if (offsetXAD(xp) + nxlen == xoff &&
120		    abnr == ((xp->flag & XAD_NOTRECORDED) ? true : false))
121			xaddr = hint + nxlen;
122
123		/* adjust the hint to the last block of the extent */
124		hint += (nxlen - 1);
125	}
126
127	/* allocate the disk blocks for the extent.  initially, extBalloc()
128	 * will try to allocate disk blocks for the requested size (xlen).
129	 * if this fails (xlen contiguous free blocks not avaliable), it'll
130	 * try to allocate a smaller number of blocks (producing a smaller
131	 * extent), with this smaller number of blocks consisting of the
132	 * requested number of blocks rounded down to the next smaller
133	 * power of 2 number (i.e. 16 -> 8).  it'll continue to round down
134	 * and retry the allocation until the number of blocks to allocate
135	 * is smaller than the number of blocks per page.
136	 */
137	nxlen = xlen;
138	if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
139		mutex_unlock(&JFS_IP(ip)->commit_mutex);
140		return (rc);
141	}
142
143	/* Allocate blocks to quota. */
144	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
145		dbFree(ip, nxaddr, (s64) nxlen);
146		mutex_unlock(&JFS_IP(ip)->commit_mutex);
147		return -EDQUOT;
148	}
149
150	/* determine the value of the extent flag */
151	xflag = abnr ? XAD_NOTRECORDED : 0;
152
153	/* if we can extend the hint extent to cover the current request,
154	 * extend it.  otherwise, insert a new extent to
155	 * cover the current request.
156	 */
157	if (xaddr && xaddr == nxaddr)
158		rc = xtExtend(0, ip, xoff, (int) nxlen, 0);
159	else
160		rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0);
161
162	/* if the extend or insert failed,
163	 * free the newly allocated blocks and return the error.
164	 */
165	if (rc) {
166		dbFree(ip, nxaddr, nxlen);
167		DQUOT_FREE_BLOCK(ip, nxlen);
168		mutex_unlock(&JFS_IP(ip)->commit_mutex);
169		return (rc);
170	}
171
172	/* set the results of the extent allocation */
173	XADaddress(xp, nxaddr);
174	XADlength(xp, nxlen);
175	XADoffset(xp, xoff);
176	xp->flag = xflag;
177
178	mark_inode_dirty(ip);
179
180	mutex_unlock(&JFS_IP(ip)->commit_mutex);
181	/*
182	 * COMMIT_SyncList flags an anonymous tlock on page that is on
183	 * sync list.
184	 * We need to commit the inode to get the page written disk.
185	 */
186	if (test_and_clear_cflag(COMMIT_Synclist,ip))
187		jfs_commit_inode(ip, 0);
188
189	return (0);
190}
191
192
193#ifdef _NOTYET
194/*
195 * NAME:        extRealloc()
196 *
197 * FUNCTION:    extend the allocation of a file extent containing a
198 *		partial back last page.
199 *
200 * PARAMETERS:
201 *	ip	- the inode of the file.
202 *	cp	- cbuf for the partial backed last page.
203 *	xlen	- request size of the resulting extent.
204 *	xp	- pointer to an xad. on successful exit, the xad
205 *		  describes the newly allocated extent.
206 *	abnr	- bool indicating whether the newly allocated extent
207 *		  should be marked as allocated but not recorded.
208 *
209 * RETURN VALUES:
210 *      0       - success
211 *      -EIO	- i/o error.
212 *      -ENOSPC	- insufficient disk resources.
213 */
214int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
215{
216	struct super_block *sb = ip->i_sb;
217	s64 xaddr, xlen, nxaddr, delta, xoff;
218	s64 ntail, nextend, ninsert;
219	int rc, nbperpage = JFS_SBI(sb)->nbperpage;
220	int xflag;
221
222	/* This blocks if we are low on resources */
223	txBeginAnon(ip->i_sb);
224
225	mutex_lock(&JFS_IP(ip)->commit_mutex);
226	/* validate extent length */
227	if (nxlen > MAXXLEN)
228		nxlen = MAXXLEN;
229
230	/* get the extend (partial) page's disk block address and
231	 * number of blocks.
232	 */
233	xaddr = addressXAD(xp);
234	xlen = lengthXAD(xp);
235	xoff = offsetXAD(xp);
236
237	/* if the extend page is abnr and if the request is for
238	 * the extent to be allocated and recorded,
239	 * make the page allocated and recorded.
240	 */
241	if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
242		xp->flag = 0;
243		if ((rc = xtUpdate(0, ip, xp)))
244			goto exit;
245	}
246
247	/* try to allocated the request number of blocks for the
248	 * extent.  dbRealloc() first tries to satisfy the request
249	 * by extending the allocation in place. otherwise, it will
250	 * try to allocate a new set of blocks large enough for the
251	 * request.  in satisfying a request, dbReAlloc() may allocate
252	 * less than what was request but will always allocate enough
253	 * space as to satisfy the extend page.
254	 */
255	if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
256		goto exit;
257
258	/* Allocat blocks to quota. */
259	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
260		dbFree(ip, nxaddr, (s64) nxlen);
261		mutex_unlock(&JFS_IP(ip)->commit_mutex);
262		return -EDQUOT;
263	}
264
265	delta = nxlen - xlen;
266
267	/* check if the extend page is not abnr but the request is abnr
268	 * and the allocated disk space is for more than one page.  if this
269	 * is the case, there is a miss match of abnr between the extend page
270	 * and the one or more pages following the extend page.  as a result,
271	 * two extents will have to be manipulated. the first will be that
272	 * of the extent of the extend page and will be manipulated thru
273	 * an xtExtend() or an xtTailgate(), depending upon whether the
274	 * disk allocation occurred as an inplace extension.  the second
275	 * extent will be manipulated (created) through an xtInsert() and
276	 * will be for the pages following the extend page.
277	 */
278	if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) {
279		ntail = nbperpage;
280		nextend = ntail - xlen;
281		ninsert = nxlen - nbperpage;
282
283		xflag = XAD_NOTRECORDED;
284	} else {
285		ntail = nxlen;
286		nextend = delta;
287		ninsert = 0;
288
289		xflag = xp->flag;
290	}
291
292	/* if we were able to extend the disk allocation in place,
293	 * extend the extent.  otherwise, move the extent to a
294	 * new disk location.
295	 */
296	if (xaddr == nxaddr) {
297		/* extend the extent */
298		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
299			dbFree(ip, xaddr + xlen, delta);
300			DQUOT_FREE_BLOCK(ip, nxlen);
301			goto exit;
302		}
303	} else {
304		/*
305		 * move the extent to a new location:
306		 *
307		 * xtTailgate() accounts for relocated tail extent;
308		 */
309		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
310			dbFree(ip, nxaddr, nxlen);
311			DQUOT_FREE_BLOCK(ip, nxlen);
312			goto exit;
313		}
314	}
315
316
317	/* check if we need to also insert a new extent */
318	if (ninsert) {
319		/* perform the insert.  if it fails, free the blocks
320		 * to be inserted and make it appear that we only did
321		 * the xtExtend() or xtTailgate() above.
322		 */
323		xaddr = nxaddr + ntail;
324		if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert,
325			      &xaddr, 0)) {
326			dbFree(ip, xaddr, (s64) ninsert);
327			delta = nextend;
328			nxlen = ntail;
329			xflag = 0;
330		}
331	}
332
333	/* set the return results */
334	XADaddress(xp, nxaddr);
335	XADlength(xp, nxlen);
336	XADoffset(xp, xoff);
337	xp->flag = xflag;
338
339	mark_inode_dirty(ip);
340exit:
341	mutex_unlock(&JFS_IP(ip)->commit_mutex);
342	return (rc);
343}
344#endif			/* _NOTYET */
345
346
347/*
348 * NAME:        extHint()
349 *
350 * FUNCTION:    produce an extent allocation hint for a file offset.
351 *
352 * PARAMETERS:
353 *	ip	- the inode of the file.
354 *	offset  - file offset for which the hint is needed.
355 *	xp	- pointer to the xad that is to be filled in with
356 *		  the hint.
357 *
358 * RETURN VALUES:
359 *      0       - success
360 *      -EIO	- i/o error.
361 */
362int extHint(struct inode *ip, s64 offset, xad_t * xp)
363{
364	struct super_block *sb = ip->i_sb;
365	struct xadlist xadl;
366	struct lxdlist lxdl;
367	lxd_t lxd;
368	s64 prev;
369	int rc, nbperpage = JFS_SBI(sb)->nbperpage;
370
371	/* init the hint as "no hint provided" */
372	XADaddress(xp, 0);
373
374	/* determine the starting extent offset of the page previous
375	 * to the page containing the offset.
376	 */
377	prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage;
378
379	/* if the offsets in the first page of the file,
380	 * no hint provided.
381	 */
382	if (prev < 0)
383		return (0);
384
385	/* prepare to lookup the previous page's extent info */
386	lxdl.maxnlxd = 1;
387	lxdl.nlxd = 1;
388	lxdl.lxd = &lxd;
389	LXDoffset(&lxd, prev)
390	    LXDlength(&lxd, nbperpage);
391
392	xadl.maxnxad = 1;
393	xadl.nxad = 0;
394	xadl.xad = xp;
395
396	/* perform the lookup */
397	if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
398		return (rc);
399
400	/* check if not extent exists for the previous page.
401	 * this is possible for sparse files.
402	 */
403	if (xadl.nxad == 0) {
404//              assert(ISSPARSE(ip));
405		return (0);
406	}
407
408	/* only preserve the abnr flag within the xad flags
409	 * of the returned hint.
410	 */
411	xp->flag &= XAD_NOTRECORDED;
412
413        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
414		jfs_error(ip->i_sb, "extHint: corrupt xtree");
415		return -EIO;
416        }
417
418	return (0);
419}
420
421
422/*
423 * NAME:        extRecord()
424 *
425 * FUNCTION:    change a page with a file from not recorded to recorded.
426 *
427 * PARAMETERS:
428 *	ip	- inode of the file.
429 *	cp	- cbuf of the file page.
430 *
431 * RETURN VALUES:
432 *      0       - success
433 *      -EIO	- i/o error.
434 *      -ENOSPC	- insufficient disk resources.
435 */
436int extRecord(struct inode *ip, xad_t * xp)
437{
438	int rc;
439
440	txBeginAnon(ip->i_sb);
441
442	mutex_lock(&JFS_IP(ip)->commit_mutex);
443
444	/* update the extent */
445	rc = xtUpdate(0, ip, xp);
446
447	mutex_unlock(&JFS_IP(ip)->commit_mutex);
448	return rc;
449}
450
451
452#ifdef _NOTYET
453/*
454 * NAME:        extFill()
455 *
456 * FUNCTION:    allocate disk space for a file page that represents
457 *		a file hole.
458 *
459 * PARAMETERS:
460 *	ip	- the inode of the file.
461 *	cp	- cbuf of the file page represent the hole.
462 *
463 * RETURN VALUES:
464 *      0       - success
465 *      -EIO	- i/o error.
466 *      -ENOSPC	- insufficient disk resources.
467 */
468int extFill(struct inode *ip, xad_t * xp)
469{
470	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
471	s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
472
473//      assert(ISSPARSE(ip));
474
475	/* initialize the extent allocation hint */
476	XADaddress(xp, 0);
477
478	/* allocate an extent to fill the hole */
479	if ((rc = extAlloc(ip, nbperpage, blkno, xp, false)))
480		return (rc);
481
482	assert(lengthPXD(xp) == nbperpage);
483
484	return (0);
485}
486#endif			/* _NOTYET */
487
488
489/*
490 * NAME:	extBalloc()
491 *
492 * FUNCTION:    allocate disk blocks to form an extent.
493 *
494 *		initially, we will try to allocate disk blocks for the
495 *		requested size (nblocks).  if this fails (nblocks
496 *		contiguous free blocks not avaliable), we'll try to allocate
497 *		a smaller number of blocks (producing a smaller extent), with
498 *		this smaller number of blocks consisting of the requested
499 *		number of blocks rounded down to the next smaller power of 2
500 *		number (i.e. 16 -> 8).  we'll continue to round down and
501 *		retry the allocation until the number of blocks to allocate
502 *		is smaller than the number of blocks per page.
503 *
504 * PARAMETERS:
505 *	ip	 - the inode of the file.
506 *	hint	 - disk block number to be used as an allocation hint.
507 *	*nblocks - pointer to an s64 value.  on entry, this value specifies
508 *		   the desired number of block to be allocated. on successful
509 *		   exit, this value is set to the number of blocks actually
510 *		   allocated.
511 *	blkno	 - pointer to a block address that is filled in on successful
512 *		   return with the starting block number of the newly
513 *		   allocated block range.
514 *
515 * RETURN VALUES:
516 *      0       - success
517 *      -EIO	- i/o error.
518 *      -ENOSPC	- insufficient disk resources.
519 */
520static int
521extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
522{
523	struct jfs_inode_info *ji = JFS_IP(ip);
524	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
525	s64 nb, nblks, daddr, max;
526	int rc, nbperpage = sbi->nbperpage;
527	struct bmap *bmp = sbi->bmap;
528	int ag;
529
530	/* get the number of blocks to initially attempt to allocate.
531	 * we'll first try the number of blocks requested unless this
532	 * number is greater than the maximum number of contiguous free
533	 * blocks in the map. in that case, we'll start off with the
534	 * maximum free.
535	 */
536	max = (s64) 1 << bmp->db_maxfreebud;
537	if (*nblocks >= max && *nblocks > nbperpage)
538		nb = nblks = (max > nbperpage) ? max : nbperpage;
539	else
540		nb = nblks = *nblocks;
541
542	/* try to allocate blocks */
543	while ((rc = dbAlloc(ip, hint, nb, &daddr)) != 0) {
544		/* if something other than an out of space error,
545		 * stop and return this error.
546		 */
547		if (rc != -ENOSPC)
548			return (rc);
549
550		/* decrease the allocation request size */
551		nb = min(nblks, extRoundDown(nb));
552
553		/* give up if we cannot cover a page */
554		if (nb < nbperpage)
555			return (rc);
556	}
557
558	*nblocks = nb;
559	*blkno = daddr;
560
561	if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
562		ag = BLKTOAG(daddr, sbi);
563		spin_lock_irq(&ji->ag_lock);
564		if (ji->active_ag == -1) {
565			atomic_inc(&bmp->db_active[ag]);
566			ji->active_ag = ag;
567		} else if (ji->active_ag != ag) {
568			atomic_dec(&bmp->db_active[ji->active_ag]);
569			atomic_inc(&bmp->db_active[ag]);
570			ji->active_ag = ag;
571		}
572		spin_unlock_irq(&ji->ag_lock);
573	}
574
575	return (0);
576}
577
578
579#ifdef _NOTYET
580/*
581 * NAME:	extBrealloc()
582 *
583 * FUNCTION:    attempt to extend an extent's allocation.
584 *
585 *		Initially, we will try to extend the extent's allocation
586 *		in place.  If this fails, we'll try to move the extent
587 *		to a new set of blocks.  If moving the extent, we initially
588 *		will try to allocate disk blocks for the requested size
589 *		(newnblks).  if this fails (new contiguous free blocks not
590 *		avaliable), we'll try to allocate a smaller number of
591 *		blocks (producing a smaller extent), with this smaller
592 *		number of blocks consisting of the requested number of
593 *		blocks rounded down to the next smaller power of 2
594 *		number (i.e. 16 -> 8).  We'll continue to round down and
595 *		retry the allocation until the number of blocks to allocate
596 *		is smaller than the number of blocks per page.
597 *
598 * PARAMETERS:
599 *	ip	 - the inode of the file.
600 *	blkno    - starting block number of the extents current allocation.
601 *	nblks    - number of blocks within the extents current allocation.
602 *	newnblks - pointer to a s64 value.  on entry, this value is the
603 *		   the new desired extent size (number of blocks).  on
604 *		   successful exit, this value is set to the extent's actual
605 *		   new size (new number of blocks).
606 *	newblkno - the starting block number of the extents new allocation.
607 *
608 * RETURN VALUES:
609 *      0       - success
610 *      -EIO	- i/o error.
611 *      -ENOSPC	- insufficient disk resources.
612 */
613static int
614extBrealloc(struct inode *ip,
615	    s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno)
616{
617	int rc;
618
619	/* try to extend in place */
620	if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) {
621		*newblkno = blkno;
622		return (0);
623	} else {
624		if (rc != -ENOSPC)
625			return (rc);
626	}
627
628	/* in place extension not possible.
629	 * try to move the extent to a new set of blocks.
630	 */
631	return (extBalloc(ip, blkno, newnblks, newblkno));
632}
633#endif			/* _NOTYET */
634
635
636/*
637 * NAME:        extRoundDown()
638 *
639 * FUNCTION:    round down a specified number of blocks to the next
640 *		smallest power of 2 number.
641 *
642 * PARAMETERS:
643 *	nb	- the inode of the file.
644 *
645 * RETURN VALUES:
646 *      next smallest power of 2 number.
647 */
648static s64 extRoundDown(s64 nb)
649{
650	int i;
651	u64 m, k;
652
653	for (i = 0, m = (u64) 1 << 63; i < 64; i++, m >>= 1) {
654		if (m & nb)
655			break;
656	}
657
658	i = 63 - i;
659	k = (u64) 1 << i;
660	k = ((k - 1) & nb) ? k : k >> 1;
661
662	return (k);
663}
664