1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 *    must display the following acknowledgement:
43 *	This product includes software developed by the University of
44 *	California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ffs_inode.c	8.13 (Berkeley) 4/21/95
62 */
63
64#include <rev_endian_fs.h>
65#include <vm/vm_pager.h>
66
67#include <sys/param.h>
68#include <sys/systm.h>
69#include <sys/mount_internal.h>
70#include <sys/proc_internal.h>	/* for accessing p_stats */
71#include <sys/file.h>
72#include <sys/buf_internal.h>
73#include <sys/vnode_internal.h>
74#include <sys/kernel.h>
75#include <sys/malloc.h>
76#include <sys/trace.h>
77#include <sys/resourcevar.h>
78#include <sys/ubc.h>
79#include <sys/quota.h>
80
81#include <sys/vm.h>
82
83#include <ufs/ufs/quota.h>
84#include <ufs/ufs/inode.h>
85#include <ufs/ufs/ufsmount.h>
86#include <ufs/ufs/ufs_extern.h>
87
88#include <ufs/ffs/fs.h>
89#include <ufs/ffs/ffs_extern.h>
90
91#if REV_ENDIAN_FS
92#include <ufs/ufs/ufs_byte_order.h>
93#include <libkern/OSByteOrder.h>
94#endif /* REV_ENDIAN_FS */
95#include <libkern/OSAtomic.h>
96
97static int ffs_indirtrunc(struct inode *, ufs_daddr_t, ufs_daddr_t,
98	    ufs_daddr_t, int, long *);
99
100/*
101 * Update the access, modified, and inode change times as specified by the
102 * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is
103 * used to specify that the inode needs to be updated but that the times have
104 * already been set. The access and modified times are taken from the second
105 * and third parameters; the inode change time is always taken from the current
106 * time. If waitfor is set, then wait for the disk write of the inode to
107 * complete.
108 */
109int
110ffs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor)
111{
112	register struct fs *fs;
113	struct buf *bp;
114	struct inode *ip;
115	struct timeval tv;
116	errno_t error;
117#if REV_ENDIAN_FS
118	struct mount *mp=(vp)->v_mount;
119	int rev_endian=(mp->mnt_flag & MNT_REVEND);
120#endif /* REV_ENDIAN_FS */
121
122	ip = VTOI(vp);
123	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
124		ip->i_flag &=
125		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
126		return (0);
127	}
128	if ((ip->i_flag &
129	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
130		return (0);
131	if (ip->i_flag & IN_ACCESS)
132		ip->i_atime = access->tv_sec;
133	if (ip->i_flag & IN_UPDATE) {
134		ip->i_mtime = modify->tv_sec;
135		ip->i_modrev++;
136	}
137	if (ip->i_flag & IN_CHANGE) {
138		microtime(&tv);
139		ip->i_ctime = tv.tv_sec;
140	}
141	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
142	fs = ip->i_fs;
143	/*
144	 * Ensure that uid and gid are correct. This is a temporary
145	 * fix until fsck has been changed to do the update.
146	 */
147	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
148		ip->i_din.di_ouid = ip->i_uid;		/* XXX */
149		ip->i_din.di_ogid = ip->i_gid;		/* XXX */
150	}						/* XXX */
151	if (error = buf_bread(ip->i_devvp,
152			      (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
153		(int)fs->fs_bsize, NOCRED, &bp)) {
154		buf_brelse(bp);
155		return ((int)error);
156	}
157#if REV_ENDIAN_FS
158	if (rev_endian)
159		byte_swap_inode_out(ip, ((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)));
160	else {
161#endif /* REV_ENDIAN_FS */
162	*((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
163#if REV_ENDIAN_FS
164	}
165#endif /* REV_ENDIAN_FS */
166
167	if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
168		return ((int)buf_bwrite(bp));
169	else {
170		buf_bdwrite(bp);
171		return (0);
172	}
173}
174
175
176#define	SINGLE	0	/* index of single indirect block */
177#define	DOUBLE	1	/* index of double indirect block */
178#define	TRIPLE	2	/* index of triple indirect block */
179
180int
181ffs_truncate_internal(vnode_t ovp, off_t length, int flags, ucred_t cred)
182{
183	struct inode	*oip;
184	struct fs	*fs;
185	ufs_daddr_t lastblock;
186	ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
187	ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
188	buf_t	bp;
189	int	offset, size, level, i;
190	long	count, nblocks, vflags, blocksreleased = 0;
191	struct	timeval tv;
192	int	aflags, error, allerror;
193	off_t	osize;
194	int	devBlockSize=0;
195#if QUOTA
196	int64_t change;   /* in bytes */
197#endif /* QUOTA */
198
199	if (length < 0)
200		return (EINVAL);
201
202	oip = VTOI(ovp);
203	fs = oip->i_fs;
204
205	if (length > fs->fs_maxfilesize)
206	        return (EFBIG);
207
208	microtime(&tv);
209	if (ovp->v_type == VLNK &&
210	    oip->i_size < ovp->v_mount->mnt_maxsymlinklen) {
211#if DIAGNOSTIC
212		if (length != 0)
213			panic("ffs_truncate: partial truncate of symlink");
214#endif
215		bzero((char *)&oip->i_shortlink, (u_int)oip->i_size);
216		oip->i_size = 0;
217		oip->i_flag |= IN_CHANGE | IN_UPDATE;
218		return (ffs_update(ovp, &tv, &tv, 1));
219	}
220
221	if (oip->i_size == length) {
222		oip->i_flag |= IN_CHANGE | IN_UPDATE;
223		return (ffs_update(ovp, &tv, &tv, 0));
224	}
225#if QUOTA
226	if (error = getinoquota(oip))
227		return (error);
228#endif
229	osize = oip->i_size;
230
231	/*
232	 * Lengthen the size of the file. We must ensure that the
233	 * last byte of the file is allocated. Since the smallest
234	 * value of osize is 0, length will be at least 1.
235	 */
236	if (osize < length) {
237		offset = blkoff(fs, length - 1);
238		lbn = lblkno(fs, length - 1);
239		aflags = B_CLRBUF;
240		if (flags & IO_SYNC)
241			aflags |= B_SYNC;
242		if (error = ffs_balloc(oip, lbn, offset + 1, cred, &bp, aflags, 0))
243			return (error);
244		oip->i_size = length;
245
246		if (UBCINFOEXISTS(ovp)) {
247			buf_markinvalid(bp);
248			buf_bwrite(bp);
249			ubc_setsize(ovp, (off_t)length);
250		} else {
251			if (aflags & B_SYNC)
252				buf_bwrite(bp);
253			else
254				buf_bawrite(bp);
255		}
256		oip->i_flag |= IN_CHANGE | IN_UPDATE;
257		return (ffs_update(ovp, &tv, &tv, 1));
258	}
259	/*
260	 * Shorten the size of the file. If the file is not being
261	 * truncated to a block boundry, the contents of the
262	 * partial block following the end of the file must be
263	 * zero'ed in case it ever become accessable again because
264	 * of subsequent file growth.
265	 */
266	if (UBCINFOEXISTS(ovp))
267		ubc_setsize(ovp, (off_t)length);
268
269	vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META;
270
271	if (vflags & BUF_WRITE_DATA)
272	        ffs_fsync_internal(ovp, MNT_WAIT);
273	allerror = buf_invalidateblks(ovp, vflags, 0, 0);
274
275	offset = blkoff(fs, length);
276	if (offset == 0) {
277		oip->i_size = length;
278	} else {
279		lbn = lblkno(fs, length);
280		aflags = B_CLRBUF;
281		if (flags & IO_SYNC)
282			aflags |= B_SYNC;
283		if (error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags, 0))
284			return (error);
285		oip->i_size = length;
286		size = blksize(fs, oip, lbn);
287		bzero((char *)buf_dataptr(bp) + offset, (u_int)(size - offset));
288		allocbuf(bp, size);
289		if (UBCINFOEXISTS(ovp)) {
290			buf_markinvalid(bp);
291			buf_bwrite(bp);
292		} else {
293			if (aflags & B_SYNC)
294				buf_bwrite(bp);
295			else
296				buf_bawrite(bp);
297		}
298	}
299	/*
300	 * Calculate index into inode's block list of
301	 * last direct and indirect blocks (if any)
302	 * which we want to keep.  Lastblock is -1 when
303	 * the file is truncated to 0.
304	 */
305	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
306	lastiblock[SINGLE] = lastblock - NDADDR;
307	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
308	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
309
310	devBlockSize = vfs_devblocksize(vnode_mount(ovp));
311	nblocks = btodb(fs->fs_bsize, devBlockSize);
312
313	/*
314	 * Update file and block pointers on disk before we start freeing
315	 * blocks.  If we crash before free'ing blocks below, the blocks
316	 * will be returned to the free list.  lastiblock values are also
317	 * normalized to -1 for calls to ffs_indirtrunc below.
318	 */
319	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
320	for (level = TRIPLE; level >= SINGLE; level--)
321		if (lastiblock[level] < 0) {
322			oip->i_ib[level] = 0;
323			lastiblock[level] = -1;
324		}
325	for (i = NDADDR - 1; i > lastblock; i--)
326		oip->i_db[i] = 0;
327	oip->i_flag |= IN_CHANGE | IN_UPDATE;
328	if (error = ffs_update(ovp, &tv, &tv, MNT_WAIT))
329		allerror = error;
330	/*
331	 * Having written the new inode to disk, save its new configuration
332	 * and put back the old block pointers long enough to process them.
333	 * Note that we save the new block configuration so we can check it
334	 * when we are done.
335	 */
336	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
337	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
338	oip->i_size = osize;
339
340	vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META;
341
342	if (vflags & BUF_WRITE_DATA)
343	        ffs_fsync_internal(ovp, MNT_WAIT);
344	allerror = buf_invalidateblks(ovp, vflags, 0, 0);
345
346	/*
347	 * Indirect blocks first.
348	 */
349	indir_lbn[SINGLE] = -NDADDR;
350	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
351	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
352	for (level = TRIPLE; level >= SINGLE; level--) {
353		bn = oip->i_ib[level];
354		if (bn != 0) {
355			error = ffs_indirtrunc(oip, indir_lbn[level],
356			    fsbtodb(fs, bn), lastiblock[level], level, &count);
357			if (error)
358				allerror = error;
359			blocksreleased += count;
360			if (lastiblock[level] < 0) {
361				oip->i_ib[level] = 0;
362				ffs_blkfree(oip, bn, fs->fs_bsize);
363				blocksreleased += nblocks;
364			}
365		}
366		if (lastiblock[level] >= 0)
367			goto done;
368	}
369
370	/*
371	 * All whole direct blocks or frags.
372	 */
373	for (i = NDADDR - 1; i > lastblock; i--) {
374		register long bsize;
375
376		bn = oip->i_db[i];
377		if (bn == 0)
378			continue;
379		oip->i_db[i] = 0;
380		bsize = blksize(fs, oip, i);
381		ffs_blkfree(oip, bn, bsize);
382		blocksreleased += btodb(bsize, devBlockSize);
383	}
384	if (lastblock < 0)
385		goto done;
386
387	/*
388	 * Finally, look for a change in size of the
389	 * last direct block; release any frags.
390	 */
391	bn = oip->i_db[lastblock];
392	if (bn != 0) {
393		long oldspace, newspace;
394
395		/*
396		 * Calculate amount of space we're giving
397		 * back as old block size minus new block size.
398		 */
399		oldspace = blksize(fs, oip, lastblock);
400		oip->i_size = length;
401		newspace = blksize(fs, oip, lastblock);
402		if (newspace == 0)
403			panic("itrunc: newspace");
404		if (oldspace - newspace > 0) {
405			/*
406			 * Block number of space to be free'd is
407			 * the old block # plus the number of frags
408			 * required for the storage we're keeping.
409			 */
410			bn += numfrags(fs, newspace);
411			ffs_blkfree(oip, bn, oldspace - newspace);
412			blocksreleased += btodb(oldspace - newspace, devBlockSize);
413		}
414	}
415done:
416#if DIAGNOSTIC
417	for (level = SINGLE; level <= TRIPLE; level++)
418		if (newblks[NDADDR + level] != oip->i_ib[level])
419			panic("itrunc1");
420	for (i = 0; i < NDADDR; i++)
421		if (newblks[i] != oip->i_db[i])
422			panic("itrunc2");
423	if (length == 0 &&
424	    (vnode_hasdirtyblks(ovp) || vnode_hascleanblks(ovp)))
425		panic("itrunc3");
426#endif /* DIAGNOSTIC */
427	/*
428	 * Put back the real size.
429	 */
430	oip->i_size = length;
431	oip->i_blocks -= blocksreleased;
432	if (oip->i_blocks < 0)			/* sanity */
433		oip->i_blocks = 0;
434	oip->i_flag |= IN_CHANGE;
435#if QUOTA
436	change = dbtob((int64_t)blocksreleased,devBlockSize);
437	(void) chkdq(oip, -change, NOCRED, 0);
438#endif
439	return (allerror);
440}
441
442/*
443 * Release blocks associated with the inode ip and stored in the indirect
444 * block bn.  Blocks are free'd in LIFO order up to (but not including)
445 * lastbn.  If level is greater than SINGLE, the block is an indirect block
446 * and recursive calls to indirtrunc must be used to cleanse other indirect
447 * blocks.
448 *
449 * NB: triple indirect blocks are untested.
450 */
451static int
452ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
453	register struct inode *ip;
454	ufs_daddr_t lbn, lastbn;
455	ufs_daddr_t dbn;
456	int level;
457	long *countp;
458{
459	register int i;
460	struct buf *bp;
461	struct buf *tbp;
462	register struct fs *fs = ip->i_fs;
463	register ufs_daddr_t *bap;
464	struct vnode *vp=ITOV(ip);
465	ufs_daddr_t *copy, nb, nlbn, last;
466	long blkcount, factor;
467	int nblocks, blocksreleased = 0;
468	errno_t error = 0, allerror = 0;
469	int devBlockSize=0;
470	struct mount *mp=vp->v_mount;
471#if REV_ENDIAN_FS
472	int rev_endian=(mp->mnt_flag & MNT_REVEND);
473#endif /* REV_ENDIAN_FS */
474
475	/*
476	 * Calculate index in current block of last
477	 * block to be kept.  -1 indicates the entire
478	 * block so we need not calculate the index.
479	 */
480	factor = 1;
481	for (i = SINGLE; i < level; i++)
482		factor *= NINDIR(fs);
483	last = lastbn;
484	if (lastbn > 0)
485		last /= factor;
486
487	devBlockSize = vfs_devblocksize(mp);
488	nblocks = btodb(fs->fs_bsize, devBlockSize);
489
490	/* Doing a MALLOC here is asking for trouble. We can still
491	 * deadlock on pagerfile lock, in case we are running
492	 * low on memory and block in MALLOC
493	 */
494
495	tbp = buf_geteblk(fs->fs_bsize);
496	copy = (ufs_daddr_t *)buf_dataptr(tbp);
497
498	/*
499	 * Get buffer of block pointers, zero those entries corresponding
500	 * to blocks to be free'd, and update on disk copy first.  Since
501	 * double(triple) indirect before single(double) indirect, calls
502	 * to bmap on these blocks will fail.  However, we already have
503	 * the on disk address, so we have to set the blkno field
504	 * explicitly instead of letting buf_bread do everything for us.
505	 */
506
507	vp = ITOV(ip);
508	bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, 0, 0, BLK_META);
509
510	if (buf_valid(bp)) {
511		/* Braces must be here in case trace evaluates to nothing. */
512		trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
513	} else {
514		trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
515		OSIncrementAtomic(&current_proc()->p_stats->p_ru.ru_inblock);	/* pay for read */
516		buf_setflags(bp,  B_READ);
517		if (buf_count(bp) > buf_size(bp))
518			panic("ffs_indirtrunc: bad buffer size");
519		buf_setblkno(bp, (daddr64_t)((unsigned)dbn));
520		VNOP_STRATEGY(bp);
521		error = buf_biowait(bp);
522	}
523	if (error) {
524		buf_brelse(bp);
525		*countp = 0;
526		buf_brelse(tbp);
527		return ((int)error);
528	}
529
530	bap = (ufs_daddr_t *)buf_dataptr(bp);
531	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
532	bzero((caddr_t)&bap[last + 1],
533	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t));
534	if (last == -1)
535		buf_markinvalid(bp);
536	if (last != -1 && (vp)->v_mount->mnt_flag & MNT_ASYNC) {
537		error = 0;
538		buf_bdwrite(bp);
539	} else {
540		error = buf_bwrite(bp);
541		if (error)
542			allerror = error;
543	}
544	bap = copy;
545
546	/*
547	 * Recursively free totally unused blocks.
548	 */
549	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
550	    i--, nlbn += factor) {
551#if	REV_ENDIAN_FS
552		if (rev_endian)
553			nb = OSSwapInt32(bap[i]);
554		else {
555#endif	/* REV_ENDIAN_FS */
556			nb = bap[i];
557#if	REV_ENDIAN_FS
558		}
559#endif	/* REV_ENDIAN_FS */
560		if (nb == 0)
561			continue;
562		if (level > SINGLE) {
563			if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
564			    (ufs_daddr_t)-1, level - 1, &blkcount))
565				allerror = error;
566			blocksreleased += blkcount;
567		}
568		ffs_blkfree(ip, nb, fs->fs_bsize);
569		blocksreleased += nblocks;
570	}
571
572	/*
573	 * Recursively free last partial block.
574	 */
575	if (level > SINGLE && lastbn >= 0) {
576		last = lastbn % factor;
577#if	REV_ENDIAN_FS
578		if (rev_endian)
579			nb = OSSwapInt32(bap[i]);
580		else {
581#endif	/* REV_ENDIAN_FS */
582			nb = bap[i];
583#if	REV_ENDIAN_FS
584		}
585#endif	/* REV_ENDIAN_FS */
586		if (nb != 0) {
587			if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
588			    last, level - 1, &blkcount))
589				allerror = error;
590			blocksreleased += blkcount;
591		}
592	}
593	buf_brelse(tbp);
594	*countp = blocksreleased;
595	return ((int)allerror);
596}
597
598