1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1991, 1993, 1994
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 *    must display the following acknowledgement:
43 *	This product includes software developed by the University of
44 *	California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
62 */
63
64#include <rev_endian_fs.h>
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/namei.h>
68#include <sys/proc.h>
69#include <sys/kauth.h>
70#include <sys/kernel.h>
71#include <sys/vnode_internal.h>
72#include <sys/socket.h>
73#include <sys/mount_internal.h>
74#include <sys/mount.h>
75#include <sys/buf.h>
76#include <sys/mbuf.h>
77#include <sys/file.h>
78#include <sys/disk.h>
79#include <sys/ioctl.h>
80#include <sys/errno.h>
81#include <sys/malloc.h>
82#include <sys/ubc.h>
83#include <sys/quota.h>
84
85#include <miscfs/specfs/specdev.h>
86
87#include <ufs/ufs/quota.h>
88#include <ufs/ufs/ufsmount.h>
89#include <ufs/ufs/inode.h>
90#include <ufs/ufs/ufs_extern.h>
91
92#include <ufs/ffs/fs.h>
93#include <ufs/ffs/ffs_extern.h>
94#if REV_ENDIAN_FS
95#include <ufs/ufs/ufs_byte_order.h>
96#include <libkern/OSByteOrder.h>
97#endif /* REV_ENDIAN_FS */
98
99int ffs_sbupdate(struct ufsmount *, int);
100
101struct vfsops ufs_vfsops = {
102	ffs_mount,
103	ufs_start,
104	ffs_unmount,
105	ufs_root,
106	ufs_quotactl,
107	ffs_vfs_getattr,
108	ffs_sync,
109	ffs_vget,
110	ffs_fhtovp,
111	ffs_vptofh,
112	ffs_init,
113	ffs_sysctl,
114	ffs_vfs_setattr,
115	{0}
116};
117
118extern u_long nextgennumber;
119
120union _qcvt {
121	int64_t qcvt;
122	int32_t val[2];
123};
124#define SETHIGH(q, h) { \
125	union _qcvt tmp; \
126	tmp.qcvt = (q); \
127	tmp.val[_QUAD_HIGHWORD] = (h); \
128	(q) = tmp.qcvt; \
129}
130#define SETLOW(q, l) { \
131	union _qcvt tmp; \
132	tmp.qcvt = (q); \
133	tmp.val[_QUAD_LOWWORD] = (l); \
134	(q) = tmp.qcvt; \
135}
136
137/*
138 * Called by main() when ufs is going to be mounted as root.
139 */
140int
141ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
142{
143	struct proc *p = current_proc();	/* XXX */
144	int	error;
145
146	/* Set asynchronous flag by default */
147	vfs_setflags(mp, MNT_ASYNC);
148
149	if (error = ffs_mountfs(rvp, mp, context))
150		return (error);
151
152	(void)ffs_statfs(mp, vfs_statfs(mp), NULL);
153
154	return (0);
155}
156
157/*
158 * VFS Operations.
159 *
160 * mount system call
161 */
162int
163ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data,  vfs_context_t context)
164{
165	struct proc *p = vfs_context_proc(context);
166	struct ufsmount *ump;
167	register struct fs *fs;
168	u_int size;
169	int error  = 0, flags;
170	mode_t accessmode;
171	int ronly;
172	int reload = 0;
173
174	/*
175	 * If updating, check whether changing from read-write to
176	 * read-only; if there is no device name, that's all we do.
177	 */
178	if (mp->mnt_flag & MNT_UPDATE) {
179		ump = VFSTOUFS(mp);
180		fs = ump->um_fs;
181		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
182			/*
183			 * Flush any dirty data.
184			 */
185			VFS_SYNC(mp, MNT_WAIT, context);
186			/*
187			 * Check for and optionally get rid of files open
188			 * for writing.
189			 */
190			flags = WRITECLOSE;
191			if (mp->mnt_flag & MNT_FORCE)
192				flags |= FORCECLOSE;
193			if (error = ffs_flushfiles(mp, flags, p))
194				return (error);
195			fs->fs_clean = 1;
196			fs->fs_ronly = 1;
197			if (error = ffs_sbupdate(ump, MNT_WAIT)) {
198				fs->fs_clean = 0;
199				fs->fs_ronly = 0;
200				return (error);
201			}
202		}
203		/* save fs_ronly to later use */
204		ronly = fs->fs_ronly;
205		if ((mp->mnt_flag & MNT_RELOAD) || ronly)
206			reload = 1;
207		if ((reload) &&
208		    (error = ffs_reload(mp, vfs_context_ucred(context), p)))
209			return (error);
210		/* replace the ronly after load */
211		fs->fs_ronly = ronly;
212		/*
213		* Do not update the file system if the user was in singleuser
214		* and then tries to mount -uw without fscking
215		*/
216		if (!fs->fs_clean && ronly) {
217			printf("WARNING: trying to mount a dirty file system\n");
218			if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
219				printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
220				/*
221				 * Reset the readonly bit as reload might have
222				 * modified this bit
223				 */
224				fs->fs_ronly = 1;
225				return(EPERM);
226			}
227		}
228
229		if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
230			fs->fs_ronly = 0;
231			fs->fs_clean = 0;
232			(void) ffs_sbupdate(ump, MNT_WAIT);
233		}
234		if (devvp == 0) {
235			return(0);
236		}
237	}
238	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
239		ufs_ihashinit();
240		error = ffs_mountfs(devvp, mp, context);
241	} else {
242		if (devvp != ump->um_devvp)
243			error = EINVAL;	/* needs translation */
244	}
245	if (error) {
246		return (error);
247	}
248	ump = VFSTOUFS(mp);
249	fs = ump->um_fs;
250	bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
251	strncpy(fs->fs_fsmnt,  (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
252	(void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
253	return (0);
254}
255
256
257struct ffs_reload_cargs {
258        struct vnode	*devvp;
259        kauth_cred_t cred;
260        struct fs 	*fs;
261        struct proc	*p;
262        int		error;
263#if REV_ENDIAN_FS
264        int		rev_endian;
265#endif /* REV_ENDIAN_FS */
266};
267
268
269static int
270ffs_reload_callback(struct vnode *vp, void *cargs)
271{
272	struct inode *ip;
273	struct buf   *bp;
274	struct fs    *fs;
275	struct ffs_reload_cargs *args;
276
277	args = (struct ffs_reload_cargs *)cargs;
278
279	/*
280	 * flush all the buffers associated with this node
281	 */
282	if (buf_invalidateblks(vp, 0, 0, 0))
283	        panic("ffs_reload: dirty2");
284
285	/*
286	 * Step 6: re-read inode data
287	 */
288	ip = VTOI(vp);
289	fs = args->fs;
290
291	if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
292					 (int)fs->fs_bsize, NOCRED, &bp)) {
293	        buf_brelse(bp);
294
295		return (VNODE_RETURNED_DONE);
296	}
297
298#if REV_ENDIAN_FS
299	if (args->rev_endian) {
300	        byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
301				    ino_to_fsbo(fs, ip->i_number)), ip);
302	} else {
303#endif /* REV_ENDIAN_FS */
304	        ip->i_din = *((struct dinode *)buf_dataptr(bp) +
305			      ino_to_fsbo(fs, ip->i_number));
306#if REV_ENDIAN_FS
307	}
308#endif /* REV_ENDIAN_FS */
309
310	buf_brelse(bp);
311
312	return (VNODE_RETURNED);
313}
314
315
316/*
317 * Reload all incore data for a filesystem (used after running fsck on
318 * the root filesystem and finding things to fix). The filesystem must
319 * be mounted read-only.
320 *
321 * Things to do to update the mount:
322 *	1) invalidate all cached meta-data.
323 *	2) re-read superblock from disk.
324 *	3) re-read summary information from disk.
325 *	4) invalidate all inactive vnodes.
326 *	5) invalidate all cached file data.
327 *	6) re-read inode data for all active vnodes.
328 */
329ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
330{
331	register struct vnode *devvp;
332	void *space;
333	struct buf *bp;
334	struct fs *fs, *newfs;
335	int i, blks, size, error;
336	u_int64_t maxfilesize;					/* XXX */
337	int32_t *lp;
338	struct ffs_reload_cargs args;
339#if REV_ENDIAN_FS
340	int rev_endian = (mountp->mnt_flag & MNT_REVEND);
341#endif /* REV_ENDIAN_FS */
342
343	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
344		return (EINVAL);
345	/*
346	 * Step 1: invalidate all cached meta-data.
347	 */
348	devvp = VFSTOUFS(mountp)->um_devvp;
349	if (buf_invalidateblks(devvp, 0, 0, 0))
350		panic("ffs_reload: dirty1");
351	/*
352	 * Step 2: re-read superblock from disk.
353	 */
354	size = vfs_devblocksize(mountp);
355
356	if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
357		buf_brelse(bp);
358		return (error);
359	}
360	newfs = (struct fs *)buf_dataptr(bp);
361#if REV_ENDIAN_FS
362	if (rev_endian) {
363		error = byte_swap_sbin(newfs);
364		if (error) {
365			buf_brelse(bp);
366			return (error);
367		}
368	}
369#endif /* REV_ENDIAN_FS */
370	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
371	    newfs->fs_bsize < sizeof(struct fs)) {
372#if REV_ENDIAN_FS
373		if (rev_endian)
374			byte_swap_sbout(newfs);
375#endif /* REV_ENDIAN_FS */
376
377		buf_brelse(bp);
378		return (EIO);		/* XXX needs translation */
379	}
380	fs = VFSTOUFS(mountp)->um_fs;
381	/*
382	 * Copy pointer fields back into superblock before copying in	XXX
383	 * new superblock. These should really be in the ufsmount.	XXX
384	 * Note that important parameters (eg fs_ncg) are unchanged.
385	 */
386	newfs->fs_csp = fs->fs_csp;
387	newfs->fs_maxcluster = fs->fs_maxcluster;
388	newfs->fs_contigdirs = fs->fs_contigdirs;
389	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
390	if (fs->fs_sbsize < SBSIZE)
391		buf_markinvalid(bp);
392#if REV_ENDIAN_FS
393	if (rev_endian)
394		byte_swap_sbout(newfs);
395#endif /* REV_ENDIAN_FS */
396	buf_brelse(bp);
397	mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
398	ffs_oldfscompat(fs);
399	maxfilesize = 0x100000000ULL;    /* 4GB */
400	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
401		fs->fs_maxfilesize = maxfilesize;		/* XXX */
402	/*
403	 * Step 3: re-read summary information from disk.
404	 */
405	blks = howmany(fs->fs_cssize, fs->fs_fsize);
406	space = fs->fs_csp;
407	for (i = 0; i < blks; i += fs->fs_frag) {
408		size = fs->fs_bsize;
409		if (i + fs->fs_frag > blks)
410			size = (blks - i) * fs->fs_fsize;
411		if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
412					   NOCRED, &bp)) {
413			buf_brelse(bp);
414			return (error);
415		}
416#if REV_ENDIAN_FS
417		if (rev_endian) {
418			/* csum swaps */
419			byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
420		}
421#endif /* REV_ENDIAN_FS */
422		bcopy((char *)buf_dataptr(bp), space, (u_int)size);
423#if REV_ENDIAN_FS
424		if (rev_endian) {
425			/* csum swaps */
426			byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
427		}
428#endif /* REV_ENDIAN_FS */
429		space = (char *) space + size;
430		buf_brelse(bp);
431	}
432	/*
433	 * We no longer know anything about clusters per cylinder group.
434	 */
435	if (fs->fs_contigsumsize > 0) {
436		lp = fs->fs_maxcluster;
437		for (i = 0; i < fs->fs_ncg; i++)
438			*lp++ = fs->fs_contigsumsize;
439	}
440#if REV_ENDIAN_FS
441	args.rev_endian = rev_endian;
442#endif /* REV_ENDIAN_FS */
443	args.devvp = devvp;
444	args.cred = cred;
445	args.fs = fs;
446	args.p = p;
447	args.error = 0;
448	/*
449	 * ffs_reload_callback will be called for each vnode
450	 * hung off of this mount point that can't be recycled...
451	 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
452	 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
453	 * properly referenced and unreferenced around the callback
454	 */
455	vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
456
457	return (args.error);
458}
459
460/*
461 * Common code for mount and mountroot
462 */
463int
464ffs_mountfs(devvp, mp, context)
465	struct vnode *devvp;
466	struct mount *mp;
467	vfs_context_t context;
468{
469	struct ufsmount *ump;
470	struct buf *bp;
471	struct fs *fs;
472	dev_t dev;
473	struct buf *cgbp;
474	struct cg *cgp;
475	int32_t clustersumoff;
476	void *space;
477	int error, i, blks, ronly;
478	u_int32_t size;
479	int32_t *lp;
480	kauth_cred_t cred;
481	u_int64_t maxfilesize;					/* XXX */
482        u_int dbsize = DEV_BSIZE;
483#if REV_ENDIAN_FS
484	int rev_endian=0;
485#endif /* REV_ENDIAN_FS */
486	dev = devvp->v_rdev;
487	cred = vfs_context_ucred(context);
488
489	ronly = vfs_isrdonly(mp);
490	bp  = NULL;
491	ump = NULL;
492
493	/* Advisory locking should be handled at the VFS layer */
494	vfs_setlocklocal(mp);
495
496	/* Obtain the actual device block size */
497	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
498		error = ENXIO;
499		goto out;
500	}
501
502	if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
503	    SBSIZE, cred, &bp))
504		goto out;
505	fs = (struct fs *)buf_dataptr(bp);
506#if REV_ENDIAN_FS
507	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
508	    fs->fs_bsize < sizeof(struct fs)) {
509	    	int magic = fs->fs_magic;
510
511	    	byte_swap_ints(&magic, 1);
512	    	if (magic != FS_MAGIC) {
513			error = EINVAL;
514			goto out;
515	    	}
516		if (error = byte_swap_sbin(fs))
517			goto out;
518
519		if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
520	    		fs->fs_bsize < sizeof(struct fs)) {
521			byte_swap_sbout(fs);
522			error = EINVAL;		/* XXX needs translation */
523			goto out;
524		}
525		rev_endian=1;
526	}
527#endif /* REV_ENDIAN_FS */
528	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
529	    fs->fs_bsize < sizeof(struct fs)) {
530#if REV_ENDIAN_FS
531		if (rev_endian)
532			byte_swap_sbout(fs);
533#endif /* REV_ENDIAN_FS */
534		error = EINVAL;		/* XXX needs translation */
535		goto out;
536	}
537
538	if (fs->fs_sbsize < 0 || fs->fs_sbsize > SBSIZE) {
539		error = EINVAL;
540		goto out;
541	}
542
543	/*
544	 * Buffer cache does not handle multiple pages in a buf when
545	 * invalidating incore buffer in pageout. There are no locks
546	 * in the pageout path.  So there is a danger of loosing data when
547	 * block allocation happens at the same time a pageout of buddy
548	 * page occurs. incore() returns buf with both
549	 * pages, this leads vnode-pageout to incorrectly flush of entire.
550	 * buf. Till the low level ffs code is modified to deal with these
551	 * do not mount any FS more than 4K size.
552	 */
553	/*
554	 * Can't mount filesystems with a fragment size less than DIRBLKSIZ
555	 */
556	/*
557	 * Don't mount dirty filesystems, except for the root filesystem
558	 */
559	if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
560        ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
561#if REV_ENDIAN_FS
562		if (rev_endian)
563			byte_swap_sbout(fs);
564#endif /* REV_ENDIAN_FS */
565        error = ENOTSUP;
566        goto out;
567    }
568
569	/* Let's figure out the devblock size the file system is with */
570	/* the device block size = fragment size / number of sectors per frag */
571
572	dbsize = fs->fs_fsize / NSPF(fs);
573	if(dbsize <= 0 ) {
574		kprintf("device blocksize computaion failed\n");
575	} else {
576		if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
577				FWRITE, context) != 0) {
578			kprintf("failed to set device blocksize\n");
579		}
580		/* force the specfs to reread blocksize from size() */
581		set_fsblocksize(devvp);
582	}
583
584	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
585	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
586#if REV_ENDIAN_FS
587		if (rev_endian)
588			byte_swap_sbout(fs);
589#endif /* REV_ENDIAN_FS */
590		error = EROFS;          /* needs translation */
591		goto out;
592	}
593
594	/* If we are not mounting read only, then check for overlap
595	 * condition in cylinder group's free block map.
596	 * If overlap exists, then force this into a read only mount
597	 * to avoid further corruption. PR#2216969
598	 */
599	if (ronly == 0){
600	    if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
601					(int)fs->fs_cgsize, NOCRED, &cgbp)) {
602		    	buf_brelse(cgbp);
603		    	goto out;
604	    	}
605	    	cgp = (struct cg *)buf_dataptr(cgbp);
606#if REV_ENDIAN_FS
607		if (rev_endian)
608			byte_swap_cgin(cgp,fs);
609#endif /* REV_ENDIAN_FS */
610	    	if (!cg_chkmagic(cgp)){
611#if REV_ENDIAN_FS
612				if (rev_endian)
613					byte_swap_cgout(cgp,fs);
614#endif /* REV_ENDIAN_FS */
615		    	buf_brelse(cgbp);
616		    	goto out;
617	    	}
618	    	if (cgp->cg_clustersumoff != 0) {
619	      		/* Check for overlap */
620	      		clustersumoff = cgp->cg_freeoff +
621		      	howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
622	      		clustersumoff = roundup(clustersumoff, sizeof(long));
623	      		if (cgp->cg_clustersumoff < clustersumoff) {
624		    	/* Overlap exists */
625              		mp->mnt_flag |= MNT_RDONLY;
626		      		ronly = 1;
627	      		}
628	    	}
629#if REV_ENDIAN_FS
630			if (rev_endian)
631				byte_swap_cgout(cgp,fs);
632#endif /* REV_ENDIAN_FS */
633			buf_brelse(cgbp);
634	}
635
636	ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
637	bzero((caddr_t)ump, sizeof *ump);
638	ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
639	    M_WAITOK);
640	bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
641	if (fs->fs_sbsize < SBSIZE)
642		buf_markinvalid(bp);
643#if REV_ENDIAN_FS
644	if (rev_endian)
645		byte_swap_sbout(fs);
646#endif /* REV_ENDIAN_FS */
647	buf_brelse(bp);
648	bp = NULL;
649	fs = ump->um_fs;
650	fs->fs_ronly = ronly;
651	if (fs->fs_cssize < 1 || fs->fs_fsize < 1 || fs->fs_ncg < 1) {
652		error = EINVAL;
653		goto out;
654	}
655	if (fs->fs_frag < 1 || fs->fs_frag > MAXFRAG) {
656		error = EINVAL;
657		goto out;
658	}
659
660	size = fs->fs_cssize;
661	blks = howmany(size, fs->fs_fsize);
662	if (fs->fs_contigsumsize > 0) {
663		if (fs->fs_ncg > INT_MAX / sizeof(int32_t) || size > INT_MAX - fs->fs_ncg * sizeof(int32_t)) {
664			error = EINVAL;
665			goto out;
666		}
667		size += fs->fs_ncg * sizeof(int32_t);
668	}
669	if (fs->fs_ncg > INT_MAX / sizeof(u_int8_t) || size > INT_MAX - fs->fs_ncg * sizeof(u_int8_t)) {
670		error = EINVAL;
671		goto out;
672	}
673	size += fs->fs_ncg * sizeof(u_int8_t);
674	space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
675	fs->fs_csp = space;
676	for (i = 0; i < blks; i += fs->fs_frag) {
677		size = fs->fs_bsize;
678		if (i + fs->fs_frag > blks)
679			size = (blks - i) * fs->fs_fsize;
680		if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
681					   size, cred, &bp)) {
682			_FREE(fs->fs_csp, M_UFSMNT);
683			goto out;
684		}
685		bcopy((char *)buf_dataptr(bp), space, (u_int)size);
686#if REV_ENDIAN_FS
687		if (rev_endian)
688			byte_swap_ints((int *) space, size / sizeof(int));
689#endif /* REV_ENDIAN_FS */
690		space = (char *)space + size;
691		buf_brelse(bp);
692		bp = NULL;
693	}
694	if (fs->fs_contigsumsize > 0) {
695		fs->fs_maxcluster = lp = space;
696		for (i = 0; i < fs->fs_ncg; i++)
697			*lp++ = fs->fs_contigsumsize;
698		space = lp;
699	}
700	size = fs->fs_ncg * sizeof(u_int8_t);
701	fs->fs_contigdirs = (u_int8_t *)space;
702	space = (u_int8_t *)space + size;
703	bzero(fs->fs_contigdirs, size);
704	/* XXX Compatibility for old filesystems */
705	if (fs->fs_avgfilesize <= 0)
706		fs->fs_avgfilesize = AVFILESIZ;
707	if (fs->fs_avgfpdir <= 0)
708		fs->fs_avgfpdir = AFPDIR;
709	/* XXX End of compatibility */
710	mp->mnt_data = (qaddr_t)ump;
711	mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
712	mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
713	/* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
714	mp->mnt_maxsymlinklen = 60;
715#if REV_ENDIAN_FS
716	if (rev_endian)
717		mp->mnt_flag |= MNT_REVEND;
718#endif /* REV_ENDIAN_FS */
719	ump->um_mountp = mp;
720	ump->um_dev = dev;
721	ump->um_devvp = devvp;
722	ump->um_nindir = fs->fs_nindir;
723	ump->um_bptrtodb = fs->fs_fsbtodb;
724	ump->um_seqinc = fs->fs_frag;
725	for (i = 0; i < MAXQUOTAS; i++)
726		dqfileinit(&ump->um_qfiles[i]);
727	ffs_oldfscompat(fs);
728	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
729	maxfilesize = 0x100000000ULL;    /* 4GB */
730#if 0
731	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
732#endif /* 0 */
733	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
734		fs->fs_maxfilesize = maxfilesize;		/* XXX */
735	if (ronly == 0) {
736		fs->fs_clean = 0;
737		(void) ffs_sbupdate(ump, MNT_WAIT);
738	}
739	return (0);
740out:
741	if (bp)
742		buf_brelse(bp);
743	if (ump) {
744		_FREE(ump->um_fs, M_UFSMNT);
745		_FREE(ump, M_UFSMNT);
746	}
747	return (error);
748}
749
750/*
751 * Sanity checks for old file systems.
752 *
753 * XXX - goes away some day.
754 */
755ffs_oldfscompat(fs)
756	struct fs *fs;
757{
758	int i;
759
760	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
761	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
762	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
763		fs->fs_nrpos = 8;				/* XXX */
764	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
765		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
766								/* XXX */
767		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
768		for (i = 0; i < NIADDR; i++) {			/* XXX */
769			sizepb *= NINDIR(fs);			/* XXX */
770			fs->fs_maxfilesize += sizepb;		/* XXX */
771		}						/* XXX */
772		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
773		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
774	}							/* XXX */
775	return (0);
776}
777
778/*
779 * unmount system call
780 */
781int
782ffs_unmount(mp, mntflags, context)
783	struct mount *mp;
784	int mntflags;
785	vfs_context_t context;
786{
787	struct proc *p = vfs_context_proc(context);
788	register struct ufsmount *ump;
789	register struct fs *fs;
790	int error, flags;
791	int force;
792
793	flags = 0;
794	force = 0;
795	if (mntflags & MNT_FORCE) {
796		flags |= FORCECLOSE;
797		force = 1;
798	}
799	if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
800		return (error);
801	ump = VFSTOUFS(mp);
802	fs = ump->um_fs;
803
804	if (fs->fs_ronly == 0) {
805		fs->fs_clean = 1;
806		if (error = ffs_sbupdate(ump, MNT_WAIT)) {
807			fs->fs_clean = 0;
808#ifdef notyet
809		/* we can atleast cleanup ; as the media could be WP */
810		/* & during mount, we do not check for write failures  */
811		/* FIXME LATER : the Correct fix would be to have */
812		/* mount detect the WP media and downgrade to readonly mount */
813		/* For now, here it is */
814			return (error);
815#endif /* notyet */
816		}
817	}
818	_FREE(fs->fs_csp, M_UFSMNT);
819	_FREE(fs, M_UFSMNT);
820	_FREE(ump, M_UFSMNT);
821
822	return (0);
823}
824
825/*
826 * Flush out all the files in a filesystem.
827 */
828ffs_flushfiles(mp, flags, p)
829	register struct mount *mp;
830	int flags;
831	struct proc *p;
832{
833	register struct ufsmount *ump;
834	int i, error;
835
836	ump = VFSTOUFS(mp);
837
838#if QUOTA
839	/*
840	 * NOTE: The open quota files have an indirect reference
841	 * on the root directory vnode.  We must account for this
842	 * extra reference when doing the intial vflush.
843	 */
844	if (mp->mnt_flag & MNT_QUOTA) {
845		struct vnode *rootvp = NULLVP;
846		int quotafilecnt = 0;
847
848		/* Find out how many quota files we have open. */
849		for (i = 0; i < MAXQUOTAS; i++) {
850			if (ump->um_qfiles[i].qf_vp != NULLVP)
851				++quotafilecnt;
852		}
853
854		/*
855		 * Check if the root vnode is in our inode hash
856		 * (so we can skip over it).
857		 */
858		rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
859
860		error = vflush(mp, rootvp, SKIPSYSTEM|flags);
861
862		if (rootvp) {
863			/*
864			 * See if there are additional references on the
865			 * root vp besides the ones obtained from the open
866			 * quota files and the hfs_chashget call above.
867			 */
868			if ((error == 0) &&
869			    (rootvp->v_usecount > (1 + quotafilecnt))) {
870				error = EBUSY;  /* root dir is still open */
871			}
872			vnode_put(rootvp);
873		}
874		if (error && (flags & FORCECLOSE) == 0)
875			return (error);
876
877		for (i = 0; i < MAXQUOTAS; i++) {
878			if (ump->um_qfiles[i].qf_vp == NULLVP)
879				continue;
880			quotaoff(mp, i);
881		}
882		/*
883		 * Here we fall through to vflush again to ensure
884		 * that we have gotten rid of all the system vnodes.
885		 */
886	}
887#endif
888	error = vflush(mp, NULLVP, SKIPSWAP|flags);
889	error = vflush(mp, NULLVP, flags);
890	return (error);
891}
892
893/*
894 * Get file system statistics.
895 */
896int
897ffs_statfs(mp, sbp, context)
898	struct mount *mp;
899	register struct vfsstatfs *sbp;
900	vfs_context_t context;
901{
902	register struct ufsmount *ump;
903	register struct fs *fs;
904
905	ump = VFSTOUFS(mp);
906	fs = ump->um_fs;
907	if (fs->fs_magic != FS_MAGIC)
908		panic("ffs_statfs");
909	sbp->f_bsize = fs->fs_fsize;
910	sbp->f_iosize = fs->fs_bsize;
911	sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
912	sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
913		fs->fs_cstotal.cs_nffree));
914	sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
915	sbp->f_files =  (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
916	sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
917	return (0);
918}
919
920int
921ffs_vfs_getattr(mp, fsap, context)
922	struct mount *mp;
923	struct vfs_attr *fsap;
924	vfs_context_t context;
925{
926	struct ufsmount *ump;
927	struct fs *fs;
928	kauth_cred_t cred;
929	struct vnode *devvp;
930	struct buf *bp;
931	struct ufslabel *ulp;
932	char *offset;
933	int bs, error, length;
934
935	ump = VFSTOUFS(mp);
936	fs = ump->um_fs;
937	cred = vfs_context_ucred(context);
938
939	VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
940	VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
941	VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
942	VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
943	    (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
944	    fs->fs_cstotal.cs_nffree)));
945	VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
946	    fs->fs_minfree)));
947	VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
948	    (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
949	VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
950	    fs->fs_cstotal.cs_nifree));
951
952	if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
953		fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
954		fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
955		VFSATTR_SET_SUPPORTED(fsap, f_fsid);
956	}
957
958	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
959		devvp = ump->um_devvp;
960		bs = vfs_devblocksize(mp);
961
962		if (error = (int)buf_meta_bread(devvp,
963		    (daddr64_t)(UFS_LABEL_OFFSET / bs),
964		    MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
965			if (bp)
966				buf_brelse(bp);
967			return (error);
968		}
969
970		/*
971		 * Since the disklabel is read directly by older user space
972		 * code, make sure this buffer won't remain in the cache when
973		 * we release it.
974		 */
975		buf_setflags(bp, B_NOCACHE);
976
977		offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
978		ulp = (struct ufslabel *)offset;
979
980		if (ufs_label_check(ulp)) {
981			length = ulp->ul_namelen;
982#if REV_ENDIAN_FS
983			if (mp->mnt_flag & MNT_REVEND)
984				length = OSSwapInt16(length);
985#endif
986			if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
987				bcopy(ulp->ul_name, fsap->f_vol_name, length);
988				fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
989				fsap->f_vol_name[length] = '\0';
990			}
991		}
992
993		buf_brelse(bp);
994		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
995	}
996
997	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
998		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
999		    VOL_CAP_FMT_SYMBOLICLINKS |
1000		    VOL_CAP_FMT_HARDLINKS |
1001		    VOL_CAP_FMT_SPARSE_FILES |
1002		    VOL_CAP_FMT_CASE_SENSITIVE |
1003		    VOL_CAP_FMT_CASE_PRESERVING |
1004		    VOL_CAP_FMT_FAST_STATFS |
1005		    VOL_CAP_FMT_HIDDEN_FILES ;
1006		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
1007		    = VOL_CAP_INT_NFSEXPORT |
1008		    VOL_CAP_INT_VOL_RENAME |
1009		    VOL_CAP_INT_ADVLOCK |
1010		    VOL_CAP_INT_FLOCK;
1011		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
1012		    = 0;
1013		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
1014		    = 0;
1015
1016		/* Capabilities we know about: */
1017		fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
1018		    VOL_CAP_FMT_PERSISTENTOBJECTIDS |
1019		    VOL_CAP_FMT_SYMBOLICLINKS |
1020		    VOL_CAP_FMT_HARDLINKS |
1021		    VOL_CAP_FMT_JOURNAL |
1022		    VOL_CAP_FMT_JOURNAL_ACTIVE |
1023		    VOL_CAP_FMT_NO_ROOT_TIMES |
1024		    VOL_CAP_FMT_SPARSE_FILES |
1025		    VOL_CAP_FMT_ZERO_RUNS |
1026		    VOL_CAP_FMT_CASE_SENSITIVE |
1027		    VOL_CAP_FMT_CASE_PRESERVING |
1028		    VOL_CAP_FMT_FAST_STATFS |
1029		    VOL_CAP_FMT_2TB_FILESIZE |
1030		    VOL_CAP_FMT_OPENDENYMODES |
1031		    VOL_CAP_FMT_HIDDEN_FILES ;
1032		fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
1033		    VOL_CAP_INT_SEARCHFS |
1034		    VOL_CAP_INT_ATTRLIST |
1035		    VOL_CAP_INT_NFSEXPORT |
1036		    VOL_CAP_INT_READDIRATTR |
1037		    VOL_CAP_INT_EXCHANGEDATA |
1038		    VOL_CAP_INT_COPYFILE |
1039		    VOL_CAP_INT_ALLOCATE |
1040		    VOL_CAP_INT_VOL_RENAME |
1041		    VOL_CAP_INT_ADVLOCK |
1042		    VOL_CAP_INT_FLOCK |
1043		    VOL_CAP_INT_MANLOCK;
1044		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1045		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1046
1047		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1048	}
1049
1050	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1051		fsap->f_attributes.validattr.commonattr = 0;
1052		fsap->f_attributes.validattr.volattr =
1053		    ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1054		fsap->f_attributes.validattr.dirattr = 0;
1055		fsap->f_attributes.validattr.fileattr = 0;
1056		fsap->f_attributes.validattr.forkattr = 0;
1057
1058		fsap->f_attributes.nativeattr.commonattr = 0;
1059		fsap->f_attributes.nativeattr.volattr =
1060		    ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1061		fsap->f_attributes.nativeattr.dirattr = 0;
1062		fsap->f_attributes.nativeattr.fileattr = 0;
1063		fsap->f_attributes.nativeattr.forkattr = 0;
1064
1065		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1066	}
1067
1068	return (0);
1069}
1070
1071
1072int
1073ffs_vfs_setattr(mp, fsap, context)
1074	struct mount *mp;
1075	struct vfs_attr *fsap;
1076	vfs_context_t context;
1077{
1078	struct ufsmount *ump;
1079	struct vnode *devvp;
1080	struct buf *bp;
1081	struct ufslabel *ulp;
1082	kauth_cred_t cred;
1083	char *offset;
1084	int bs, error;
1085
1086
1087	ump = VFSTOUFS(mp);
1088	cred = vfs_context_ucred(context);
1089
1090	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1091		devvp = ump->um_devvp;
1092		bs = vfs_devblocksize(mp);
1093		if (error = buf_meta_bread(devvp,
1094		    (daddr64_t)(UFS_LABEL_OFFSET / bs),
1095		    MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1096			if (bp)
1097				buf_brelse(bp);
1098			return (error);
1099		}
1100
1101		/*
1102		 * Since the disklabel is read directly by older user space
1103		 * code, make sure this buffer won't remain in the cache when
1104		 * we release it.
1105		 */
1106		buf_setflags(bp, B_NOCACHE);
1107
1108		/* Validate the label structure; init if not valid */
1109		offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1110		ulp = (struct ufslabel *)offset;
1111		if (!ufs_label_check(ulp))
1112			ufs_label_init(ulp);
1113
1114		/* Copy new name over existing name */
1115		ulp->ul_namelen = strlen(fsap->f_vol_name);
1116		bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1117		ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1118		ulp->ul_name[ulp->ul_namelen] = '\0';
1119
1120#if REV_ENDIAN_FS
1121		if (mp->mnt_flag & MNT_REVEND)
1122			ulp->ul_namelen = OSSwapInt16(ulp->ul_namelen);
1123#endif
1124
1125		/* Update the checksum */
1126		ulp->ul_checksum = 0;
1127		ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1128
1129		/* Write the label back to disk */
1130		buf_bwrite(bp);
1131		bp = NULL;
1132
1133		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1134	}
1135
1136 	return (0);
1137 }
1138struct ffs_sync_cargs {
1139        vfs_context_t context;
1140        int    waitfor;
1141        int    error;
1142};
1143
1144
1145static int
1146ffs_sync_callback(struct vnode *vp, void *cargs)
1147{
1148	struct inode *ip;
1149	struct ffs_sync_cargs *args;
1150	int error;
1151
1152	args = (struct ffs_sync_cargs *)cargs;
1153
1154	ip = VTOI(vp);
1155
1156	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1157	        error = VNOP_FSYNC(vp, args->waitfor, args->context);
1158
1159		if (error)
1160		        args->error = error;
1161
1162	}
1163	return (VNODE_RETURNED);
1164}
1165
1166/*
1167 * Go through the disk queues to initiate sandbagged IO;
1168 * go through the inodes to write those that have been modified;
1169 * initiate the writing of the super block if it has been modified.
1170 *
1171 * Note: we are always called with the filesystem marked `MPBUSY'.
1172 */
1173int
1174ffs_sync(mp, waitfor, context)
1175	struct mount *mp;
1176	int waitfor;
1177	vfs_context_t context;
1178{
1179	struct vnode *nvp, *vp;
1180	struct ufsmount *ump = VFSTOUFS(mp);
1181	struct fs *fs;
1182	struct timeval tv;
1183	int error, allerror = 0;
1184	struct ffs_sync_cargs args;
1185
1186	fs = ump->um_fs;
1187	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1188		printf("fs = %s\n", fs->fs_fsmnt);
1189		panic("update: rofs mod");
1190	}
1191	/*
1192	 * Write back each (modified) inode.
1193	 */
1194	args.context = context;
1195	args.waitfor = waitfor;
1196	args.error = 0;
1197	/*
1198	 * ffs_sync_callback will be called for each vnode
1199	 * hung off of this mount point... the vnode will be
1200	 * properly referenced and unreferenced around the callback
1201	 */
1202	vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1203
1204	if (args.error)
1205	        allerror = args.error;
1206
1207	/*
1208	 * Force stale file system control information to be flushed.
1209	 */
1210	if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1211		allerror = error;
1212#if QUOTA
1213	qsync(mp);
1214#endif
1215	/*
1216	 * Write back modified superblock.
1217	 */
1218	if (fs->fs_fmod != 0) {
1219		fs->fs_fmod = 0;
1220		microtime(&tv);
1221		fs->fs_time = tv.tv_sec;
1222		if (error = ffs_sbupdate(ump, waitfor))
1223			allerror = error;
1224	}
1225	return (allerror);
1226}
1227
1228/*
1229 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1230 * in from disk.  If it is in core, wait for the lock bit to clear, then
1231 * return the inode locked.  Detection and handling of mount points must be
1232 * done by the calling routine.
1233 */
1234int
1235ffs_vget(mp, ino, vpp, context)
1236	mount_t	mp;
1237	ino64_t ino;
1238	vnode_t *vpp;
1239	vfs_context_t context;
1240{
1241        return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1242}
1243
1244
1245int
1246ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1247	mount_t	mp;
1248	ino_t	ino;
1249	vnode_t	*vpp;
1250	vnode_t	dvp;
1251	struct	componentname *cnp;
1252	int	mode;
1253	int	fhwanted;
1254{
1255	struct proc *p = current_proc();		/* XXX */
1256	struct fs *fs;
1257	struct inode *ip;
1258	struct ufsmount *ump;
1259	struct buf *bp;
1260	struct vnode *vp;
1261	struct vnode_fsparam vfsp;
1262	struct timeval tv;
1263	enum vtype vtype;
1264	dev_t dev;
1265	int i, type, error = 0;
1266
1267	*vpp = NULL;
1268	ump  = VFSTOUFS(mp);
1269	dev  = ump->um_dev;
1270#if 0
1271	/* Check for unmount in progress */
1272	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1273		return (EPERM);
1274	}
1275#endif
1276	/*
1277	 * Allocate a new inode... do it before we check the
1278	 * cache, because the MALLOC_ZONE may block
1279	 */
1280	type = M_FFSNODE;
1281	MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1282
1283	/*
1284	 * check in the inode hash
1285	 */
1286	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1287	       /*
1288		* found it... get rid of the allocation
1289		* that we didn't need and return
1290		* the 'found' vnode
1291		*/
1292		FREE_ZONE(ip, sizeof(struct inode), type);
1293		vp = *vpp;
1294		return (0);
1295	}
1296	bzero((caddr_t)ip, sizeof(struct inode));
1297	/*
1298	 * lock the inode
1299	 */
1300//	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1301//	lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1302
1303	ip->i_fs = fs = ump->um_fs;
1304	ip->i_dev = dev;
1305	ip->i_number = ino;
1306#if QUOTA
1307	for (i = 0; i < MAXQUOTAS; i++)
1308		ip->i_dquot[i] = NODQUOT;
1309#endif
1310	SET(ip->i_flag, IN_ALLOC);
1311	/*
1312	 * Put it onto its hash chain locked so that other requests for
1313	 * this inode will block if they arrive while we are sleeping waiting
1314	 * for old data structures to be purged or for the contents of the
1315	 * disk portion of this inode to be read.
1316	 */
1317	ufs_ihashins(ip);
1318
1319	/* Read in the disk contents for the inode, copy into the inode. */
1320	if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1321				   (int)fs->fs_bsize, NOCRED, &bp)) {
1322		buf_brelse(bp);
1323		goto errout;
1324	}
1325#if REV_ENDIAN_FS
1326	if (mp->mnt_flag & MNT_REVEND) {
1327		byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1328	} else {
1329		ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1330	}
1331#else
1332	ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1333#endif /* REV_ENDIAN_FS */
1334	buf_brelse(bp);
1335
1336	if (mode == 0)
1337	        vtype = IFTOVT(ip->i_mode);
1338	else
1339	        vtype = IFTOVT(mode);
1340
1341	if (vtype == VNON) {
1342		if (fhwanted) {
1343			/* NFS is in play */
1344			error = ESTALE;
1345			goto errout;
1346		} else {
1347			error = ENOENT;
1348			goto errout;
1349		}
1350	}
1351
1352	vfsp.vnfs_mp = mp;
1353	vfsp.vnfs_vtype = vtype;
1354	vfsp.vnfs_str = "ufs";
1355	vfsp.vnfs_dvp = dvp;
1356	vfsp.vnfs_fsnode = ip;
1357	vfsp.vnfs_cnp = cnp;
1358
1359	if (mode == 0)
1360	        vfsp.vnfs_filesize = ip->i_din.di_size;
1361	else
1362	        vfsp.vnfs_filesize = 0;
1363
1364	if (vtype == VFIFO )
1365		vfsp.vnfs_vops = FFS_FIFOOPS;
1366	else if (vtype == VBLK || vtype == VCHR)
1367		vfsp.vnfs_vops = ffs_specop_p;
1368	else
1369		vfsp.vnfs_vops = ffs_vnodeop_p;
1370
1371	if (vtype == VBLK || vtype == VCHR)
1372		vfsp.vnfs_rdev = ip->i_rdev;
1373	else
1374		vfsp.vnfs_rdev = 0;
1375
1376	if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1377		vfsp.vnfs_flags = 0;
1378	else
1379	        vfsp.vnfs_flags = VNFS_NOCACHE;
1380
1381	/*
1382	 * Tag root directory
1383	 */
1384	vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1385	vfsp.vnfs_marksystem = 0;
1386
1387	if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1388		goto errout;
1389
1390	/*
1391	 * Finish inode initialization now that aliasing has been resolved.
1392	 */
1393	ip->i_devvp = ump->um_devvp;
1394	ip->i_vnode = vp;
1395
1396	vnode_ref(ip->i_devvp);
1397	vnode_addfsref(vp);
1398	vnode_settag(vp, VT_UFS);
1399
1400	/*
1401	 * Initialize modrev times
1402	 */
1403	microtime(&tv);
1404	SETHIGH(ip->i_modrev, tv.tv_sec);
1405	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1406
1407	/*
1408	 * Set up a generation number for this inode if it does not
1409	 * already have one. This should only happen on old filesystems.
1410	 */
1411	if (ip->i_gen == 0) {
1412		if (++nextgennumber < (u_long)tv.tv_sec)
1413			nextgennumber = tv.tv_sec;
1414		ip->i_gen = nextgennumber;
1415		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1416			ip->i_flag |= IN_MODIFIED;
1417	}
1418	/*
1419	 * Ensure that uid and gid are correct. This is a temporary
1420	 * fix until fsck has been changed to do the update.
1421	 */
1422	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1423		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1424		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1425	}						/* XXX */
1426	*vpp = vp;
1427
1428	CLR(ip->i_flag, IN_ALLOC);
1429
1430	if (ISSET(ip->i_flag, IN_WALLOC))
1431		wakeup(ip);
1432
1433	return (0);
1434
1435errout:
1436	ufs_ihashrem(ip);
1437
1438	if (ISSET(ip->i_flag, IN_WALLOC))
1439		wakeup(ip);
1440	FREE_ZONE(ip, sizeof(struct inode), type);
1441
1442	return (error);
1443}
1444
1445/*
1446 * File handle to vnode
1447 *
1448 * Have to be really careful about stale file handles:
1449 * - check that the inode number is valid
1450 * - call vget to get the locked inode
1451 * - check for an unallocated inode (i_mode == 0)
1452 */
1453int
1454ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1455	register struct mount *mp;
1456	int fhlen;
1457	unsigned char *fhp;
1458	struct vnode **vpp;
1459	vfs_context_t context;
1460{
1461	register struct ufid *ufhp;
1462	register struct inode *ip;
1463	struct vnode *nvp;
1464	struct fs *fs;
1465	int error;
1466	ino_t	  ino;
1467
1468	if (fhlen < (int)sizeof(struct ufid))
1469		return (EINVAL);
1470	ufhp = (struct ufid *)fhp;
1471	fs = VFSTOUFS(mp)->um_fs;
1472	ino = ntohl(ufhp->ufid_ino);
1473	if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1474		return (ESTALE);
1475	error = ffs_vget_internal(mp, ino, &nvp, NULL, NULL, 0, 1);
1476	if (error) {
1477		*vpp = NULLVP;
1478		return (error);
1479	}
1480	ip = VTOI(nvp);
1481	if (ip->i_mode == 0 || ip->i_gen != ntohl(ufhp->ufid_gen)) {
1482		vnode_put(nvp);
1483		*vpp = NULLVP;
1484		return (ESTALE);
1485	}
1486	*vpp = nvp;
1487	return (0);
1488}
1489
1490/*
1491 * Vnode pointer to File handle
1492 */
1493/* ARGSUSED */
1494int
1495ffs_vptofh(vp, fhlenp, fhp, context)
1496	struct vnode *vp;
1497	int *fhlenp;
1498	unsigned char *fhp;
1499	vfs_context_t context;
1500{
1501	register struct inode *ip;
1502	register struct ufid *ufhp;
1503
1504	if (*fhlenp < (int)sizeof(struct ufid))
1505		return (EOVERFLOW);
1506	ip = VTOI(vp);
1507	ufhp = (struct ufid *)fhp;
1508	ufhp->ufid_ino = htonl(ip->i_number);
1509	ufhp->ufid_gen = htonl(ip->i_gen);
1510	*fhlenp = sizeof(struct ufid);
1511	return (0);
1512}
1513
1514/*
1515 * Initialize the filesystem; just use ufs_init.
1516 */
1517int
1518ffs_init(vfsp)
1519	struct vfsconf *vfsp;
1520{
1521
1522	return (ufs_init(vfsp));
1523}
1524
1525/*
1526 * fast filesystem related variables.
1527 */
1528ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1529		   user_addr_t newp, size_t newlen, vfs_context_t context)
1530{
1531	extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1532
1533	/* all sysctl names at this level are terminal */
1534	if (namelen != 1)
1535		return (ENOTDIR);		/* overloaded */
1536
1537	switch (name[0]) {
1538	case FFS_CLUSTERREAD:
1539		return (sysctl_int(oldp, oldlenp, newp, newlen,
1540		    &doclusterread));
1541	case FFS_CLUSTERWRITE:
1542		return (sysctl_int(oldp, oldlenp, newp, newlen,
1543		    &doclusterwrite));
1544	case FFS_REALLOCBLKS:
1545		return (sysctl_int(oldp, oldlenp, newp, newlen,
1546		    &doreallocblks));
1547	case FFS_ASYNCFREE:
1548		return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1549	default:
1550		return (ENOTSUP);
1551	}
1552	/* NOTREACHED */
1553}
1554
1555/*
1556 * Write a superblock and associated information back to disk.
1557 */
1558int
1559ffs_sbupdate(mp, waitfor)
1560	struct ufsmount *mp;
1561	int waitfor;
1562{
1563	register struct fs *dfs, *fs = mp->um_fs;
1564	register struct buf *bp;
1565	int blks;
1566	void *space;
1567	int i, size, error, allerror = 0;
1568	int devBlockSize=0;
1569#if REV_ENDIAN_FS
1570	int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1571#endif /* REV_ENDIAN_FS */
1572
1573	/*
1574	 * First write back the summary information.
1575	 */
1576	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1577	space = fs->fs_csp;
1578	for (i = 0; i < blks; i += fs->fs_frag) {
1579		size = fs->fs_bsize;
1580		if (i + fs->fs_frag > blks)
1581			size = (blks - i) * fs->fs_fsize;
1582		bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1583				size, 0, 0, BLK_META);
1584		bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1585#if REV_ENDIAN_FS
1586		if (rev_endian) {
1587			byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1588		}
1589#endif /* REV_ENDIAN_FS */
1590		space = (char *)space + size;
1591		if (waitfor != MNT_WAIT)
1592			buf_bawrite(bp);
1593		else if (error = (int)buf_bwrite(bp))
1594			allerror = error;
1595	}
1596	/*
1597	 * Now write back the superblock itself. If any errors occurred
1598	 * up to this point, then fail so that the superblock avoids
1599	 * being written out as clean.
1600	 */
1601	if (allerror)
1602		return (allerror);
1603	devBlockSize = vfs_devblocksize(mp->um_mountp);
1604
1605	bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1606	bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1607	/* Restore compatibility to old file systems.		   XXX */
1608	dfs = (struct fs *)buf_dataptr(bp);			/* XXX */
1609	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1610		dfs->fs_nrpos = -1;				/* XXX */
1611#if REV_ENDIAN_FS
1612	/*
1613	*  Swapping bytes here ; so that in case
1614	*   of inode format < FS_44INODEFMT appropriate
1615	*   fields get moved
1616	*/
1617	if (rev_endian) {
1618		byte_swap_sbout((struct fs *)buf_dataptr(bp));
1619	}
1620#endif /* REV_ENDIAN_FS */
1621	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1622		int32_t *lp, tmp;				/* XXX */
1623								/* XXX */
1624		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1625		tmp = lp[4];					/* XXX */
1626		for (i = 4; i > 0; i--)				/* XXX */
1627			lp[i] = lp[i-1];			/* XXX */
1628		lp[0] = tmp;					/* XXX */
1629	}							/* XXX */
1630#if REV_ENDIAN_FS
1631	/* Note that dfs is already swapped so swap the filesize
1632	*  before writing
1633	*/
1634	if (rev_endian) {
1635		dfs->fs_maxfilesize = OSSwapInt64(mp->um_savedmaxfilesize);		/* XXX */
1636	} else {
1637#endif /* REV_ENDIAN_FS */
1638		dfs->fs_maxfilesize = mp->um_savedmaxfilesize;	/* XXX */
1639#if REV_ENDIAN_FS
1640	}
1641#endif /* REV_ENDIAN_FS */
1642	if (waitfor != MNT_WAIT)
1643		buf_bawrite(bp);
1644	else if (error = (int)buf_bwrite(bp))
1645		allerror = error;
1646
1647	return (allerror);
1648}
1649