ffs_vfsops.c revision 131551
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 131551 2004-07-04 08:52:35Z phk $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/mac.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <sys/disk.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <ufs/ufs/extattr.h>
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <vm/vm.h>
65#include <vm/uma.h>
66#include <vm/vm_page.h>
67
68uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
69
70static int	ffs_sbupdate(struct ufsmount *, int);
71       int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
73static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
74		    ufs2_daddr_t);
75static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
76static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
77static vfs_init_t ffs_init;
78static vfs_uninit_t ffs_uninit;
79static vfs_extattrctl_t ffs_extattrctl;
80
81static struct vfsops ufs_vfsops = {
82	.vfs_extattrctl =	ffs_extattrctl,
83	.vfs_fhtovp =		ffs_fhtovp,
84	.vfs_init =		ffs_init,
85	.vfs_mount =		ffs_mount,
86	.vfs_quotactl =		ufs_quotactl,
87	.vfs_root =		ufs_root,
88	.vfs_start =		ufs_start,
89	.vfs_statfs =		ffs_statfs,
90	.vfs_sync =		ffs_sync,
91	.vfs_uninit =		ffs_uninit,
92	.vfs_unmount =		ffs_unmount,
93	.vfs_vget =		ffs_vget,
94	.vfs_vptofh =		ffs_vptofh,
95};
96
97VFS_SET(ufs_vfsops, ufs, 0);
98
99/*
100 * ffs_mount
101 *
102 * Called when mounting local physical media
103 *
104 * PARAMETERS:
105 *		mountroot
106 *			mp	mount point structure
107 *			path	NULL (flag for root mount!!!)
108 *			data	<unused>
109 *			ndp	<unused>
110 *			p	process (user credentials check [statfs])
111 *
112 *		mount
113 *			mp	mount point structure
114 *			path	path to mount point
115 *			data	pointer to argument struct in user space
116 *			ndp	mount point namei() return (used for
117 *				credentials on reload), reused to look
118 *				up block device.
119 *			p	process (user credentials check)
120 *
121 * RETURNS:	0	Success
122 *		!0	error number (errno.h)
123 *
124 * LOCK STATE:
125 *
126 *		ENTRY
127 *			mount point is locked
128 *		EXIT
129 *			mount point is locked
130 *
131 * NOTES:
132 *		A NULL path can be used for a flag since the mount
133 *		system call will fail with EFAULT in copyinstr in
134 *		namei() if it is a genuine NULL from the user.
135 */
136int
137ffs_mount(mp, path, data, ndp, td)
138        struct mount		*mp;	/* mount struct pointer*/
139        char			*path;	/* path to mount point*/
140        caddr_t			data;	/* arguments to FS specific mount*/
141        struct nameidata	*ndp;	/* mount point credentials*/
142        struct thread		*td;	/* process requesting mount*/
143{
144	size_t size;
145	struct vnode *devvp;
146	struct ufs_args args;
147	struct ufsmount *ump = 0;
148	struct fs *fs;
149	int error, flags;
150	mode_t accessmode;
151
152	if (uma_inode == NULL) {
153		uma_inode = uma_zcreate("FFS inode",
154		    sizeof(struct inode), NULL, NULL, NULL, NULL,
155		    UMA_ALIGN_PTR, 0);
156		uma_ufs1 = uma_zcreate("FFS1 dinode",
157		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
158		    UMA_ALIGN_PTR, 0);
159		uma_ufs2 = uma_zcreate("FFS2 dinode",
160		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
161		    UMA_ALIGN_PTR, 0);
162	}
163	/*
164	 * Use NULL path to indicate we are mounting the root filesystem.
165	 */
166	if (path == NULL) {
167		if ((error = bdevvp(rootdev, &rootvp))) {
168			printf("ffs_mountroot: can't find rootvp\n");
169			return (error);
170		}
171
172		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
173			return (error);
174		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
175		return (0);
176	}
177
178	/*
179	 * Mounting non-root filesystem or updating a filesystem
180	 */
181	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
182		return (error);
183
184	/*
185	 * If updating, check whether changing from read-only to
186	 * read/write; if there is no device name, that's all we do.
187	 */
188	if (mp->mnt_flag & MNT_UPDATE) {
189		ump = VFSTOUFS(mp);
190		fs = ump->um_fs;
191		devvp = ump->um_devvp;
192		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
193			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
194				return (error);
195			/*
196			 * Flush any dirty data.
197			 */
198			if ((error = VFS_SYNC(mp, MNT_WAIT,
199			    td->td_ucred, td)) != 0) {
200				vn_finished_write(mp);
201				return (error);
202			}
203			/*
204			 * Check for and optionally get rid of files open
205			 * for writing.
206			 */
207			flags = WRITECLOSE;
208			if (mp->mnt_flag & MNT_FORCE)
209				flags |= FORCECLOSE;
210			if (mp->mnt_flag & MNT_SOFTDEP) {
211				error = softdep_flushfiles(mp, flags, td);
212			} else {
213				error = ffs_flushfiles(mp, flags, td);
214			}
215			if (error) {
216				vn_finished_write(mp);
217				return (error);
218			}
219			if (fs->fs_pendingblocks != 0 ||
220			    fs->fs_pendinginodes != 0) {
221				printf("%s: %s: blocks %jd files %d\n",
222				    fs->fs_fsmnt, "update error",
223				    (intmax_t)fs->fs_pendingblocks,
224				    fs->fs_pendinginodes);
225				fs->fs_pendingblocks = 0;
226				fs->fs_pendinginodes = 0;
227			}
228			fs->fs_ronly = 1;
229			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
230				fs->fs_clean = 1;
231			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
232				fs->fs_ronly = 0;
233				fs->fs_clean = 0;
234				vn_finished_write(mp);
235				return (error);
236			}
237			vn_finished_write(mp);
238		}
239		if ((mp->mnt_flag & MNT_RELOAD) &&
240		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
241			return (error);
242		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
243			/*
244			 * If upgrade to read-write by non-root, then verify
245			 * that user has necessary permissions on the device.
246			 */
247			if (suser(td)) {
248				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
249				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
250				    td->td_ucred, td)) != 0) {
251					VOP_UNLOCK(devvp, 0, td);
252					return (error);
253				}
254				VOP_UNLOCK(devvp, 0, td);
255			}
256			fs->fs_flags &= ~FS_UNCLEAN;
257			if (fs->fs_clean == 0) {
258				fs->fs_flags |= FS_UNCLEAN;
259				if ((mp->mnt_flag & MNT_FORCE) ||
260				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
261				     (fs->fs_flags & FS_DOSOFTDEP))) {
262					printf("WARNING: %s was not %s\n",
263					   fs->fs_fsmnt, "properly dismounted");
264				} else {
265					printf(
266"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
267					    fs->fs_fsmnt);
268					return (EPERM);
269				}
270			}
271			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
272				return (error);
273			fs->fs_ronly = 0;
274			fs->fs_clean = 0;
275			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
276				vn_finished_write(mp);
277				return (error);
278			}
279			/* check to see if we need to start softdep */
280			if ((fs->fs_flags & FS_DOSOFTDEP) &&
281			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
282				vn_finished_write(mp);
283				return (error);
284			}
285			if (fs->fs_snapinum[0] != 0)
286				ffs_snapshot_mount(mp);
287			vn_finished_write(mp);
288		}
289		/*
290		 * Soft updates is incompatible with "async",
291		 * so if we are doing softupdates stop the user
292		 * from setting the async flag in an update.
293		 * Softdep_mount() clears it in an initial mount
294		 * or ro->rw remount.
295		 */
296		if (mp->mnt_flag & MNT_SOFTDEP)
297			mp->mnt_flag &= ~MNT_ASYNC;
298		/*
299		 * If not updating name, process export requests.
300		 */
301		if (args.fspec == 0)
302			return (vfs_export(mp, &args.export));
303		/*
304		 * If this is a snapshot request, take the snapshot.
305		 */
306		if (mp->mnt_flag & MNT_SNAPSHOT)
307			return (ffs_snapshot(mp, args.fspec));
308	}
309
310	/*
311	 * Not an update, or updating the name: look up the name
312	 * and verify that it refers to a sensible disk device.
313	 */
314	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
315	if ((error = namei(ndp)) != 0)
316		return (error);
317	NDFREE(ndp, NDF_ONLY_PNBUF);
318	devvp = ndp->ni_vp;
319	if (!vn_isdisk(devvp, &error)) {
320		vrele(devvp);
321		return (error);
322	}
323
324	/*
325	 * If mount by non-root, then verify that user has necessary
326	 * permissions on the device.
327	 */
328	if (suser(td)) {
329		accessmode = VREAD;
330		if ((mp->mnt_flag & MNT_RDONLY) == 0)
331			accessmode |= VWRITE;
332		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
333		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
334			vput(devvp);
335			return (error);
336		}
337		VOP_UNLOCK(devvp, 0, td);
338	}
339
340	if (mp->mnt_flag & MNT_UPDATE) {
341		/*
342		 * Update only
343		 *
344		 * If it's not the same vnode, or at least the same device
345		 * then it's not correct.
346		 */
347
348		if (devvp != ump->um_devvp &&
349		    devvp->v_rdev != ump->um_devvp->v_rdev)
350			error = EINVAL;	/* needs translation */
351		vrele(devvp);
352		if (error)
353			return (error);
354	} else {
355		/*
356		 * New mount
357		 *
358		 * We need the name for the mount point (also used for
359		 * "last mounted on") copied in. If an error occurs,
360		 * the mount point is discarded by the upper level code.
361		 * Note that vfs_mount() populates f_mntonname for us.
362		 */
363		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
364			vrele(devvp);
365			return (error);
366		}
367	}
368	/*
369	 * Save "mounted from" device name info for mount point (NULL pad).
370	 */
371	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
372	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
373	/*
374	 * Initialize filesystem stat information in mount struct.
375	 */
376	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
377	return (0);
378}
379
380/*
381 * Reload all incore data for a filesystem (used after running fsck on
382 * the root filesystem and finding things to fix). The filesystem must
383 * be mounted read-only.
384 *
385 * Things to do to update the mount:
386 *	1) invalidate all cached meta-data.
387 *	2) re-read superblock from disk.
388 *	3) re-read summary information from disk.
389 *	4) invalidate all inactive vnodes.
390 *	5) invalidate all cached file data.
391 *	6) re-read inode data for all active vnodes.
392 */
393int
394ffs_reload(mp, cred, td)
395	struct mount *mp;
396	struct ucred *cred;
397	struct thread *td;
398{
399	struct vnode *vp, *nvp, *devvp;
400	struct inode *ip;
401	void *space;
402	struct buf *bp;
403	struct fs *fs, *newfs;
404	ufs2_daddr_t sblockloc;
405	int i, blks, size, error;
406	int32_t *lp;
407
408	if ((mp->mnt_flag & MNT_RDONLY) == 0)
409		return (EINVAL);
410	/*
411	 * Step 1: invalidate all cached meta-data.
412	 */
413	devvp = VFSTOUFS(mp)->um_devvp;
414	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
415	if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0)
416		panic("ffs_reload: dirty1");
417	/*
418	 * Only VMIO the backing device if the backing device is a real
419	 * disk device.  See ffs_mountfs() for more details.
420	 */
421	if (vn_isdisk(devvp, NULL))
422		vfs_object_create(devvp, td, td->td_ucred);
423	VOP_UNLOCK(devvp, 0, td);
424
425	/*
426	 * Step 2: re-read superblock from disk.
427	 */
428	fs = VFSTOUFS(mp)->um_fs;
429	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
430	    NOCRED, &bp)) != 0)
431		return (error);
432	newfs = (struct fs *)bp->b_data;
433	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
434	     newfs->fs_magic != FS_UFS2_MAGIC) ||
435	    newfs->fs_bsize > MAXBSIZE ||
436	    newfs->fs_bsize < sizeof(struct fs)) {
437			brelse(bp);
438			return (EIO);		/* XXX needs translation */
439	}
440	/*
441	 * Copy pointer fields back into superblock before copying in	XXX
442	 * new superblock. These should really be in the ufsmount.	XXX
443	 * Note that important parameters (eg fs_ncg) are unchanged.
444	 */
445	newfs->fs_csp = fs->fs_csp;
446	newfs->fs_maxcluster = fs->fs_maxcluster;
447	newfs->fs_contigdirs = fs->fs_contigdirs;
448	newfs->fs_active = fs->fs_active;
449	/* The file system is still read-only. */
450	newfs->fs_ronly = 1;
451	sblockloc = fs->fs_sblockloc;
452	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
453	brelse(bp);
454	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
455	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
456	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
457		printf("%s: reload pending error: blocks %jd files %d\n",
458		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
459		    fs->fs_pendinginodes);
460		fs->fs_pendingblocks = 0;
461		fs->fs_pendinginodes = 0;
462	}
463
464	/*
465	 * Step 3: re-read summary information from disk.
466	 */
467	blks = howmany(fs->fs_cssize, fs->fs_fsize);
468	space = fs->fs_csp;
469	for (i = 0; i < blks; i += fs->fs_frag) {
470		size = fs->fs_bsize;
471		if (i + fs->fs_frag > blks)
472			size = (blks - i) * fs->fs_fsize;
473		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
474		    NOCRED, &bp);
475		if (error)
476			return (error);
477		bcopy(bp->b_data, space, (u_int)size);
478		space = (char *)space + size;
479		brelse(bp);
480	}
481	/*
482	 * We no longer know anything about clusters per cylinder group.
483	 */
484	if (fs->fs_contigsumsize > 0) {
485		lp = fs->fs_maxcluster;
486		for (i = 0; i < fs->fs_ncg; i++)
487			*lp++ = fs->fs_contigsumsize;
488	}
489
490loop:
491	MNT_ILOCK(mp);
492	MNT_VNODE_FOREACH(vp, mp, nvp) {
493		VI_LOCK(vp);
494		if (vp->v_iflag & VI_XLOCK) {
495			VI_UNLOCK(vp);
496			continue;
497		}
498		MNT_IUNLOCK(mp);
499		/*
500		 * Step 4: invalidate all inactive vnodes.
501		 */
502		if (vp->v_usecount == 0) {
503			vgonel(vp, td);
504			goto loop;
505		}
506		/*
507		 * Step 5: invalidate all cached file data.
508		 */
509		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
510			goto loop;
511		}
512		if (vinvalbuf(vp, 0, cred, td, 0, 0))
513			panic("ffs_reload: dirty2");
514		/*
515		 * Step 6: re-read inode data for all active vnodes.
516		 */
517		ip = VTOI(vp);
518		error =
519		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
520		    (int)fs->fs_bsize, NOCRED, &bp);
521		if (error) {
522			VOP_UNLOCK(vp, 0, td);
523			vrele(vp);
524			return (error);
525		}
526		ffs_load_inode(bp, ip, fs, ip->i_number);
527		ip->i_effnlink = ip->i_nlink;
528		brelse(bp);
529		VOP_UNLOCK(vp, 0, td);
530		vrele(vp);
531		MNT_ILOCK(mp);
532	}
533	MNT_IUNLOCK(mp);
534	return (0);
535}
536
537/*
538 * Possible superblock locations ordered from most to least likely.
539 */
540static int sblock_try[] = SBLOCKSEARCH;
541
542/*
543 * Common code for mount and mountroot
544 */
545static int
546ffs_mountfs(devvp, mp, td)
547	struct vnode *devvp;
548	struct mount *mp;
549	struct thread *td;
550{
551	struct ufsmount *ump;
552	struct buf *bp;
553	struct fs *fs;
554	struct cdev *dev;
555	void *space;
556	ufs2_daddr_t sblockloc;
557	int error, i, blks, size, ronly;
558	int32_t *lp;
559	struct ucred *cred;
560	size_t strsize;
561
562	dev = devvp->v_rdev;
563	cred = td ? td->td_ucred : NOCRED;
564	/*
565	 * Disallow multiple mounts of the same device.
566	 * Disallow mounting of a device that is currently in use
567	 * (except for root, which might share swap device for miniroot).
568	 * Flush out any old buffers remaining from a previous use.
569	 */
570	error = vfs_mountedon(devvp);
571	if (error)
572		return (error);
573	if (vcount(devvp) > 1 && devvp != rootvp)
574		return (EBUSY);
575	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
576	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
577	if (error) {
578		VOP_UNLOCK(devvp, 0, td);
579		return (error);
580	}
581
582	/*
583	 * Only VMIO the backing device if the backing device is a real
584	 * disk device.
585	 * Note that it is optional that the backing device be VMIOed.  This
586	 * increases the opportunity for metadata caching.
587	 */
588	if (vn_isdisk(devvp, NULL))
589		vfs_object_create(devvp, td, cred);
590
591	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
592	/*
593	 * XXX: open the device with read and write access even if only
594	 * read access is needed now.  Write access is needed if the
595	 * filesystem is ever mounted read/write, and we don't change the
596	 * access mode for remounts.
597	 */
598#ifdef notyet
599	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1);
600#else
601	error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1);
602#endif
603	VOP_UNLOCK(devvp, 0, td);
604	if (error)
605		return (error);
606	if (devvp->v_rdev->si_iosize_max != 0)
607		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
608	if (mp->mnt_iosize_max > MAXPHYS)
609		mp->mnt_iosize_max = MAXPHYS;
610
611	bp = NULL;
612	ump = NULL;
613	fs = NULL;
614	sblockloc = 0;
615	/*
616	 * Try reading the superblock in each of its possible locations.
617	 */
618	for (i = 0; sblock_try[i] != -1; i++) {
619		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
620		    cred, &bp)) != 0)
621			goto out;
622		fs = (struct fs *)bp->b_data;
623		sblockloc = sblock_try[i];
624		if ((fs->fs_magic == FS_UFS1_MAGIC ||
625		     (fs->fs_magic == FS_UFS2_MAGIC &&
626		      (fs->fs_sblockloc == sblockloc ||
627		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
628		    fs->fs_bsize <= MAXBSIZE &&
629		    fs->fs_bsize >= sizeof(struct fs))
630			break;
631		brelse(bp);
632		bp = NULL;
633	}
634	if (sblock_try[i] == -1) {
635		error = EINVAL;		/* XXX needs translation */
636		goto out;
637	}
638	fs->fs_fmod = 0;
639	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
640	fs->fs_flags &= ~FS_UNCLEAN;
641	if (fs->fs_clean == 0) {
642		fs->fs_flags |= FS_UNCLEAN;
643		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
644		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
645		     (fs->fs_flags & FS_DOSOFTDEP))) {
646			printf(
647"WARNING: %s was not properly dismounted\n",
648			    fs->fs_fsmnt);
649		} else {
650			printf(
651"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
652			    fs->fs_fsmnt);
653			error = EPERM;
654			goto out;
655		}
656		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
657		    (mp->mnt_flag & MNT_FORCE)) {
658			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
659			    (intmax_t)fs->fs_pendingblocks,
660			    fs->fs_pendinginodes);
661			fs->fs_pendingblocks = 0;
662			fs->fs_pendinginodes = 0;
663		}
664	}
665	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
666		printf("%s: mount pending error: blocks %jd files %d\n",
667		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
668		    fs->fs_pendinginodes);
669		fs->fs_pendingblocks = 0;
670		fs->fs_pendinginodes = 0;
671	}
672	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
673	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
674	    M_WAITOK);
675	if (fs->fs_magic == FS_UFS1_MAGIC) {
676		ump->um_fstype = UFS1;
677		ump->um_balloc = ffs_balloc_ufs1;
678	} else {
679		ump->um_fstype = UFS2;
680		ump->um_balloc = ffs_balloc_ufs2;
681	}
682	ump->um_blkatoff = ffs_blkatoff;
683	ump->um_truncate = ffs_truncate;
684	ump->um_update = ffs_update;
685	ump->um_valloc = ffs_valloc;
686	ump->um_vfree = ffs_vfree;
687	ump->um_ifree = ffs_ifree;
688	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
689	if (fs->fs_sbsize < SBLOCKSIZE)
690		bp->b_flags |= B_INVAL | B_NOCACHE;
691	brelse(bp);
692	bp = NULL;
693	fs = ump->um_fs;
694	ffs_oldfscompat_read(fs, ump, sblockloc);
695	fs->fs_ronly = ronly;
696	size = fs->fs_cssize;
697	blks = howmany(size, fs->fs_fsize);
698	if (fs->fs_contigsumsize > 0)
699		size += fs->fs_ncg * sizeof(int32_t);
700	size += fs->fs_ncg * sizeof(u_int8_t);
701	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
702	fs->fs_csp = space;
703	for (i = 0; i < blks; i += fs->fs_frag) {
704		size = fs->fs_bsize;
705		if (i + fs->fs_frag > blks)
706			size = (blks - i) * fs->fs_fsize;
707		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
708		    cred, &bp)) != 0) {
709			free(fs->fs_csp, M_UFSMNT);
710			goto out;
711		}
712		bcopy(bp->b_data, space, (u_int)size);
713		space = (char *)space + size;
714		brelse(bp);
715		bp = NULL;
716	}
717	if (fs->fs_contigsumsize > 0) {
718		fs->fs_maxcluster = lp = space;
719		for (i = 0; i < fs->fs_ncg; i++)
720			*lp++ = fs->fs_contigsumsize;
721		space = lp;
722	}
723	size = fs->fs_ncg * sizeof(u_int8_t);
724	fs->fs_contigdirs = (u_int8_t *)space;
725	bzero(fs->fs_contigdirs, size);
726	fs->fs_active = NULL;
727	mp->mnt_data = (qaddr_t)ump;
728	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
729	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
730	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
731	    vfs_getvfs(&mp->mnt_stat.f_fsid))
732		vfs_getnewfsid(mp);
733	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
734	mp->mnt_flag |= MNT_LOCAL;
735	if ((fs->fs_flags & FS_MULTILABEL) != 0)
736#ifdef MAC
737		mp->mnt_flag |= MNT_MULTILABEL;
738#else
739		printf(
740"WARNING: %s: multilabel flag on fs but no MAC support\n",
741		    fs->fs_fsmnt);
742#endif
743	if ((fs->fs_flags & FS_ACLS) != 0)
744#ifdef UFS_ACL
745		mp->mnt_flag |= MNT_ACLS;
746#else
747		printf(
748"WARNING: %s: ACLs flag on fs but no ACLs support\n",
749		    fs->fs_fsmnt);
750#endif
751	ump->um_mountp = mp;
752	ump->um_dev = dev;
753	ump->um_devvp = devvp;
754	ump->um_nindir = fs->fs_nindir;
755	ump->um_bptrtodb = fs->fs_fsbtodb;
756	ump->um_seqinc = fs->fs_frag;
757	for (i = 0; i < MAXQUOTAS; i++)
758		ump->um_quotas[i] = NULLVP;
759#ifdef UFS_EXTATTR
760	ufs_extattr_uepm_init(&ump->um_extattr);
761#endif
762	devvp->v_rdev->si_mountpoint = mp;
763
764	/*
765	 * Set FS local "last mounted on" information (NULL pad)
766	 */
767	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
768			fs->fs_fsmnt,			/* copy area*/
769			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
770			&strsize);			/* real size*/
771	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
772
773	if( mp->mnt_flag & MNT_ROOTFS) {
774		/*
775		 * Root mount; update timestamp in mount structure.
776		 * this will be used by the common root mount code
777		 * to update the system clock.
778		 */
779		mp->mnt_time = fs->fs_time;
780	}
781
782	if (ronly == 0) {
783		if ((fs->fs_flags & FS_DOSOFTDEP) &&
784		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
785			free(fs->fs_csp, M_UFSMNT);
786			goto out;
787		}
788		if (fs->fs_snapinum[0] != 0)
789			ffs_snapshot_mount(mp);
790		fs->fs_fmod = 1;
791		fs->fs_clean = 0;
792		(void) ffs_sbupdate(ump, MNT_WAIT);
793	}
794#ifdef UFS_EXTATTR
795#ifdef UFS_EXTATTR_AUTOSTART
796	/*
797	 *
798	 * Auto-starting does the following:
799	 *	- check for /.attribute in the fs, and extattr_start if so
800	 *	- for each file in .attribute, enable that file with
801	 * 	  an attribute of the same name.
802	 * Not clear how to report errors -- probably eat them.
803	 * This would all happen while the filesystem was busy/not
804	 * available, so would effectively be "atomic".
805	 */
806	(void) ufs_extattr_autostart(mp, td);
807#endif /* !UFS_EXTATTR_AUTOSTART */
808#endif /* !UFS_EXTATTR */
809	return (0);
810out:
811	devvp->v_rdev->si_mountpoint = NULL;
812	if (bp)
813		brelse(bp);
814	/* XXX: see comment above VOP_OPEN. */
815#ifdef notyet
816	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td);
817#else
818	(void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td);
819#endif
820	if (ump) {
821		free(ump->um_fs, M_UFSMNT);
822		free(ump, M_UFSMNT);
823		mp->mnt_data = (qaddr_t)0;
824	}
825	return (error);
826}
827
828#include <sys/sysctl.h>
829int bigcgs = 0;
830SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
831
832/*
833 * Sanity checks for loading old filesystem superblocks.
834 * See ffs_oldfscompat_write below for unwound actions.
835 *
836 * XXX - Parts get retired eventually.
837 * Unfortunately new bits get added.
838 */
839static void
840ffs_oldfscompat_read(fs, ump, sblockloc)
841	struct fs *fs;
842	struct ufsmount *ump;
843	ufs2_daddr_t sblockloc;
844{
845	off_t maxfilesize;
846
847	/*
848	 * If not yet done, update fs_flags location and value of fs_sblockloc.
849	 */
850	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
851		fs->fs_flags = fs->fs_old_flags;
852		fs->fs_old_flags |= FS_FLAGS_UPDATED;
853		fs->fs_sblockloc = sblockloc;
854	}
855	/*
856	 * If not yet done, update UFS1 superblock with new wider fields.
857	 */
858	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
859		fs->fs_maxbsize = fs->fs_bsize;
860		fs->fs_time = fs->fs_old_time;
861		fs->fs_size = fs->fs_old_size;
862		fs->fs_dsize = fs->fs_old_dsize;
863		fs->fs_csaddr = fs->fs_old_csaddr;
864		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
865		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
866		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
867		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
868	}
869	if (fs->fs_magic == FS_UFS1_MAGIC &&
870	    fs->fs_old_inodefmt < FS_44INODEFMT) {
871		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
872		fs->fs_qbmask = ~fs->fs_bmask;
873		fs->fs_qfmask = ~fs->fs_fmask;
874	}
875	if (fs->fs_magic == FS_UFS1_MAGIC) {
876		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
877		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
878		if (fs->fs_maxfilesize > maxfilesize)
879			fs->fs_maxfilesize = maxfilesize;
880	}
881	/* Compatibility for old filesystems */
882	if (fs->fs_avgfilesize <= 0)
883		fs->fs_avgfilesize = AVFILESIZ;
884	if (fs->fs_avgfpdir <= 0)
885		fs->fs_avgfpdir = AFPDIR;
886	if (bigcgs) {
887		fs->fs_save_cgsize = fs->fs_cgsize;
888		fs->fs_cgsize = fs->fs_bsize;
889	}
890}
891
892/*
893 * Unwinding superblock updates for old filesystems.
894 * See ffs_oldfscompat_read above for details.
895 *
896 * XXX - Parts get retired eventually.
897 * Unfortunately new bits get added.
898 */
899static void
900ffs_oldfscompat_write(fs, ump)
901	struct fs *fs;
902	struct ufsmount *ump;
903{
904
905	/*
906	 * Copy back UFS2 updated fields that UFS1 inspects.
907	 */
908	if (fs->fs_magic == FS_UFS1_MAGIC) {
909		fs->fs_old_time = fs->fs_time;
910		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
911		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
912		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
913		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
914		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
915	}
916	if (bigcgs) {
917		fs->fs_cgsize = fs->fs_save_cgsize;
918		fs->fs_save_cgsize = 0;
919	}
920}
921
922/*
923 * unmount system call
924 */
925int
926ffs_unmount(mp, mntflags, td)
927	struct mount *mp;
928	int mntflags;
929	struct thread *td;
930{
931	struct ufsmount *ump = VFSTOUFS(mp);
932	struct fs *fs;
933	int error, flags;
934
935	flags = 0;
936	if (mntflags & MNT_FORCE) {
937		flags |= FORCECLOSE;
938	}
939#ifdef UFS_EXTATTR
940	if ((error = ufs_extattr_stop(mp, td))) {
941		if (error != EOPNOTSUPP)
942			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
943			    error);
944	} else {
945		ufs_extattr_uepm_destroy(&ump->um_extattr);
946	}
947#endif
948	if (mp->mnt_flag & MNT_SOFTDEP) {
949		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
950			return (error);
951	} else {
952		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
953			return (error);
954	}
955	fs = ump->um_fs;
956	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
957		printf("%s: unmount pending error: blocks %jd files %d\n",
958		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
959		    fs->fs_pendinginodes);
960		fs->fs_pendingblocks = 0;
961		fs->fs_pendinginodes = 0;
962	}
963	if (fs->fs_ronly == 0) {
964		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
965		error = ffs_sbupdate(ump, MNT_WAIT);
966		if (error) {
967			fs->fs_clean = 0;
968			return (error);
969		}
970	}
971	ump->um_devvp->v_rdev->si_mountpoint = NULL;
972
973	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
974	/* XXX: see comment above VOP_OPEN. */
975#ifdef notyet
976	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
977	    NOCRED, td);
978#else
979	error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td);
980#endif
981	vrele(ump->um_devvp);
982	free(fs->fs_csp, M_UFSMNT);
983	free(fs, M_UFSMNT);
984	free(ump, M_UFSMNT);
985	mp->mnt_data = (qaddr_t)0;
986	mp->mnt_flag &= ~MNT_LOCAL;
987	return (error);
988}
989
990/*
991 * Flush out all the files in a filesystem.
992 */
993int
994ffs_flushfiles(mp, flags, td)
995	struct mount *mp;
996	int flags;
997	struct thread *td;
998{
999	struct ufsmount *ump;
1000	int error;
1001
1002	ump = VFSTOUFS(mp);
1003#ifdef QUOTA
1004	if (mp->mnt_flag & MNT_QUOTA) {
1005		int i;
1006		error = vflush(mp, 0, SKIPSYSTEM|flags);
1007		if (error)
1008			return (error);
1009		for (i = 0; i < MAXQUOTAS; i++) {
1010			if (ump->um_quotas[i] == NULLVP)
1011				continue;
1012			quotaoff(td, mp, i);
1013		}
1014		/*
1015		 * Here we fall through to vflush again to ensure
1016		 * that we have gotten rid of all the system vnodes.
1017		 */
1018	}
1019#endif
1020	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1021	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1022		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1023			return (error);
1024		ffs_snapshot_unmount(mp);
1025		/*
1026		 * Here we fall through to vflush again to ensure
1027		 * that we have gotten rid of all the system vnodes.
1028		 */
1029	}
1030        /*
1031	 * Flush all the files.
1032	 */
1033	if ((error = vflush(mp, 0, flags)) != 0)
1034		return (error);
1035	/*
1036	 * Flush filesystem metadata.
1037	 */
1038	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1039	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1040	VOP_UNLOCK(ump->um_devvp, 0, td);
1041	return (error);
1042}
1043
1044/*
1045 * Get filesystem statistics.
1046 */
1047int
1048ffs_statfs(mp, sbp, td)
1049	struct mount *mp;
1050	struct statfs *sbp;
1051	struct thread *td;
1052{
1053	struct ufsmount *ump;
1054	struct fs *fs;
1055
1056	ump = VFSTOUFS(mp);
1057	fs = ump->um_fs;
1058	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1059		panic("ffs_statfs");
1060	sbp->f_version = STATFS_VERSION;
1061	sbp->f_bsize = fs->fs_fsize;
1062	sbp->f_iosize = fs->fs_bsize;
1063	sbp->f_blocks = fs->fs_dsize;
1064	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1065	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1066	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1067	    dbtofsb(fs, fs->fs_pendingblocks);
1068	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1069	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1070	sbp->f_namemax = NAME_MAX;
1071	if (sbp != &mp->mnt_stat) {
1072		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1073		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1074		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
1075		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
1076		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
1077		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
1078		sbp->f_owner = mp->mnt_stat.f_owner;
1079		sbp->f_fsid = mp->mnt_stat.f_fsid;
1080		bcopy((caddr_t)mp->mnt_stat.f_fstypename,
1081			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1082		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1083			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1084		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1085			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1086	}
1087	return (0);
1088}
1089
1090/*
1091 * Go through the disk queues to initiate sandbagged IO;
1092 * go through the inodes to write those that have been modified;
1093 * initiate the writing of the super block if it has been modified.
1094 *
1095 * Note: we are always called with the filesystem marked `MPBUSY'.
1096 */
1097int
1098ffs_sync(mp, waitfor, cred, td)
1099	struct mount *mp;
1100	int waitfor;
1101	struct ucred *cred;
1102	struct thread *td;
1103{
1104	struct vnode *nvp, *vp, *devvp;
1105	struct inode *ip;
1106	struct ufsmount *ump = VFSTOUFS(mp);
1107	struct fs *fs;
1108	int error, count, wait, lockreq, allerror = 0;
1109
1110	fs = ump->um_fs;
1111	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1112		printf("fs = %s\n", fs->fs_fsmnt);
1113		panic("ffs_sync: rofs mod");
1114	}
1115	/*
1116	 * Write back each (modified) inode.
1117	 */
1118	wait = 0;
1119	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1120	if (waitfor == MNT_WAIT) {
1121		wait = 1;
1122		lockreq = LK_EXCLUSIVE;
1123	}
1124	lockreq |= LK_INTERLOCK;
1125	MNT_ILOCK(mp);
1126loop:
1127	MNT_VNODE_FOREACH(vp, mp, nvp) {
1128		/*
1129		 * Depend on the mntvnode_slock to keep things stable enough
1130		 * for a quick test.  Since there might be hundreds of
1131		 * thousands of vnodes, we cannot afford even a subroutine
1132		 * call unless there's a good chance that we have work to do.
1133		 */
1134		VI_LOCK(vp);
1135		if (vp->v_iflag & VI_XLOCK) {
1136			VI_UNLOCK(vp);
1137			continue;
1138		}
1139		ip = VTOI(vp);
1140		if (vp->v_type == VNON || ((ip->i_flag &
1141		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1142		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1143			VI_UNLOCK(vp);
1144			continue;
1145		}
1146		MNT_IUNLOCK(mp);
1147		if ((error = vget(vp, lockreq, td)) != 0) {
1148			MNT_ILOCK(mp);
1149			if (error == ENOENT)
1150				goto loop;
1151			continue;
1152		}
1153		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1154			allerror = error;
1155		VOP_UNLOCK(vp, 0, td);
1156		vrele(vp);
1157		MNT_ILOCK(mp);
1158	}
1159	MNT_IUNLOCK(mp);
1160	/*
1161	 * Force stale filesystem control information to be flushed.
1162	 */
1163	if (waitfor == MNT_WAIT) {
1164		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1165			allerror = error;
1166		/* Flushed work items may create new vnodes to clean */
1167		if (allerror == 0 && count) {
1168			MNT_ILOCK(mp);
1169			goto loop;
1170		}
1171	}
1172#ifdef QUOTA
1173	qsync(mp);
1174#endif
1175	devvp = ump->um_devvp;
1176	VI_LOCK(devvp);
1177	if (waitfor != MNT_LAZY &&
1178	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1179		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1180		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1181			allerror = error;
1182		VOP_UNLOCK(devvp, 0, td);
1183		if (allerror == 0 && waitfor == MNT_WAIT) {
1184			MNT_ILOCK(mp);
1185			goto loop;
1186		}
1187	} else
1188		VI_UNLOCK(devvp);
1189	/*
1190	 * Write back modified superblock.
1191	 */
1192	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1193		allerror = error;
1194	return (allerror);
1195}
1196
1197int
1198ffs_vget(mp, ino, flags, vpp)
1199	struct mount *mp;
1200	ino_t ino;
1201	int flags;
1202	struct vnode **vpp;
1203{
1204	struct thread *td = curthread; 		/* XXX */
1205	struct fs *fs;
1206	struct inode *ip;
1207	struct ufsmount *ump;
1208	struct buf *bp;
1209	struct vnode *vp;
1210	struct cdev *dev;
1211	int error;
1212
1213	ump = VFSTOUFS(mp);
1214	dev = ump->um_dev;
1215
1216	/*
1217	 * We do not lock vnode creation as it is believed to be too
1218	 * expensive for such rare case as simultaneous creation of vnode
1219	 * for same ino by different processes. We just allow them to race
1220	 * and check later to decide who wins. Let the race begin!
1221	 */
1222	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1223		return (error);
1224	if (*vpp != NULL)
1225		return (0);
1226
1227	/*
1228	 * If this MALLOC() is performed after the getnewvnode()
1229	 * it might block, leaving a vnode with a NULL v_data to be
1230	 * found by ffs_sync() if a sync happens to fire right then,
1231	 * which will cause a panic because ffs_sync() blindly
1232	 * dereferences vp->v_data (as well it should).
1233	 */
1234	ip = uma_zalloc(uma_inode, M_WAITOK);
1235
1236	/* Allocate a new vnode/inode. */
1237	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1238	if (error) {
1239		*vpp = NULL;
1240		uma_zfree(uma_inode, ip);
1241		return (error);
1242	}
1243	bzero((caddr_t)ip, sizeof(struct inode));
1244	/*
1245	 * FFS supports recursive locking.
1246	 */
1247	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1248	vp->v_data = ip;
1249	ip->i_vnode = vp;
1250	ip->i_ump = ump;
1251	ip->i_fs = fs = ump->um_fs;
1252	ip->i_dev = dev;
1253	ip->i_number = ino;
1254#ifdef QUOTA
1255	{
1256		int i;
1257		for (i = 0; i < MAXQUOTAS; i++)
1258			ip->i_dquot[i] = NODQUOT;
1259	}
1260#endif
1261	/*
1262	 * Exclusively lock the vnode before adding to hash. Note, that we
1263	 * must not release nor downgrade the lock (despite flags argument
1264	 * says) till it is fully initialized.
1265	 */
1266	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1267
1268	/*
1269	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1270	 * duplicate of vnode being created and add it to the hash. If a
1271	 * duplicate vnode was found, it will be vget()ed from hash for us.
1272	 */
1273	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1274		vput(vp);
1275		*vpp = NULL;
1276		return (error);
1277	}
1278
1279	/* We lost the race, then throw away our vnode and return existing */
1280	if (*vpp != NULL) {
1281		vput(vp);
1282		return (0);
1283	}
1284
1285	/* Read in the disk contents for the inode, copy into the inode. */
1286	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1287	    (int)fs->fs_bsize, NOCRED, &bp);
1288	if (error) {
1289		/*
1290		 * The inode does not contain anything useful, so it would
1291		 * be misleading to leave it on its hash chain. With mode
1292		 * still zero, it will be unlinked and returned to the free
1293		 * list by vput().
1294		 */
1295		brelse(bp);
1296		vput(vp);
1297		*vpp = NULL;
1298		return (error);
1299	}
1300	if (ip->i_ump->um_fstype == UFS1)
1301		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1302	else
1303		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1304	ffs_load_inode(bp, ip, fs, ino);
1305	if (DOINGSOFTDEP(vp))
1306		softdep_load_inodeblock(ip);
1307	else
1308		ip->i_effnlink = ip->i_nlink;
1309	bqrelse(bp);
1310
1311	/*
1312	 * Initialize the vnode from the inode, check for aliases.
1313	 * Note that the underlying vnode may have changed.
1314	 */
1315	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1316	if (error) {
1317		vput(vp);
1318		*vpp = NULL;
1319		return (error);
1320	}
1321	/*
1322	 * Finish inode initialization.
1323	 */
1324	VREF(ip->i_devvp);
1325	/*
1326	 * Set up a generation number for this inode if it does not
1327	 * already have one. This should only happen on old filesystems.
1328	 */
1329	if (ip->i_gen == 0) {
1330		ip->i_gen = arc4random() / 2 + 1;
1331		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1332			ip->i_flag |= IN_MODIFIED;
1333			DIP(ip, i_gen) = ip->i_gen;
1334		}
1335	}
1336	/*
1337	 * Ensure that uid and gid are correct. This is a temporary
1338	 * fix until fsck has been changed to do the update.
1339	 */
1340	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1341	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1342		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1343		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1344	}						/* XXX */
1345
1346#ifdef MAC
1347	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1348		/*
1349		 * If this vnode is already allocated, and we're running
1350		 * multi-label, attempt to perform a label association
1351		 * from the extended attributes on the inode.
1352		 */
1353		error = mac_associate_vnode_extattr(mp, vp);
1354		if (error) {
1355			/* ufs_inactive will release ip->i_devvp ref. */
1356			vput(vp);
1357			*vpp = NULL;
1358			return (error);
1359		}
1360	}
1361#endif
1362
1363	*vpp = vp;
1364	return (0);
1365}
1366
1367/*
1368 * File handle to vnode
1369 *
1370 * Have to be really careful about stale file handles:
1371 * - check that the inode number is valid
1372 * - call ffs_vget() to get the locked inode
1373 * - check for an unallocated inode (i_mode == 0)
1374 * - check that the given client host has export rights and return
1375 *   those rights via. exflagsp and credanonp
1376 */
1377int
1378ffs_fhtovp(mp, fhp, vpp)
1379	struct mount *mp;
1380	struct fid *fhp;
1381	struct vnode **vpp;
1382{
1383	struct ufid *ufhp;
1384	struct fs *fs;
1385
1386	ufhp = (struct ufid *)fhp;
1387	fs = VFSTOUFS(mp)->um_fs;
1388	if (ufhp->ufid_ino < ROOTINO ||
1389	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1390		return (ESTALE);
1391	return (ufs_fhtovp(mp, ufhp, vpp));
1392}
1393
1394/*
1395 * Vnode pointer to File handle
1396 */
1397/* ARGSUSED */
1398int
1399ffs_vptofh(vp, fhp)
1400	struct vnode *vp;
1401	struct fid *fhp;
1402{
1403	struct inode *ip;
1404	struct ufid *ufhp;
1405
1406	ip = VTOI(vp);
1407	ufhp = (struct ufid *)fhp;
1408	ufhp->ufid_len = sizeof(struct ufid);
1409	ufhp->ufid_ino = ip->i_number;
1410	ufhp->ufid_gen = ip->i_gen;
1411	return (0);
1412}
1413
1414/*
1415 * Initialize the filesystem.
1416 */
1417static int
1418ffs_init(vfsp)
1419	struct vfsconf *vfsp;
1420{
1421
1422	softdep_initialize();
1423	return (ufs_init(vfsp));
1424}
1425
1426/*
1427 * Undo the work of ffs_init().
1428 */
1429static int
1430ffs_uninit(vfsp)
1431	struct vfsconf *vfsp;
1432{
1433	int ret;
1434
1435	ret = ufs_uninit(vfsp);
1436	softdep_uninitialize();
1437	return (ret);
1438}
1439
1440/*
1441 * Write a superblock and associated information back to disk.
1442 */
1443static int
1444ffs_sbupdate(mp, waitfor)
1445	struct ufsmount *mp;
1446	int waitfor;
1447{
1448	struct fs *fs = mp->um_fs;
1449	struct buf *bp;
1450	int blks;
1451	void *space;
1452	int i, size, error, allerror = 0;
1453
1454	if (fs->fs_ronly == 1 &&
1455	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1456	    (MNT_RDONLY | MNT_UPDATE))
1457		panic("ffs_sbupdate: write read-only filesystem");
1458	/*
1459	 * First write back the summary information.
1460	 */
1461	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1462	space = fs->fs_csp;
1463	for (i = 0; i < blks; i += fs->fs_frag) {
1464		size = fs->fs_bsize;
1465		if (i + fs->fs_frag > blks)
1466			size = (blks - i) * fs->fs_fsize;
1467		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1468		    size, 0, 0, 0);
1469		bcopy(space, bp->b_data, (u_int)size);
1470		space = (char *)space + size;
1471		if (waitfor != MNT_WAIT)
1472			bawrite(bp);
1473		else if ((error = bwrite(bp)) != 0)
1474			allerror = error;
1475	}
1476	/*
1477	 * Now write back the superblock itself. If any errors occurred
1478	 * up to this point, then fail so that the superblock avoids
1479	 * being written out as clean.
1480	 */
1481	if (allerror)
1482		return (allerror);
1483	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1484	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1485		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1486		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1487		fs->fs_sblockloc = SBLOCK_UFS1;
1488	}
1489	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1490	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1491		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1492		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1493		fs->fs_sblockloc = SBLOCK_UFS2;
1494	}
1495	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1496	    0, 0, 0);
1497	fs->fs_fmod = 0;
1498	fs->fs_time = time_second;
1499	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1500	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1501	if (waitfor != MNT_WAIT)
1502		bawrite(bp);
1503	else if ((error = bwrite(bp)) != 0)
1504		allerror = error;
1505	return (allerror);
1506}
1507
1508static int
1509ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1510	int attrnamespace, const char *attrname, struct thread *td)
1511{
1512
1513#ifdef UFS_EXTATTR
1514	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1515	    attrname, td));
1516#else
1517	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1518	    attrname, td));
1519#endif
1520}
1521
1522static void
1523ffs_ifree(struct ufsmount *ump, struct inode *ip)
1524{
1525
1526	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1527		uma_zfree(uma_ufs1, ip->i_din1);
1528	else if (ip->i_din2 != NULL)
1529		uma_zfree(uma_ufs2, ip->i_din2);
1530	uma_zfree(uma_inode, ip);
1531}
1532