mfs_vfsops.c revision 1.55
1/*	$NetBSD: mfs_vfsops.c,v 1.55 2004/03/24 15:34:56 atatat Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)mfs_vfsops.c	8.11 (Berkeley) 6/19/95
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.55 2004/03/24 15:34:56 atatat Exp $");
36
37#if defined(_KERNEL_OPT)
38#include "opt_compat_netbsd.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/sysctl.h>
44#include <sys/time.h>
45#include <sys/kernel.h>
46#include <sys/proc.h>
47#include <sys/buf.h>
48#include <sys/mount.h>
49#include <sys/signalvar.h>
50#include <sys/vnode.h>
51#include <sys/malloc.h>
52
53#include <miscfs/syncfs/syncfs.h>
54
55#include <ufs/ufs/quota.h>
56#include <ufs/ufs/inode.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/ufs_extern.h>
59
60#include <ufs/ffs/fs.h>
61#include <ufs/ffs/ffs_extern.h>
62
63#include <ufs/mfs/mfsnode.h>
64#include <ufs/mfs/mfs_extern.h>
65
66caddr_t	mfs_rootbase;	/* address of mini-root in kernel virtual memory */
67u_long	mfs_rootsize;	/* size of mini-root in bytes */
68
69static	int mfs_minor;	/* used for building internal dev_t */
70
71extern int (**mfs_vnodeop_p) __P((void *));
72
73MALLOC_DEFINE(M_MFSNODE, "MFS node", "MFS vnode private part");
74
75/*
76 * mfs vfs operations.
77 */
78
79extern const struct vnodeopv_desc mfs_vnodeop_opv_desc;
80
81const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = {
82	&mfs_vnodeop_opv_desc,
83	NULL,
84};
85
86struct vfsops mfs_vfsops = {
87	MOUNT_MFS,
88	mfs_mount,
89	mfs_start,
90	ffs_unmount,
91	ufs_root,
92	ufs_quotactl,
93	mfs_statfs,
94	ffs_sync,
95	ffs_vget,
96	ffs_fhtovp,
97	ffs_vptofh,
98	mfs_init,
99	mfs_reinit,
100	mfs_done,
101	NULL,
102	NULL,
103	ufs_check_export,
104	mfs_vnodeopv_descs,
105};
106
107SYSCTL_SETUP(sysctl_vfs_mfs_setup, "sysctl vfs.mfs subtree setup")
108{
109
110	sysctl_createv(clog, 0, NULL, NULL,
111		       CTLFLAG_PERMANENT,
112		       CTLTYPE_NODE, "vfs", NULL,
113		       NULL, 0, NULL, 0,
114		       CTL_VFS, CTL_EOL);
115	sysctl_createv(clog, 0, NULL, NULL,
116		       CTLFLAG_PERMANENT|CTLFLAG_ALIAS,
117		       CTLTYPE_NODE, "mfs", NULL,
118		       NULL, 1, NULL, 0,
119		       CTL_VFS, 3, CTL_EOL);
120	/*
121	 * XXX the "1" and the "3" above could be dynamic, thereby
122	 * eliminating one more instance of the "number to vfs"
123	 * mapping problem, but they are in order as taken from
124	 * sys/mount.h
125	 */
126}
127
128/*
129 * Memory based filesystem initialization.
130 */
131void
132mfs_init()
133{
134#ifdef _LKM
135	malloc_type_attach(M_MFSNODE);
136#endif
137	/*
138	 * ffs_init() ensures to initialize necessary resources
139	 * only once.
140	 */
141	ffs_init();
142}
143
144void
145mfs_reinit()
146{
147	ffs_reinit();
148}
149
150void
151mfs_done()
152{
153	/*
154	 * ffs_done() ensures to free necessary resources
155	 * only once, when it's no more needed.
156	 */
157	ffs_done();
158#ifdef _LKM
159	malloc_type_detach(M_MFSNODE);
160#endif
161}
162
163/*
164 * Called by main() when mfs is going to be mounted as root.
165 */
166
167int
168mfs_mountroot()
169{
170	struct fs *fs;
171	struct mount *mp;
172	struct proc *p = curproc;	/* XXX */
173	struct ufsmount *ump;
174	struct mfsnode *mfsp;
175	int error = 0;
176
177	/*
178	 * Get vnodes for rootdev.
179	 */
180	if (bdevvp(rootdev, &rootvp)) {
181		printf("mfs_mountroot: can't setup bdevvp's");
182		return (error);
183	}
184
185	if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
186		vrele(rootvp);
187		return (error);
188	}
189
190	mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
191	rootvp->v_data = mfsp;
192	rootvp->v_op = mfs_vnodeop_p;
193	rootvp->v_tag = VT_MFS;
194	mfsp->mfs_baseoff = mfs_rootbase;
195	mfsp->mfs_size = mfs_rootsize;
196	mfsp->mfs_vnode = rootvp;
197	mfsp->mfs_proc = NULL;		/* indicate kernel space */
198	mfsp->mfs_shutdown = 0;
199	bufq_alloc(&mfsp->mfs_buflist, BUFQ_FCFS);
200	if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
201		mp->mnt_op->vfs_refcount--;
202		vfs_unbusy(mp);
203		bufq_free(&mfsp->mfs_buflist);
204		free(mp, M_MOUNT);
205		free(mfsp, M_MFSNODE);
206		vrele(rootvp);
207		return (error);
208	}
209	simple_lock(&mountlist_slock);
210	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
211	simple_unlock(&mountlist_slock);
212	mp->mnt_vnodecovered = NULLVP;
213	ump = VFSTOUFS(mp);
214	fs = ump->um_fs;
215	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
216	(void)ffs_statfs(mp, &mp->mnt_stat, p);
217	vfs_unbusy(mp);
218	inittodr((time_t)0);
219	return (0);
220}
221
222/*
223 * This is called early in boot to set the base address and size
224 * of the mini-root.
225 */
226int
227mfs_initminiroot(base)
228	caddr_t base;
229{
230	struct fs *fs = (struct fs *)(base + SBLOCK_UFS1);
231
232	/* check for valid super block */
233	if (fs->fs_magic != FS_UFS1_MAGIC || fs->fs_bsize > MAXBSIZE ||
234	    fs->fs_bsize < sizeof(struct fs))
235		return (0);
236	mountroot = mfs_mountroot;
237	mfs_rootbase = base;
238	mfs_rootsize = fs->fs_fsize * fs->fs_size;
239	rootdev = makedev(255, mfs_minor);
240	mfs_minor++;
241	return (mfs_rootsize);
242}
243
244/*
245 * VFS Operations.
246 *
247 * mount system call
248 */
249/* ARGSUSED */
250int
251mfs_mount(mp, path, data, ndp, p)
252	struct mount *mp;
253	const char *path;
254	void *data;
255	struct nameidata *ndp;
256	struct proc *p;
257{
258	struct vnode *devvp;
259	struct mfs_args args;
260	struct ufsmount *ump;
261	struct fs *fs;
262	struct mfsnode *mfsp;
263	int flags, error;
264
265	if (mp->mnt_flag & MNT_GETARGS) {
266		struct vnode *vp;
267		struct mfsnode *mfsp;
268
269		ump = VFSTOUFS(mp);
270		if (ump == NULL)
271			return EIO;
272
273		vp = ump->um_devvp;
274		if (vp == NULL)
275			return EIO;
276
277		mfsp = VTOMFS(vp);
278		if (mfsp == NULL)
279			return EIO;
280
281		args.fspec = NULL;
282		vfs_showexport(mp, &args.export, &ump->um_export);
283		args.base = mfsp->mfs_baseoff;
284		args.size = mfsp->mfs_size;
285		return copyout(&args, data, sizeof(args));
286	}
287	/*
288	 * XXX turn off async to avoid hangs when writing lots of data.
289	 * the problem is that MFS needs to allocate pages to clean pages,
290	 * so if we wait until the last minute to clean pages then there
291	 * may not be any pages available to do the cleaning.
292	 * ... and since the default partially-synchronous mode turns out
293	 * to not be sufficient under heavy load, make it full synchronous.
294	 */
295	mp->mnt_flag &= ~MNT_ASYNC;
296	mp->mnt_flag |= MNT_SYNCHRONOUS;
297
298	error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args));
299	if (error)
300		return (error);
301
302	/*
303	 * If updating, check whether changing from read-only to
304	 * read/write; if there is no device name, that's all we do.
305	 */
306	if (mp->mnt_flag & MNT_UPDATE) {
307		ump = VFSTOUFS(mp);
308		fs = ump->um_fs;
309		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
310			flags = WRITECLOSE;
311			if (mp->mnt_flag & MNT_FORCE)
312				flags |= FORCECLOSE;
313			error = ffs_flushfiles(mp, flags, p);
314			if (error)
315				return (error);
316		}
317		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR))
318			fs->fs_ronly = 0;
319		if (args.fspec == 0)
320			return (vfs_export(mp, &ump->um_export, &args.export));
321		return (0);
322	}
323	error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
324	if (error)
325		return (error);
326	devvp->v_type = VBLK;
327	if (checkalias(devvp, makedev(255, mfs_minor), (struct mount *)0))
328		panic("mfs_mount: dup dev");
329	mfs_minor++;
330	mfsp = (struct mfsnode *)malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
331	devvp->v_data = mfsp;
332	mfsp->mfs_baseoff = args.base;
333	mfsp->mfs_size = args.size;
334	mfsp->mfs_vnode = devvp;
335	mfsp->mfs_proc = p;
336	mfsp->mfs_shutdown = 0;
337	bufq_alloc(&mfsp->mfs_buflist, BUFQ_FCFS);
338	if ((error = ffs_mountfs(devvp, mp, p)) != 0) {
339		mfsp->mfs_shutdown = 1;
340		vrele(devvp);
341		return (error);
342	}
343	ump = VFSTOUFS(mp);
344	fs = ump->um_fs;
345	error = set_statfs_info(path, UIO_USERSPACE, args.fspec,
346	    UIO_USERSPACE, mp, p);
347	(void)memcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
348	    sizeof(mp->mnt_stat.f_mntonname));
349	return error;
350}
351
352int	mfs_pri = PWAIT | PCATCH;		/* XXX prob. temp */
353
354/*
355 * Used to grab the process and keep it in the kernel to service
356 * memory filesystem I/O requests.
357 *
358 * Loop servicing I/O requests.
359 * Copy the requested data into or out of the memory filesystem
360 * address space.
361 */
362/* ARGSUSED */
363int
364mfs_start(mp, flags, p)
365	struct mount *mp;
366	int flags;
367	struct proc *p;
368{
369	struct vnode *vp = VFSTOUFS(mp)->um_devvp;
370	struct mfsnode *mfsp = VTOMFS(vp);
371	struct buf *bp;
372	caddr_t base;
373	int sleepreturn = 0;
374	struct lwp *l; /* XXX NJWLWP */
375
376	/* XXX NJWLWP the vnode interface again gives us a proc in a
377	 * place where we want a execution context. Cheat.
378	 */
379	KASSERT(curproc == p);
380	l = curlwp;
381	base = mfsp->mfs_baseoff;
382	while (mfsp->mfs_shutdown != 1) {
383		while ((bp = BUFQ_GET(&mfsp->mfs_buflist)) != NULL) {
384			mfs_doio(bp, base);
385			wakeup((caddr_t)bp);
386		}
387		/*
388		 * If a non-ignored signal is received, try to unmount.
389		 * If that fails, or the filesystem is already in the
390		 * process of being unmounted, clear the signal (it has been
391		 * "processed"), otherwise we will loop here, as tsleep
392		 * will always return EINTR/ERESTART.
393		 */
394		if (sleepreturn != 0) {
395			/*
396			 * XXX Freeze syncer.  Must do this before locking
397			 * the mount point.  See dounmount() for details.
398			 */
399			lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
400			if (vfs_busy(mp, LK_NOWAIT, 0) != 0)
401				lockmgr(&syncer_lock, LK_RELEASE, NULL);
402			else if (dounmount(mp, 0, p) != 0)
403				CLRSIG(p, CURSIG(l));
404			sleepreturn = 0;
405			continue;
406		}
407
408		sleepreturn = tsleep(vp, mfs_pri, "mfsidl", 0);
409	}
410	KASSERT(BUFQ_PEEK(&mfsp->mfs_buflist) == NULL);
411	bufq_free(&mfsp->mfs_buflist);
412	return (sleepreturn);
413}
414
415/*
416 * Get file system statistics.
417 */
418int
419mfs_statfs(mp, sbp, p)
420	struct mount *mp;
421	struct statfs *sbp;
422	struct proc *p;
423{
424	int error;
425
426	error = ffs_statfs(mp, sbp, p);
427#ifdef COMPAT_09
428	sbp->f_type = 3;
429#else
430	sbp->f_type = 0;
431#endif
432	strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN);
433	return (error);
434}
435