mfs_vfsops.c revision 1.52
1/*	$NetBSD: mfs_vfsops.c,v 1.52 2003/08/07 16:34:41 agc Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)mfs_vfsops.c	8.11 (Berkeley) 6/19/95
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.52 2003/08/07 16:34:41 agc Exp $");
36
37#if defined(_KERNEL_OPT)
38#include "opt_compat_netbsd.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/time.h>
44#include <sys/kernel.h>
45#include <sys/proc.h>
46#include <sys/buf.h>
47#include <sys/mount.h>
48#include <sys/signalvar.h>
49#include <sys/vnode.h>
50#include <sys/malloc.h>
51
52#include <miscfs/syncfs/syncfs.h>
53
54#include <ufs/ufs/quota.h>
55#include <ufs/ufs/inode.h>
56#include <ufs/ufs/ufsmount.h>
57#include <ufs/ufs/ufs_extern.h>
58
59#include <ufs/ffs/fs.h>
60#include <ufs/ffs/ffs_extern.h>
61
62#include <ufs/mfs/mfsnode.h>
63#include <ufs/mfs/mfs_extern.h>
64
65caddr_t	mfs_rootbase;	/* address of mini-root in kernel virtual memory */
66u_long	mfs_rootsize;	/* size of mini-root in bytes */
67
68static	int mfs_minor;	/* used for building internal dev_t */
69
70extern int (**mfs_vnodeop_p) __P((void *));
71
72MALLOC_DEFINE(M_MFSNODE, "MFS node", "MFS vnode private part");
73
74/*
75 * mfs vfs operations.
76 */
77
78extern const struct vnodeopv_desc mfs_vnodeop_opv_desc;
79
80const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = {
81	&mfs_vnodeop_opv_desc,
82	NULL,
83};
84
85struct vfsops mfs_vfsops = {
86	MOUNT_MFS,
87	mfs_mount,
88	mfs_start,
89	ffs_unmount,
90	ufs_root,
91	ufs_quotactl,
92	mfs_statfs,
93	ffs_sync,
94	ffs_vget,
95	ffs_fhtovp,
96	ffs_vptofh,
97	mfs_init,
98	mfs_reinit,
99	mfs_done,
100	ffs_sysctl,
101	NULL,
102	ufs_check_export,
103	mfs_vnodeopv_descs,
104};
105
106/*
107 * Memory based filesystem initialization.
108 */
109void
110mfs_init()
111{
112#ifdef _LKM
113	malloc_type_attach(M_MFSNODE);
114#endif
115	/*
116	 * ffs_init() ensures to initialize necessary resources
117	 * only once.
118	 */
119	ffs_init();
120}
121
122void
123mfs_reinit()
124{
125	ffs_reinit();
126}
127
128void
129mfs_done()
130{
131	/*
132	 * ffs_done() ensures to free necessary resources
133	 * only once, when it's no more needed.
134	 */
135	ffs_done();
136#ifdef _LKM
137	malloc_type_detach(M_MFSNODE);
138#endif
139}
140
141/*
142 * Called by main() when mfs is going to be mounted as root.
143 */
144
145int
146mfs_mountroot()
147{
148	struct fs *fs;
149	struct mount *mp;
150	struct proc *p = curproc;	/* XXX */
151	struct ufsmount *ump;
152	struct mfsnode *mfsp;
153	int error = 0;
154
155	/*
156	 * Get vnodes for rootdev.
157	 */
158	if (bdevvp(rootdev, &rootvp)) {
159		printf("mfs_mountroot: can't setup bdevvp's");
160		return (error);
161	}
162
163	if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
164		vrele(rootvp);
165		return (error);
166	}
167
168	mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
169	rootvp->v_data = mfsp;
170	rootvp->v_op = mfs_vnodeop_p;
171	rootvp->v_tag = VT_MFS;
172	mfsp->mfs_baseoff = mfs_rootbase;
173	mfsp->mfs_size = mfs_rootsize;
174	mfsp->mfs_vnode = rootvp;
175	mfsp->mfs_proc = NULL;		/* indicate kernel space */
176	mfsp->mfs_shutdown = 0;
177	bufq_alloc(&mfsp->mfs_buflist, BUFQ_FCFS);
178	if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
179		mp->mnt_op->vfs_refcount--;
180		vfs_unbusy(mp);
181		bufq_free(&mfsp->mfs_buflist);
182		free(mp, M_MOUNT);
183		free(mfsp, M_MFSNODE);
184		vrele(rootvp);
185		return (error);
186	}
187	simple_lock(&mountlist_slock);
188	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
189	simple_unlock(&mountlist_slock);
190	mp->mnt_vnodecovered = NULLVP;
191	ump = VFSTOUFS(mp);
192	fs = ump->um_fs;
193	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
194	(void)ffs_statfs(mp, &mp->mnt_stat, p);
195	vfs_unbusy(mp);
196	inittodr((time_t)0);
197	return (0);
198}
199
200/*
201 * This is called early in boot to set the base address and size
202 * of the mini-root.
203 */
204int
205mfs_initminiroot(base)
206	caddr_t base;
207{
208	struct fs *fs = (struct fs *)(base + SBLOCK_UFS1);
209
210	/* check for valid super block */
211	if (fs->fs_magic != FS_UFS1_MAGIC || fs->fs_bsize > MAXBSIZE ||
212	    fs->fs_bsize < sizeof(struct fs))
213		return (0);
214	mountroot = mfs_mountroot;
215	mfs_rootbase = base;
216	mfs_rootsize = fs->fs_fsize * fs->fs_size;
217	rootdev = makedev(255, mfs_minor);
218	mfs_minor++;
219	return (mfs_rootsize);
220}
221
222/*
223 * VFS Operations.
224 *
225 * mount system call
226 */
227/* ARGSUSED */
228int
229mfs_mount(mp, path, data, ndp, p)
230	struct mount *mp;
231	const char *path;
232	void *data;
233	struct nameidata *ndp;
234	struct proc *p;
235{
236	struct vnode *devvp;
237	struct mfs_args args;
238	struct ufsmount *ump;
239	struct fs *fs;
240	struct mfsnode *mfsp;
241	int flags, error;
242
243	if (mp->mnt_flag & MNT_GETARGS) {
244		struct vnode *vp;
245		struct mfsnode *mfsp;
246
247		ump = VFSTOUFS(mp);
248		if (ump == NULL)
249			return EIO;
250
251		vp = ump->um_devvp;
252		if (vp == NULL)
253			return EIO;
254
255		mfsp = VTOMFS(vp);
256		if (mfsp == NULL)
257			return EIO;
258
259		args.fspec = NULL;
260		vfs_showexport(mp, &args.export, &ump->um_export);
261		args.base = mfsp->mfs_baseoff;
262		args.size = mfsp->mfs_size;
263		return copyout(&args, data, sizeof(args));
264	}
265	/*
266	 * XXX turn off async to avoid hangs when writing lots of data.
267	 * the problem is that MFS needs to allocate pages to clean pages,
268	 * so if we wait until the last minute to clean pages then there
269	 * may not be any pages available to do the cleaning.
270	 * ... and since the default partially-synchronous mode turns out
271	 * to not be sufficient under heavy load, make it full synchronous.
272	 */
273	mp->mnt_flag &= ~MNT_ASYNC;
274	mp->mnt_flag |= MNT_SYNCHRONOUS;
275
276	error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args));
277	if (error)
278		return (error);
279
280	/*
281	 * If updating, check whether changing from read-only to
282	 * read/write; if there is no device name, that's all we do.
283	 */
284	if (mp->mnt_flag & MNT_UPDATE) {
285		ump = VFSTOUFS(mp);
286		fs = ump->um_fs;
287		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
288			flags = WRITECLOSE;
289			if (mp->mnt_flag & MNT_FORCE)
290				flags |= FORCECLOSE;
291			error = ffs_flushfiles(mp, flags, p);
292			if (error)
293				return (error);
294		}
295		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
296			fs->fs_ronly = 0;
297		if (args.fspec == 0)
298			return (vfs_export(mp, &ump->um_export, &args.export));
299		return (0);
300	}
301	error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
302	if (error)
303		return (error);
304	devvp->v_type = VBLK;
305	if (checkalias(devvp, makedev(255, mfs_minor), (struct mount *)0))
306		panic("mfs_mount: dup dev");
307	mfs_minor++;
308	mfsp = (struct mfsnode *)malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
309	devvp->v_data = mfsp;
310	mfsp->mfs_baseoff = args.base;
311	mfsp->mfs_size = args.size;
312	mfsp->mfs_vnode = devvp;
313	mfsp->mfs_proc = p;
314	mfsp->mfs_shutdown = 0;
315	bufq_alloc(&mfsp->mfs_buflist, BUFQ_FCFS);
316	if ((error = ffs_mountfs(devvp, mp, p)) != 0) {
317		mfsp->mfs_shutdown = 1;
318		vrele(devvp);
319		return (error);
320	}
321	ump = VFSTOUFS(mp);
322	fs = ump->um_fs;
323	error = set_statfs_info(path, UIO_USERSPACE, args.fspec,
324	    UIO_USERSPACE, mp, p);
325	(void)memcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
326	    sizeof(mp->mnt_stat.f_mntonname));
327	return error;
328}
329
330int	mfs_pri = PWAIT | PCATCH;		/* XXX prob. temp */
331
332/*
333 * Used to grab the process and keep it in the kernel to service
334 * memory filesystem I/O requests.
335 *
336 * Loop servicing I/O requests.
337 * Copy the requested data into or out of the memory filesystem
338 * address space.
339 */
340/* ARGSUSED */
341int
342mfs_start(mp, flags, p)
343	struct mount *mp;
344	int flags;
345	struct proc *p;
346{
347	struct vnode *vp = VFSTOUFS(mp)->um_devvp;
348	struct mfsnode *mfsp = VTOMFS(vp);
349	struct buf *bp;
350	caddr_t base;
351	int sleepreturn = 0;
352	struct lwp *l; /* XXX NJWLWP */
353
354	/* XXX NJWLWP the vnode interface again gives us a proc in a
355	 * place where we want a execution context. Cheat.
356	 */
357	KASSERT(curproc == p);
358	l = curlwp;
359	base = mfsp->mfs_baseoff;
360	while (mfsp->mfs_shutdown != 1) {
361		while ((bp = BUFQ_GET(&mfsp->mfs_buflist)) != NULL) {
362			mfs_doio(bp, base);
363			wakeup((caddr_t)bp);
364		}
365		/*
366		 * If a non-ignored signal is received, try to unmount.
367		 * If that fails, or the filesystem is already in the
368		 * process of being unmounted, clear the signal (it has been
369		 * "processed"), otherwise we will loop here, as tsleep
370		 * will always return EINTR/ERESTART.
371		 */
372		if (sleepreturn != 0) {
373			/*
374			 * XXX Freeze syncer.  Must do this before locking
375			 * the mount point.  See dounmount() for details.
376			 */
377			lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
378			if (vfs_busy(mp, LK_NOWAIT, 0) != 0)
379				lockmgr(&syncer_lock, LK_RELEASE, NULL);
380			else if (dounmount(mp, 0, p) != 0)
381				CLRSIG(p, CURSIG(l));
382			sleepreturn = 0;
383			continue;
384		}
385
386		sleepreturn = tsleep(vp, mfs_pri, "mfsidl", 0);
387	}
388	KASSERT(BUFQ_PEEK(&mfsp->mfs_buflist) == NULL);
389	bufq_free(&mfsp->mfs_buflist);
390	return (sleepreturn);
391}
392
393/*
394 * Get file system statistics.
395 */
396int
397mfs_statfs(mp, sbp, p)
398	struct mount *mp;
399	struct statfs *sbp;
400	struct proc *p;
401{
402	int error;
403
404	error = ffs_statfs(mp, sbp, p);
405#ifdef COMPAT_09
406	sbp->f_type = 3;
407#else
408	sbp->f_type = 0;
409#endif
410	strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN);
411	return (error);
412}
413