mfs_vfsops.c revision 1.86
1/*	$NetBSD: mfs_vfsops.c,v 1.86 2008/01/24 17:32:57 ad Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)mfs_vfsops.c	8.11 (Berkeley) 6/19/95
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.86 2008/01/24 17:32:57 ad Exp $");
36
37#if defined(_KERNEL_OPT)
38#include "opt_compat_netbsd.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/sysctl.h>
44#include <sys/time.h>
45#include <sys/kernel.h>
46#include <sys/proc.h>
47#include <sys/buf.h>
48#include <sys/bufq.h>
49#include <sys/mount.h>
50#include <sys/signalvar.h>
51#include <sys/vnode.h>
52#include <sys/malloc.h>
53
54#include <miscfs/syncfs/syncfs.h>
55
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/inode.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <ufs/mfs/mfsnode.h>
65#include <ufs/mfs/mfs_extern.h>
66
67void *	mfs_rootbase;	/* address of mini-root in kernel virtual memory */
68u_long	mfs_rootsize;	/* size of mini-root in bytes */
69
70static	int mfs_minor;	/* used for building internal dev_t */
71
72extern int (**mfs_vnodeop_p)(void *);
73
74MALLOC_JUSTDEFINE(M_MFSNODE, "MFS node", "MFS vnode private part");
75
76/*
77 * mfs vfs operations.
78 */
79
80extern const struct vnodeopv_desc mfs_vnodeop_opv_desc;
81
82const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = {
83	&mfs_vnodeop_opv_desc,
84	NULL,
85};
86
87struct vfsops mfs_vfsops = {
88	MOUNT_MFS,
89	sizeof (struct mfs_args),
90	mfs_mount,
91	mfs_start,
92	ffs_unmount,
93	ufs_root,
94	ufs_quotactl,
95	mfs_statvfs,
96	ffs_sync,
97	ffs_vget,
98	ffs_fhtovp,
99	ffs_vptofh,
100	mfs_init,
101	mfs_reinit,
102	mfs_done,
103	NULL,
104	(int (*)(struct mount *, struct vnode *, struct timespec *)) eopnotsupp,
105	vfs_stdextattrctl,
106	(void *)eopnotsupp,	/* vfs_suspendctl */
107	mfs_vnodeopv_descs,
108	0,
109	{ NULL, NULL },
110};
111VFS_ATTACH(mfs_vfsops);
112
113SYSCTL_SETUP(sysctl_vfs_mfs_setup, "sysctl vfs.mfs subtree setup")
114{
115
116	sysctl_createv(clog, 0, NULL, NULL,
117		       CTLFLAG_PERMANENT,
118		       CTLTYPE_NODE, "vfs", NULL,
119		       NULL, 0, NULL, 0,
120		       CTL_VFS, CTL_EOL);
121	sysctl_createv(clog, 0, NULL, NULL,
122		       CTLFLAG_PERMANENT|CTLFLAG_ALIAS,
123		       CTLTYPE_NODE, "mfs",
124		       SYSCTL_DESCR("Memory based file system"),
125		       NULL, 1, NULL, 0,
126		       CTL_VFS, 3, CTL_EOL);
127	/*
128	 * XXX the "1" and the "3" above could be dynamic, thereby
129	 * eliminating one more instance of the "number to vfs"
130	 * mapping problem, but they are in order as taken from
131	 * sys/mount.h
132	 */
133}
134
135/*
136 * Memory based filesystem initialization.
137 */
138void
139mfs_init(void)
140{
141
142	malloc_type_attach(M_MFSNODE);
143	/*
144	 * ffs_init() ensures to initialize necessary resources
145	 * only once.
146	 */
147	ffs_init();
148}
149
150void
151mfs_reinit(void)
152{
153	ffs_reinit();
154}
155
156void
157mfs_done(void)
158{
159	/*
160	 * ffs_done() ensures to free necessary resources
161	 * only once, when it's no more needed.
162	 */
163	ffs_done();
164	malloc_type_detach(M_MFSNODE);
165}
166
167/*
168 * Called by main() when mfs is going to be mounted as root.
169 */
170
171int
172mfs_mountroot(void)
173{
174	struct fs *fs;
175	struct mount *mp;
176	struct lwp *l = curlwp;		/* XXX */
177	struct ufsmount *ump;
178	struct mfsnode *mfsp;
179	int error = 0;
180
181	if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
182		vrele(rootvp);
183		return (error);
184	}
185
186	mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
187	rootvp->v_data = mfsp;
188	rootvp->v_op = mfs_vnodeop_p;
189	rootvp->v_tag = VT_MFS;
190	mfsp->mfs_baseoff = mfs_rootbase;
191	mfsp->mfs_size = mfs_rootsize;
192	mfsp->mfs_vnode = rootvp;
193	mfsp->mfs_proc = NULL;		/* indicate kernel space */
194	mfsp->mfs_shutdown = 0;
195	bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
196	if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
197		mp->mnt_op->vfs_refcount--;
198		vfs_unbusy(mp);
199		bufq_free(mfsp->mfs_buflist);
200		vfs_destroy(mp);
201		free(mfsp, M_MFSNODE);
202		return (error);
203	}
204	mutex_enter(&mountlist_lock);
205	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
206	mutex_exit(&mountlist_lock);
207	mp->mnt_vnodecovered = NULLVP;
208	ump = VFSTOUFS(mp);
209	fs = ump->um_fs;
210	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
211	(void)ffs_statvfs(mp, &mp->mnt_stat);
212	vfs_unbusy(mp);
213	return (0);
214}
215
216/*
217 * This is called early in boot to set the base address and size
218 * of the mini-root.
219 */
220int
221mfs_initminiroot(void *base)
222{
223	struct fs *fs = (struct fs *)((char *)base + SBLOCK_UFS1);
224
225	/* check for valid super block */
226	if (fs->fs_magic != FS_UFS1_MAGIC || fs->fs_bsize > MAXBSIZE ||
227	    fs->fs_bsize < sizeof(struct fs))
228		return (0);
229	mountroot = mfs_mountroot;
230	mfs_rootbase = base;
231	mfs_rootsize = fs->fs_fsize * fs->fs_size;
232	rootdev = makedev(255, mfs_minor);
233	mfs_minor++;
234	return (mfs_rootsize);
235}
236
237/*
238 * VFS Operations.
239 *
240 * mount system call
241 */
242/* ARGSUSED */
243int
244mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
245{
246	struct lwp *l = curlwp;
247	struct vnode *devvp;
248	struct mfs_args *args = data;
249	struct ufsmount *ump;
250	struct fs *fs;
251	struct mfsnode *mfsp;
252	struct proc *p;
253	int flags, error = 0;
254
255	if (*data_len < sizeof *args)
256		return EINVAL;
257
258	p = l->l_proc;
259	if (mp->mnt_flag & MNT_GETARGS) {
260		struct vnode *vp;
261
262		ump = VFSTOUFS(mp);
263		if (ump == NULL)
264			return EIO;
265
266		vp = ump->um_devvp;
267		if (vp == NULL)
268			return EIO;
269
270		mfsp = VTOMFS(vp);
271		if (mfsp == NULL)
272			return EIO;
273
274		args->fspec = NULL;
275		args->base = mfsp->mfs_baseoff;
276		args->size = mfsp->mfs_size;
277		*data_len = sizeof *args;
278		return 0;
279	}
280	/*
281	 * XXX turn off async to avoid hangs when writing lots of data.
282	 * the problem is that MFS needs to allocate pages to clean pages,
283	 * so if we wait until the last minute to clean pages then there
284	 * may not be any pages available to do the cleaning.
285	 * ... and since the default partially-synchronous mode turns out
286	 * to not be sufficient under heavy load, make it full synchronous.
287	 */
288	mp->mnt_flag &= ~MNT_ASYNC;
289	mp->mnt_flag |= MNT_SYNCHRONOUS;
290
291	/*
292	 * If updating, check whether changing from read-only to
293	 * read/write; if there is no device name, that's all we do.
294	 */
295	if (mp->mnt_flag & MNT_UPDATE) {
296		ump = VFSTOUFS(mp);
297		fs = ump->um_fs;
298		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
299			flags = WRITECLOSE;
300			if (mp->mnt_flag & MNT_FORCE)
301				flags |= FORCECLOSE;
302			error = ffs_flushfiles(mp, flags, l);
303			if (error)
304				return (error);
305		}
306		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR))
307			fs->fs_ronly = 0;
308		if (args->fspec == NULL)
309			return EINVAL;
310		return (0);
311	}
312	error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
313	if (error)
314		return (error);
315	devvp->v_type = VBLK;
316	mfs_minor++;
317	mfsp = (struct mfsnode *)malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
318	devvp->v_data = mfsp;
319	mfsp->mfs_baseoff = args->base;
320	mfsp->mfs_size = args->size;
321	mfsp->mfs_vnode = devvp;
322	mfsp->mfs_proc = p;
323	mfsp->mfs_shutdown = 0;
324	bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
325	if ((error = ffs_mountfs(devvp, mp, l)) != 0) {
326		mfsp->mfs_shutdown = 1;
327		vrele(devvp);
328		return (error);
329	}
330	ump = VFSTOUFS(mp);
331	fs = ump->um_fs;
332	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
333	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
334	if (error)
335		return error;
336	(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
337		sizeof(fs->fs_fsmnt));
338	fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0';
339	/* XXX: cleanup on error */
340	return 0;
341}
342
343int	mfs_pri = PWAIT | PCATCH;		/* XXX prob. temp */
344
345/*
346 * Used to grab the process and keep it in the kernel to service
347 * memory filesystem I/O requests.
348 *
349 * Loop servicing I/O requests.
350 * Copy the requested data into or out of the memory filesystem
351 * address space.
352 */
353/* ARGSUSED */
354int
355mfs_start(struct mount *mp, int flags)
356{
357	struct lwp *l = curlwp;
358	struct vnode *vp = VFSTOUFS(mp)->um_devvp;
359	struct mfsnode *mfsp = VTOMFS(vp);
360	struct proc *p;
361	struct buf *bp;
362	void *base;
363	int sleepreturn = 0;
364	ksiginfoq_t kq;
365
366	base = mfsp->mfs_baseoff;
367	while (mfsp->mfs_shutdown != 1) {
368		while ((bp = BUFQ_GET(mfsp->mfs_buflist)) != NULL) {
369			mfs_doio(bp, base);
370			wakeup((void *)bp);
371		}
372		/*
373		 * If a non-ignored signal is received, try to unmount.
374		 * If that fails, or the filesystem is already in the
375		 * process of being unmounted, clear the signal (it has been
376		 * "processed"), otherwise we will loop here, as tsleep
377		 * will always return EINTR/ERESTART.
378		 */
379		if (sleepreturn != 0) {
380			/*
381			 * XXX Freeze syncer.  Must do this before locking
382			 * the mount point.  See dounmount() for details.
383			 */
384			mutex_enter(&syncer_mutex);
385			if (vfs_busy(mp, LK_NOWAIT, 0) != 0)
386				mutex_exit(&syncer_mutex);
387			else if (dounmount(mp, 0, l) != 0) {
388				p = l->l_proc;
389				ksiginfo_queue_init(&kq);
390				mutex_enter(&p->p_smutex);
391				sigclearall(p, NULL, &kq);
392				mutex_exit(&p->p_smutex);
393				ksiginfo_queue_drain(&kq);
394			}
395			sleepreturn = 0;
396			continue;
397		}
398
399		sleepreturn = tsleep(vp, mfs_pri, "mfsidl", 0);
400	}
401	KASSERT(BUFQ_PEEK(mfsp->mfs_buflist) == NULL);
402	bufq_free(mfsp->mfs_buflist);
403	return (sleepreturn);
404}
405
406/*
407 * Get file system statistics.
408 */
409int
410mfs_statvfs(struct mount *mp, struct statvfs *sbp)
411{
412	int error;
413
414	error = ffs_statvfs(mp, sbp);
415	if (error)
416		return error;
417	(void)strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name,
418	    sizeof(sbp->f_fstypename));
419	sbp->f_fstypename[sizeof(sbp->f_fstypename) - 1] = '\0';
420	return 0;
421}
422