1/*	$NetBSD: mfs_vfsops.c,v 1.103 2011/06/12 03:36:01 rmind Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)mfs_vfsops.c	8.11 (Berkeley) 6/19/95
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.103 2011/06/12 03:36:01 rmind Exp $");
36
37#if defined(_KERNEL_OPT)
38#include "opt_compat_netbsd.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/sysctl.h>
44#include <sys/time.h>
45#include <sys/kernel.h>
46#include <sys/proc.h>
47#include <sys/buf.h>
48#include <sys/bufq.h>
49#include <sys/mount.h>
50#include <sys/signalvar.h>
51#include <sys/vnode.h>
52#include <sys/kmem.h>
53#include <sys/module.h>
54
55#include <miscfs/genfs/genfs.h>
56#include <miscfs/specfs/specdev.h>
57
58#include <ufs/ufs/quota.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/ufs_extern.h>
62
63#include <ufs/ffs/fs.h>
64#include <ufs/ffs/ffs_extern.h>
65
66#include <ufs/mfs/mfsnode.h>
67#include <ufs/mfs/mfs_extern.h>
68
69MODULE(MODULE_CLASS_VFS, mfs, "ffs");
70
71kmutex_t mfs_lock;	/* global lock */
72
73/* used for building internal dev_t, minor == 0 reserved for miniroot */
74static int mfs_minor = 1;
75static int mfs_initcnt;
76
77extern int (**mfs_vnodeop_p)(void *);
78
79static struct sysctllog *mfs_sysctl_log;
80
81/*
82 * mfs vfs operations.
83 */
84
85extern const struct vnodeopv_desc mfs_vnodeop_opv_desc;
86
87const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = {
88	&mfs_vnodeop_opv_desc,
89	NULL,
90};
91
92struct vfsops mfs_vfsops = {
93	MOUNT_MFS,
94	sizeof (struct mfs_args),
95	mfs_mount,
96	mfs_start,
97	ffs_unmount,
98	ufs_root,
99	ufs_quotactl,
100	mfs_statvfs,
101	ffs_sync,
102	ffs_vget,
103	ffs_fhtovp,
104	ffs_vptofh,
105	mfs_init,
106	mfs_reinit,
107	mfs_done,
108	NULL,
109	(int (*)(struct mount *, struct vnode *, struct timespec *)) eopnotsupp,
110	vfs_stdextattrctl,
111	(void *)eopnotsupp,	/* vfs_suspendctl */
112	genfs_renamelock_enter,
113	genfs_renamelock_exit,
114	(void *)eopnotsupp,
115	mfs_vnodeopv_descs,
116	0,
117	{ NULL, NULL },
118};
119
120static int
121mfs_modcmd(modcmd_t cmd, void *arg)
122{
123	int error;
124
125	switch (cmd) {
126	case MODULE_CMD_INIT:
127		error = vfs_attach(&mfs_vfsops);
128		if (error != 0)
129			break;
130		sysctl_createv(&mfs_sysctl_log, 0, NULL, NULL,
131			       CTLFLAG_PERMANENT,
132			       CTLTYPE_NODE, "vfs", NULL,
133			       NULL, 0, NULL, 0,
134			       CTL_VFS, CTL_EOL);
135		sysctl_createv(&mfs_sysctl_log, 0, NULL, NULL,
136			       CTLFLAG_PERMANENT|CTLFLAG_ALIAS,
137			       CTLTYPE_NODE, "mfs",
138			       SYSCTL_DESCR("Memory based file system"),
139			       NULL, 1, NULL, 0,
140			       CTL_VFS, 3, CTL_EOL);
141		/*
142		 * XXX the "1" and the "3" above could be dynamic, thereby
143		 * eliminating one more instance of the "number to vfs"
144		 * mapping problem, but they are in order as taken from
145		 * sys/mount.h
146		 */
147		break;
148	case MODULE_CMD_FINI:
149		error = vfs_detach(&mfs_vfsops);
150		if (error != 0)
151			break;
152		sysctl_teardown(&mfs_sysctl_log);
153		break;
154	default:
155		error = ENOTTY;
156		break;
157	}
158
159	return (error);
160}
161
162/*
163 * Memory based filesystem initialization.
164 */
165void
166mfs_init(void)
167{
168
169	if (mfs_initcnt++ == 0) {
170		mutex_init(&mfs_lock, MUTEX_DEFAULT, IPL_NONE);
171		ffs_init();
172	}
173}
174
175void
176mfs_reinit(void)
177{
178
179	ffs_reinit();
180}
181
182void
183mfs_done(void)
184{
185
186	if (--mfs_initcnt == 0) {
187		ffs_done();
188		mutex_destroy(&mfs_lock);
189	}
190}
191
192/*
193 * Called by main() when mfs is going to be mounted as root.
194 */
195
196int
197mfs_mountroot(void)
198{
199	struct fs *fs;
200	struct mount *mp;
201	struct lwp *l = curlwp;		/* XXX */
202	struct ufsmount *ump;
203	struct mfsnode *mfsp;
204	int error = 0;
205
206	if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
207		vrele(rootvp);
208		return (error);
209	}
210
211	mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
212	rootvp->v_data = mfsp;
213	rootvp->v_op = mfs_vnodeop_p;
214	rootvp->v_tag = VT_MFS;
215	mfsp->mfs_baseoff = mfs_rootbase;
216	mfsp->mfs_size = mfs_rootsize;
217	mfsp->mfs_vnode = rootvp;
218	mfsp->mfs_proc = NULL;		/* indicate kernel space */
219	mfsp->mfs_shutdown = 0;
220	cv_init(&mfsp->mfs_cv, "mfs");
221	mfsp->mfs_refcnt = 1;
222	bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
223	if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
224		vfs_unbusy(mp, false, NULL);
225		bufq_free(mfsp->mfs_buflist);
226		vfs_destroy(mp);
227		kmem_free(mfsp, sizeof(*mfsp));
228		return (error);
229	}
230	mutex_enter(&mountlist_lock);
231	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
232	mutex_exit(&mountlist_lock);
233	mp->mnt_vnodecovered = NULLVP;
234	ump = VFSTOUFS(mp);
235	fs = ump->um_fs;
236	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
237	(void)ffs_statvfs(mp, &mp->mnt_stat);
238	vfs_unbusy(mp, false, NULL);
239	return (0);
240}
241
242/*
243 * VFS Operations.
244 *
245 * mount system call
246 */
247/* ARGSUSED */
248int
249mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
250{
251	struct lwp *l = curlwp;
252	struct vnode *devvp;
253	struct mfs_args *args = data;
254	struct ufsmount *ump;
255	struct fs *fs;
256	struct mfsnode *mfsp;
257	struct proc *p;
258	int flags, error = 0;
259
260	if (args == NULL)
261		return EINVAL;
262	if (*data_len < sizeof *args)
263		return EINVAL;
264
265	p = l->l_proc;
266	if (mp->mnt_flag & MNT_GETARGS) {
267		struct vnode *vp;
268
269		ump = VFSTOUFS(mp);
270		if (ump == NULL)
271			return EIO;
272
273		vp = ump->um_devvp;
274		if (vp == NULL)
275			return EIO;
276
277		mfsp = VTOMFS(vp);
278		if (mfsp == NULL)
279			return EIO;
280
281		args->fspec = NULL;
282		args->base = mfsp->mfs_baseoff;
283		args->size = mfsp->mfs_size;
284		*data_len = sizeof *args;
285		return 0;
286	}
287	/*
288	 * XXX turn off async to avoid hangs when writing lots of data.
289	 * the problem is that MFS needs to allocate pages to clean pages,
290	 * so if we wait until the last minute to clean pages then there
291	 * may not be any pages available to do the cleaning.
292	 * ... and since the default partially-synchronous mode turns out
293	 * to not be sufficient under heavy load, make it full synchronous.
294	 */
295	mp->mnt_flag &= ~MNT_ASYNC;
296	mp->mnt_flag |= MNT_SYNCHRONOUS;
297
298	/*
299	 * If updating, check whether changing from read-only to
300	 * read/write; if there is no device name, that's all we do.
301	 */
302	if (mp->mnt_flag & MNT_UPDATE) {
303		ump = VFSTOUFS(mp);
304		fs = ump->um_fs;
305		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
306			flags = WRITECLOSE;
307			if (mp->mnt_flag & MNT_FORCE)
308				flags |= FORCECLOSE;
309			error = ffs_flushfiles(mp, flags, l);
310			if (error)
311				return (error);
312		}
313		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR))
314			fs->fs_ronly = 0;
315		if (args->fspec == NULL)
316			return EINVAL;
317		return (0);
318	}
319	error = getnewvnode(VT_MFS, NULL, mfs_vnodeop_p, NULL, &devvp);
320	if (error)
321		return (error);
322	devvp->v_vflag |= VV_MPSAFE;
323	devvp->v_type = VBLK;
324	spec_node_init(devvp, makedev(255, mfs_minor));
325	mfs_minor++;
326	mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
327	devvp->v_data = mfsp;
328	mfsp->mfs_baseoff = args->base;
329	mfsp->mfs_size = args->size;
330	mfsp->mfs_vnode = devvp;
331	mfsp->mfs_proc = p;
332	mfsp->mfs_shutdown = 0;
333	cv_init(&mfsp->mfs_cv, "mfsidl");
334	mfsp->mfs_refcnt = 1;
335	bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
336	if ((error = ffs_mountfs(devvp, mp, l)) != 0) {
337		mfsp->mfs_shutdown = 1;
338		vrele(devvp);
339		return (error);
340	}
341	ump = VFSTOUFS(mp);
342	fs = ump->um_fs;
343	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
344	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
345	if (error)
346		return error;
347	(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
348		sizeof(fs->fs_fsmnt));
349	fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0';
350	/* XXX: cleanup on error */
351	return 0;
352}
353
354/*
355 * Used to grab the process and keep it in the kernel to service
356 * memory filesystem I/O requests.
357 *
358 * Loop servicing I/O requests.
359 * Copy the requested data into or out of the memory filesystem
360 * address space.
361 */
362/* ARGSUSED */
363int
364mfs_start(struct mount *mp, int flags)
365{
366	struct vnode *vp;
367	struct mfsnode *mfsp;
368	struct proc *p;
369	struct buf *bp;
370	void *base;
371	int sleepreturn = 0, refcnt, error;
372	ksiginfoq_t kq;
373
374	/*
375	 * Ensure that file system is still mounted when getting mfsnode.
376	 * Add a reference to the mfsnode to prevent it disappearing in
377	 * this routine.
378	 */
379	if ((error = vfs_busy(mp, NULL)) != 0)
380		return error;
381	vp = VFSTOUFS(mp)->um_devvp;
382	mfsp = VTOMFS(vp);
383	mutex_enter(&mfs_lock);
384	mfsp->mfs_refcnt++;
385	mutex_exit(&mfs_lock);
386	vfs_unbusy(mp, false, NULL);
387
388	base = mfsp->mfs_baseoff;
389	mutex_enter(&mfs_lock);
390	while (mfsp->mfs_shutdown != 1) {
391		while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) {
392			mutex_exit(&mfs_lock);
393			mfs_doio(bp, base);
394			mutex_enter(&mfs_lock);
395		}
396		/*
397		 * If a non-ignored signal is received, try to unmount.
398		 * If that fails, or the filesystem is already in the
399		 * process of being unmounted, clear the signal (it has been
400		 * "processed"), otherwise we will loop here, as tsleep
401		 * will always return EINTR/ERESTART.
402		 */
403		if (sleepreturn != 0) {
404			mutex_exit(&mfs_lock);
405			if (dounmount(mp, 0, curlwp) != 0) {
406				p = curproc;
407				ksiginfo_queue_init(&kq);
408				mutex_enter(p->p_lock);
409				sigclearall(p, NULL, &kq);
410				mutex_exit(p->p_lock);
411				ksiginfo_queue_drain(&kq);
412			}
413			sleepreturn = 0;
414			mutex_enter(&mfs_lock);
415			continue;
416		}
417
418		sleepreturn = cv_wait_sig(&mfsp->mfs_cv, &mfs_lock);
419	}
420	KASSERT(bufq_peek(mfsp->mfs_buflist) == NULL);
421	refcnt = --mfsp->mfs_refcnt;
422	mutex_exit(&mfs_lock);
423	if (refcnt == 0) {
424		bufq_free(mfsp->mfs_buflist);
425		cv_destroy(&mfsp->mfs_cv);
426		kmem_free(mfsp, sizeof(*mfsp));
427	}
428	return (sleepreturn);
429}
430
431/*
432 * Get file system statistics.
433 */
434int
435mfs_statvfs(struct mount *mp, struct statvfs *sbp)
436{
437	int error;
438
439	error = ffs_statvfs(mp, sbp);
440	if (error)
441		return error;
442	(void)strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name,
443	    sizeof(sbp->f_fstypename));
444	sbp->f_fstypename[sizeof(sbp->f_fstypename) - 1] = '\0';
445	return 0;
446}
447