sdev_vfsops.c revision 3898:c788126f2a20
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * This is the /dev (hence, the sdev_ prefix) filesystem.
30 */
31
32#include <sys/types.h>
33#include <sys/param.h>
34#include <sys/sysmacros.h>
35#include <sys/systm.h>
36#include <sys/kmem.h>
37#include <sys/time.h>
38#include <sys/pathname.h>
39#include <sys/vfs.h>
40#include <sys/vfs_opreg.h>
41#include <sys/vnode.h>
42#include <sys/file.h>
43#include <sys/stat.h>
44#include <sys/uio.h>
45#include <sys/stat.h>
46#include <sys/errno.h>
47#include <sys/cmn_err.h>
48#include <sys/cred.h>
49#include <sys/statvfs.h>
50#include <sys/policy.h>
51#include <sys/mount.h>
52#include <sys/debug.h>
53#include <sys/modctl.h>
54#include <sys/mkdev.h>
55#include <fs/fs_subr.h>
56#include <sys/fs/sdev_impl.h>
57#include <sys/fs/sdev_node.h>
58#include <sys/fs/snode.h>
59#include <sys/fs/dv_node.h>
60#include <sys/sunndi.h>
61#include <sys/mntent.h>
62
63/*
64 * /dev vfs operations.
65 */
66
67/*
68 * globals
69 */
70struct sdev_data *sdev_origins; /* mount info for origins under /dev */
71kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
72
73/*
74 * static
75 */
76static major_t devmajor;	/* the fictitious major we live on */
77static major_t devminor;	/* the fictitious minor of this instance */
78static struct sdev_data *sdev_mntinfo = NULL;	/* linked list of instances */
79
80/* LINTED E_STATIC_UNUSED */		/* useful for debugging */
81static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
82
83static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
84    struct cred *);
85static int sdev_unmount(struct vfs *, int, struct cred *);
86static int sdev_root(struct vfs *, struct vnode **);
87static int sdev_statvfs(struct vfs *, struct statvfs64 *);
88static void sdev_insert_mntinfo(struct sdev_data *);
89static int devinit(int, char *);
90
91static vfsdef_t sdev_vfssw = {
92	VFSDEF_VERSION,
93	"dev",		/* type name string */
94	devinit,	/* init routine */
95	VSW_CANREMOUNT,	/* flags */
96	NULL		/* mount options table prototype */
97};
98
99
100/*
101 * Module linkage information
102 */
103static struct modlfs modlfs = {
104	&mod_fsops, "/dev filesystem %I%", &sdev_vfssw
105};
106
107static struct modlinkage modlinkage = {
108	MODREV_1, (void *)&modlfs, NULL
109};
110
111int
112_init(void)
113{
114	int e;
115
116	mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
117	sdev_node_cache_init();
118	sdev_devfsadm_lockinit();
119	if ((e = mod_install(&modlinkage)) != 0) {
120		sdev_devfsadm_lockdestroy();
121		sdev_node_cache_fini();
122		mutex_destroy(&sdev_lock);
123		return (e);
124	}
125	return (0);
126}
127
128/*
129 * dev module remained loaded for the global /dev instance
130 */
131int
132_fini(void)
133{
134	return (EBUSY);
135}
136
137int
138_info(struct modinfo *modinfop)
139{
140	return (mod_info(&modlinkage, modinfop));
141}
142
143/*ARGSUSED*/
144static int
145devinit(int fstype, char *name)
146{
147	static const fs_operation_def_t dev_vfsops_tbl[] = {
148		VFSNAME_MOUNT,		{ .vfs_mount = sdev_mount },
149		VFSNAME_UNMOUNT,	{ .vfs_unmount = sdev_unmount },
150		VFSNAME_ROOT, 		{ .vfs_root = sdev_root },
151		VFSNAME_STATVFS,	{ .vfs_statvfs = sdev_statvfs },
152		NULL,			NULL
153	};
154
155	int	error;
156	extern major_t getudev(void);
157
158	devtype = fstype;
159
160	error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
161	if (error != 0) {
162		cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
163		return (error);
164	}
165
166	error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
167	if (error != 0) {
168		(void) vfs_freevfsops_by_type(fstype);
169		cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
170		return (error);
171	}
172
173	if ((devmajor = getudev()) == (major_t)-1) {
174		cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
175		return (1);
176	}
177
178	/* initialize negative cache */
179	sdev_ncache_init();
180
181	return (0);
182}
183
184/*
185 * Both mount point and backing store directory name are
186 * passed in from userland
187 */
188static int
189sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
190    struct cred *cr)
191{
192	struct sdev_data *sdev_data;
193	struct vnode *avp;
194	struct sdev_node *dv;
195	struct sdev_mountargs *args = NULL;
196	int	error = 0;
197	dev_t	devdev;
198
199	/*
200	 * security check
201	 */
202	if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
203	    (secpolicy_sys_devices(cr) != 0))
204		return (EPERM);
205
206	/*
207	 * Sanity check the mount point
208	 */
209	if (mvp->v_type != VDIR)
210		return (ENOTDIR);
211
212	/*
213	 * Sanity Check for overlay mount.
214	 */
215	mutex_enter(&mvp->v_lock);
216	if ((uap->flags & MS_OVERLAY) == 0 &&
217	    (uap->flags & MS_REMOUNT) == 0 &&
218	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
219		mutex_exit(&mvp->v_lock);
220		return (EBUSY);
221	}
222	mutex_exit(&mvp->v_lock);
223
224	args = kmem_zalloc(sizeof (*args), KM_SLEEP);
225
226	if ((uap->flags & MS_DATA) &&
227	    (uap->datalen != 0 && uap->dataptr != NULL)) {
228		/* copy in the arguments */
229		if (error = sdev_copyin_mountargs(uap, args))
230			goto cleanup;
231	}
232
233	/*
234	 * Sanity check the backing store
235	 */
236	if (args->sdev_attrdir) {
237		/* user supplied an attribute store */
238		if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
239		    UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
240			cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
241			    "directory %s failed",
242			    (char *)(uintptr_t)args->sdev_attrdir);
243			goto cleanup;
244		}
245
246		if (avp->v_type != VDIR) {
247			VN_RELE(avp);
248			error = ENOTDIR;
249			goto cleanup;
250		}
251	} else {
252		/* use mountp as the attribute store */
253		avp = mvp;
254		VN_HOLD(avp);
255	}
256
257	mutex_enter(&sdev_lock);
258
259	/*
260	 * handling installation
261	 */
262	if (uap->flags & MS_REMOUNT) {
263		sdev_data = (struct sdev_data *)vfsp->vfs_data;
264		ASSERT(sdev_data);
265
266		dv = sdev_data->sdev_root;
267		ASSERT(dv == dv->sdev_dotdot);
268
269		/*
270		 * mark all existing sdev_nodes (except root node) stale
271		 */
272		sdev_stale(dv);
273
274		/* Reset previous mountargs */
275		if (sdev_data->sdev_mountargs) {
276			kmem_free(sdev_data->sdev_mountargs,
277			    sizeof (struct sdev_mountargs));
278		}
279		sdev_data->sdev_mountargs = args;
280		args = NULL;		/* so it won't be freed below */
281
282		sdev_stale_attrvp = dv->sdev_attrvp;
283		dv->sdev_attrvp = avp;
284		vfsp->vfs_mtime = ddi_get_time();
285
286		mutex_exit(&sdev_lock);
287		goto cleanup;				/* we're done */
288	}
289
290	/*
291	 * Create and initialize the vfs-private data.
292	 */
293	devdev = makedevice(devmajor, devminor);
294	while (vfs_devismounted(devdev)) {
295		devminor = (devminor + 1) & MAXMIN32;
296
297		/*
298		 * All the minor numbers are used up.
299		 */
300		if (devminor == 0) {
301			mutex_exit(&sdev_lock);
302			VN_RELE(avp);
303			error = ENODEV;
304			goto cleanup;
305		}
306
307		devdev = makedevice(devmajor, devminor);
308	}
309
310	dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
311	sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
312	vfsp->vfs_dev = devdev;
313	vfsp->vfs_data = (caddr_t)sdev_data;
314	vfsp->vfs_fstype = devtype;
315	vfsp->vfs_bsize = DEV_BSIZE;
316	vfsp->vfs_mtime = ddi_get_time();
317	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
318
319	ASSERT(dv == dv->sdev_dotdot);
320
321	sdev_data->sdev_vfsp = vfsp;
322	sdev_data->sdev_root = dv;
323	sdev_data->sdev_mountargs = args;
324
325	/* get acl flavor from attribute dir */
326	if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
327	    kcred) != 0 || sdev_data->sdev_acl_flavor == 0)
328		sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
329
330	args = NULL;			/* so it won't be freed below */
331	sdev_insert_mntinfo(sdev_data);
332	mutex_exit(&sdev_lock);
333
334	if (!SDEV_IS_GLOBAL(dv)) {
335		ASSERT(sdev_origins);
336		dv->sdev_flags &= ~SDEV_GLOBAL;
337		dv->sdev_origin = sdev_origins->sdev_root;
338	} else {
339		sdev_ncache_setup();
340		rw_enter(&dv->sdev_contents, RW_WRITER);
341		sdev_filldir_dynamic(dv);
342		rw_exit(&dv->sdev_contents);
343	}
344
345	sdev_update_timestamps(dv->sdev_attrvp,
346		cr, AT_CTIME|AT_MTIME|AT_ATIME);
347
348cleanup:
349	if (args)
350		kmem_free(args, sizeof (*args));
351	return (error);
352}
353
354/*
355 * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
356 */
357static int
358sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
359{
360	struct sdev_node *dv;
361	int error;
362	struct sdev_data *sdev_data, *prev, *next;
363
364	/*
365	 * enforce the security policies
366	 */
367	if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
368	    (secpolicy_sys_devices(cr) != 0))
369		return (EPERM);
370
371	if (flag & MS_FORCE)
372		return (ENOTSUP);
373
374	mutex_enter(&sdev_lock);
375	dv = VFSTOSDEVFS(vfsp)->sdev_root;
376	ASSERT(dv == dv->sdev_dotdot);
377	if (SDEVTOV(dv)->v_count > 1) {
378		mutex_exit(&sdev_lock);
379		return (EBUSY);
380	}
381
382	/*
383	 * global instance remains mounted
384	 */
385	if (SDEV_IS_GLOBAL(dv)) {
386		mutex_exit(&sdev_lock);
387		return (EBUSY);
388	}
389	mutex_exit(&sdev_lock);
390
391	/* verify the v_count */
392	if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
393		return (error);
394	}
395	ASSERT(SDEVTOV(dv)->v_count == 1);
396
397	/* release hold on root node and destroy it */
398	SDEV_RELE(dv);
399	dv->sdev_nlink -= 2;
400	sdev_nodedestroy(dv, 0);
401
402	sdev_data = (struct sdev_data *)vfsp->vfs_data;
403	vfsp->vfs_data = (caddr_t)0;
404
405	/*
406	 * XXX separate it into sdev_delete_mntinfo() if useful
407	 */
408	mutex_enter(&sdev_lock);
409	prev = sdev_data->sdev_prev;
410	next = sdev_data->sdev_next;
411	if (prev)
412		prev->sdev_next = next;
413	else
414		sdev_mntinfo = next;
415	if (next)
416		next->sdev_prev = prev;
417	mutex_exit(&sdev_lock);
418
419	if (sdev_data->sdev_mountargs) {
420		kmem_free(sdev_data->sdev_mountargs,
421		    sizeof (struct sdev_mountargs));
422	}
423	kmem_free(sdev_data, sizeof (struct sdev_data));
424	return (0);
425}
426
427/*
428 * return root vnode for given vfs
429 */
430static int
431sdev_root(struct vfs *vfsp, struct vnode **vpp)
432{
433	*vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
434	VN_HOLD(*vpp);
435	return (0);
436}
437
438/*
439 * return 'generic superblock' information to userland.
440 *
441 * not much that we can usefully admit to here
442 */
443static int
444sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
445{
446	dev32_t d32;
447
448	bzero(sbp, sizeof (*sbp));
449	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
450	sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
451
452	/* no illusions that free/avail files is relevant to dev */
453	sbp->f_ffree = 0;
454	sbp->f_favail = 0;
455
456	/* no illusions that blocks are relevant to devfs */
457	sbp->f_bfree = 0;
458	sbp->f_bavail = 0;
459	sbp->f_blocks = 0;
460
461	(void) cmpldev(&d32, vfsp->vfs_dev);
462	sbp->f_fsid = d32;
463	(void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
464	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
465	sbp->f_namemax = MAXNAMELEN - 1;
466	(void) strcpy(sbp->f_fstr, "dev");
467
468	return (0);
469}
470
471int
472sdev_module_register(char *mod_name, struct devname_ops *dev_ops)
473{
474	struct devname_nsmap *map = NULL;
475
476	if (strcmp(mod_name, DEVNAME_NSCONFIG) == 0) {
477		devname_ns_ops = dev_ops;
478		return (0);
479	}
480
481	map = sdev_get_nsmap_by_module(mod_name);
482	if (map == NULL)
483		return (EFAULT);
484
485	rw_enter(&map->dir_lock, RW_WRITER);
486	map->dir_ops = dev_ops;
487	rw_exit(&map->dir_lock);
488	return (0);
489}
490
491static void
492sdev_insert_mntinfo(struct sdev_data *data)
493{
494	ASSERT(mutex_owned(&sdev_lock));
495	data->sdev_next = sdev_mntinfo;
496	data->sdev_prev = NULL;
497	if (sdev_mntinfo) {
498		sdev_mntinfo->sdev_prev = data;
499	} else {
500		sdev_origins = data;
501	}
502	sdev_mntinfo = data;
503}
504
505struct sdev_data *
506sdev_find_mntinfo(char *mntpt)
507{
508	struct sdev_data *mntinfo;
509
510	mutex_enter(&sdev_lock);
511	mntinfo = sdev_mntinfo;
512	while (mntinfo) {
513		if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
514			SDEVTOV(mntinfo->sdev_root)->v_count++;
515			break;
516		}
517		mntinfo = mntinfo->sdev_next;
518	}
519	mutex_exit(&sdev_lock);
520	return (mntinfo);
521}
522
523void
524sdev_mntinfo_rele(struct sdev_data *mntinfo)
525{
526	mutex_enter(&sdev_lock);
527	SDEVTOV(mntinfo->sdev_root)->v_count--;
528	mutex_exit(&sdev_lock);
529}
530