sdev_subr.c revision 6065:b05c5c670963
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * utility routines for the /dev fs
30 */
31
32#include <sys/types.h>
33#include <sys/param.h>
34#include <sys/t_lock.h>
35#include <sys/systm.h>
36#include <sys/sysmacros.h>
37#include <sys/user.h>
38#include <sys/time.h>
39#include <sys/vfs.h>
40#include <sys/vnode.h>
41#include <sys/file.h>
42#include <sys/fcntl.h>
43#include <sys/flock.h>
44#include <sys/kmem.h>
45#include <sys/uio.h>
46#include <sys/errno.h>
47#include <sys/stat.h>
48#include <sys/cred.h>
49#include <sys/dirent.h>
50#include <sys/pathname.h>
51#include <sys/cmn_err.h>
52#include <sys/debug.h>
53#include <sys/mode.h>
54#include <sys/policy.h>
55#include <fs/fs_subr.h>
56#include <sys/mount.h>
57#include <sys/fs/snode.h>
58#include <sys/fs/dv_node.h>
59#include <sys/fs/sdev_impl.h>
60#include <sys/fs/sdev_node.h>
61#include <sys/sunndi.h>
62#include <sys/sunmdi.h>
63#include <sys/conf.h>
64#include <sys/proc.h>
65#include <sys/user.h>
66#include <sys/modctl.h>
67
68#ifdef DEBUG
69int sdev_debug = 0x00000001;
70int sdev_debug_cache_flags = 0;
71#endif
72
73/*
74 * globals
75 */
76/* prototype memory vattrs */
77vattr_t sdev_vattr_dir = {
78	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
79	VDIR,					/* va_type */
80	SDEV_DIRMODE_DEFAULT,			/* va_mode */
81	SDEV_UID_DEFAULT,			/* va_uid */
82	SDEV_GID_DEFAULT,			/* va_gid */
83	0,					/* va_fsid */
84	0,					/* va_nodeid */
85	0,					/* va_nlink */
86	0,					/* va_size */
87	0,					/* va_atime */
88	0,					/* va_mtime */
89	0,					/* va_ctime */
90	0,					/* va_rdev */
91	0,					/* va_blksize */
92	0,					/* va_nblocks */
93	0					/* va_vcode */
94};
95
96vattr_t sdev_vattr_lnk = {
97	AT_TYPE|AT_MODE,			/* va_mask */
98	VLNK,					/* va_type */
99	SDEV_LNKMODE_DEFAULT,			/* va_mode */
100	SDEV_UID_DEFAULT,			/* va_uid */
101	SDEV_GID_DEFAULT,			/* va_gid */
102	0,					/* va_fsid */
103	0,					/* va_nodeid */
104	0,					/* va_nlink */
105	0,					/* va_size */
106	0,					/* va_atime */
107	0,					/* va_mtime */
108	0,					/* va_ctime */
109	0,					/* va_rdev */
110	0,					/* va_blksize */
111	0,					/* va_nblocks */
112	0					/* va_vcode */
113};
114
115vattr_t sdev_vattr_blk = {
116	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
117	VBLK,					/* va_type */
118	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
119	SDEV_UID_DEFAULT,			/* va_uid */
120	SDEV_GID_DEFAULT,			/* va_gid */
121	0,					/* va_fsid */
122	0,					/* va_nodeid */
123	0,					/* va_nlink */
124	0,					/* va_size */
125	0,					/* va_atime */
126	0,					/* va_mtime */
127	0,					/* va_ctime */
128	0,					/* va_rdev */
129	0,					/* va_blksize */
130	0,					/* va_nblocks */
131	0					/* va_vcode */
132};
133
134vattr_t sdev_vattr_chr = {
135	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
136	VCHR,					/* va_type */
137	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
138	SDEV_UID_DEFAULT,			/* va_uid */
139	SDEV_GID_DEFAULT,			/* va_gid */
140	0,					/* va_fsid */
141	0,					/* va_nodeid */
142	0,					/* va_nlink */
143	0,					/* va_size */
144	0,					/* va_atime */
145	0,					/* va_mtime */
146	0,					/* va_ctime */
147	0,					/* va_rdev */
148	0,					/* va_blksize */
149	0,					/* va_nblocks */
150	0					/* va_vcode */
151};
152
153kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
154int		devtype;		/* fstype */
155
156struct devname_ops *devname_ns_ops;	/* default name service directory ops */
157kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
158
159/* static */
160static struct devname_nsmap *devname_nsmaps = NULL;
161				/* contents from /etc/dev/devname_master */
162static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
163
164static struct vnodeops *sdev_get_vop(struct sdev_node *);
165static void sdev_set_no_nocache(struct sdev_node *);
166static int sdev_get_moduleops(struct sdev_node *);
167static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
168static void sdev_free_vtab(fs_operation_def_t *);
169
170static void
171sdev_prof_free(struct sdev_node *dv)
172{
173	ASSERT(!SDEV_IS_GLOBAL(dv));
174	if (dv->sdev_prof.dev_name)
175		nvlist_free(dv->sdev_prof.dev_name);
176	if (dv->sdev_prof.dev_map)
177		nvlist_free(dv->sdev_prof.dev_map);
178	if (dv->sdev_prof.dev_symlink)
179		nvlist_free(dv->sdev_prof.dev_symlink);
180	if (dv->sdev_prof.dev_glob_incdir)
181		nvlist_free(dv->sdev_prof.dev_glob_incdir);
182	if (dv->sdev_prof.dev_glob_excdir)
183		nvlist_free(dv->sdev_prof.dev_glob_excdir);
184	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
185}
186
187/*
188 * sdev_node cache constructor
189 */
190/*ARGSUSED1*/
191static int
192i_sdev_node_ctor(void *buf, void *cfarg, int flag)
193{
194	struct sdev_node *dv = (struct sdev_node *)buf;
195	struct vnode *vp;
196
197	ASSERT(flag == KM_SLEEP);
198
199	bzero(buf, sizeof (struct sdev_node));
200	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
201	dv->sdev_vnode = vn_alloc(KM_SLEEP);
202	vp = SDEVTOV(dv);
203	vp->v_data = (caddr_t)dv;
204	return (0);
205}
206
207/* sdev_node destructor for kmem cache */
208/*ARGSUSED1*/
209static void
210i_sdev_node_dtor(void *buf, void *arg)
211{
212	struct sdev_node *dv = (struct sdev_node *)buf;
213	struct vnode *vp = SDEVTOV(dv);
214
215	rw_destroy(&dv->sdev_contents);
216	vn_free(vp);
217}
218
219/* initialize sdev_node cache */
220void
221sdev_node_cache_init()
222{
223	int flags = 0;
224
225#ifdef	DEBUG
226	flags = sdev_debug_cache_flags;
227	if (flags)
228		sdcmn_err(("cache debug flags 0x%x\n", flags));
229#endif	/* DEBUG */
230
231	ASSERT(sdev_node_cache == NULL);
232	sdev_node_cache = kmem_cache_create("sdev_node_cache",
233	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
234	    NULL, NULL, NULL, flags);
235}
236
237/* destroy sdev_node cache */
238void
239sdev_node_cache_fini()
240{
241	ASSERT(sdev_node_cache != NULL);
242	kmem_cache_destroy(sdev_node_cache);
243	sdev_node_cache = NULL;
244}
245
246void
247sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
248{
249	ASSERT(dv);
250	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
251	dv->sdev_state = state;
252}
253
254static void
255sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
256{
257	timestruc_t now;
258
259	ASSERT(vap);
260
261	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
262	*dv->sdev_attr = *vap;
263
264	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
265
266	gethrestime(&now);
267	dv->sdev_attr->va_atime = now;
268	dv->sdev_attr->va_mtime = now;
269	dv->sdev_attr->va_ctime = now;
270}
271
272/* alloc and initialize a sdev_node */
273int
274sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
275    vattr_t *vap)
276{
277	struct sdev_node *dv = NULL;
278	struct vnode *vp;
279	size_t nmlen, len;
280	devname_handle_t  *dhl;
281
282	nmlen = strlen(nm) + 1;
283	if (nmlen > MAXNAMELEN) {
284		sdcmn_err9(("sdev_nodeinit: node name %s"
285		    " too long\n", nm));
286		*newdv = NULL;
287		return (ENAMETOOLONG);
288	}
289
290	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
291
292	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
293	bcopy(nm, dv->sdev_name, nmlen);
294	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
295	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
296	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
297	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
298	/* overwritten for VLNK nodes */
299	dv->sdev_symlink = NULL;
300
301	vp = SDEVTOV(dv);
302	vn_reinit(vp);
303	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
304	if (vap)
305		vp->v_type = vap->va_type;
306
307	/*
308	 * initialized to the parent's vnodeops.
309	 * maybe overwriten for a VDIR
310	 */
311	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
312	vn_exists(vp);
313
314	dv->sdev_dotdot = NULL;
315	dv->sdev_dot = NULL;
316	dv->sdev_next = NULL;
317	dv->sdev_attrvp = NULL;
318	if (vap) {
319		sdev_attrinit(dv, vap);
320	} else {
321		dv->sdev_attr = NULL;
322	}
323
324	dv->sdev_ino = sdev_mkino(dv);
325	dv->sdev_nlink = 0;		/* updated on insert */
326	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
327	dv->sdev_flags |= SDEV_BUILD;
328	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
329	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
330	if (SDEV_IS_GLOBAL(ddv)) {
331		dv->sdev_flags |= SDEV_GLOBAL;
332		dv->sdev_mapinfo = NULL;
333		dhl = &(dv->sdev_handle);
334		dhl->dh_data = dv;
335		dhl->dh_spec = DEVNAME_NS_NONE;
336		dhl->dh_args = NULL;
337		sdev_set_no_nocache(dv);
338		dv->sdev_gdir_gen = 0;
339	} else {
340		dv->sdev_flags &= ~SDEV_GLOBAL;
341		dv->sdev_origin = NULL; /* set later */
342		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
343		dv->sdev_ldir_gen = 0;
344		dv->sdev_devtree_gen = 0;
345	}
346
347	rw_enter(&dv->sdev_contents, RW_WRITER);
348	sdev_set_nodestate(dv, SDEV_INIT);
349	rw_exit(&dv->sdev_contents);
350	*newdv = dv;
351
352	return (0);
353}
354
355/*
356 * transition a sdev_node into SDEV_READY state
357 */
358int
359sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
360    void *args, struct cred *cred)
361{
362	int error = 0;
363	struct vnode *vp = SDEVTOV(dv);
364	vtype_t type;
365
366	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
367
368	type = vap->va_type;
369	vp->v_type = type;
370	vp->v_rdev = vap->va_rdev;
371	rw_enter(&dv->sdev_contents, RW_WRITER);
372	if (type == VDIR) {
373		dv->sdev_nlink = 2;
374		dv->sdev_flags &= ~SDEV_PERSIST;
375		dv->sdev_flags &= ~SDEV_DYNAMIC;
376		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
377		error = sdev_get_moduleops(dv); /* from plug-in module */
378		ASSERT(dv->sdev_dotdot);
379		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
380		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
381	} else if (type == VLNK) {
382		ASSERT(args);
383		dv->sdev_nlink = 1;
384		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
385	} else {
386		dv->sdev_nlink = 1;
387	}
388
389	if (!(SDEV_IS_GLOBAL(dv))) {
390		dv->sdev_origin = (struct sdev_node *)args;
391		dv->sdev_flags &= ~SDEV_PERSIST;
392	}
393
394	/*
395	 * shadow node is created here OR
396	 * if failed (indicated by dv->sdev_attrvp == NULL),
397	 * created later in sdev_setattr
398	 */
399	if (avp) {
400		dv->sdev_attrvp = avp;
401	} else {
402		if (dv->sdev_attr == NULL)
403			sdev_attrinit(dv, vap);
404		else
405			*dv->sdev_attr = *vap;
406
407		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
408		    ((SDEVTOV(dv)->v_type == VDIR) &&
409		    (dv->sdev_attrvp == NULL)))
410			error = sdev_shadow_node(dv, cred);
411	}
412
413	/* transition to READY state */
414	sdev_set_nodestate(dv, SDEV_READY);
415	sdev_nc_node_exists(dv);
416	rw_exit(&dv->sdev_contents);
417	return (error);
418}
419
420/*
421 * setting ZOMBIE state
422 */
423static int
424sdev_nodezombied(struct sdev_node *dv)
425{
426	rw_enter(&dv->sdev_contents, RW_WRITER);
427	sdev_set_nodestate(dv, SDEV_ZOMBIE);
428	rw_exit(&dv->sdev_contents);
429	return (0);
430}
431
432/*
433 * Build the VROOT sdev_node.
434 */
435/*ARGSUSED*/
436struct sdev_node *
437sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
438    struct vnode *avp, struct cred *cred)
439{
440	struct sdev_node *dv;
441	struct vnode *vp;
442	char devdir[] = "/dev";
443
444	ASSERT(sdev_node_cache != NULL);
445	ASSERT(avp);
446	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
447	vp = SDEVTOV(dv);
448	vn_reinit(vp);
449	vp->v_flag |= VROOT;
450	vp->v_vfsp = vfsp;
451	vp->v_type = VDIR;
452	vp->v_rdev = devdev;
453	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
454	vn_exists(vp);
455
456	if (vfsp->vfs_mntpt)
457		dv->sdev_name = i_ddi_strdup(
458		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
459	else
460		/* vfs_mountdev1 set mount point later */
461		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
462	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
463	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
464	dv->sdev_ino = SDEV_ROOTINO;
465	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
466	dv->sdev_dotdot = dv;		/* .. == self */
467	dv->sdev_attrvp = avp;
468	dv->sdev_attr = NULL;
469	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
470	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
471	if (strcmp(dv->sdev_name, "/dev") == 0) {
472		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
473		dv->sdev_mapinfo = NULL;
474		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
475		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
476		dv->sdev_gdir_gen = 0;
477	} else {
478		dv->sdev_flags = SDEV_BUILD;
479		dv->sdev_flags &= ~SDEV_PERSIST;
480		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
481		dv->sdev_ldir_gen = 0;
482		dv->sdev_devtree_gen = 0;
483	}
484
485	rw_enter(&dv->sdev_contents, RW_WRITER);
486	sdev_set_nodestate(dv, SDEV_READY);
487	rw_exit(&dv->sdev_contents);
488	sdev_nc_node_exists(dv);
489	return (dv);
490}
491
492/*
493 *  1. load the module
494 *  2. modload invokes sdev_module_register, which in turn sets
495 *     the dv->sdev_mapinfo->dir_ops
496 *
497 * note: locking order:
498 *	dv->sdev_contents -> map->dir_lock
499 */
500static int
501sdev_get_moduleops(struct sdev_node *dv)
502{
503	int error = 0;
504	struct devname_nsmap *map = NULL;
505	char *module;
506	char *path;
507	int load = 1;
508
509	ASSERT(SDEVTOV(dv)->v_type == VDIR);
510
511	if (devname_nsmaps == NULL)
512		return (0);
513
514	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
515		return (0);
516
517
518	path = dv->sdev_path;
519	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
520		rw_enter(&map->dir_lock, RW_READER);
521		if (map->dir_invalid) {
522			if (map->dir_module && map->dir_newmodule &&
523			    (strcmp(map->dir_module,
524			    map->dir_newmodule) == 0)) {
525				load = 0;
526			}
527			sdev_replace_nsmap(map, map->dir_newmodule,
528			    map->dir_newmap);
529		}
530
531		module = map->dir_module;
532		if (module && load) {
533			sdcmn_err6(("sdev_get_moduleops: "
534			    "load module %s", module));
535			rw_exit(&map->dir_lock);
536			error = modload("devname", module);
537			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
538			if (error < 0) {
539				return (-1);
540			}
541		} else if (module == NULL) {
542			/*
543			 * loading the module ops for name services
544			 */
545			if (devname_ns_ops == NULL) {
546				sdcmn_err6((
547				    "sdev_get_moduleops: modload default\n"));
548				error = modload("devname", DEVNAME_NSCONFIG);
549				sdcmn_err6((
550				    "sdev_get_moduleops: error %d\n", error));
551				if (error < 0) {
552					return (-1);
553				}
554			}
555
556			if (!rw_tryupgrade(&map->dir_lock)) {
557				rw_exit(&map->dir_lock);
558				rw_enter(&map->dir_lock, RW_WRITER);
559			}
560			ASSERT(devname_ns_ops);
561			map->dir_ops = devname_ns_ops;
562			rw_exit(&map->dir_lock);
563		}
564	}
565
566	dv->sdev_mapinfo = map;
567	return (0);
568}
569
570/* directory dependent vop table */
571struct sdev_vop_table {
572	char *vt_name;				/* subdirectory name */
573	const fs_operation_def_t *vt_service;	/* vnodeops table */
574	struct vnodeops *vt_vops;		/* constructed vop */
575	struct vnodeops **vt_global_vops;	/* global container for vop */
576	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
577	int vt_flags;
578};
579
580/*
581 * A nice improvement would be to provide a plug-in mechanism
582 * for this table instead of a const table.
583 */
584static struct sdev_vop_table vtab[] =
585{
586	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
587	SDEV_DYNAMIC | SDEV_VTOR },
588
589	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
590
591	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
592	SDEV_DYNAMIC | SDEV_VTOR },
593
594	{ NULL, NULL, NULL, NULL, NULL, 0}
595};
596
597
598/*
599 *  sets a directory's vnodeops if the directory is in the vtab;
600 */
601static struct vnodeops *
602sdev_get_vop(struct sdev_node *dv)
603{
604	int i;
605	char *path;
606
607	path = dv->sdev_path;
608	ASSERT(path);
609
610	/* gets the relative path to /dev/ */
611	path += 5;
612
613	/* gets the vtab entry if matches */
614	for (i = 0; vtab[i].vt_name; i++) {
615		if (strcmp(vtab[i].vt_name, path) != 0)
616			continue;
617		dv->sdev_flags |= vtab[i].vt_flags;
618
619		if (vtab[i].vt_vops) {
620			if (vtab[i].vt_global_vops)
621				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
622			return (vtab[i].vt_vops);
623		}
624
625		if (vtab[i].vt_service) {
626			fs_operation_def_t *templ;
627			templ = sdev_merge_vtab(vtab[i].vt_service);
628			if (vn_make_ops(vtab[i].vt_name,
629			    (const fs_operation_def_t *)templ,
630			    &vtab[i].vt_vops) != 0) {
631				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
632				    vtab[i].vt_name);
633				/*NOTREACHED*/
634			}
635			if (vtab[i].vt_global_vops) {
636				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
637			}
638			sdev_free_vtab(templ);
639			return (vtab[i].vt_vops);
640		}
641		return (sdev_vnodeops);
642	}
643
644	/* child inherits the persistence of the parent */
645	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
646		dv->sdev_flags |= SDEV_PERSIST;
647
648	return (sdev_vnodeops);
649}
650
651static void
652sdev_set_no_nocache(struct sdev_node *dv)
653{
654	int i;
655	char *path;
656
657	ASSERT(dv->sdev_path);
658	path = dv->sdev_path + strlen("/dev/");
659
660	for (i = 0; vtab[i].vt_name; i++) {
661		if (strcmp(vtab[i].vt_name, path) == 0) {
662			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
663				dv->sdev_flags |= SDEV_NO_NCACHE;
664			break;
665		}
666	}
667}
668
669void *
670sdev_get_vtor(struct sdev_node *dv)
671{
672	int i;
673
674	for (i = 0; vtab[i].vt_name; i++) {
675		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
676			continue;
677		return ((void *)vtab[i].vt_vtor);
678	}
679	return (NULL);
680}
681
682/*
683 * Build the base root inode
684 */
685ino_t
686sdev_mkino(struct sdev_node *dv)
687{
688	ino_t	ino;
689
690	/*
691	 * for now, follow the lead of tmpfs here
692	 * need to someday understand the requirements here
693	 */
694	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
695	ino += SDEV_ROOTINO + 1;
696
697	return (ino);
698}
699
700static int
701sdev_getlink(struct vnode *linkvp, char **link)
702{
703	int err;
704	char *buf;
705	struct uio uio = {0};
706	struct iovec iov = {0};
707
708	if (linkvp == NULL)
709		return (ENOENT);
710	ASSERT(linkvp->v_type == VLNK);
711
712	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
713	iov.iov_base = buf;
714	iov.iov_len = MAXPATHLEN;
715	uio.uio_iov = &iov;
716	uio.uio_iovcnt = 1;
717	uio.uio_resid = MAXPATHLEN;
718	uio.uio_segflg = UIO_SYSSPACE;
719	uio.uio_llimit = MAXOFFSET_T;
720
721	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
722	if (err) {
723		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
724		kmem_free(buf, MAXPATHLEN);
725		return (ENOENT);
726	}
727
728	/* mission complete */
729	*link = i_ddi_strdup(buf, KM_SLEEP);
730	kmem_free(buf, MAXPATHLEN);
731	return (0);
732}
733
734/*
735 * A convenient wrapper to get the devfs node vnode for a device
736 * minor functionality: readlink() of a /dev symlink
737 * Place the link into dv->sdev_symlink
738 */
739static int
740sdev_follow_link(struct sdev_node *dv)
741{
742	int err;
743	struct vnode *linkvp;
744	char *link = NULL;
745
746	linkvp = SDEVTOV(dv);
747	if (linkvp == NULL)
748		return (ENOENT);
749	ASSERT(linkvp->v_type == VLNK);
750	err = sdev_getlink(linkvp, &link);
751	if (err) {
752		(void) sdev_nodezombied(dv);
753		dv->sdev_symlink = NULL;
754		return (ENOENT);
755	}
756
757	ASSERT(link != NULL);
758	dv->sdev_symlink = link;
759	return (0);
760}
761
762static int
763sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
764{
765	vtype_t otype = SDEVTOV(dv)->v_type;
766
767	/*
768	 * existing sdev_node has a different type.
769	 */
770	if (otype != nvap->va_type) {
771		sdcmn_err9(("sdev_node_check: existing node "
772		    "  %s type %d does not match new node type %d\n",
773		    dv->sdev_name, otype, nvap->va_type));
774		return (EEXIST);
775	}
776
777	/*
778	 * For a symlink, the target should be the same.
779	 */
780	if (otype == VLNK) {
781		ASSERT(nargs != NULL);
782		ASSERT(dv->sdev_symlink != NULL);
783		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
784			sdcmn_err9(("sdev_node_check: existing node "
785			    " %s has different symlink %s as new node "
786			    " %s\n", dv->sdev_name, dv->sdev_symlink,
787			    (char *)nargs));
788			return (EEXIST);
789		}
790	}
791
792	return (0);
793}
794
795/*
796 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
797 *
798 * arguments:
799 *	- ddv (parent)
800 *	- nm (child name)
801 *	- newdv (sdev_node for nm is returned here)
802 *	- vap (vattr for the node to be created, va_type should be set.
803 *	  the defaults should be used if unknown)
804 *	- cred
805 *	- args
806 *	    . tnm (for VLNK)
807 *	    . global sdev_node (for !SDEV_GLOBAL)
808 * 	- state: SDEV_INIT, SDEV_READY
809 *
810 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
811 *
812 * NOTE:  directory contents writers lock needs to be held before
813 *	  calling this routine.
814 */
815int
816sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
817    struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
818    sdev_node_state_t state)
819{
820	int error = 0;
821	sdev_node_state_t node_state;
822	struct sdev_node *dv = NULL;
823
824	ASSERT(state != SDEV_ZOMBIE);
825	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
826
827	if (*newdv) {
828		dv = *newdv;
829	} else {
830		/* allocate and initialize a sdev_node */
831		if (ddv->sdev_state == SDEV_ZOMBIE) {
832			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
833			    ddv->sdev_path));
834			return (ENOENT);
835		}
836
837		error = sdev_nodeinit(ddv, nm, &dv, vap);
838		if (error != 0) {
839			sdcmn_err9(("sdev_mknode: error %d,"
840			    " name %s can not be initialized\n",
841			    error, nm));
842			return (ENOENT);
843		}
844		ASSERT(dv);
845
846		/* insert into the directory cache */
847		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
848		if (error) {
849			sdcmn_err9(("sdev_mknode: node %s can not"
850			    " be added into directory cache\n", nm));
851			return (ENOENT);
852		}
853	}
854
855	ASSERT(dv);
856	node_state = dv->sdev_state;
857	ASSERT(node_state != SDEV_ZOMBIE);
858
859	if (state == SDEV_READY) {
860		switch (node_state) {
861		case SDEV_INIT:
862			error = sdev_nodeready(dv, vap, avp, args, cred);
863			/*
864			 * masking the errors with ENOENT
865			 */
866			if (error) {
867				sdcmn_err9(("sdev_mknode: node %s can NOT"
868				    " be transitioned into READY state, "
869				    "error %d\n", nm, error));
870				error = ENOENT;
871			}
872			break;
873		case SDEV_READY:
874			/*
875			 * Do some sanity checking to make sure
876			 * the existing sdev_node is what has been
877			 * asked for.
878			 */
879			error = sdev_node_check(dv, vap, args);
880			break;
881		default:
882			break;
883		}
884	}
885
886	if (!error) {
887		*newdv = dv;
888		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
889	} else {
890		SDEV_SIMPLE_RELE(dv);
891		*newdv = NULL;
892	}
893
894	return (error);
895}
896
897/*
898 * convenient wrapper to change vp's ATIME, CTIME and ATIME
899 */
900void
901sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
902{
903	struct vattr attr;
904	timestruc_t now;
905	int err;
906
907	ASSERT(vp);
908	gethrestime(&now);
909	if (mask & AT_CTIME)
910		attr.va_ctime = now;
911	if (mask & AT_MTIME)
912		attr.va_mtime = now;
913	if (mask & AT_ATIME)
914		attr.va_atime = now;
915
916	attr.va_mask = (mask & AT_TIMES);
917	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
918	if (err && (err != EROFS)) {
919		sdcmn_err(("update timestamps error %d\n", err));
920	}
921}
922
923/*
924 * the backing store vnode is released here
925 */
926/*ARGSUSED1*/
927void
928sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
929{
930	/* no references */
931	ASSERT(dv->sdev_nlink == 0);
932
933	if (dv->sdev_attrvp != NULLVP) {
934		VN_RELE(dv->sdev_attrvp);
935		/*
936		 * reset the attrvp so that no more
937		 * references can be made on this already
938		 * vn_rele() vnode
939		 */
940		dv->sdev_attrvp = NULLVP;
941	}
942
943	if (dv->sdev_attr != NULL) {
944		kmem_free(dv->sdev_attr, sizeof (struct vattr));
945		dv->sdev_attr = NULL;
946	}
947
948	if (dv->sdev_name != NULL) {
949		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
950		dv->sdev_name = NULL;
951	}
952
953	if (dv->sdev_symlink != NULL) {
954		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
955		dv->sdev_symlink = NULL;
956	}
957
958	if (dv->sdev_path) {
959		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
960		dv->sdev_path = NULL;
961	}
962
963	if (!SDEV_IS_GLOBAL(dv))
964		sdev_prof_free(dv);
965
966	mutex_destroy(&dv->sdev_lookup_lock);
967	cv_destroy(&dv->sdev_lookup_cv);
968
969	/* return node to initial state as per constructor */
970	(void) memset((void *)&dv->sdev_instance_data, 0,
971	    sizeof (dv->sdev_instance_data));
972	vn_invalid(SDEVTOV(dv));
973	kmem_cache_free(sdev_node_cache, dv);
974}
975
976/*
977 * DIRECTORY CACHE lookup
978 */
979struct sdev_node *
980sdev_findbyname(struct sdev_node *ddv, char *nm)
981{
982	struct sdev_node *dv;
983	size_t	nmlen = strlen(nm);
984
985	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
986	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
987		if (dv->sdev_namelen != nmlen) {
988			continue;
989		}
990
991		/*
992		 * Can't lookup stale nodes
993		 */
994		if (dv->sdev_flags & SDEV_STALE) {
995			sdcmn_err9((
996			    "sdev_findbyname: skipped stale node: %s\n",
997			    dv->sdev_name));
998			continue;
999		}
1000
1001		if (strcmp(dv->sdev_name, nm) == 0) {
1002			SDEV_HOLD(dv);
1003			return (dv);
1004		}
1005	}
1006	return (NULL);
1007}
1008
1009/*
1010 * Inserts a new sdev_node in a parent directory
1011 */
1012void
1013sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1014{
1015	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1016	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1017	ASSERT(ddv->sdev_nlink >= 2);
1018	ASSERT(dv->sdev_nlink == 0);
1019
1020	dv->sdev_dotdot = ddv;
1021	dv->sdev_next = ddv->sdev_dot;
1022	ddv->sdev_dot = dv;
1023	ddv->sdev_nlink++;
1024}
1025
1026/*
1027 * The following check is needed because while sdev_nodes are linked
1028 * in SDEV_INIT state, they have their link counts incremented only
1029 * in SDEV_READY state.
1030 */
1031static void
1032decr_link(struct sdev_node *dv)
1033{
1034	if (dv->sdev_state != SDEV_INIT)
1035		dv->sdev_nlink--;
1036	else
1037		ASSERT(dv->sdev_nlink == 0);
1038}
1039
1040/*
1041 * Delete an existing dv from directory cache
1042 *
1043 * In the case of a node is still held by non-zero reference count,
1044 *     the node is put into ZOMBIE state. Once the reference count
1045 *     reaches "0", the node is unlinked and destroyed,
1046 *     in sdev_inactive().
1047 */
1048static int
1049sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1050{
1051	struct sdev_node *idv;
1052	struct sdev_node *prev = NULL;
1053	struct vnode *vp;
1054
1055	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1056
1057	vp = SDEVTOV(dv);
1058	mutex_enter(&vp->v_lock);
1059
1060	/* dv is held still */
1061	if (vp->v_count > 1) {
1062		rw_enter(&dv->sdev_contents, RW_WRITER);
1063		if (dv->sdev_state == SDEV_READY) {
1064			sdcmn_err9((
1065			    "sdev_delete: node %s busy with count %d\n",
1066			    dv->sdev_name, vp->v_count));
1067			dv->sdev_state = SDEV_ZOMBIE;
1068		}
1069		rw_exit(&dv->sdev_contents);
1070		--vp->v_count;
1071		mutex_exit(&vp->v_lock);
1072		return (EBUSY);
1073	}
1074	ASSERT(vp->v_count == 1);
1075
1076	/* unlink from the memory cache */
1077	ddv->sdev_nlink--;	/* .. to above */
1078	if (vp->v_type == VDIR) {
1079		decr_link(dv);		/* . to self */
1080	}
1081
1082	for (idv = ddv->sdev_dot; idv && idv != dv;
1083	    prev = idv, idv = idv->sdev_next)
1084		;
1085	ASSERT(idv == dv);	/* node to be deleted must exist */
1086	if (prev == NULL)
1087		ddv->sdev_dot = dv->sdev_next;
1088	else
1089		prev->sdev_next = dv->sdev_next;
1090	dv->sdev_next = NULL;
1091	decr_link(dv);	/* name, back to zero */
1092	vp->v_count--;
1093	mutex_exit(&vp->v_lock);
1094
1095	/* destroy the node */
1096	sdev_nodedestroy(dv, 0);
1097	return (0);
1098}
1099
1100/*
1101 * check if the source is in the path of the target
1102 *
1103 * source and target are different
1104 */
1105/*ARGSUSED2*/
1106static int
1107sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1108{
1109	int error = 0;
1110	struct sdev_node *dotdot, *dir;
1111
1112	dotdot = tdv->sdev_dotdot;
1113	ASSERT(dotdot);
1114
1115	/* fs root */
1116	if (dotdot == tdv) {
1117		return (0);
1118	}
1119
1120	for (;;) {
1121		/*
1122		 * avoid error cases like
1123		 *	mv a a/b
1124		 *	mv a a/b/c
1125		 *	etc.
1126		 */
1127		if (dotdot == sdv) {
1128			error = EINVAL;
1129			break;
1130		}
1131
1132		dir = dotdot;
1133		dotdot = dir->sdev_dotdot;
1134
1135		/* done checking because root is reached */
1136		if (dir == dotdot) {
1137			break;
1138		}
1139	}
1140	return (error);
1141}
1142
1143int
1144sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1145    struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1146    struct cred *cred)
1147{
1148	int error = 0;
1149	struct vnode *ovp = SDEVTOV(odv);
1150	struct vnode *nvp;
1151	struct vattr vattr;
1152	int doingdir = (ovp->v_type == VDIR);
1153	char *link = NULL;
1154	int samedir = (oddv == nddv) ? 1 : 0;
1155	int bkstore = 0;
1156	struct sdev_node *idv = NULL;
1157	struct sdev_node *ndv = NULL;
1158	timestruc_t now;
1159
1160	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1161	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1162	if (error)
1163		return (error);
1164
1165	if (!samedir)
1166		rw_enter(&oddv->sdev_contents, RW_WRITER);
1167	rw_enter(&nddv->sdev_contents, RW_WRITER);
1168
1169	/*
1170	 * the source may have been deleted by another thread before
1171	 * we gets here.
1172	 */
1173	if (odv->sdev_state != SDEV_READY) {
1174		error = ENOENT;
1175		goto err_out;
1176	}
1177
1178	if (doingdir && (odv == nddv)) {
1179		error = EINVAL;
1180		goto err_out;
1181	}
1182
1183	/*
1184	 * If renaming a directory, and the parents are different (".." must be
1185	 * changed) then the source dir must not be in the dir hierarchy above
1186	 * the target since it would orphan everything below the source dir.
1187	 */
1188	if (doingdir && (oddv != nddv)) {
1189		error = sdev_checkpath(odv, nddv, cred);
1190		if (error)
1191			goto err_out;
1192	}
1193
1194	/* destination existing */
1195	if (*ndvp) {
1196		nvp = SDEVTOV(*ndvp);
1197		ASSERT(nvp);
1198
1199		/* handling renaming to itself */
1200		if (odv == *ndvp) {
1201			error = 0;
1202			goto err_out;
1203		}
1204
1205		if (nvp->v_type == VDIR) {
1206			if (!doingdir) {
1207				error = EISDIR;
1208				goto err_out;
1209			}
1210
1211			if (vn_vfswlock(nvp)) {
1212				error = EBUSY;
1213				goto err_out;
1214			}
1215
1216			if (vn_mountedvfs(nvp) != NULL) {
1217				vn_vfsunlock(nvp);
1218				error = EBUSY;
1219				goto err_out;
1220			}
1221
1222			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1223			if ((*ndvp)->sdev_nlink > 2) {
1224				vn_vfsunlock(nvp);
1225				error = EEXIST;
1226				goto err_out;
1227			}
1228			vn_vfsunlock(nvp);
1229
1230			(void) sdev_dirdelete(nddv, *ndvp);
1231			*ndvp = NULL;
1232			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1233			    nddv->sdev_attrvp, cred, NULL, 0);
1234			if (error)
1235				goto err_out;
1236		} else {
1237			if (doingdir) {
1238				error = ENOTDIR;
1239				goto err_out;
1240			}
1241
1242			if (SDEV_IS_PERSIST((*ndvp))) {
1243				bkstore = 1;
1244			}
1245
1246			/*
1247			 * get rid of the node from the directory cache
1248			 * note, in case EBUSY is returned, the ZOMBIE
1249			 * node is taken care in sdev_mknode.
1250			 */
1251			(void) sdev_dirdelete(nddv, *ndvp);
1252			*ndvp = NULL;
1253			if (bkstore) {
1254				error = VOP_REMOVE(nddv->sdev_attrvp,
1255				    nnm, cred, NULL, 0);
1256				if (error)
1257					goto err_out;
1258			}
1259		}
1260	}
1261
1262	/* fix the source for a symlink */
1263	if (vattr.va_type == VLNK) {
1264		if (odv->sdev_symlink == NULL) {
1265			error = sdev_follow_link(odv);
1266			if (error) {
1267				error = ENOENT;
1268				goto err_out;
1269			}
1270		}
1271		ASSERT(odv->sdev_symlink);
1272		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1273	}
1274
1275	/*
1276	 * make a fresh node from the source attrs
1277	 */
1278	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1279	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1280	    NULL, (void *)link, cred, SDEV_READY);
1281
1282	if (link)
1283		kmem_free(link, strlen(link) + 1);
1284
1285	if (error)
1286		goto err_out;
1287	ASSERT(*ndvp);
1288	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1289
1290	/* move dir contents */
1291	if (doingdir) {
1292		for (idv = odv->sdev_dot; idv; idv = idv->sdev_next) {
1293			error = sdev_rnmnode(odv, idv,
1294			    (struct sdev_node *)(*ndvp), &ndv,
1295			    idv->sdev_name, cred);
1296
1297			if (error)
1298				goto err_out;
1299			ndv = NULL;
1300		}
1301
1302	}
1303
1304	if ((*ndvp)->sdev_attrvp) {
1305		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1306		    AT_CTIME|AT_ATIME);
1307	} else {
1308		ASSERT((*ndvp)->sdev_attr);
1309		gethrestime(&now);
1310		(*ndvp)->sdev_attr->va_ctime = now;
1311		(*ndvp)->sdev_attr->va_atime = now;
1312	}
1313
1314	if (nddv->sdev_attrvp) {
1315		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1316		    AT_MTIME|AT_ATIME);
1317	} else {
1318		ASSERT(nddv->sdev_attr);
1319		gethrestime(&now);
1320		nddv->sdev_attr->va_mtime = now;
1321		nddv->sdev_attr->va_atime = now;
1322	}
1323	rw_exit(&nddv->sdev_contents);
1324	if (!samedir)
1325		rw_exit(&oddv->sdev_contents);
1326
1327	SDEV_RELE(*ndvp);
1328	return (error);
1329
1330err_out:
1331	rw_exit(&nddv->sdev_contents);
1332	if (!samedir)
1333		rw_exit(&oddv->sdev_contents);
1334	return (error);
1335}
1336
1337/*
1338 * Merge sdev_node specific information into an attribute structure.
1339 *
1340 * note: sdev_node is not locked here
1341 */
1342void
1343sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1344{
1345	struct vnode *vp = SDEVTOV(dv);
1346
1347	vap->va_nlink = dv->sdev_nlink;
1348	vap->va_nodeid = dv->sdev_ino;
1349	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1350	vap->va_type = vp->v_type;
1351
1352	if (vp->v_type == VDIR) {
1353		vap->va_rdev = 0;
1354		vap->va_fsid = vp->v_rdev;
1355	} else if (vp->v_type == VLNK) {
1356		vap->va_rdev = 0;
1357		vap->va_mode  &= ~S_IFMT;
1358		vap->va_mode |= S_IFLNK;
1359	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1360		vap->va_rdev = vp->v_rdev;
1361		vap->va_mode &= ~S_IFMT;
1362		if (vap->va_type == VCHR)
1363			vap->va_mode |= S_IFCHR;
1364		else
1365			vap->va_mode |= S_IFBLK;
1366	} else {
1367		vap->va_rdev = 0;
1368	}
1369}
1370
1371static struct vattr *
1372sdev_getdefault_attr(enum vtype type)
1373{
1374	if (type == VDIR)
1375		return (&sdev_vattr_dir);
1376	else if (type == VCHR)
1377		return (&sdev_vattr_chr);
1378	else if (type == VBLK)
1379		return (&sdev_vattr_blk);
1380	else if (type == VLNK)
1381		return (&sdev_vattr_lnk);
1382	else
1383		return (NULL);
1384}
1385int
1386sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1387{
1388	int rv = 0;
1389	struct vnode *vp = SDEVTOV(dv);
1390
1391	switch (vp->v_type) {
1392	case VCHR:
1393	case VBLK:
1394		/*
1395		 * If vnode is a device, return special vnode instead
1396		 * (though it knows all about -us- via sp->s_realvp)
1397		 */
1398		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1399		VN_RELE(vp);
1400		if (*vpp == NULLVP)
1401			rv = ENOSYS;
1402		break;
1403	default:	/* most types are returned as is */
1404		*vpp = vp;
1405		break;
1406	}
1407	return (rv);
1408}
1409
1410/*
1411 * loopback into sdev_lookup()
1412 */
1413static struct vnode *
1414devname_find_by_devpath(char *devpath, struct vattr *vattr)
1415{
1416	int error = 0;
1417	struct vnode *vp;
1418
1419	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp);
1420	if (error) {
1421		return (NULL);
1422	}
1423
1424	if (vattr)
1425		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1426	return (vp);
1427}
1428
1429/*
1430 * the junction between devname and devfs
1431 */
1432static struct vnode *
1433devname_configure_by_path(char *physpath, struct vattr *vattr)
1434{
1435	int error = 0;
1436	struct vnode *vp;
1437
1438	ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1)
1439	    == 0);
1440
1441	error = devfs_lookupname(physpath + sizeof ("/devices/") - 1,
1442	    NULLVPP, &vp);
1443	if (error != 0) {
1444		if (error == ENODEV) {
1445			cmn_err(CE_CONT, "%s: not found (line %d)\n",
1446			    physpath, __LINE__);
1447		}
1448
1449		return (NULL);
1450	}
1451
1452	if (vattr)
1453		(void) VOP_GETATTR(vp, vattr, 0, kcred, NULL);
1454	return (vp);
1455}
1456
1457/*
1458 * junction between devname and root file system, e.g. ufs
1459 */
1460int
1461devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1462{
1463	struct vnode *rdvp = ddv->sdev_attrvp;
1464	int rval = 0;
1465
1466	ASSERT(rdvp);
1467
1468	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1469	    NULL);
1470	return (rval);
1471}
1472
1473static int
1474sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1475{
1476	struct sdev_node *dv = NULL;
1477	char	*nm;
1478	struct vnode *dirvp;
1479	int	error;
1480	vnode_t	*vp;
1481	int eof;
1482	struct iovec iov;
1483	struct uio uio;
1484	struct dirent64 *dp;
1485	dirent64_t *dbuf;
1486	size_t dbuflen;
1487	struct vattr vattr;
1488	char *link = NULL;
1489
1490	if (ddv->sdev_attrvp == NULL)
1491		return (0);
1492	if (!(ddv->sdev_flags & SDEV_BUILD))
1493		return (0);
1494
1495	dirvp = ddv->sdev_attrvp;
1496	VN_HOLD(dirvp);
1497	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1498
1499	uio.uio_iov = &iov;
1500	uio.uio_iovcnt = 1;
1501	uio.uio_segflg = UIO_SYSSPACE;
1502	uio.uio_fmode = 0;
1503	uio.uio_extflg = UIO_COPY_CACHED;
1504	uio.uio_loffset = 0;
1505	uio.uio_llimit = MAXOFFSET_T;
1506
1507	eof = 0;
1508	error = 0;
1509	while (!error && !eof) {
1510		uio.uio_resid = dlen;
1511		iov.iov_base = (char *)dbuf;
1512		iov.iov_len = dlen;
1513		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1514		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1515		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1516
1517		dbuflen = dlen - uio.uio_resid;
1518		if (error || dbuflen == 0)
1519			break;
1520
1521		if (!(ddv->sdev_flags & SDEV_BUILD)) {
1522			error = 0;
1523			break;
1524		}
1525
1526		for (dp = dbuf; ((intptr_t)dp <
1527		    (intptr_t)dbuf + dbuflen);
1528		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1529			nm = dp->d_name;
1530
1531			if (strcmp(nm, ".") == 0 ||
1532			    strcmp(nm, "..") == 0)
1533				continue;
1534
1535			vp = NULLVP;
1536			dv = sdev_cache_lookup(ddv, nm);
1537			if (dv) {
1538				if (dv->sdev_state != SDEV_ZOMBIE) {
1539					SDEV_SIMPLE_RELE(dv);
1540				} else {
1541					/*
1542					 * A ZOMBIE node may not have been
1543					 * cleaned up from the backing store,
1544					 * bypass this entry in this case,
1545					 * and clean it up from the directory
1546					 * cache if this is the last call.
1547					 */
1548					(void) sdev_dirdelete(ddv, dv);
1549				}
1550				continue;
1551			}
1552
1553			/* refill the cache if not already */
1554			error = devname_backstore_lookup(ddv, nm, &vp);
1555			if (error)
1556				continue;
1557
1558			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
1559			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1560			if (error)
1561				continue;
1562
1563			if (vattr.va_type == VLNK) {
1564				error = sdev_getlink(vp, &link);
1565				if (error) {
1566					continue;
1567				}
1568				ASSERT(link != NULL);
1569			}
1570
1571			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1572				rw_exit(&ddv->sdev_contents);
1573				rw_enter(&ddv->sdev_contents, RW_WRITER);
1574			}
1575			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1576			    cred, SDEV_READY);
1577			rw_downgrade(&ddv->sdev_contents);
1578
1579			if (link != NULL) {
1580				kmem_free(link, strlen(link) + 1);
1581				link = NULL;
1582			}
1583
1584			if (!error) {
1585				ASSERT(dv);
1586				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1587				SDEV_SIMPLE_RELE(dv);
1588			}
1589			vp = NULL;
1590			dv = NULL;
1591		}
1592	}
1593
1594done:
1595	VN_RELE(dirvp);
1596	kmem_free(dbuf, dlen);
1597
1598	return (error);
1599}
1600
1601void
1602sdev_filldir_dynamic(struct sdev_node *ddv)
1603{
1604	int error;
1605	int i;
1606	struct vattr *vap;
1607	char *nm = NULL;
1608	struct sdev_node *dv = NULL;
1609
1610	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1611	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1612
1613	vap = sdev_getdefault_attr(VDIR);
1614	for (i = 0; vtab[i].vt_name != NULL; i++) {
1615		nm = vtab[i].vt_name;
1616		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1617		dv = NULL;
1618		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1619		    NULL, kcred, SDEV_READY);
1620		if (error) {
1621			cmn_err(CE_WARN, "%s/%s: error %d\n",
1622			    ddv->sdev_name, nm, error);
1623		} else {
1624			ASSERT(dv);
1625			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1626			SDEV_SIMPLE_RELE(dv);
1627		}
1628	}
1629}
1630
1631/*
1632 * Creating a backing store entry based on sdev_attr.
1633 * This is called either as part of node creation in a persistent directory
1634 * or from setattr/setsecattr to persist access attributes across reboot.
1635 */
1636int
1637sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1638{
1639	int error = 0;
1640	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1641	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1642	struct vattr *vap = dv->sdev_attr;
1643	char *nm = dv->sdev_name;
1644	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1645
1646	ASSERT(dv && dv->sdev_name && rdvp);
1647	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1648
1649lookup:
1650	/* try to find it in the backing store */
1651	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1652	    NULL);
1653	if (error == 0) {
1654		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1655			VN_HOLD(rrvp);
1656			VN_RELE(*rvp);
1657			*rvp = rrvp;
1658		}
1659
1660		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1661		dv->sdev_attr = NULL;
1662		dv->sdev_attrvp = *rvp;
1663		return (0);
1664	}
1665
1666	/* let's try to persist the node */
1667	gethrestime(&vap->va_atime);
1668	vap->va_mtime = vap->va_atime;
1669	vap->va_ctime = vap->va_atime;
1670	vap->va_mask |= AT_TYPE|AT_MODE;
1671	switch (vap->va_type) {
1672	case VDIR:
1673		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1674		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1675		    (void *)(*rvp), error));
1676		break;
1677	case VCHR:
1678	case VBLK:
1679	case VREG:
1680	case VDOOR:
1681		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1682		    rvp, cred, 0, NULL, NULL);
1683		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1684		    (void *)(*rvp), error));
1685		if (!error)
1686			VN_RELE(*rvp);
1687		break;
1688	case VLNK:
1689		ASSERT(dv->sdev_symlink);
1690		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1691		    NULL, 0);
1692		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1693		    error));
1694		break;
1695	default:
1696		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1697		    "create\n", nm);
1698		/*NOTREACHED*/
1699	}
1700
1701	/* go back to lookup to factor out spec node and set attrvp */
1702	if (error == 0)
1703		goto lookup;
1704
1705	return (error);
1706}
1707
1708static int
1709sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1710{
1711	int error = 0;
1712	struct sdev_node *dup = NULL;
1713
1714	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1715	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1716		sdev_direnter(ddv, *dv);
1717	} else {
1718		if (dup->sdev_state == SDEV_ZOMBIE) {
1719			error = sdev_dirdelete(ddv, dup);
1720			/*
1721			 * The ZOMBIE node is still hanging
1722			 * around with more than one reference counts.
1723			 * Fail the new node creation so that
1724			 * the directory cache won't have
1725			 * duplicate entries for the same named node
1726			 */
1727			if (error == EBUSY) {
1728				SDEV_SIMPLE_RELE(*dv);
1729				sdev_nodedestroy(*dv, 0);
1730				*dv = NULL;
1731				return (error);
1732			}
1733			sdev_direnter(ddv, *dv);
1734		} else {
1735			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1736			SDEV_SIMPLE_RELE(*dv);
1737			sdev_nodedestroy(*dv, 0);
1738			*dv = dup;
1739		}
1740	}
1741
1742	return (0);
1743}
1744
1745static int
1746sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1747{
1748	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1749	return (sdev_dirdelete(ddv, *dv));
1750}
1751
1752/*
1753 * update the in-core directory cache
1754 */
1755int
1756sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1757    sdev_cache_ops_t ops)
1758{
1759	int error = 0;
1760
1761	ASSERT((SDEV_HELD(*dv)));
1762
1763	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1764	switch (ops) {
1765	case SDEV_CACHE_ADD:
1766		error = sdev_cache_add(ddv, dv, nm);
1767		break;
1768	case SDEV_CACHE_DELETE:
1769		error = sdev_cache_delete(ddv, dv);
1770		break;
1771	default:
1772		break;
1773	}
1774
1775	return (error);
1776}
1777
1778/*
1779 * retrieve the named entry from the directory cache
1780 */
1781struct sdev_node *
1782sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1783{
1784	struct sdev_node *dv = NULL;
1785
1786	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1787	dv = sdev_findbyname(ddv, nm);
1788
1789	return (dv);
1790}
1791
1792/*
1793 * Implicit reconfig for nodes constructed by a link generator
1794 * Start devfsadm if needed, or if devfsadm is in progress,
1795 * prepare to block on devfsadm either completing or
1796 * constructing the desired node.  As devfsadmd is global
1797 * in scope, constructing all necessary nodes, we only
1798 * need to initiate it once.
1799 */
1800static int
1801sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1802{
1803	int error = 0;
1804
1805	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1806		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1807		    ddv->sdev_name, nm, devfsadm_state));
1808		mutex_enter(&dv->sdev_lookup_lock);
1809		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1810		mutex_exit(&dv->sdev_lookup_lock);
1811		error = 0;
1812	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1813		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1814		    ddv->sdev_name, nm, devfsadm_state));
1815
1816		sdev_devfsadmd_thread(ddv, dv, kcred);
1817		mutex_enter(&dv->sdev_lookup_lock);
1818		SDEV_BLOCK_OTHERS(dv,
1819		    (SDEV_LOOKUP | SDEV_LGWAITING));
1820		mutex_exit(&dv->sdev_lookup_lock);
1821		error = 0;
1822	} else {
1823		error = -1;
1824	}
1825
1826	return (error);
1827}
1828
1829static int
1830sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1831    int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred)
1832{
1833	struct vnode *rvp = NULL;
1834	int error = 0;
1835	struct vattr *vap;
1836	devname_spec_t spec;
1837	devname_handle_t *hdl;
1838	void *args = NULL;
1839	struct sdev_node *dv = *dvp;
1840
1841	ASSERT(dv && ddv);
1842	hdl = &(dv->sdev_handle);
1843	ASSERT(hdl->dh_data == dv);
1844	mutex_enter(&dv->sdev_lookup_lock);
1845	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1846	mutex_exit(&dv->sdev_lookup_lock);
1847	error = (*fn)(nm, hdl, cred);
1848	if (error) {
1849		return (error);
1850	}
1851
1852	spec = hdl->dh_spec;
1853	args = hdl->dh_args;
1854	ASSERT(args);
1855
1856	switch (spec) {
1857	case DEVNAME_NS_PATH:
1858		/*
1859		 * symlink of:
1860		 *	/dev/dir/nm -> /device/...
1861		 */
1862		rvp = devname_configure_by_path((char *)args, NULL);
1863		break;
1864	case DEVNAME_NS_DEV:
1865		/*
1866		 * symlink of:
1867		 *	/dev/dir/nm -> /dev/...
1868		 */
1869		rvp = devname_find_by_devpath((char *)args, NULL);
1870		break;
1871	default:
1872		if (args)
1873			kmem_free((char *)args, strlen(args) + 1);
1874		return (ENOENT);
1875
1876	}
1877
1878	if (rvp == NULL) {
1879		if (args)
1880			kmem_free((char *)args, strlen(args) + 1);
1881		return (ENOENT);
1882	} else {
1883		vap = sdev_getdefault_attr(VLNK);
1884		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1885		/*
1886		 * Could sdev_mknode return a different dv_node
1887		 * once the lock is dropped?
1888		 */
1889		if (!rw_tryupgrade(&ddv->sdev_contents)) {
1890			rw_exit(&ddv->sdev_contents);
1891			rw_enter(&ddv->sdev_contents, RW_WRITER);
1892		}
1893		error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred,
1894		    SDEV_READY);
1895		rw_downgrade(&ddv->sdev_contents);
1896		if (error) {
1897			if (args)
1898				kmem_free((char *)args, strlen(args) + 1);
1899			return (error);
1900		} else {
1901			mutex_enter(&dv->sdev_lookup_lock);
1902			SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1903			mutex_exit(&dv->sdev_lookup_lock);
1904			error = 0;
1905		}
1906	}
1907
1908	if (args)
1909		kmem_free((char *)args, strlen(args) + 1);
1910
1911	*dvp = dv;
1912	return (0);
1913}
1914
1915/*
1916 *  Support for specialized device naming construction mechanisms
1917 */
1918static int
1919sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1920    int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1921    void *, char *), int flags, struct cred *cred)
1922{
1923	int rv = 0;
1924	char *physpath = NULL;
1925	struct vnode *rvp = NULL;
1926	struct vattr vattr;
1927	struct vattr *vap;
1928	struct sdev_node *dv = *dvp;
1929
1930	mutex_enter(&dv->sdev_lookup_lock);
1931	SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP);
1932	mutex_exit(&dv->sdev_lookup_lock);
1933
1934	/* for non-devfsadm devices */
1935	if (flags & SDEV_PATH) {
1936		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1937		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1938		    NULL);
1939		if (rv) {
1940			kmem_free(physpath, MAXPATHLEN);
1941			return (-1);
1942		}
1943
1944		ASSERT(physpath);
1945		rvp = devname_configure_by_path(physpath, NULL);
1946		if (rvp == NULL) {
1947			sdcmn_err3(("devname_configure_by_path: "
1948			    "failed for /dev/%s/%s\n",
1949			    ddv->sdev_name, nm));
1950			kmem_free(physpath, MAXPATHLEN);
1951			rv = -1;
1952		} else {
1953			vap = sdev_getdefault_attr(VLNK);
1954			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1955
1956			/*
1957			 * Sdev_mknode may return back a different sdev_node
1958			 * that was created by another thread that
1959			 * raced to the directroy cache before this thread.
1960			 *
1961			 * With current directory cache mechanism
1962			 * (linked list with the sdev_node name as
1963			 * the entity key), this is a way to make sure
1964			 * only one entry exists for the same name
1965			 * in the same directory. The outcome is
1966			 * the winner wins.
1967			 */
1968			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1969				rw_exit(&ddv->sdev_contents);
1970				rw_enter(&ddv->sdev_contents, RW_WRITER);
1971			}
1972			rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1973			    (void *)physpath, cred, SDEV_READY);
1974			rw_downgrade(&ddv->sdev_contents);
1975			kmem_free(physpath, MAXPATHLEN);
1976			if (rv) {
1977				return (rv);
1978			} else {
1979				mutex_enter(&dv->sdev_lookup_lock);
1980				SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
1981				mutex_exit(&dv->sdev_lookup_lock);
1982				return (0);
1983			}
1984		}
1985	} else if (flags & SDEV_VNODE) {
1986		/*
1987		 * DBNR has its own way to create the device
1988		 * and return a backing store vnode in rvp
1989		 */
1990		ASSERT(callback);
1991		rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL);
1992		if (rv || (rvp == NULL)) {
1993			sdcmn_err3(("devname_lookup_func: SDEV_VNODE "
1994			    "callback failed \n"));
1995			return (-1);
1996		}
1997		vap = sdev_getdefault_attr(rvp->v_type);
1998		if (vap == NULL)
1999			return (-1);
2000
2001		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2002		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2003			rw_exit(&ddv->sdev_contents);
2004			rw_enter(&ddv->sdev_contents, RW_WRITER);
2005		}
2006		rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL,
2007		    cred, SDEV_READY);
2008		rw_downgrade(&ddv->sdev_contents);
2009		if (rv)
2010			return (rv);
2011
2012		mutex_enter(&dv->sdev_lookup_lock);
2013		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2014		mutex_exit(&dv->sdev_lookup_lock);
2015		return (0);
2016	} else if (flags & SDEV_VATTR) {
2017		/*
2018		 * /dev/pts
2019		 *
2020		 * DBNR has its own way to create the device
2021		 * "0" is returned upon success.
2022		 *
2023		 * callback is responsible to set the basic attributes,
2024		 * e.g. va_type/va_uid/va_gid/
2025		 *    dev_t if VCHR or VBLK/
2026		 */
2027		ASSERT(callback);
2028		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
2029		if (rv) {
2030			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
2031			    "callback failed \n"));
2032			return (-1);
2033		}
2034
2035		ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2036		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2037			rw_exit(&ddv->sdev_contents);
2038			rw_enter(&ddv->sdev_contents, RW_WRITER);
2039		}
2040		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
2041		    cred, SDEV_READY);
2042		rw_downgrade(&ddv->sdev_contents);
2043
2044		if (rv)
2045			return (rv);
2046
2047		mutex_enter(&dv->sdev_lookup_lock);
2048		SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2049		mutex_exit(&dv->sdev_lookup_lock);
2050		return (0);
2051	} else {
2052		impossible(("lookup: %s/%s by %s not supported (%d)\n",
2053		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
2054		    __LINE__));
2055		rv = -1;
2056	}
2057
2058	*dvp = dv;
2059	return (rv);
2060}
2061
2062static int
2063is_devfsadm_thread(char *exec_name)
2064{
2065	/*
2066	 * note: because devfsadmd -> /usr/sbin/devfsadm
2067	 * it is safe to use "devfsadm" to capture the lookups
2068	 * from devfsadm and its daemon version.
2069	 */
2070	if (strcmp(exec_name, "devfsadm") == 0)
2071		return (1);
2072	return (0);
2073}
2074
2075
2076/*
2077 * Lookup Order:
2078 *	sdev_node cache;
2079 *	backing store (SDEV_PERSIST);
2080 *	DBNR: a. dir_ops implemented in the loadable modules;
2081 *	      b. vnode ops in vtab.
2082 */
2083int
2084devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
2085    struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
2086    struct cred *, void *, char *), int flags)
2087{
2088	int rv = 0, nmlen;
2089	struct vnode *rvp = NULL;
2090	struct sdev_node *dv = NULL;
2091	int	retried = 0;
2092	int	error = 0;
2093	struct devname_nsmap *map = NULL;
2094	struct devname_ops *dirops = NULL;
2095	int (*fn)(char *, devname_handle_t *, struct cred *) = NULL;
2096	struct vattr vattr;
2097	char *lookup_thread = curproc->p_user.u_comm;
2098	int failed_flags = 0;
2099	int (*vtor)(struct sdev_node *) = NULL;
2100	int state;
2101	int parent_state;
2102	char *link = NULL;
2103
2104	if (SDEVTOV(ddv)->v_type != VDIR)
2105		return (ENOTDIR);
2106
2107	/*
2108	 * Empty name or ., return node itself.
2109	 */
2110	nmlen = strlen(nm);
2111	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
2112		*vpp = SDEVTOV(ddv);
2113		VN_HOLD(*vpp);
2114		return (0);
2115	}
2116
2117	/*
2118	 * .., return the parent directory
2119	 */
2120	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
2121		*vpp = SDEVTOV(ddv->sdev_dotdot);
2122		VN_HOLD(*vpp);
2123		return (0);
2124	}
2125
2126	rw_enter(&ddv->sdev_contents, RW_READER);
2127	if (ddv->sdev_flags & SDEV_VTOR) {
2128		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2129		ASSERT(vtor);
2130	}
2131
2132tryagain:
2133	/*
2134	 * (a) directory cache lookup:
2135	 */
2136	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2137	parent_state = ddv->sdev_state;
2138	dv = sdev_cache_lookup(ddv, nm);
2139	if (dv) {
2140		state = dv->sdev_state;
2141		switch (state) {
2142		case SDEV_INIT:
2143			if (is_devfsadm_thread(lookup_thread))
2144				break;
2145
2146			/* ZOMBIED parent won't allow node creation */
2147			if (parent_state == SDEV_ZOMBIE) {
2148				SD_TRACE_FAILED_LOOKUP(ddv, nm,
2149				    retried);
2150				goto nolock_notfound;
2151			}
2152
2153			mutex_enter(&dv->sdev_lookup_lock);
2154			/* compensate the threads started after devfsadm */
2155			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2156			    !(SDEV_IS_LOOKUP(dv)))
2157				SDEV_BLOCK_OTHERS(dv,
2158				    (SDEV_LOOKUP | SDEV_LGWAITING));
2159
2160			if (SDEV_IS_LOOKUP(dv)) {
2161				failed_flags |= SLF_REBUILT;
2162				rw_exit(&ddv->sdev_contents);
2163				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
2164				mutex_exit(&dv->sdev_lookup_lock);
2165				rw_enter(&ddv->sdev_contents, RW_READER);
2166
2167				if (error != 0) {
2168					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2169					    retried);
2170					goto nolock_notfound;
2171				}
2172
2173				state = dv->sdev_state;
2174				if (state == SDEV_INIT) {
2175					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2176					    retried);
2177					goto nolock_notfound;
2178				} else if (state == SDEV_READY) {
2179					goto found;
2180				} else if (state == SDEV_ZOMBIE) {
2181					rw_exit(&ddv->sdev_contents);
2182					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2183					    retried);
2184					SDEV_RELE(dv);
2185					goto lookup_failed;
2186				}
2187			} else {
2188				mutex_exit(&dv->sdev_lookup_lock);
2189			}
2190			break;
2191		case SDEV_READY:
2192			goto found;
2193		case SDEV_ZOMBIE:
2194			rw_exit(&ddv->sdev_contents);
2195			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2196			SDEV_RELE(dv);
2197			goto lookup_failed;
2198		default:
2199			rw_exit(&ddv->sdev_contents);
2200			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2201			sdev_lookup_failed(ddv, nm, failed_flags);
2202			*vpp = NULLVP;
2203			return (ENOENT);
2204		}
2205	}
2206	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2207
2208	/*
2209	 * ZOMBIED parent does not allow new node creation.
2210	 * bail out early
2211	 */
2212	if (parent_state == SDEV_ZOMBIE) {
2213		rw_exit(&ddv->sdev_contents);
2214		*vpp = NULL;
2215		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2216		return (ENOENT);
2217	}
2218
2219	/*
2220	 * (b0): backing store lookup
2221	 *	SDEV_PERSIST is default except:
2222	 *		1) pts nodes
2223	 *		2) non-chmod'ed local nodes
2224	 */
2225	if (SDEV_IS_PERSIST(ddv)) {
2226		error = devname_backstore_lookup(ddv, nm, &rvp);
2227
2228		if (!error) {
2229			sdcmn_err3(("devname_backstore_lookup: "
2230			    "found attrvp %p for %s\n", (void *)rvp, nm));
2231
2232			vattr.va_mask = AT_MODE|AT_UID|AT_GID;
2233			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2234			if (error) {
2235				rw_exit(&ddv->sdev_contents);
2236				if (dv)
2237					SDEV_RELE(dv);
2238				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2239				sdev_lookup_failed(ddv, nm, failed_flags);
2240				*vpp = NULLVP;
2241				return (ENOENT);
2242			}
2243
2244			if (vattr.va_type == VLNK) {
2245				error = sdev_getlink(rvp, &link);
2246				if (error) {
2247					rw_exit(&ddv->sdev_contents);
2248					if (dv)
2249						SDEV_RELE(dv);
2250					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2251					    retried);
2252					sdev_lookup_failed(ddv, nm,
2253					    failed_flags);
2254					*vpp = NULLVP;
2255					return (ENOENT);
2256				}
2257				ASSERT(link != NULL);
2258			}
2259
2260			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2261				rw_exit(&ddv->sdev_contents);
2262				rw_enter(&ddv->sdev_contents, RW_WRITER);
2263			}
2264			error = sdev_mknode(ddv, nm, &dv, &vattr,
2265			    rvp, link, cred, SDEV_READY);
2266			rw_downgrade(&ddv->sdev_contents);
2267
2268			if (link != NULL) {
2269				kmem_free(link, strlen(link) + 1);
2270				link = NULL;
2271			}
2272
2273			if (error) {
2274				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2275				rw_exit(&ddv->sdev_contents);
2276				if (dv)
2277					SDEV_RELE(dv);
2278				goto lookup_failed;
2279			} else {
2280				goto found;
2281			}
2282		} else if (retried) {
2283			rw_exit(&ddv->sdev_contents);
2284			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2285			    ddv->sdev_name, nm));
2286			if (dv)
2287				SDEV_RELE(dv);
2288			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2289			sdev_lookup_failed(ddv, nm, failed_flags);
2290			*vpp = NULLVP;
2291			return (ENOENT);
2292		}
2293	}
2294
2295
2296	/* first thread that is doing the lookup on this node */
2297	if (!dv) {
2298		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2299			rw_exit(&ddv->sdev_contents);
2300			rw_enter(&ddv->sdev_contents, RW_WRITER);
2301		}
2302		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2303		    cred, SDEV_INIT);
2304		if (!dv) {
2305			rw_exit(&ddv->sdev_contents);
2306			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2307			sdev_lookup_failed(ddv, nm, failed_flags);
2308			*vpp = NULLVP;
2309			return (ENOENT);
2310		}
2311		rw_downgrade(&ddv->sdev_contents);
2312	}
2313	ASSERT(dv);
2314	ASSERT(SDEV_HELD(dv));
2315
2316	if (SDEV_IS_NO_NCACHE(dv)) {
2317		failed_flags |= SLF_NO_NCACHE;
2318	}
2319
2320	if (SDEV_IS_GLOBAL(ddv)) {
2321		map = sdev_get_map(ddv, 1);
2322		dirops = map ? map->dir_ops : NULL;
2323		fn = dirops ? dirops->devnops_lookup : NULL;
2324	}
2325
2326	/*
2327	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2328	 */
2329	if ((fn == NULL) && !callback) {
2330
2331		if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2332		    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2333		    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2334			ASSERT(SDEV_HELD(dv));
2335			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2336			goto nolock_notfound;
2337		}
2338
2339		/*
2340		 * filter out known non-existent devices recorded
2341		 * during initial reconfiguration boot for which
2342		 * reconfig should not be done and lookup may
2343		 * be short-circuited now.
2344		 */
2345		if (sdev_lookup_filter(ddv, nm)) {
2346			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2347			goto nolock_notfound;
2348		}
2349
2350		/* bypassing devfsadm internal nodes */
2351		if (is_devfsadm_thread(lookup_thread)) {
2352			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2353			goto nolock_notfound;
2354		}
2355
2356		if (sdev_reconfig_disable) {
2357			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2358			goto nolock_notfound;
2359		}
2360
2361		error = sdev_call_devfsadmd(ddv, dv, nm);
2362		if (error == 0) {
2363			sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2364			    ddv->sdev_name, nm, curproc->p_user.u_comm));
2365			if (sdev_reconfig_verbose) {
2366				cmn_err(CE_CONT,
2367				    "?lookup of %s/%s by %s: reconfig\n",
2368				    ddv->sdev_name, nm, curproc->p_user.u_comm);
2369			}
2370			retried = 1;
2371			failed_flags |= SLF_REBUILT;
2372			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2373			SDEV_SIMPLE_RELE(dv);
2374			goto tryagain;
2375		} else {
2376			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2377			goto nolock_notfound;
2378		}
2379	}
2380
2381	/*
2382	 * (b2) Directory Based Name Resolution (DBNR):
2383	 *	ddv	- parent
2384	 *	nm	- /dev/(ddv->sdev_name)/nm
2385	 *
2386	 *	note: module vnode ops take precedence than the build-in ones
2387	 */
2388	if (fn) {
2389		error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred);
2390		if (error) {
2391			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2392			goto notfound;
2393		} else {
2394			goto found;
2395		}
2396	} else if (callback) {
2397		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2398		    flags, cred);
2399		if (error == 0) {
2400			goto found;
2401		} else {
2402			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2403			goto notfound;
2404		}
2405	}
2406	ASSERT(rvp);
2407
2408found:
2409	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2410	ASSERT(dv->sdev_state == SDEV_READY);
2411	if (vtor) {
2412		/*
2413		 * Check validity of returned node
2414		 */
2415		switch (vtor(dv)) {
2416		case SDEV_VTOR_VALID:
2417			break;
2418		case SDEV_VTOR_INVALID:
2419			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2420			sdcmn_err7(("lookup: destroy invalid "
2421			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2422			goto nolock_notfound;
2423		case SDEV_VTOR_SKIP:
2424			sdcmn_err7(("lookup: node not applicable - "
2425			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2426			rw_exit(&ddv->sdev_contents);
2427			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2428			SDEV_RELE(dv);
2429			goto lookup_failed;
2430		default:
2431			cmn_err(CE_PANIC,
2432			    "dev fs: validator failed: %s(%p)\n",
2433			    dv->sdev_name, (void *)dv);
2434			break;
2435			/*NOTREACHED*/
2436		}
2437	}
2438
2439	if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) {
2440		rw_enter(&dv->sdev_contents, RW_READER);
2441		(void) sdev_get_map(dv, 1);
2442		rw_exit(&dv->sdev_contents);
2443	}
2444	rw_exit(&ddv->sdev_contents);
2445	rv = sdev_to_vp(dv, vpp);
2446	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2447	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2448	    dv->sdev_state, nm, rv));
2449	return (rv);
2450
2451notfound:
2452	mutex_enter(&dv->sdev_lookup_lock);
2453	SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP);
2454	mutex_exit(&dv->sdev_lookup_lock);
2455nolock_notfound:
2456	/*
2457	 * Destroy the node that is created for synchronization purposes.
2458	 */
2459	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2460	    nm, dv->sdev_state));
2461	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2462	if (dv->sdev_state == SDEV_INIT) {
2463		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2464			rw_exit(&ddv->sdev_contents);
2465			rw_enter(&ddv->sdev_contents, RW_WRITER);
2466		}
2467
2468		/*
2469		 * Node state may have changed during the lock
2470		 * changes. Re-check.
2471		 */
2472		if (dv->sdev_state == SDEV_INIT) {
2473			(void) sdev_dirdelete(ddv, dv);
2474			rw_exit(&ddv->sdev_contents);
2475			sdev_lookup_failed(ddv, nm, failed_flags);
2476			*vpp = NULL;
2477			return (ENOENT);
2478		}
2479	}
2480
2481	rw_exit(&ddv->sdev_contents);
2482	SDEV_RELE(dv);
2483
2484lookup_failed:
2485	sdev_lookup_failed(ddv, nm, failed_flags);
2486	*vpp = NULL;
2487	return (ENOENT);
2488}
2489
2490/*
2491 * Given a directory node, mark all nodes beneath as
2492 * STALE, i.e. nodes that don't exist as far as new
2493 * consumers are concerned
2494 */
2495void
2496sdev_stale(struct sdev_node *ddv)
2497{
2498	struct sdev_node *dv;
2499	struct vnode *vp;
2500
2501	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2502
2503	rw_enter(&ddv->sdev_contents, RW_WRITER);
2504	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) {
2505		vp = SDEVTOV(dv);
2506		if (vp->v_type == VDIR)
2507			sdev_stale(dv);
2508
2509		sdcmn_err9(("sdev_stale: setting stale %s\n",
2510		    dv->sdev_name));
2511		dv->sdev_flags |= SDEV_STALE;
2512	}
2513	ddv->sdev_flags |= SDEV_BUILD;
2514	rw_exit(&ddv->sdev_contents);
2515}
2516
2517/*
2518 * Given a directory node, clean out all the nodes beneath.
2519 * If expr is specified, clean node with names matching expr.
2520 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2521 *	so they are excluded from future lookups.
2522 */
2523int
2524sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2525{
2526	int error = 0;
2527	int busy = 0;
2528	struct vnode *vp;
2529	struct sdev_node *dv, *next = NULL;
2530	int bkstore = 0;
2531	int len = 0;
2532	char *bks_name = NULL;
2533
2534	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2535
2536	/*
2537	 * We try our best to destroy all unused sdev_node's
2538	 */
2539	rw_enter(&ddv->sdev_contents, RW_WRITER);
2540	for (dv = ddv->sdev_dot; dv; dv = next) {
2541		next = dv->sdev_next;
2542		vp = SDEVTOV(dv);
2543
2544		if (expr && gmatch(dv->sdev_name, expr) == 0)
2545			continue;
2546
2547		if (vp->v_type == VDIR &&
2548		    sdev_cleandir(dv, NULL, flags) != 0) {
2549			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2550			    dv->sdev_name));
2551			busy++;
2552			continue;
2553		}
2554
2555		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2556			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2557			    dv->sdev_name));
2558			busy++;
2559			continue;
2560		}
2561
2562		/*
2563		 * at this point, either dv is not held or SDEV_ENFORCE
2564		 * is specified. In either case, dv needs to be deleted
2565		 */
2566		SDEV_HOLD(dv);
2567
2568		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2569		if (bkstore && (vp->v_type == VDIR))
2570			bkstore += 1;
2571
2572		if (bkstore) {
2573			len = strlen(dv->sdev_name) + 1;
2574			bks_name = kmem_alloc(len, KM_SLEEP);
2575			bcopy(dv->sdev_name, bks_name, len);
2576		}
2577
2578		error = sdev_dirdelete(ddv, dv);
2579
2580		if (error == EBUSY) {
2581			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2582			busy++;
2583		}
2584
2585		/* take care the backing store clean up */
2586		if (bkstore && (error == 0)) {
2587			ASSERT(bks_name);
2588			ASSERT(ddv->sdev_attrvp);
2589
2590			if (bkstore == 1) {
2591				error = VOP_REMOVE(ddv->sdev_attrvp,
2592				    bks_name, kcred, NULL, 0);
2593			} else if (bkstore == 2) {
2594				error = VOP_RMDIR(ddv->sdev_attrvp,
2595				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2596			}
2597
2598			/* do not propagate the backing store errors */
2599			if (error) {
2600				sdcmn_err9(("sdev_cleandir: backing store"
2601				    "not cleaned\n"));
2602				error = 0;
2603			}
2604
2605			bkstore = 0;
2606			kmem_free(bks_name, len);
2607			bks_name = NULL;
2608			len = 0;
2609		}
2610	}
2611
2612	ddv->sdev_flags |= SDEV_BUILD;
2613	rw_exit(&ddv->sdev_contents);
2614
2615	if (busy) {
2616		error = EBUSY;
2617	}
2618
2619	return (error);
2620}
2621
2622/*
2623 * a convenient wrapper for readdir() funcs
2624 */
2625size_t
2626add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2627{
2628	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2629	if (reclen > size)
2630		return (0);
2631
2632	de->d_ino = (ino64_t)ino;
2633	de->d_off = (off64_t)off + 1;
2634	de->d_reclen = (ushort_t)reclen;
2635	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2636	return (reclen);
2637}
2638
2639/*
2640 * sdev_mount service routines
2641 */
2642int
2643sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2644{
2645	int	error;
2646
2647	if (uap->datalen != sizeof (*args))
2648		return (EINVAL);
2649
2650	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2651		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2652		    "get user data. error %d\n", error);
2653		return (EFAULT);
2654	}
2655
2656	return (0);
2657}
2658
2659#ifdef nextdp
2660#undef nextdp
2661#endif
2662#define	nextdp(dp)	((struct dirent64 *) \
2663			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2664
2665/*
2666 * readdir helper func
2667 */
2668int
2669devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2670    int flags)
2671{
2672	struct sdev_node *ddv = VTOSDEV(vp);
2673	struct sdev_node *dv;
2674	dirent64_t	*dp;
2675	ulong_t		outcount = 0;
2676	size_t		namelen;
2677	ulong_t		alloc_count;
2678	void		*outbuf;
2679	struct iovec	*iovp;
2680	int		error = 0;
2681	size_t		reclen;
2682	offset_t	diroff;
2683	offset_t	soff;
2684	int		this_reclen;
2685	struct devname_nsmap	*map = NULL;
2686	struct devname_ops	*dirops = NULL;
2687	int (*fn)(devname_handle_t *, struct cred *) = NULL;
2688	int (*vtor)(struct sdev_node *) = NULL;
2689	struct vattr attr;
2690	timestruc_t now;
2691
2692	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2693	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2694
2695	if (uiop->uio_loffset >= MAXOFF_T) {
2696		if (eofp)
2697			*eofp = 1;
2698		return (0);
2699	}
2700
2701	if (uiop->uio_iovcnt != 1)
2702		return (EINVAL);
2703
2704	if (vp->v_type != VDIR)
2705		return (ENOTDIR);
2706
2707	if (ddv->sdev_flags & SDEV_VTOR) {
2708		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2709		ASSERT(vtor);
2710	}
2711
2712	if (eofp != NULL)
2713		*eofp = 0;
2714
2715	soff = uiop->uio_loffset;
2716	iovp = uiop->uio_iov;
2717	alloc_count = iovp->iov_len;
2718	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2719	outcount = 0;
2720
2721	if (ddv->sdev_state == SDEV_ZOMBIE)
2722		goto get_cache;
2723
2724	if (SDEV_IS_GLOBAL(ddv)) {
2725		map = sdev_get_map(ddv, 0);
2726		dirops = map ? map->dir_ops : NULL;
2727		fn = dirops ? dirops->devnops_readdir : NULL;
2728
2729		if (map && map->dir_map) {
2730			/*
2731			 * load the name mapping rule database
2732			 * through invoking devfsadm and symlink
2733			 * all the entries in the map
2734			 */
2735			devname_rdr_result_t rdr_result;
2736			int do_thread = 0;
2737
2738			rw_enter(&map->dir_lock, RW_READER);
2739			do_thread = map->dir_maploaded ? 0 : 1;
2740			rw_exit(&map->dir_lock);
2741
2742			if (do_thread) {
2743				mutex_enter(&ddv->sdev_lookup_lock);
2744				SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR);
2745				mutex_exit(&ddv->sdev_lookup_lock);
2746
2747				sdev_dispatch_to_nsrdr_thread(ddv,
2748				    map->dir_map, &rdr_result);
2749			}
2750		} else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2751		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2752		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2753		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2754		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2755		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2756		    !sdev_reconfig_disable) {
2757			/*
2758			 * invoking "devfsadm" to do system device reconfig
2759			 */
2760			mutex_enter(&ddv->sdev_lookup_lock);
2761			SDEV_BLOCK_OTHERS(ddv,
2762			    (SDEV_READDIR|SDEV_LGWAITING));
2763			mutex_exit(&ddv->sdev_lookup_lock);
2764
2765			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2766			    ddv->sdev_path, curproc->p_user.u_comm));
2767			if (sdev_reconfig_verbose) {
2768				cmn_err(CE_CONT,
2769				    "?readdir of %s by %s: reconfig\n",
2770				    ddv->sdev_path, curproc->p_user.u_comm);
2771			}
2772
2773			sdev_devfsadmd_thread(ddv, NULL, kcred);
2774		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2775			/*
2776			 * compensate the "ls" started later than "devfsadm"
2777			 */
2778			mutex_enter(&ddv->sdev_lookup_lock);
2779			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2780			mutex_exit(&ddv->sdev_lookup_lock);
2781		}
2782
2783		/*
2784		 * release the contents lock so that
2785		 * the cache may be updated by devfsadmd
2786		 */
2787		rw_exit(&ddv->sdev_contents);
2788		mutex_enter(&ddv->sdev_lookup_lock);
2789		if (SDEV_IS_READDIR(ddv))
2790			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2791		mutex_exit(&ddv->sdev_lookup_lock);
2792		rw_enter(&ddv->sdev_contents, RW_READER);
2793
2794		sdcmn_err4(("readdir of directory %s by %s\n",
2795		    ddv->sdev_name, curproc->p_user.u_comm));
2796		if (ddv->sdev_flags & SDEV_BUILD) {
2797			if (SDEV_IS_PERSIST(ddv)) {
2798				error = sdev_filldir_from_store(ddv,
2799				    alloc_count, cred);
2800			}
2801			ddv->sdev_flags &= ~SDEV_BUILD;
2802		}
2803	}
2804
2805get_cache:
2806	/* handle "." and ".." */
2807	diroff = 0;
2808	if (soff == 0) {
2809		/* first time */
2810		this_reclen = DIRENT64_RECLEN(1);
2811		if (alloc_count < this_reclen) {
2812			error = EINVAL;
2813			goto done;
2814		}
2815
2816		dp->d_ino = (ino64_t)ddv->sdev_ino;
2817		dp->d_off = (off64_t)1;
2818		dp->d_reclen = (ushort_t)this_reclen;
2819
2820		(void) strncpy(dp->d_name, ".",
2821		    DIRENT64_NAMELEN(this_reclen));
2822		outcount += dp->d_reclen;
2823		dp = nextdp(dp);
2824	}
2825
2826	diroff++;
2827	if (soff <= 1) {
2828		this_reclen = DIRENT64_RECLEN(2);
2829		if (alloc_count < outcount + this_reclen) {
2830			error = EINVAL;
2831			goto done;
2832		}
2833
2834		dp->d_reclen = (ushort_t)this_reclen;
2835		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2836		dp->d_off = (off64_t)2;
2837
2838		(void) strncpy(dp->d_name, "..",
2839		    DIRENT64_NAMELEN(this_reclen));
2840		outcount += dp->d_reclen;
2841
2842		dp = nextdp(dp);
2843	}
2844
2845
2846	/* gets the cache */
2847	diroff++;
2848	for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next, diroff++) {
2849		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2850		    diroff, soff, dv->sdev_name));
2851
2852		/* bypassing pre-matured nodes */
2853		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2854			sdcmn_err3(("sdev_readdir: pre-mature node  "
2855			    "%s\n", dv->sdev_name));
2856			continue;
2857		}
2858
2859		/* don't list stale nodes */
2860		if (dv->sdev_flags & SDEV_STALE) {
2861			sdcmn_err4(("sdev_readdir: STALE node  "
2862			    "%s\n", dv->sdev_name));
2863			continue;
2864		}
2865
2866		/*
2867		 * Check validity of node
2868		 */
2869		if (vtor) {
2870			switch (vtor(dv)) {
2871			case SDEV_VTOR_VALID:
2872				break;
2873			case SDEV_VTOR_INVALID:
2874			case SDEV_VTOR_SKIP:
2875				continue;
2876			default:
2877				cmn_err(CE_PANIC,
2878				    "dev fs: validator failed: %s(%p)\n",
2879				    dv->sdev_name, (void *)dv);
2880				break;
2881			/*NOTREACHED*/
2882			}
2883		}
2884
2885		/*
2886		 * call back into the module for the validity/bookkeeping
2887		 * of this entry
2888		 */
2889		if (fn) {
2890			error = (*fn)(&(dv->sdev_handle), cred);
2891			if (error) {
2892				sdcmn_err4(("sdev_readdir: module did not "
2893				    "validate %s\n", dv->sdev_name));
2894				continue;
2895			}
2896		}
2897
2898		namelen = strlen(dv->sdev_name);
2899		reclen = DIRENT64_RECLEN(namelen);
2900		if (outcount + reclen > alloc_count) {
2901			goto full;
2902		}
2903		dp->d_reclen = (ushort_t)reclen;
2904		dp->d_ino = (ino64_t)dv->sdev_ino;
2905		dp->d_off = (off64_t)diroff + 1;
2906		(void) strncpy(dp->d_name, dv->sdev_name,
2907		    DIRENT64_NAMELEN(reclen));
2908		outcount += reclen;
2909		dp = nextdp(dp);
2910	}
2911
2912full:
2913	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2914	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2915	    (void *)dv));
2916
2917	if (outcount)
2918		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2919
2920	if (!error) {
2921		uiop->uio_loffset = diroff;
2922		if (eofp)
2923			*eofp = dv ? 0 : 1;
2924	}
2925
2926
2927	if (ddv->sdev_attrvp) {
2928		gethrestime(&now);
2929		attr.va_ctime = now;
2930		attr.va_atime = now;
2931		attr.va_mask = AT_CTIME|AT_ATIME;
2932
2933		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2934	}
2935done:
2936	kmem_free(outbuf, alloc_count);
2937	return (error);
2938}
2939
2940
2941static int
2942sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2943{
2944	vnode_t *vp;
2945	vnode_t *cvp;
2946	struct sdev_node *svp;
2947	char *nm;
2948	struct pathname pn;
2949	int error;
2950	int persisted = 0;
2951
2952	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2953		return (error);
2954	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2955
2956	vp = rootdir;
2957	VN_HOLD(vp);
2958
2959	while (pn_pathleft(&pn)) {
2960		ASSERT(vp->v_type == VDIR);
2961		(void) pn_getcomponent(&pn, nm);
2962		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2963		    NULL, NULL);
2964		VN_RELE(vp);
2965
2966		if (error)
2967			break;
2968
2969		/* traverse mount points encountered on our journey */
2970		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2971			VN_RELE(cvp);
2972			break;
2973		}
2974
2975		/*
2976		 * Direct the operation to the persisting filesystem
2977		 * underlying /dev.  Bail if we encounter a
2978		 * non-persistent dev entity here.
2979		 */
2980		if (cvp->v_vfsp->vfs_fstype == devtype) {
2981
2982			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2983				error = ENOENT;
2984				VN_RELE(cvp);
2985				break;
2986			}
2987
2988			if (VTOSDEV(cvp) == NULL) {
2989				error = ENOENT;
2990				VN_RELE(cvp);
2991				break;
2992			}
2993			svp = VTOSDEV(cvp);
2994			if ((vp = svp->sdev_attrvp) == NULL) {
2995				error = ENOENT;
2996				VN_RELE(cvp);
2997				break;
2998			}
2999			persisted = 1;
3000			VN_HOLD(vp);
3001			VN_RELE(cvp);
3002			cvp = vp;
3003		}
3004
3005		vp = cvp;
3006		pn_skipslash(&pn);
3007	}
3008
3009	kmem_free(nm, MAXNAMELEN);
3010	pn_free(&pn);
3011
3012	if (error)
3013		return (error);
3014
3015	/*
3016	 * Only return persisted nodes in the filesystem underlying /dev.
3017	 */
3018	if (!persisted) {
3019		VN_RELE(vp);
3020		return (ENOENT);
3021	}
3022
3023	*r_vp = vp;
3024	return (0);
3025}
3026
3027int
3028sdev_modctl_readdir(const char *dir, char ***dirlistp,
3029	int *npathsp, int *npathsp_alloc, int checking_empty)
3030{
3031	char	**pathlist = NULL;
3032	char	**newlist = NULL;
3033	int	npaths = 0;
3034	int	npaths_alloc = 0;
3035	dirent64_t *dbuf = NULL;
3036	int	n;
3037	char	*s;
3038	int error;
3039	vnode_t *vp;
3040	int eof;
3041	struct iovec iov;
3042	struct uio uio;
3043	struct dirent64 *dp;
3044	size_t dlen;
3045	size_t dbuflen;
3046	int ndirents = 64;
3047	char *nm;
3048
3049	error = sdev_modctl_lookup(dir, &vp);
3050	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
3051	    dir, curproc->p_user.u_comm,
3052	    (error == 0) ? "ok" : "failed"));
3053	if (error)
3054		return (error);
3055
3056	dlen = ndirents * (sizeof (*dbuf));
3057	dbuf = kmem_alloc(dlen, KM_SLEEP);
3058
3059	uio.uio_iov = &iov;
3060	uio.uio_iovcnt = 1;
3061	uio.uio_segflg = UIO_SYSSPACE;
3062	uio.uio_fmode = 0;
3063	uio.uio_extflg = UIO_COPY_CACHED;
3064	uio.uio_loffset = 0;
3065	uio.uio_llimit = MAXOFFSET_T;
3066
3067	eof = 0;
3068	error = 0;
3069	while (!error && !eof) {
3070		uio.uio_resid = dlen;
3071		iov.iov_base = (char *)dbuf;
3072		iov.iov_len = dlen;
3073
3074		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3075		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
3076		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3077
3078		dbuflen = dlen - uio.uio_resid;
3079
3080		if (error || dbuflen == 0)
3081			break;
3082
3083		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
3084		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
3085
3086			nm = dp->d_name;
3087
3088			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
3089				continue;
3090			if (npaths == npaths_alloc) {
3091				npaths_alloc += 64;
3092				newlist = (char **)
3093				    kmem_zalloc((npaths_alloc + 1) *
3094				    sizeof (char *), KM_SLEEP);
3095				if (pathlist) {
3096					bcopy(pathlist, newlist,
3097					    npaths * sizeof (char *));
3098					kmem_free(pathlist,
3099					    (npaths + 1) * sizeof (char *));
3100				}
3101				pathlist = newlist;
3102			}
3103			n = strlen(nm) + 1;
3104			s = kmem_alloc(n, KM_SLEEP);
3105			bcopy(nm, s, n);
3106			pathlist[npaths++] = s;
3107			sdcmn_err11(("  %s/%s\n", dir, s));
3108
3109			/* if checking empty, one entry is as good as many */
3110			if (checking_empty) {
3111				eof = 1;
3112				break;
3113			}
3114		}
3115	}
3116
3117exit:
3118	VN_RELE(vp);
3119
3120	if (dbuf)
3121		kmem_free(dbuf, dlen);
3122
3123	if (error)
3124		return (error);
3125
3126	*dirlistp = pathlist;
3127	*npathsp = npaths;
3128	*npathsp_alloc = npaths_alloc;
3129
3130	return (0);
3131}
3132
3133void
3134sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
3135{
3136	int	i, n;
3137
3138	for (i = 0; i < npaths; i++) {
3139		n = strlen(pathlist[i]) + 1;
3140		kmem_free(pathlist[i], n);
3141	}
3142
3143	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
3144}
3145
3146int
3147sdev_modctl_devexists(const char *path)
3148{
3149	vnode_t *vp;
3150	int error;
3151
3152	error = sdev_modctl_lookup(path, &vp);
3153	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
3154	    path, curproc->p_user.u_comm,
3155	    (error == 0) ? "ok" : "failed"));
3156	if (error == 0)
3157		VN_RELE(vp);
3158
3159	return (error);
3160}
3161
3162void
3163sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname)
3164{
3165	rw_enter(&map->dir_lock, RW_WRITER);
3166	if (module) {
3167		ASSERT(map->dir_newmodule == NULL);
3168		map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP);
3169	}
3170	if (mapname) {
3171		ASSERT(map->dir_newmap == NULL);
3172		map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP);
3173	}
3174
3175	map->dir_invalid = 1;
3176	rw_exit(&map->dir_lock);
3177}
3178
3179void
3180sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname)
3181{
3182	char *old_module = NULL;
3183	char *old_map = NULL;
3184
3185	ASSERT(RW_LOCK_HELD(&map->dir_lock));
3186	if (!rw_tryupgrade(&map->dir_lock)) {
3187		rw_exit(&map->dir_lock);
3188		rw_enter(&map->dir_lock, RW_WRITER);
3189	}
3190
3191	old_module = map->dir_module;
3192	if (module) {
3193		if (old_module && strcmp(old_module, module) != 0) {
3194			kmem_free(old_module, strlen(old_module) + 1);
3195		}
3196		map->dir_module = module;
3197		map->dir_newmodule = NULL;
3198	}
3199
3200	old_map = map->dir_map;
3201	if (mapname) {
3202		if (old_map && strcmp(old_map, mapname) != 0) {
3203			kmem_free(old_map, strlen(old_map) + 1);
3204		}
3205
3206		map->dir_map = mapname;
3207		map->dir_newmap = NULL;
3208	}
3209	map->dir_maploaded = 0;
3210	map->dir_invalid = 0;
3211	rw_downgrade(&map->dir_lock);
3212}
3213
3214/*
3215 * dir_name should have at least one attribute,
3216 *	dir_module
3217 *	or dir_map
3218 *	or both
3219 * caller holds the devname_nsmaps_lock
3220 */
3221void
3222sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map)
3223{
3224	struct devname_nsmap *map;
3225	int len = 0;
3226
3227	ASSERT(dir_name);
3228	ASSERT(dir_module || dir_map);
3229	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3230
3231	if (map = sdev_get_nsmap_by_dir(dir_name, 1)) {
3232		sdev_update_newnsmap(map, dir_module, dir_map);
3233		return;
3234	}
3235
3236	map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP);
3237	map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP);
3238	if (dir_module) {
3239		map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP);
3240	}
3241
3242	if (dir_map) {
3243		if (dir_map[0] != '/') {
3244			len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2;
3245			map->dir_map = kmem_zalloc(len, KM_SLEEP);
3246			(void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR,
3247			    dir_map);
3248		} else {
3249			map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP);
3250		}
3251	}
3252
3253	map->dir_ops = NULL;
3254	map->dir_maploaded = 0;
3255	map->dir_invalid = 0;
3256	rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL);
3257
3258	map->next = devname_nsmaps;
3259	map->prev = NULL;
3260	if (devname_nsmaps) {
3261		devname_nsmaps->prev = map;
3262	}
3263	devname_nsmaps = map;
3264}
3265
3266struct devname_nsmap *
3267sdev_get_nsmap_by_dir(char *dir_path, int locked)
3268{
3269	struct devname_nsmap *map = NULL;
3270
3271	if (!locked)
3272		mutex_enter(&devname_nsmaps_lock);
3273	for (map = devname_nsmaps; map; map = map->next) {
3274		sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name));
3275		if (strcmp(map->dir_name, dir_path) == 0) {
3276			if (!locked)
3277				mutex_exit(&devname_nsmaps_lock);
3278			return (map);
3279		}
3280	}
3281	if (!locked)
3282		mutex_exit(&devname_nsmaps_lock);
3283	return (NULL);
3284}
3285
3286struct devname_nsmap *
3287sdev_get_nsmap_by_module(char *mod_name)
3288{
3289	struct devname_nsmap *map = NULL;
3290
3291	mutex_enter(&devname_nsmaps_lock);
3292	for (map = devname_nsmaps; map; map = map->next) {
3293		sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n",
3294		    map->dir_module));
3295		if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) {
3296			mutex_exit(&devname_nsmaps_lock);
3297			return (map);
3298		}
3299	}
3300	mutex_exit(&devname_nsmaps_lock);
3301	return (NULL);
3302}
3303
3304void
3305sdev_invalidate_nsmaps()
3306{
3307	struct devname_nsmap *map = NULL;
3308
3309	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3310
3311	if (devname_nsmaps == NULL)
3312		return;
3313
3314	for (map = devname_nsmaps; map; map = map->next) {
3315		rw_enter(&map->dir_lock, RW_WRITER);
3316		map->dir_invalid = 1;
3317		rw_exit(&map->dir_lock);
3318	}
3319	devname_nsmaps_invalidated = 1;
3320}
3321
3322
3323int
3324sdev_nsmaps_loaded()
3325{
3326	int ret = 0;
3327
3328	mutex_enter(&devname_nsmaps_lock);
3329	if (devname_nsmaps_loaded)
3330		ret = 1;
3331
3332	mutex_exit(&devname_nsmaps_lock);
3333	return (ret);
3334}
3335
3336int
3337sdev_nsmaps_reloaded()
3338{
3339	int ret = 0;
3340
3341	mutex_enter(&devname_nsmaps_lock);
3342	if (devname_nsmaps_invalidated)
3343		ret = 1;
3344
3345	mutex_exit(&devname_nsmaps_lock);
3346	return (ret);
3347}
3348
3349static void
3350sdev_free_nsmap(struct devname_nsmap *map)
3351{
3352	ASSERT(map);
3353	if (map->dir_name)
3354		kmem_free(map->dir_name, strlen(map->dir_name) + 1);
3355	if (map->dir_module)
3356		kmem_free(map->dir_module, strlen(map->dir_module) + 1);
3357	if (map->dir_map)
3358		kmem_free(map->dir_map, strlen(map->dir_map) + 1);
3359	rw_destroy(&map->dir_lock);
3360	kmem_free(map, sizeof (*map));
3361}
3362
3363void
3364sdev_validate_nsmaps()
3365{
3366	struct devname_nsmap *map = NULL;
3367	struct devname_nsmap *oldmap = NULL;
3368
3369	ASSERT(MUTEX_HELD(&devname_nsmaps_lock));
3370	map = devname_nsmaps;
3371	while (map) {
3372		rw_enter(&map->dir_lock, RW_READER);
3373		if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) &&
3374		    (map->dir_newmap == NULL)) {
3375			oldmap = map;
3376			rw_exit(&map->dir_lock);
3377			if (map->prev)
3378				map->prev->next = oldmap->next;
3379			if (map == devname_nsmaps)
3380				devname_nsmaps = oldmap->next;
3381
3382			map = oldmap->next;
3383			if (map)
3384				map->prev = oldmap->prev;
3385			sdev_free_nsmap(oldmap);
3386			oldmap = NULL;
3387		} else {
3388			rw_exit(&map->dir_lock);
3389			map = map->next;
3390		}
3391	}
3392	devname_nsmaps_invalidated = 0;
3393}
3394
3395static int
3396sdev_map_is_invalid(struct devname_nsmap *map)
3397{
3398	int ret = 0;
3399
3400	ASSERT(map);
3401	rw_enter(&map->dir_lock, RW_READER);
3402	if (map->dir_invalid)
3403		ret = 1;
3404	rw_exit(&map->dir_lock);
3405	return (ret);
3406}
3407
3408static int
3409sdev_check_map(struct devname_nsmap *map)
3410{
3411	struct devname_nsmap *mapp;
3412
3413	mutex_enter(&devname_nsmaps_lock);
3414	if (devname_nsmaps == NULL) {
3415		mutex_exit(&devname_nsmaps_lock);
3416		return (1);
3417	}
3418
3419	for (mapp = devname_nsmaps; mapp; mapp = mapp->next) {
3420		if (mapp == map) {
3421			mutex_exit(&devname_nsmaps_lock);
3422			return (0);
3423		}
3424	}
3425
3426	mutex_exit(&devname_nsmaps_lock);
3427	return (1);
3428
3429}
3430
3431struct devname_nsmap *
3432sdev_get_map(struct sdev_node *dv, int validate)
3433{
3434	struct devname_nsmap *map;
3435	int error;
3436
3437	ASSERT(RW_READ_HELD(&dv->sdev_contents));
3438	map = dv->sdev_mapinfo;
3439	if (map && sdev_check_map(map)) {
3440		if (!rw_tryupgrade(&dv->sdev_contents)) {
3441			rw_exit(&dv->sdev_contents);
3442			rw_enter(&dv->sdev_contents, RW_WRITER);
3443		}
3444		dv->sdev_mapinfo = NULL;
3445		rw_downgrade(&dv->sdev_contents);
3446		return (NULL);
3447	}
3448
3449	if (validate && (!map || (map && sdev_map_is_invalid(map)))) {
3450		if (!rw_tryupgrade(&dv->sdev_contents)) {
3451			rw_exit(&dv->sdev_contents);
3452			rw_enter(&dv->sdev_contents, RW_WRITER);
3453		}
3454		error = sdev_get_moduleops(dv);
3455		if (!error)
3456			map = dv->sdev_mapinfo;
3457		rw_downgrade(&dv->sdev_contents);
3458	}
3459	return (map);
3460}
3461
3462extern int sdev_vnodeops_tbl_size;
3463
3464/*
3465 * construct a new template with overrides from vtab
3466 */
3467static fs_operation_def_t *
3468sdev_merge_vtab(const fs_operation_def_t tab[])
3469{
3470	fs_operation_def_t *new;
3471	const fs_operation_def_t *tab_entry;
3472
3473	/* make a copy of standard vnode ops table */
3474	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
3475	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
3476
3477	/* replace the overrides from tab */
3478	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
3479		fs_operation_def_t *std_entry = new;
3480		while (std_entry->name) {
3481			if (strcmp(tab_entry->name, std_entry->name) == 0) {
3482				std_entry->func = tab_entry->func;
3483				break;
3484			}
3485			std_entry++;
3486		}
3487		if (std_entry->name == NULL)
3488			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
3489			    tab_entry->name);
3490	}
3491
3492	return (new);
3493}
3494
3495/* free memory allocated by sdev_merge_vtab */
3496static void
3497sdev_free_vtab(fs_operation_def_t *new)
3498{
3499	kmem_free(new, sdev_vnodeops_tbl_size);
3500}
3501
3502void
3503devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp)
3504{
3505	struct sdev_node *dv = hdl->dh_data;
3506
3507	ASSERT(dv);
3508
3509	rw_enter(&dv->sdev_contents, RW_READER);
3510	*vpp = SDEVTOV(dv);
3511	rw_exit(&dv->sdev_contents);
3512}
3513
3514int
3515devname_get_path(devname_handle_t *hdl, char **path)
3516{
3517	struct sdev_node *dv = hdl->dh_data;
3518
3519	ASSERT(dv);
3520
3521	rw_enter(&dv->sdev_contents, RW_READER);
3522	*path = dv->sdev_path;
3523	rw_exit(&dv->sdev_contents);
3524	return (0);
3525}
3526
3527int
3528devname_get_name(devname_handle_t *hdl, char **entry)
3529{
3530	struct sdev_node *dv = hdl->dh_data;
3531
3532	ASSERT(dv);
3533	rw_enter(&dv->sdev_contents, RW_READER);
3534	*entry = dv->sdev_name;
3535	rw_exit(&dv->sdev_contents);
3536	return (0);
3537}
3538
3539void
3540devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp)
3541{
3542	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3543
3544	ASSERT(dv);
3545
3546	rw_enter(&dv->sdev_contents, RW_READER);
3547	*vpp = SDEVTOV(dv);
3548	rw_exit(&dv->sdev_contents);
3549}
3550
3551int
3552devname_get_dir_path(devname_handle_t *hdl, char **path)
3553{
3554	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3555
3556	ASSERT(dv);
3557	rw_enter(&dv->sdev_contents, RW_READER);
3558	*path = dv->sdev_path;
3559	rw_exit(&dv->sdev_contents);
3560	return (0);
3561}
3562
3563int
3564devname_get_dir_name(devname_handle_t *hdl, char **entry)
3565{
3566	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3567
3568	ASSERT(dv);
3569	rw_enter(&dv->sdev_contents, RW_READER);
3570	*entry = dv->sdev_name;
3571	rw_exit(&dv->sdev_contents);
3572	return (0);
3573}
3574
3575int
3576devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map)
3577{
3578	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3579
3580	ASSERT(dv);
3581	rw_enter(&dv->sdev_contents, RW_READER);
3582	*map = dv->sdev_mapinfo;
3583	rw_exit(&dv->sdev_contents);
3584	return (0);
3585}
3586
3587int
3588devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl)
3589{
3590	struct sdev_node *dv = hdl->dh_data->sdev_dotdot;
3591
3592	ASSERT(dv);
3593	rw_enter(&dv->sdev_contents, RW_READER);
3594	*dir_hdl = &(dv->sdev_handle);
3595	rw_exit(&dv->sdev_contents);
3596	return (0);
3597}
3598
3599void
3600devname_set_nodetype(devname_handle_t *hdl, void *args, int spec)
3601{
3602	struct sdev_node *dv = hdl->dh_data;
3603
3604	ASSERT(dv);
3605	rw_enter(&dv->sdev_contents, RW_WRITER);
3606	hdl->dh_spec = (devname_spec_t)spec;
3607	hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP);
3608	rw_exit(&dv->sdev_contents);
3609}
3610
3611/*
3612 * a generic setattr() function
3613 *
3614 * note: flags only supports AT_UID and AT_GID.
3615 *	 Future enhancements can be done for other types, e.g. AT_MODE
3616 */
3617int
3618devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3619    struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3620    int), int protocol)
3621{
3622	struct sdev_node	*dv = VTOSDEV(vp);
3623	struct sdev_node	*parent = dv->sdev_dotdot;
3624	struct vattr		*get;
3625	uint_t			mask = vap->va_mask;
3626	int 			error;
3627
3628	/* some sanity checks */
3629	if (vap->va_mask & AT_NOSET)
3630		return (EINVAL);
3631
3632	if (vap->va_mask & AT_SIZE) {
3633		if (vp->v_type == VDIR) {
3634			return (EISDIR);
3635		}
3636	}
3637
3638	/* no need to set attribute, but do not fail either */
3639	ASSERT(parent);
3640	rw_enter(&parent->sdev_contents, RW_READER);
3641	if (dv->sdev_state == SDEV_ZOMBIE) {
3642		rw_exit(&parent->sdev_contents);
3643		return (0);
3644	}
3645
3646	/* If backing store exists, just set it. */
3647	if (dv->sdev_attrvp) {
3648		rw_exit(&parent->sdev_contents);
3649		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3650	}
3651
3652	/*
3653	 * Otherwise, for nodes with the persistence attribute, create it.
3654	 */
3655	ASSERT(dv->sdev_attr);
3656	if (SDEV_IS_PERSIST(dv) ||
3657	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3658		sdev_vattr_merge(dv, vap);
3659		rw_enter(&dv->sdev_contents, RW_WRITER);
3660		error = sdev_shadow_node(dv, cred);
3661		rw_exit(&dv->sdev_contents);
3662		rw_exit(&parent->sdev_contents);
3663
3664		if (error)
3665			return (error);
3666		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3667	}
3668
3669
3670	/*
3671	 * sdev_attr was allocated in sdev_mknode
3672	 */
3673	rw_enter(&dv->sdev_contents, RW_WRITER);
3674	error = secpolicy_vnode_setattr(cred, vp, vap,
3675	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3676	if (error) {
3677		rw_exit(&dv->sdev_contents);
3678		rw_exit(&parent->sdev_contents);
3679		return (error);
3680	}
3681
3682	get = dv->sdev_attr;
3683	if (mask & AT_MODE) {
3684		get->va_mode &= S_IFMT;
3685		get->va_mode |= vap->va_mode & ~S_IFMT;
3686	}
3687
3688	if ((mask & AT_UID) || (mask & AT_GID)) {
3689		if (mask & AT_UID)
3690			get->va_uid = vap->va_uid;
3691		if (mask & AT_GID)
3692			get->va_gid = vap->va_gid;
3693		/*
3694		 * a callback must be provided if the protocol is set
3695		 */
3696		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3697			ASSERT(callback);
3698			error = callback(dv, get, protocol);
3699			if (error) {
3700				rw_exit(&dv->sdev_contents);
3701				rw_exit(&parent->sdev_contents);
3702				return (error);
3703			}
3704		}
3705	}
3706
3707	if (mask & AT_ATIME)
3708		get->va_atime = vap->va_atime;
3709	if (mask & AT_MTIME)
3710		get->va_mtime = vap->va_mtime;
3711	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3712		gethrestime(&get->va_ctime);
3713	}
3714
3715	sdev_vattr_merge(dv, get);
3716	rw_exit(&dv->sdev_contents);
3717	rw_exit(&parent->sdev_contents);
3718	return (0);
3719}
3720
3721/*
3722 * a generic inactive() function
3723 */
3724void
3725devname_inactive_func(struct vnode *vp, struct cred *cred,
3726    void (*callback)(struct vnode *))
3727{
3728	int clean;
3729	struct sdev_node *dv = VTOSDEV(vp);
3730	struct sdev_node *ddv = dv->sdev_dotdot;
3731	struct sdev_node *idv;
3732	struct sdev_node *prev = NULL;
3733	int state;
3734	struct devname_nsmap *map = NULL;
3735	struct devname_ops *dirops = NULL;
3736	void (*fn)(devname_handle_t *, struct cred *) = NULL;
3737
3738	rw_enter(&ddv->sdev_contents, RW_WRITER);
3739	state = dv->sdev_state;
3740
3741	mutex_enter(&vp->v_lock);
3742	ASSERT(vp->v_count >= 1);
3743
3744	if (vp->v_count == 1 && callback != NULL)
3745		callback(vp);
3746
3747	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3748
3749	/*
3750	 * last ref count on the ZOMBIE node is released.
3751	 * clean up the sdev_node, and
3752	 * release the hold on the backing store node so that
3753	 * the ZOMBIE backing stores also cleaned out.
3754	 */
3755	if (clean) {
3756		ASSERT(ddv);
3757		if (SDEV_IS_GLOBAL(dv)) {
3758			map = ddv->sdev_mapinfo;
3759			dirops = map ? map->dir_ops : NULL;
3760			if (dirops && (fn = dirops->devnops_inactive))
3761				(*fn)(&(dv->sdev_handle), cred);
3762		}
3763
3764		ddv->sdev_nlink--;
3765		if (vp->v_type == VDIR) {
3766			dv->sdev_nlink--;
3767		}
3768		for (idv = ddv->sdev_dot; idv && idv != dv;
3769		    prev = idv, idv = idv->sdev_next)
3770			;
3771		ASSERT(idv == dv);
3772		if (prev == NULL)
3773			ddv->sdev_dot = dv->sdev_next;
3774		else
3775			prev->sdev_next = dv->sdev_next;
3776		dv->sdev_next = NULL;
3777		dv->sdev_nlink--;
3778		--vp->v_count;
3779		mutex_exit(&vp->v_lock);
3780		sdev_nodedestroy(dv, 0);
3781	} else {
3782		--vp->v_count;
3783		mutex_exit(&vp->v_lock);
3784	}
3785	rw_exit(&ddv->sdev_contents);
3786}
3787