1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * utility routines for the /dev fs
27 */
28
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/t_lock.h>
32#include <sys/systm.h>
33#include <sys/sysmacros.h>
34#include <sys/user.h>
35#include <sys/time.h>
36#include <sys/vfs.h>
37#include <sys/vnode.h>
38#include <sys/file.h>
39#include <sys/fcntl.h>
40#include <sys/flock.h>
41#include <sys/kmem.h>
42#include <sys/uio.h>
43#include <sys/errno.h>
44#include <sys/stat.h>
45#include <sys/cred.h>
46#include <sys/dirent.h>
47#include <sys/pathname.h>
48#include <sys/cmn_err.h>
49#include <sys/debug.h>
50#include <sys/mode.h>
51#include <sys/policy.h>
52#include <fs/fs_subr.h>
53#include <sys/mount.h>
54#include <sys/fs/snode.h>
55#include <sys/fs/dv_node.h>
56#include <sys/fs/sdev_impl.h>
57#include <sys/sunndi.h>
58#include <sys/sunmdi.h>
59#include <sys/conf.h>
60#include <sys/proc.h>
61#include <sys/user.h>
62#include <sys/modctl.h>
63
64#ifdef DEBUG
65int sdev_debug = 0x00000001;
66int sdev_debug_cache_flags = 0;
67#endif
68
69/*
70 * globals
71 */
72/* prototype memory vattrs */
73vattr_t sdev_vattr_dir = {
74	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
75	VDIR,					/* va_type */
76	SDEV_DIRMODE_DEFAULT,			/* va_mode */
77	SDEV_UID_DEFAULT,			/* va_uid */
78	SDEV_GID_DEFAULT,			/* va_gid */
79	0,					/* va_fsid */
80	0,					/* va_nodeid */
81	0,					/* va_nlink */
82	0,					/* va_size */
83	0,					/* va_atime */
84	0,					/* va_mtime */
85	0,					/* va_ctime */
86	0,					/* va_rdev */
87	0,					/* va_blksize */
88	0,					/* va_nblocks */
89	0					/* va_vcode */
90};
91
92vattr_t sdev_vattr_lnk = {
93	AT_TYPE|AT_MODE,			/* va_mask */
94	VLNK,					/* va_type */
95	SDEV_LNKMODE_DEFAULT,			/* va_mode */
96	SDEV_UID_DEFAULT,			/* va_uid */
97	SDEV_GID_DEFAULT,			/* va_gid */
98	0,					/* va_fsid */
99	0,					/* va_nodeid */
100	0,					/* va_nlink */
101	0,					/* va_size */
102	0,					/* va_atime */
103	0,					/* va_mtime */
104	0,					/* va_ctime */
105	0,					/* va_rdev */
106	0,					/* va_blksize */
107	0,					/* va_nblocks */
108	0					/* va_vcode */
109};
110
111vattr_t sdev_vattr_blk = {
112	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
113	VBLK,					/* va_type */
114	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
115	SDEV_UID_DEFAULT,			/* va_uid */
116	SDEV_GID_DEFAULT,			/* va_gid */
117	0,					/* va_fsid */
118	0,					/* va_nodeid */
119	0,					/* va_nlink */
120	0,					/* va_size */
121	0,					/* va_atime */
122	0,					/* va_mtime */
123	0,					/* va_ctime */
124	0,					/* va_rdev */
125	0,					/* va_blksize */
126	0,					/* va_nblocks */
127	0					/* va_vcode */
128};
129
130vattr_t sdev_vattr_chr = {
131	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
132	VCHR,					/* va_type */
133	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
134	SDEV_UID_DEFAULT,			/* va_uid */
135	SDEV_GID_DEFAULT,			/* va_gid */
136	0,					/* va_fsid */
137	0,					/* va_nodeid */
138	0,					/* va_nlink */
139	0,					/* va_size */
140	0,					/* va_atime */
141	0,					/* va_mtime */
142	0,					/* va_ctime */
143	0,					/* va_rdev */
144	0,					/* va_blksize */
145	0,					/* va_nblocks */
146	0					/* va_vcode */
147};
148
149kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
150int		devtype;		/* fstype */
151
152/* static */
153static struct vnodeops *sdev_get_vop(struct sdev_node *);
154static void sdev_set_no_negcache(struct sdev_node *);
155static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
156static void sdev_free_vtab(fs_operation_def_t *);
157
158static void
159sdev_prof_free(struct sdev_node *dv)
160{
161	ASSERT(!SDEV_IS_GLOBAL(dv));
162	if (dv->sdev_prof.dev_name)
163		nvlist_free(dv->sdev_prof.dev_name);
164	if (dv->sdev_prof.dev_map)
165		nvlist_free(dv->sdev_prof.dev_map);
166	if (dv->sdev_prof.dev_symlink)
167		nvlist_free(dv->sdev_prof.dev_symlink);
168	if (dv->sdev_prof.dev_glob_incdir)
169		nvlist_free(dv->sdev_prof.dev_glob_incdir);
170	if (dv->sdev_prof.dev_glob_excdir)
171		nvlist_free(dv->sdev_prof.dev_glob_excdir);
172	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
173}
174
175/* sdev_node cache constructor */
176/*ARGSUSED1*/
177static int
178i_sdev_node_ctor(void *buf, void *cfarg, int flag)
179{
180	struct sdev_node *dv = (struct sdev_node *)buf;
181	struct vnode *vp;
182
183	bzero(buf, sizeof (struct sdev_node));
184	vp = dv->sdev_vnode = vn_alloc(flag);
185	if (vp == NULL) {
186		return (-1);
187	}
188	vp->v_data = dv;
189	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
190	return (0);
191}
192
193/* sdev_node cache destructor */
194/*ARGSUSED1*/
195static void
196i_sdev_node_dtor(void *buf, void *arg)
197{
198	struct sdev_node *dv = (struct sdev_node *)buf;
199	struct vnode *vp = SDEVTOV(dv);
200
201	rw_destroy(&dv->sdev_contents);
202	vn_free(vp);
203}
204
205/* initialize sdev_node cache */
206void
207sdev_node_cache_init()
208{
209	int flags = 0;
210
211#ifdef	DEBUG
212	flags = sdev_debug_cache_flags;
213	if (flags)
214		sdcmn_err(("cache debug flags 0x%x\n", flags));
215#endif	/* DEBUG */
216
217	ASSERT(sdev_node_cache == NULL);
218	sdev_node_cache = kmem_cache_create("sdev_node_cache",
219	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
220	    NULL, NULL, NULL, flags);
221}
222
223/* destroy sdev_node cache */
224void
225sdev_node_cache_fini()
226{
227	ASSERT(sdev_node_cache != NULL);
228	kmem_cache_destroy(sdev_node_cache);
229	sdev_node_cache = NULL;
230}
231
232/*
233 * Compare two nodes lexographically to balance avl tree
234 */
235static int
236sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
237{
238	int rv;
239	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
240		return (0);
241	return ((rv < 0) ? -1 : 1);
242}
243
244void
245sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
246{
247	ASSERT(dv);
248	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
249	dv->sdev_state = state;
250}
251
252static void
253sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
254{
255	timestruc_t	now;
256	struct vattr	*attrp;
257	uint_t		mask;
258
259	ASSERT(dv->sdev_attr);
260	ASSERT(vap);
261
262	attrp = dv->sdev_attr;
263	mask = vap->va_mask;
264	if (mask & AT_TYPE)
265		attrp->va_type = vap->va_type;
266	if (mask & AT_MODE)
267		attrp->va_mode = vap->va_mode;
268	if (mask & AT_UID)
269		attrp->va_uid = vap->va_uid;
270	if (mask & AT_GID)
271		attrp->va_gid = vap->va_gid;
272	if (mask & AT_RDEV)
273		attrp->va_rdev = vap->va_rdev;
274
275	gethrestime(&now);
276	attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
277	attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
278	attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
279}
280
281static void
282sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
283{
284	ASSERT(dv->sdev_attr == NULL);
285	ASSERT(vap->va_mask & AT_TYPE);
286	ASSERT(vap->va_mask & AT_MODE);
287
288	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
289	sdev_attr_update(dv, vap);
290}
291
292/* alloc and initialize a sdev_node */
293int
294sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
295    vattr_t *vap)
296{
297	struct sdev_node *dv = NULL;
298	struct vnode *vp;
299	size_t nmlen, len;
300	devname_handle_t  *dhl;
301
302	nmlen = strlen(nm) + 1;
303	if (nmlen > MAXNAMELEN) {
304		sdcmn_err9(("sdev_nodeinit: node name %s"
305		    " too long\n", nm));
306		*newdv = NULL;
307		return (ENAMETOOLONG);
308	}
309
310	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
311
312	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
313	bcopy(nm, dv->sdev_name, nmlen);
314	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
315	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
316	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
317	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
318	/* overwritten for VLNK nodes */
319	dv->sdev_symlink = NULL;
320
321	vp = SDEVTOV(dv);
322	vn_reinit(vp);
323	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
324	if (vap)
325		vp->v_type = vap->va_type;
326
327	/*
328	 * initialized to the parent's vnodeops.
329	 * maybe overwriten for a VDIR
330	 */
331	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
332	vn_exists(vp);
333
334	dv->sdev_dotdot = NULL;
335	dv->sdev_attrvp = NULL;
336	if (vap) {
337		sdev_attr_alloc(dv, vap);
338	} else {
339		dv->sdev_attr = NULL;
340	}
341
342	dv->sdev_ino = sdev_mkino(dv);
343	dv->sdev_nlink = 0;		/* updated on insert */
344	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
345	dv->sdev_flags |= SDEV_BUILD;
346	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
347	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
348	if (SDEV_IS_GLOBAL(ddv)) {
349		dv->sdev_flags |= SDEV_GLOBAL;
350		dhl = &(dv->sdev_handle);
351		dhl->dh_data = dv;
352		dhl->dh_args = NULL;
353		sdev_set_no_negcache(dv);
354		dv->sdev_gdir_gen = 0;
355	} else {
356		dv->sdev_flags &= ~SDEV_GLOBAL;
357		dv->sdev_origin = NULL; /* set later */
358		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
359		dv->sdev_ldir_gen = 0;
360		dv->sdev_devtree_gen = 0;
361	}
362
363	rw_enter(&dv->sdev_contents, RW_WRITER);
364	sdev_set_nodestate(dv, SDEV_INIT);
365	rw_exit(&dv->sdev_contents);
366	*newdv = dv;
367
368	return (0);
369}
370
371/*
372 * transition a sdev_node into SDEV_READY state
373 */
374int
375sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
376    void *args, struct cred *cred)
377{
378	int error = 0;
379	struct vnode *vp = SDEVTOV(dv);
380	vtype_t type;
381
382	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
383
384	type = vap->va_type;
385	vp->v_type = type;
386	vp->v_rdev = vap->va_rdev;
387	rw_enter(&dv->sdev_contents, RW_WRITER);
388	if (type == VDIR) {
389		dv->sdev_nlink = 2;
390		dv->sdev_flags &= ~SDEV_PERSIST;
391		dv->sdev_flags &= ~SDEV_DYNAMIC;
392		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
393		ASSERT(dv->sdev_dotdot);
394		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
395		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
396		avl_create(&dv->sdev_entries,
397		    (int (*)(const void *, const void *))sdev_compare_nodes,
398		    sizeof (struct sdev_node),
399		    offsetof(struct sdev_node, sdev_avllink));
400	} else if (type == VLNK) {
401		ASSERT(args);
402		dv->sdev_nlink = 1;
403		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
404	} else {
405		dv->sdev_nlink = 1;
406	}
407
408	if (!(SDEV_IS_GLOBAL(dv))) {
409		dv->sdev_origin = (struct sdev_node *)args;
410		dv->sdev_flags &= ~SDEV_PERSIST;
411	}
412
413	/*
414	 * shadow node is created here OR
415	 * if failed (indicated by dv->sdev_attrvp == NULL),
416	 * created later in sdev_setattr
417	 */
418	if (avp) {
419		dv->sdev_attrvp = avp;
420	} else {
421		if (dv->sdev_attr == NULL) {
422			sdev_attr_alloc(dv, vap);
423		} else {
424			sdev_attr_update(dv, vap);
425		}
426
427		if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
428			error = sdev_shadow_node(dv, cred);
429	}
430
431	if (error == 0) {
432		/* transition to READY state */
433		sdev_set_nodestate(dv, SDEV_READY);
434		sdev_nc_node_exists(dv);
435	} else {
436		sdev_set_nodestate(dv, SDEV_ZOMBIE);
437	}
438	rw_exit(&dv->sdev_contents);
439	return (error);
440}
441
442/*
443 * setting ZOMBIE state
444 */
445static int
446sdev_nodezombied(struct sdev_node *dv)
447{
448	rw_enter(&dv->sdev_contents, RW_WRITER);
449	sdev_set_nodestate(dv, SDEV_ZOMBIE);
450	rw_exit(&dv->sdev_contents);
451	return (0);
452}
453
454/*
455 * Build the VROOT sdev_node.
456 */
457/*ARGSUSED*/
458struct sdev_node *
459sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
460    struct vnode *avp, struct cred *cred)
461{
462	struct sdev_node *dv;
463	struct vnode *vp;
464	char devdir[] = "/dev";
465
466	ASSERT(sdev_node_cache != NULL);
467	ASSERT(avp);
468	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
469	vp = SDEVTOV(dv);
470	vn_reinit(vp);
471	vp->v_flag |= VROOT;
472	vp->v_vfsp = vfsp;
473	vp->v_type = VDIR;
474	vp->v_rdev = devdev;
475	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
476	vn_exists(vp);
477
478	if (vfsp->vfs_mntpt)
479		dv->sdev_name = i_ddi_strdup(
480		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
481	else
482		/* vfs_mountdev1 set mount point later */
483		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
484	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
485	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
486	dv->sdev_ino = SDEV_ROOTINO;
487	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
488	dv->sdev_dotdot = dv;		/* .. == self */
489	dv->sdev_attrvp = avp;
490	dv->sdev_attr = NULL;
491	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
492	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
493	if (strcmp(dv->sdev_name, "/dev") == 0) {
494		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
495		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
496		dv->sdev_gdir_gen = 0;
497	} else {
498		dv->sdev_flags = SDEV_BUILD;
499		dv->sdev_flags &= ~SDEV_PERSIST;
500		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
501		dv->sdev_ldir_gen = 0;
502		dv->sdev_devtree_gen = 0;
503	}
504
505	avl_create(&dv->sdev_entries,
506	    (int (*)(const void *, const void *))sdev_compare_nodes,
507	    sizeof (struct sdev_node),
508	    offsetof(struct sdev_node, sdev_avllink));
509
510	rw_enter(&dv->sdev_contents, RW_WRITER);
511	sdev_set_nodestate(dv, SDEV_READY);
512	rw_exit(&dv->sdev_contents);
513	sdev_nc_node_exists(dv);
514	return (dv);
515}
516
517/* directory dependent vop table */
518struct sdev_vop_table {
519	char *vt_name;				/* subdirectory name */
520	const fs_operation_def_t *vt_service;	/* vnodeops table */
521	struct vnodeops *vt_vops;		/* constructed vop */
522	struct vnodeops **vt_global_vops;	/* global container for vop */
523	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
524	int vt_flags;
525};
526
527/*
528 * A nice improvement would be to provide a plug-in mechanism
529 * for this table instead of a const table.
530 */
531static struct sdev_vop_table vtab[] =
532{
533	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
534	SDEV_DYNAMIC | SDEV_VTOR },
535
536	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
537	SDEV_DYNAMIC | SDEV_VTOR },
538
539	{ "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
540	devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
541
542	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
543
544	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
545	SDEV_DYNAMIC | SDEV_VTOR },
546
547	{ "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
548	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
549
550	/*
551	 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
552	 * lofi driver controls child nodes.
553	 *
554	 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
555	 * stale nodes (e.g. from devfsadm -R).
556	 *
557	 * In addition, devfsadm knows not to attempt a rmdir: a zone
558	 * may hold a reference, which would zombify the node,
559	 * preventing a mkdir.
560	 */
561
562	{ "lofi", NULL, NULL, NULL, NULL,
563	    SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
564	{ "rlofi", NULL, NULL, NULL, NULL,
565	    SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
566
567	{ NULL, NULL, NULL, NULL, NULL, 0}
568};
569
570struct sdev_vop_table *
571sdev_match(struct sdev_node *dv)
572{
573	int vlen;
574	int i;
575
576	for (i = 0; vtab[i].vt_name; i++) {
577		if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0)
578			return (&vtab[i]);
579		if (vtab[i].vt_flags & SDEV_SUBDIR) {
580			char *ptr;
581
582			ASSERT(strlen(dv->sdev_path) > 5);
583			ptr = dv->sdev_path + 5;
584			vlen = strlen(vtab[i].vt_name);
585			if ((strncmp(vtab[i].vt_name, ptr,
586			    vlen - 1) == 0) && ptr[vlen] == '/')
587				return (&vtab[i]);
588		}
589
590	}
591	return (NULL);
592}
593
594/*
595 *  sets a directory's vnodeops if the directory is in the vtab;
596 */
597static struct vnodeops *
598sdev_get_vop(struct sdev_node *dv)
599{
600	struct sdev_vop_table *vtp;
601	char *path;
602
603	path = dv->sdev_path;
604	ASSERT(path);
605
606	/* gets the relative path to /dev/ */
607	path += 5;
608
609	/* gets the vtab entry it matches */
610	if ((vtp = sdev_match(dv)) != NULL) {
611		dv->sdev_flags |= vtp->vt_flags;
612
613		if (vtp->vt_vops) {
614			if (vtp->vt_global_vops)
615				*(vtp->vt_global_vops) = vtp->vt_vops;
616			return (vtp->vt_vops);
617		}
618
619		if (vtp->vt_service) {
620			fs_operation_def_t *templ;
621			templ = sdev_merge_vtab(vtp->vt_service);
622			if (vn_make_ops(vtp->vt_name,
623			    (const fs_operation_def_t *)templ,
624			    &vtp->vt_vops) != 0) {
625				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
626				    vtp->vt_name);
627				/*NOTREACHED*/
628			}
629			if (vtp->vt_global_vops) {
630				*(vtp->vt_global_vops) = vtp->vt_vops;
631			}
632			sdev_free_vtab(templ);
633			return (vtp->vt_vops);
634		}
635		return (sdev_vnodeops);
636	}
637
638	/* child inherits the persistence of the parent */
639	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
640		dv->sdev_flags |= SDEV_PERSIST;
641
642	return (sdev_vnodeops);
643}
644
645static void
646sdev_set_no_negcache(struct sdev_node *dv)
647{
648	int i;
649	char *path;
650
651	ASSERT(dv->sdev_path);
652	path = dv->sdev_path + strlen("/dev/");
653
654	for (i = 0; vtab[i].vt_name; i++) {
655		if (strcmp(vtab[i].vt_name, path) == 0) {
656			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
657				dv->sdev_flags |= SDEV_NO_NCACHE;
658			break;
659		}
660	}
661}
662
663void *
664sdev_get_vtor(struct sdev_node *dv)
665{
666	struct sdev_vop_table *vtp;
667
668	vtp = sdev_match(dv);
669	if (vtp)
670		return ((void *)vtp->vt_vtor);
671	else
672		return (NULL);
673}
674
675/*
676 * Build the base root inode
677 */
678ino_t
679sdev_mkino(struct sdev_node *dv)
680{
681	ino_t	ino;
682
683	/*
684	 * for now, follow the lead of tmpfs here
685	 * need to someday understand the requirements here
686	 */
687	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
688	ino += SDEV_ROOTINO + 1;
689
690	return (ino);
691}
692
693int
694sdev_getlink(struct vnode *linkvp, char **link)
695{
696	int err;
697	char *buf;
698	struct uio uio = {0};
699	struct iovec iov = {0};
700
701	if (linkvp == NULL)
702		return (ENOENT);
703	ASSERT(linkvp->v_type == VLNK);
704
705	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
706	iov.iov_base = buf;
707	iov.iov_len = MAXPATHLEN;
708	uio.uio_iov = &iov;
709	uio.uio_iovcnt = 1;
710	uio.uio_resid = MAXPATHLEN;
711	uio.uio_segflg = UIO_SYSSPACE;
712	uio.uio_llimit = MAXOFFSET_T;
713
714	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
715	if (err) {
716		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
717		kmem_free(buf, MAXPATHLEN);
718		return (ENOENT);
719	}
720
721	/* mission complete */
722	*link = i_ddi_strdup(buf, KM_SLEEP);
723	kmem_free(buf, MAXPATHLEN);
724	return (0);
725}
726
727/*
728 * A convenient wrapper to get the devfs node vnode for a device
729 * minor functionality: readlink() of a /dev symlink
730 * Place the link into dv->sdev_symlink
731 */
732static int
733sdev_follow_link(struct sdev_node *dv)
734{
735	int err;
736	struct vnode *linkvp;
737	char *link = NULL;
738
739	linkvp = SDEVTOV(dv);
740	if (linkvp == NULL)
741		return (ENOENT);
742	ASSERT(linkvp->v_type == VLNK);
743	err = sdev_getlink(linkvp, &link);
744	if (err) {
745		(void) sdev_nodezombied(dv);
746		dv->sdev_symlink = NULL;
747		return (ENOENT);
748	}
749
750	ASSERT(link != NULL);
751	dv->sdev_symlink = link;
752	return (0);
753}
754
755static int
756sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
757{
758	vtype_t otype = SDEVTOV(dv)->v_type;
759
760	/*
761	 * existing sdev_node has a different type.
762	 */
763	if (otype != nvap->va_type) {
764		sdcmn_err9(("sdev_node_check: existing node "
765		    "  %s type %d does not match new node type %d\n",
766		    dv->sdev_name, otype, nvap->va_type));
767		return (EEXIST);
768	}
769
770	/*
771	 * For a symlink, the target should be the same.
772	 */
773	if (otype == VLNK) {
774		ASSERT(nargs != NULL);
775		ASSERT(dv->sdev_symlink != NULL);
776		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
777			sdcmn_err9(("sdev_node_check: existing node "
778			    " %s has different symlink %s as new node "
779			    " %s\n", dv->sdev_name, dv->sdev_symlink,
780			    (char *)nargs));
781			return (EEXIST);
782		}
783	}
784
785	return (0);
786}
787
788/*
789 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
790 *
791 * arguments:
792 *	- ddv (parent)
793 *	- nm (child name)
794 *	- newdv (sdev_node for nm is returned here)
795 *	- vap (vattr for the node to be created, va_type should be set.
796 *	- avp (attribute vnode)
797 *	  the defaults should be used if unknown)
798 *	- cred
799 *	- args
800 *	    . tnm (for VLNK)
801 *	    . global sdev_node (for !SDEV_GLOBAL)
802 * 	- state: SDEV_INIT, SDEV_READY
803 *
804 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
805 *
806 * NOTE:  directory contents writers lock needs to be held before
807 *	  calling this routine.
808 */
809int
810sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
811    struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
812    sdev_node_state_t state)
813{
814	int error = 0;
815	sdev_node_state_t node_state;
816	struct sdev_node *dv = NULL;
817
818	ASSERT(state != SDEV_ZOMBIE);
819	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
820
821	if (*newdv) {
822		dv = *newdv;
823	} else {
824		/* allocate and initialize a sdev_node */
825		if (ddv->sdev_state == SDEV_ZOMBIE) {
826			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
827			    ddv->sdev_path));
828			return (ENOENT);
829		}
830
831		error = sdev_nodeinit(ddv, nm, &dv, vap);
832		if (error != 0) {
833			sdcmn_err9(("sdev_mknode: error %d,"
834			    " name %s can not be initialized\n",
835			    error, nm));
836			return (error);
837		}
838		ASSERT(dv);
839
840		/* insert into the directory cache */
841		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
842		if (error) {
843			sdcmn_err9(("sdev_mknode: node %s can not"
844			    " be added into directory cache\n", nm));
845			return (ENOENT);
846		}
847	}
848
849	ASSERT(dv);
850	node_state = dv->sdev_state;
851	ASSERT(node_state != SDEV_ZOMBIE);
852
853	if (state == SDEV_READY) {
854		switch (node_state) {
855		case SDEV_INIT:
856			error = sdev_nodeready(dv, vap, avp, args, cred);
857			if (error) {
858				sdcmn_err9(("sdev_mknode: node %s can NOT"
859				    " be transitioned into READY state, "
860				    "error %d\n", nm, error));
861			}
862			break;
863		case SDEV_READY:
864			/*
865			 * Do some sanity checking to make sure
866			 * the existing sdev_node is what has been
867			 * asked for.
868			 */
869			error = sdev_node_check(dv, vap, args);
870			break;
871		default:
872			break;
873		}
874	}
875
876	if (!error) {
877		*newdv = dv;
878		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
879	} else {
880		SDEV_SIMPLE_RELE(dv);
881		*newdv = NULL;
882	}
883
884	return (error);
885}
886
887/*
888 * convenient wrapper to change vp's ATIME, CTIME and MTIME
889 */
890void
891sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
892{
893	struct vattr attr;
894	timestruc_t now;
895	int err;
896
897	ASSERT(vp);
898	gethrestime(&now);
899	if (mask & AT_CTIME)
900		attr.va_ctime = now;
901	if (mask & AT_MTIME)
902		attr.va_mtime = now;
903	if (mask & AT_ATIME)
904		attr.va_atime = now;
905
906	attr.va_mask = (mask & AT_TIMES);
907	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
908	if (err && (err != EROFS)) {
909		sdcmn_err(("update timestamps error %d\n", err));
910	}
911}
912
913/*
914 * the backing store vnode is released here
915 */
916/*ARGSUSED1*/
917void
918sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
919{
920	/* no references */
921	ASSERT(dv->sdev_nlink == 0);
922
923	if (dv->sdev_attrvp != NULLVP) {
924		VN_RELE(dv->sdev_attrvp);
925		/*
926		 * reset the attrvp so that no more
927		 * references can be made on this already
928		 * vn_rele() vnode
929		 */
930		dv->sdev_attrvp = NULLVP;
931	}
932
933	if (dv->sdev_attr != NULL) {
934		kmem_free(dv->sdev_attr, sizeof (struct vattr));
935		dv->sdev_attr = NULL;
936	}
937
938	if (dv->sdev_name != NULL) {
939		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
940		dv->sdev_name = NULL;
941	}
942
943	if (dv->sdev_symlink != NULL) {
944		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
945		dv->sdev_symlink = NULL;
946	}
947
948	if (dv->sdev_path) {
949		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
950		dv->sdev_path = NULL;
951	}
952
953	if (!SDEV_IS_GLOBAL(dv))
954		sdev_prof_free(dv);
955
956	if (SDEVTOV(dv)->v_type == VDIR) {
957		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
958		avl_destroy(&dv->sdev_entries);
959	}
960
961	mutex_destroy(&dv->sdev_lookup_lock);
962	cv_destroy(&dv->sdev_lookup_cv);
963
964	/* return node to initial state as per constructor */
965	(void) memset((void *)&dv->sdev_instance_data, 0,
966	    sizeof (dv->sdev_instance_data));
967	vn_invalid(SDEVTOV(dv));
968	kmem_cache_free(sdev_node_cache, dv);
969}
970
971/*
972 * DIRECTORY CACHE lookup
973 */
974struct sdev_node *
975sdev_findbyname(struct sdev_node *ddv, char *nm)
976{
977	struct sdev_node *dv;
978	struct sdev_node dvtmp;
979	avl_index_t	where;
980
981	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
982
983	dvtmp.sdev_name = nm;
984	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
985	if (dv) {
986		ASSERT(dv->sdev_dotdot == ddv);
987		ASSERT(strcmp(dv->sdev_name, nm) == 0);
988		SDEV_HOLD(dv);
989		return (dv);
990	}
991	return (NULL);
992}
993
994/*
995 * Inserts a new sdev_node in a parent directory
996 */
997void
998sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
999{
1000	avl_index_t where;
1001
1002	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1003	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1004	ASSERT(ddv->sdev_nlink >= 2);
1005	ASSERT(dv->sdev_nlink == 0);
1006
1007	dv->sdev_dotdot = ddv;
1008	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1009	avl_insert(&ddv->sdev_entries, dv, where);
1010	ddv->sdev_nlink++;
1011}
1012
1013/*
1014 * The following check is needed because while sdev_nodes are linked
1015 * in SDEV_INIT state, they have their link counts incremented only
1016 * in SDEV_READY state.
1017 */
1018static void
1019decr_link(struct sdev_node *dv)
1020{
1021	if (dv->sdev_state != SDEV_INIT)
1022		dv->sdev_nlink--;
1023	else
1024		ASSERT(dv->sdev_nlink == 0);
1025}
1026
1027/*
1028 * Delete an existing dv from directory cache
1029 *
1030 * In the case of a node is still held by non-zero reference count,
1031 *     the node is put into ZOMBIE state. Once the reference count
1032 *     reaches "0", the node is unlinked and destroyed,
1033 *     in sdev_inactive().
1034 */
1035static int
1036sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1037{
1038	struct vnode *vp;
1039
1040	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1041
1042	vp = SDEVTOV(dv);
1043	mutex_enter(&vp->v_lock);
1044
1045	/* dv is held still */
1046	if (vp->v_count > 1) {
1047		rw_enter(&dv->sdev_contents, RW_WRITER);
1048		if (dv->sdev_state == SDEV_READY) {
1049			sdcmn_err9((
1050			    "sdev_dirdelete: node %s busy with count %d\n",
1051			    dv->sdev_name, vp->v_count));
1052			dv->sdev_state = SDEV_ZOMBIE;
1053		}
1054		rw_exit(&dv->sdev_contents);
1055		--vp->v_count;
1056		mutex_exit(&vp->v_lock);
1057		return (EBUSY);
1058	}
1059	ASSERT(vp->v_count == 1);
1060
1061	/* unlink from the memory cache */
1062	ddv->sdev_nlink--;	/* .. to above */
1063	if (vp->v_type == VDIR) {
1064		decr_link(dv);		/* . to self */
1065	}
1066
1067	avl_remove(&ddv->sdev_entries, dv);
1068	decr_link(dv);	/* name, back to zero */
1069	vp->v_count--;
1070	mutex_exit(&vp->v_lock);
1071
1072	/* destroy the node */
1073	sdev_nodedestroy(dv, 0);
1074	return (0);
1075}
1076
1077/*
1078 * check if the source is in the path of the target
1079 *
1080 * source and target are different
1081 */
1082/*ARGSUSED2*/
1083static int
1084sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1085{
1086	int error = 0;
1087	struct sdev_node *dotdot, *dir;
1088
1089	dotdot = tdv->sdev_dotdot;
1090	ASSERT(dotdot);
1091
1092	/* fs root */
1093	if (dotdot == tdv) {
1094		return (0);
1095	}
1096
1097	for (;;) {
1098		/*
1099		 * avoid error cases like
1100		 *	mv a a/b
1101		 *	mv a a/b/c
1102		 *	etc.
1103		 */
1104		if (dotdot == sdv) {
1105			error = EINVAL;
1106			break;
1107		}
1108
1109		dir = dotdot;
1110		dotdot = dir->sdev_dotdot;
1111
1112		/* done checking because root is reached */
1113		if (dir == dotdot) {
1114			break;
1115		}
1116	}
1117	return (error);
1118}
1119
1120int
1121sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1122    struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1123    struct cred *cred)
1124{
1125	int error = 0;
1126	struct vnode *ovp = SDEVTOV(odv);
1127	struct vnode *nvp;
1128	struct vattr vattr;
1129	int doingdir = (ovp->v_type == VDIR);
1130	char *link = NULL;
1131	int samedir = (oddv == nddv) ? 1 : 0;
1132	int bkstore = 0;
1133	struct sdev_node *idv = NULL;
1134	struct sdev_node *ndv = NULL;
1135	timestruc_t now;
1136
1137	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1138	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1139	if (error)
1140		return (error);
1141
1142	if (!samedir)
1143		rw_enter(&oddv->sdev_contents, RW_WRITER);
1144	rw_enter(&nddv->sdev_contents, RW_WRITER);
1145
1146	/*
1147	 * the source may have been deleted by another thread before
1148	 * we gets here.
1149	 */
1150	if (odv->sdev_state != SDEV_READY) {
1151		error = ENOENT;
1152		goto err_out;
1153	}
1154
1155	if (doingdir && (odv == nddv)) {
1156		error = EINVAL;
1157		goto err_out;
1158	}
1159
1160	/*
1161	 * If renaming a directory, and the parents are different (".." must be
1162	 * changed) then the source dir must not be in the dir hierarchy above
1163	 * the target since it would orphan everything below the source dir.
1164	 */
1165	if (doingdir && (oddv != nddv)) {
1166		error = sdev_checkpath(odv, nddv, cred);
1167		if (error)
1168			goto err_out;
1169	}
1170
1171	/* destination existing */
1172	if (*ndvp) {
1173		nvp = SDEVTOV(*ndvp);
1174		ASSERT(nvp);
1175
1176		/* handling renaming to itself */
1177		if (odv == *ndvp) {
1178			error = 0;
1179			goto err_out;
1180		}
1181
1182		if (nvp->v_type == VDIR) {
1183			if (!doingdir) {
1184				error = EISDIR;
1185				goto err_out;
1186			}
1187
1188			if (vn_vfswlock(nvp)) {
1189				error = EBUSY;
1190				goto err_out;
1191			}
1192
1193			if (vn_mountedvfs(nvp) != NULL) {
1194				vn_vfsunlock(nvp);
1195				error = EBUSY;
1196				goto err_out;
1197			}
1198
1199			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
1200			if ((*ndvp)->sdev_nlink > 2) {
1201				vn_vfsunlock(nvp);
1202				error = EEXIST;
1203				goto err_out;
1204			}
1205			vn_vfsunlock(nvp);
1206
1207			(void) sdev_dirdelete(nddv, *ndvp);
1208			*ndvp = NULL;
1209			ASSERT(nddv->sdev_attrvp);
1210			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1211			    nddv->sdev_attrvp, cred, NULL, 0);
1212			if (error)
1213				goto err_out;
1214		} else {
1215			if (doingdir) {
1216				error = ENOTDIR;
1217				goto err_out;
1218			}
1219
1220			if (SDEV_IS_PERSIST((*ndvp))) {
1221				bkstore = 1;
1222			}
1223
1224			/*
1225			 * get rid of the node from the directory cache
1226			 * note, in case EBUSY is returned, the ZOMBIE
1227			 * node is taken care in sdev_mknode.
1228			 */
1229			(void) sdev_dirdelete(nddv, *ndvp);
1230			*ndvp = NULL;
1231			if (bkstore) {
1232				ASSERT(nddv->sdev_attrvp);
1233				error = VOP_REMOVE(nddv->sdev_attrvp,
1234				    nnm, cred, NULL, 0);
1235				if (error)
1236					goto err_out;
1237			}
1238		}
1239	}
1240
1241	/* fix the source for a symlink */
1242	if (vattr.va_type == VLNK) {
1243		if (odv->sdev_symlink == NULL) {
1244			error = sdev_follow_link(odv);
1245			if (error) {
1246				error = ENOENT;
1247				goto err_out;
1248			}
1249		}
1250		ASSERT(odv->sdev_symlink);
1251		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1252	}
1253
1254	/*
1255	 * make a fresh node from the source attrs
1256	 */
1257	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1258	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1259	    NULL, (void *)link, cred, SDEV_READY);
1260
1261	if (link)
1262		kmem_free(link, strlen(link) + 1);
1263
1264	if (error)
1265		goto err_out;
1266	ASSERT(*ndvp);
1267	ASSERT((*ndvp)->sdev_state == SDEV_READY);
1268
1269	/* move dir contents */
1270	if (doingdir) {
1271		for (idv = SDEV_FIRST_ENTRY(odv); idv;
1272		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
1273			error = sdev_rnmnode(odv, idv,
1274			    (struct sdev_node *)(*ndvp), &ndv,
1275			    idv->sdev_name, cred);
1276			if (error)
1277				goto err_out;
1278			ndv = NULL;
1279		}
1280	}
1281
1282	if ((*ndvp)->sdev_attrvp) {
1283		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1284		    AT_CTIME|AT_ATIME);
1285	} else {
1286		ASSERT((*ndvp)->sdev_attr);
1287		gethrestime(&now);
1288		(*ndvp)->sdev_attr->va_ctime = now;
1289		(*ndvp)->sdev_attr->va_atime = now;
1290	}
1291
1292	if (nddv->sdev_attrvp) {
1293		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1294		    AT_MTIME|AT_ATIME);
1295	} else {
1296		ASSERT(nddv->sdev_attr);
1297		gethrestime(&now);
1298		nddv->sdev_attr->va_mtime = now;
1299		nddv->sdev_attr->va_atime = now;
1300	}
1301	rw_exit(&nddv->sdev_contents);
1302	if (!samedir)
1303		rw_exit(&oddv->sdev_contents);
1304
1305	SDEV_RELE(*ndvp);
1306	return (error);
1307
1308err_out:
1309	rw_exit(&nddv->sdev_contents);
1310	if (!samedir)
1311		rw_exit(&oddv->sdev_contents);
1312	return (error);
1313}
1314
1315/*
1316 * Merge sdev_node specific information into an attribute structure.
1317 *
1318 * note: sdev_node is not locked here
1319 */
1320void
1321sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1322{
1323	struct vnode *vp = SDEVTOV(dv);
1324
1325	vap->va_nlink = dv->sdev_nlink;
1326	vap->va_nodeid = dv->sdev_ino;
1327	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1328	vap->va_type = vp->v_type;
1329
1330	if (vp->v_type == VDIR) {
1331		vap->va_rdev = 0;
1332		vap->va_fsid = vp->v_rdev;
1333	} else if (vp->v_type == VLNK) {
1334		vap->va_rdev = 0;
1335		vap->va_mode  &= ~S_IFMT;
1336		vap->va_mode |= S_IFLNK;
1337	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1338		vap->va_rdev = vp->v_rdev;
1339		vap->va_mode &= ~S_IFMT;
1340		if (vap->va_type == VCHR)
1341			vap->va_mode |= S_IFCHR;
1342		else
1343			vap->va_mode |= S_IFBLK;
1344	} else {
1345		vap->va_rdev = 0;
1346	}
1347}
1348
1349struct vattr *
1350sdev_getdefault_attr(enum vtype type)
1351{
1352	if (type == VDIR)
1353		return (&sdev_vattr_dir);
1354	else if (type == VCHR)
1355		return (&sdev_vattr_chr);
1356	else if (type == VBLK)
1357		return (&sdev_vattr_blk);
1358	else if (type == VLNK)
1359		return (&sdev_vattr_lnk);
1360	else
1361		return (NULL);
1362}
1363int
1364sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1365{
1366	int rv = 0;
1367	struct vnode *vp = SDEVTOV(dv);
1368
1369	switch (vp->v_type) {
1370	case VCHR:
1371	case VBLK:
1372		/*
1373		 * If vnode is a device, return special vnode instead
1374		 * (though it knows all about -us- via sp->s_realvp)
1375		 */
1376		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1377		VN_RELE(vp);
1378		if (*vpp == NULLVP)
1379			rv = ENOSYS;
1380		break;
1381	default:	/* most types are returned as is */
1382		*vpp = vp;
1383		break;
1384	}
1385	return (rv);
1386}
1387
1388/*
1389 * junction between devname and root file system, e.g. ufs
1390 */
1391int
1392devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1393{
1394	struct vnode *rdvp = ddv->sdev_attrvp;
1395	int rval = 0;
1396
1397	ASSERT(rdvp);
1398
1399	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1400	    NULL);
1401	return (rval);
1402}
1403
1404static int
1405sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1406{
1407	struct sdev_node *dv = NULL;
1408	char	*nm;
1409	struct vnode *dirvp;
1410	int	error;
1411	vnode_t	*vp;
1412	int eof;
1413	struct iovec iov;
1414	struct uio uio;
1415	struct dirent64 *dp;
1416	dirent64_t *dbuf;
1417	size_t dbuflen;
1418	struct vattr vattr;
1419	char *link = NULL;
1420
1421	if (ddv->sdev_attrvp == NULL)
1422		return (0);
1423	if (!(ddv->sdev_flags & SDEV_BUILD))
1424		return (0);
1425
1426	dirvp = ddv->sdev_attrvp;
1427	VN_HOLD(dirvp);
1428	dbuf = kmem_zalloc(dlen, KM_SLEEP);
1429
1430	uio.uio_iov = &iov;
1431	uio.uio_iovcnt = 1;
1432	uio.uio_segflg = UIO_SYSSPACE;
1433	uio.uio_fmode = 0;
1434	uio.uio_extflg = UIO_COPY_CACHED;
1435	uio.uio_loffset = 0;
1436	uio.uio_llimit = MAXOFFSET_T;
1437
1438	eof = 0;
1439	error = 0;
1440	while (!error && !eof) {
1441		uio.uio_resid = dlen;
1442		iov.iov_base = (char *)dbuf;
1443		iov.iov_len = dlen;
1444		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1445		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1446		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1447
1448		dbuflen = dlen - uio.uio_resid;
1449		if (error || dbuflen == 0)
1450			break;
1451
1452		if (!(ddv->sdev_flags & SDEV_BUILD))
1453			break;
1454
1455		for (dp = dbuf; ((intptr_t)dp <
1456		    (intptr_t)dbuf + dbuflen);
1457		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1458			nm = dp->d_name;
1459
1460			if (strcmp(nm, ".") == 0 ||
1461			    strcmp(nm, "..") == 0)
1462				continue;
1463
1464			vp = NULLVP;
1465			dv = sdev_cache_lookup(ddv, nm);
1466			if (dv) {
1467				if (dv->sdev_state != SDEV_ZOMBIE) {
1468					SDEV_SIMPLE_RELE(dv);
1469				} else {
1470					/*
1471					 * A ZOMBIE node may not have been
1472					 * cleaned up from the backing store,
1473					 * bypass this entry in this case,
1474					 * and clean it up from the directory
1475					 * cache if this is the last call.
1476					 */
1477					(void) sdev_dirdelete(ddv, dv);
1478				}
1479				continue;
1480			}
1481
1482			/* refill the cache if not already */
1483			error = devname_backstore_lookup(ddv, nm, &vp);
1484			if (error)
1485				continue;
1486
1487			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1488			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1489			if (error)
1490				continue;
1491
1492			if (vattr.va_type == VLNK) {
1493				error = sdev_getlink(vp, &link);
1494				if (error) {
1495					continue;
1496				}
1497				ASSERT(link != NULL);
1498			}
1499
1500			if (!rw_tryupgrade(&ddv->sdev_contents)) {
1501				rw_exit(&ddv->sdev_contents);
1502				rw_enter(&ddv->sdev_contents, RW_WRITER);
1503			}
1504			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1505			    cred, SDEV_READY);
1506			rw_downgrade(&ddv->sdev_contents);
1507
1508			if (link != NULL) {
1509				kmem_free(link, strlen(link) + 1);
1510				link = NULL;
1511			}
1512
1513			if (!error) {
1514				ASSERT(dv);
1515				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1516				SDEV_SIMPLE_RELE(dv);
1517			}
1518			vp = NULL;
1519			dv = NULL;
1520		}
1521	}
1522
1523done:
1524	VN_RELE(dirvp);
1525	kmem_free(dbuf, dlen);
1526
1527	return (error);
1528}
1529
1530void
1531sdev_filldir_dynamic(struct sdev_node *ddv)
1532{
1533	int error;
1534	int i;
1535	struct vattr vattr;
1536	struct vattr *vap = &vattr;
1537	char *nm = NULL;
1538	struct sdev_node *dv = NULL;
1539
1540	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1541	ASSERT((ddv->sdev_flags & SDEV_BUILD));
1542
1543	*vap = *sdev_getdefault_attr(VDIR);	/* note structure copy here */
1544	gethrestime(&vap->va_atime);
1545	vap->va_mtime = vap->va_atime;
1546	vap->va_ctime = vap->va_atime;
1547	for (i = 0; vtab[i].vt_name != NULL; i++) {
1548		/*
1549		 * This early, we may be in a read-only /dev
1550		 * environment: leave the creation of any nodes we'd
1551		 * attempt to persist to devfsadm.
1552		 */
1553		if (vtab[i].vt_flags & SDEV_PERSIST)
1554			continue;
1555		nm = vtab[i].vt_name;
1556		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1557		dv = NULL;
1558		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1559		    NULL, kcred, SDEV_READY);
1560		if (error) {
1561			cmn_err(CE_WARN, "%s/%s: error %d\n",
1562			    ddv->sdev_name, nm, error);
1563		} else {
1564			ASSERT(dv);
1565			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1566			SDEV_SIMPLE_RELE(dv);
1567		}
1568	}
1569}
1570
1571/*
1572 * Creating a backing store entry based on sdev_attr.
1573 * This is called either as part of node creation in a persistent directory
1574 * or from setattr/setsecattr to persist access attributes across reboot.
1575 */
1576int
1577sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1578{
1579	int error = 0;
1580	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1581	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1582	struct vattr *vap = dv->sdev_attr;
1583	char *nm = dv->sdev_name;
1584	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1585
1586	ASSERT(dv && dv->sdev_name && rdvp);
1587	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1588
1589lookup:
1590	/* try to find it in the backing store */
1591	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1592	    NULL);
1593	if (error == 0) {
1594		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1595			VN_HOLD(rrvp);
1596			VN_RELE(*rvp);
1597			*rvp = rrvp;
1598		}
1599
1600		kmem_free(dv->sdev_attr, sizeof (vattr_t));
1601		dv->sdev_attr = NULL;
1602		dv->sdev_attrvp = *rvp;
1603		return (0);
1604	}
1605
1606	/* let's try to persist the node */
1607	gethrestime(&vap->va_atime);
1608	vap->va_mtime = vap->va_atime;
1609	vap->va_ctime = vap->va_atime;
1610	vap->va_mask |= AT_TYPE|AT_MODE;
1611	switch (vap->va_type) {
1612	case VDIR:
1613		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1614		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1615		    (void *)(*rvp), error));
1616		break;
1617	case VCHR:
1618	case VBLK:
1619	case VREG:
1620	case VDOOR:
1621		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1622		    rvp, cred, 0, NULL, NULL);
1623		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1624		    (void *)(*rvp), error));
1625		if (!error)
1626			VN_RELE(*rvp);
1627		break;
1628	case VLNK:
1629		ASSERT(dv->sdev_symlink);
1630		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1631		    NULL, 0);
1632		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1633		    error));
1634		break;
1635	default:
1636		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1637		    "create\n", nm);
1638		/*NOTREACHED*/
1639	}
1640
1641	/* go back to lookup to factor out spec node and set attrvp */
1642	if (error == 0)
1643		goto lookup;
1644
1645	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1646	return (error);
1647}
1648
1649static int
1650sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1651{
1652	int error = 0;
1653	struct sdev_node *dup = NULL;
1654
1655	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1656	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1657		sdev_direnter(ddv, *dv);
1658	} else {
1659		if (dup->sdev_state == SDEV_ZOMBIE) {
1660			error = sdev_dirdelete(ddv, dup);
1661			/*
1662			 * The ZOMBIE node is still hanging
1663			 * around with more than one reference counts.
1664			 * Fail the new node creation so that
1665			 * the directory cache won't have
1666			 * duplicate entries for the same named node
1667			 */
1668			if (error == EBUSY) {
1669				SDEV_SIMPLE_RELE(*dv);
1670				sdev_nodedestroy(*dv, 0);
1671				*dv = NULL;
1672				return (error);
1673			}
1674			sdev_direnter(ddv, *dv);
1675		} else {
1676			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1677			SDEV_SIMPLE_RELE(*dv);
1678			sdev_nodedestroy(*dv, 0);
1679			*dv = dup;
1680		}
1681	}
1682
1683	return (0);
1684}
1685
1686static int
1687sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1688{
1689	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1690	return (sdev_dirdelete(ddv, *dv));
1691}
1692
1693/*
1694 * update the in-core directory cache
1695 */
1696int
1697sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1698    sdev_cache_ops_t ops)
1699{
1700	int error = 0;
1701
1702	ASSERT((SDEV_HELD(*dv)));
1703
1704	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1705	switch (ops) {
1706	case SDEV_CACHE_ADD:
1707		error = sdev_cache_add(ddv, dv, nm);
1708		break;
1709	case SDEV_CACHE_DELETE:
1710		error = sdev_cache_delete(ddv, dv);
1711		break;
1712	default:
1713		break;
1714	}
1715
1716	return (error);
1717}
1718
1719/*
1720 * retrieve the named entry from the directory cache
1721 */
1722struct sdev_node *
1723sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1724{
1725	struct sdev_node *dv = NULL;
1726
1727	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1728	dv = sdev_findbyname(ddv, nm);
1729
1730	return (dv);
1731}
1732
1733/*
1734 * Implicit reconfig for nodes constructed by a link generator
1735 * Start devfsadm if needed, or if devfsadm is in progress,
1736 * prepare to block on devfsadm either completing or
1737 * constructing the desired node.  As devfsadmd is global
1738 * in scope, constructing all necessary nodes, we only
1739 * need to initiate it once.
1740 */
1741static int
1742sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1743{
1744	int error = 0;
1745
1746	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1747		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1748		    ddv->sdev_name, nm, devfsadm_state));
1749		mutex_enter(&dv->sdev_lookup_lock);
1750		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1751		mutex_exit(&dv->sdev_lookup_lock);
1752		error = 0;
1753	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1754		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1755		    ddv->sdev_name, nm, devfsadm_state));
1756
1757		sdev_devfsadmd_thread(ddv, dv, kcred);
1758		mutex_enter(&dv->sdev_lookup_lock);
1759		SDEV_BLOCK_OTHERS(dv,
1760		    (SDEV_LOOKUP | SDEV_LGWAITING));
1761		mutex_exit(&dv->sdev_lookup_lock);
1762		error = 0;
1763	} else {
1764		error = -1;
1765	}
1766
1767	return (error);
1768}
1769
1770/*
1771 *  Support for specialized device naming construction mechanisms
1772 */
1773static int
1774sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1775    int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1776    void *, char *), int flags, struct cred *cred)
1777{
1778	int rv = 0;
1779	char *physpath = NULL;
1780	struct vattr vattr;
1781	struct vattr *vap = &vattr;
1782	struct sdev_node *dv = NULL;
1783
1784	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1785	if (flags & SDEV_VLINK) {
1786		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1787		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1788		    NULL);
1789		if (rv) {
1790			kmem_free(physpath, MAXPATHLEN);
1791			return (-1);
1792		}
1793
1794		*vap = *sdev_getdefault_attr(VLNK);	/* structure copy */
1795		vap->va_size = strlen(physpath);
1796		gethrestime(&vap->va_atime);
1797		vap->va_mtime = vap->va_atime;
1798		vap->va_ctime = vap->va_atime;
1799
1800		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1801		    (void *)physpath, cred, SDEV_READY);
1802		kmem_free(physpath, MAXPATHLEN);
1803		if (rv)
1804			return (rv);
1805	} else if (flags & SDEV_VATTR) {
1806		/*
1807		 * /dev/pts
1808		 *
1809		 * callback is responsible to set the basic attributes,
1810		 * e.g. va_type/va_uid/va_gid/
1811		 *    dev_t if VCHR or VBLK/
1812		 */
1813		ASSERT(callback);
1814		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1815		if (rv) {
1816			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1817			    "callback failed \n"));
1818			return (-1);
1819		}
1820
1821		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1822		    cred, SDEV_READY);
1823
1824		if (rv)
1825			return (rv);
1826
1827	} else {
1828		impossible(("lookup: %s/%s by %s not supported (%d)\n",
1829		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1830		    __LINE__));
1831		rv = -1;
1832	}
1833
1834	*dvp = dv;
1835	return (rv);
1836}
1837
1838static int
1839is_devfsadm_thread(char *exec_name)
1840{
1841	/*
1842	 * note: because devfsadmd -> /usr/sbin/devfsadm
1843	 * it is safe to use "devfsadm" to capture the lookups
1844	 * from devfsadm and its daemon version.
1845	 */
1846	if (strcmp(exec_name, "devfsadm") == 0)
1847		return (1);
1848	return (0);
1849}
1850
1851/*
1852 * Lookup Order:
1853 *	sdev_node cache;
1854 *	backing store (SDEV_PERSIST);
1855 *	DBNR: a. dir_ops implemented in the loadable modules;
1856 *	      b. vnode ops in vtab.
1857 */
1858int
1859devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1860    struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1861    struct cred *, void *, char *), int flags)
1862{
1863	int rv = 0, nmlen;
1864	struct vnode *rvp = NULL;
1865	struct sdev_node *dv = NULL;
1866	int	retried = 0;
1867	int	error = 0;
1868	struct vattr vattr;
1869	char *lookup_thread = curproc->p_user.u_comm;
1870	int failed_flags = 0;
1871	int (*vtor)(struct sdev_node *) = NULL;
1872	int state;
1873	int parent_state;
1874	char *link = NULL;
1875
1876	if (SDEVTOV(ddv)->v_type != VDIR)
1877		return (ENOTDIR);
1878
1879	/*
1880	 * Empty name or ., return node itself.
1881	 */
1882	nmlen = strlen(nm);
1883	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1884		*vpp = SDEVTOV(ddv);
1885		VN_HOLD(*vpp);
1886		return (0);
1887	}
1888
1889	/*
1890	 * .., return the parent directory
1891	 */
1892	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1893		*vpp = SDEVTOV(ddv->sdev_dotdot);
1894		VN_HOLD(*vpp);
1895		return (0);
1896	}
1897
1898	rw_enter(&ddv->sdev_contents, RW_READER);
1899	if (ddv->sdev_flags & SDEV_VTOR) {
1900		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1901		ASSERT(vtor);
1902	}
1903
1904tryagain:
1905	/*
1906	 * (a) directory cache lookup:
1907	 */
1908	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1909	parent_state = ddv->sdev_state;
1910	dv = sdev_cache_lookup(ddv, nm);
1911	if (dv) {
1912		state = dv->sdev_state;
1913		switch (state) {
1914		case SDEV_INIT:
1915			if (is_devfsadm_thread(lookup_thread))
1916				break;
1917
1918			/* ZOMBIED parent won't allow node creation */
1919			if (parent_state == SDEV_ZOMBIE) {
1920				SD_TRACE_FAILED_LOOKUP(ddv, nm,
1921				    retried);
1922				goto nolock_notfound;
1923			}
1924
1925			mutex_enter(&dv->sdev_lookup_lock);
1926			/* compensate the threads started after devfsadm */
1927			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1928			    !(SDEV_IS_LOOKUP(dv)))
1929				SDEV_BLOCK_OTHERS(dv,
1930				    (SDEV_LOOKUP | SDEV_LGWAITING));
1931
1932			if (SDEV_IS_LOOKUP(dv)) {
1933				failed_flags |= SLF_REBUILT;
1934				rw_exit(&ddv->sdev_contents);
1935				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1936				mutex_exit(&dv->sdev_lookup_lock);
1937				rw_enter(&ddv->sdev_contents, RW_READER);
1938
1939				if (error != 0) {
1940					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1941					    retried);
1942					goto nolock_notfound;
1943				}
1944
1945				state = dv->sdev_state;
1946				if (state == SDEV_INIT) {
1947					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1948					    retried);
1949					goto nolock_notfound;
1950				} else if (state == SDEV_READY) {
1951					goto found;
1952				} else if (state == SDEV_ZOMBIE) {
1953					rw_exit(&ddv->sdev_contents);
1954					SD_TRACE_FAILED_LOOKUP(ddv, nm,
1955					    retried);
1956					SDEV_RELE(dv);
1957					goto lookup_failed;
1958				}
1959			} else {
1960				mutex_exit(&dv->sdev_lookup_lock);
1961			}
1962			break;
1963		case SDEV_READY:
1964			goto found;
1965		case SDEV_ZOMBIE:
1966			rw_exit(&ddv->sdev_contents);
1967			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1968			SDEV_RELE(dv);
1969			goto lookup_failed;
1970		default:
1971			rw_exit(&ddv->sdev_contents);
1972			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1973			sdev_lookup_failed(ddv, nm, failed_flags);
1974			*vpp = NULLVP;
1975			return (ENOENT);
1976		}
1977	}
1978	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1979
1980	/*
1981	 * ZOMBIED parent does not allow new node creation.
1982	 * bail out early
1983	 */
1984	if (parent_state == SDEV_ZOMBIE) {
1985		rw_exit(&ddv->sdev_contents);
1986		*vpp = NULLVP;
1987		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1988		return (ENOENT);
1989	}
1990
1991	/*
1992	 * (b0): backing store lookup
1993	 *	SDEV_PERSIST is default except:
1994	 *		1) pts nodes
1995	 *		2) non-chmod'ed local nodes
1996	 *		3) zvol nodes
1997	 */
1998	if (SDEV_IS_PERSIST(ddv)) {
1999		error = devname_backstore_lookup(ddv, nm, &rvp);
2000
2001		if (!error) {
2002
2003			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2004			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2005			if (error) {
2006				rw_exit(&ddv->sdev_contents);
2007				if (dv)
2008					SDEV_RELE(dv);
2009				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2010				sdev_lookup_failed(ddv, nm, failed_flags);
2011				*vpp = NULLVP;
2012				return (ENOENT);
2013			}
2014
2015			if (vattr.va_type == VLNK) {
2016				error = sdev_getlink(rvp, &link);
2017				if (error) {
2018					rw_exit(&ddv->sdev_contents);
2019					if (dv)
2020						SDEV_RELE(dv);
2021					SD_TRACE_FAILED_LOOKUP(ddv, nm,
2022					    retried);
2023					sdev_lookup_failed(ddv, nm,
2024					    failed_flags);
2025					*vpp = NULLVP;
2026					return (ENOENT);
2027				}
2028				ASSERT(link != NULL);
2029			}
2030
2031			if (!rw_tryupgrade(&ddv->sdev_contents)) {
2032				rw_exit(&ddv->sdev_contents);
2033				rw_enter(&ddv->sdev_contents, RW_WRITER);
2034			}
2035			error = sdev_mknode(ddv, nm, &dv, &vattr,
2036			    rvp, link, cred, SDEV_READY);
2037			rw_downgrade(&ddv->sdev_contents);
2038
2039			if (link != NULL) {
2040				kmem_free(link, strlen(link) + 1);
2041				link = NULL;
2042			}
2043
2044			if (error) {
2045				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2046				rw_exit(&ddv->sdev_contents);
2047				if (dv)
2048					SDEV_RELE(dv);
2049				goto lookup_failed;
2050			} else {
2051				goto found;
2052			}
2053		} else if (retried) {
2054			rw_exit(&ddv->sdev_contents);
2055			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2056			    ddv->sdev_name, nm));
2057			if (dv)
2058				SDEV_RELE(dv);
2059			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2060			sdev_lookup_failed(ddv, nm, failed_flags);
2061			*vpp = NULLVP;
2062			return (ENOENT);
2063		}
2064	}
2065
2066lookup_create_node:
2067	/* first thread that is doing the lookup on this node */
2068	if (callback) {
2069		ASSERT(dv == NULL);
2070		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2071			rw_exit(&ddv->sdev_contents);
2072			rw_enter(&ddv->sdev_contents, RW_WRITER);
2073		}
2074		error = sdev_call_dircallback(ddv, &dv, nm, callback,
2075		    flags, cred);
2076		rw_downgrade(&ddv->sdev_contents);
2077		if (error == 0) {
2078			goto found;
2079		} else {
2080			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2081			rw_exit(&ddv->sdev_contents);
2082			goto lookup_failed;
2083		}
2084	}
2085	if (!dv) {
2086		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2087			rw_exit(&ddv->sdev_contents);
2088			rw_enter(&ddv->sdev_contents, RW_WRITER);
2089		}
2090		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2091		    cred, SDEV_INIT);
2092		if (!dv) {
2093			rw_exit(&ddv->sdev_contents);
2094			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2095			sdev_lookup_failed(ddv, nm, failed_flags);
2096			*vpp = NULLVP;
2097			return (ENOENT);
2098		}
2099		rw_downgrade(&ddv->sdev_contents);
2100	}
2101
2102	/*
2103	 * (b1) invoking devfsadm once per life time for devfsadm nodes
2104	 */
2105	ASSERT(SDEV_HELD(dv));
2106
2107	if (SDEV_IS_NO_NCACHE(dv))
2108		failed_flags |= SLF_NO_NCACHE;
2109	if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2110	    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2111	    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2112		ASSERT(SDEV_HELD(dv));
2113		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2114		goto nolock_notfound;
2115	}
2116
2117	/*
2118	 * filter out known non-existent devices recorded
2119	 * during initial reconfiguration boot for which
2120	 * reconfig should not be done and lookup may
2121	 * be short-circuited now.
2122	 */
2123	if (sdev_lookup_filter(ddv, nm)) {
2124		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2125		goto nolock_notfound;
2126	}
2127
2128	/* bypassing devfsadm internal nodes */
2129	if (is_devfsadm_thread(lookup_thread)) {
2130		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2131		goto nolock_notfound;
2132	}
2133
2134	if (sdev_reconfig_disable) {
2135		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2136		goto nolock_notfound;
2137	}
2138
2139	error = sdev_call_devfsadmd(ddv, dv, nm);
2140	if (error == 0) {
2141		sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2142		    ddv->sdev_name, nm, curproc->p_user.u_comm));
2143		if (sdev_reconfig_verbose) {
2144			cmn_err(CE_CONT,
2145			    "?lookup of %s/%s by %s: reconfig\n",
2146			    ddv->sdev_name, nm, curproc->p_user.u_comm);
2147		}
2148		retried = 1;
2149		failed_flags |= SLF_REBUILT;
2150		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2151		SDEV_SIMPLE_RELE(dv);
2152		goto tryagain;
2153	} else {
2154		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2155		goto nolock_notfound;
2156	}
2157
2158found:
2159	ASSERT(!(dv->sdev_flags & SDEV_STALE));
2160	ASSERT(dv->sdev_state == SDEV_READY);
2161	if (vtor) {
2162		/*
2163		 * Check validity of returned node
2164		 */
2165		switch (vtor(dv)) {
2166		case SDEV_VTOR_VALID:
2167			break;
2168		case SDEV_VTOR_STALE:
2169			/*
2170			 * The name exists, but the cache entry is
2171			 * stale and needs to be re-created.
2172			 */
2173			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2174			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2175				rw_exit(&ddv->sdev_contents);
2176				rw_enter(&ddv->sdev_contents, RW_WRITER);
2177			}
2178			error = sdev_cache_update(ddv, &dv, nm,
2179			    SDEV_CACHE_DELETE);
2180			rw_downgrade(&ddv->sdev_contents);
2181			if (error == 0) {
2182				dv = NULL;
2183				goto lookup_create_node;
2184			}
2185			/* FALLTHRU */
2186		case SDEV_VTOR_INVALID:
2187			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2188			sdcmn_err7(("lookup: destroy invalid "
2189			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2190			goto nolock_notfound;
2191		case SDEV_VTOR_SKIP:
2192			sdcmn_err7(("lookup: node not applicable - "
2193			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2194			rw_exit(&ddv->sdev_contents);
2195			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2196			SDEV_RELE(dv);
2197			goto lookup_failed;
2198		default:
2199			cmn_err(CE_PANIC,
2200			    "dev fs: validator failed: %s(%p)\n",
2201			    dv->sdev_name, (void *)dv);
2202			break;
2203		}
2204	}
2205
2206	rw_exit(&ddv->sdev_contents);
2207	rv = sdev_to_vp(dv, vpp);
2208	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2209	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2210	    dv->sdev_state, nm, rv));
2211	return (rv);
2212
2213nolock_notfound:
2214	/*
2215	 * Destroy the node that is created for synchronization purposes.
2216	 */
2217	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2218	    nm, dv->sdev_state));
2219	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2220	if (dv->sdev_state == SDEV_INIT) {
2221		if (!rw_tryupgrade(&ddv->sdev_contents)) {
2222			rw_exit(&ddv->sdev_contents);
2223			rw_enter(&ddv->sdev_contents, RW_WRITER);
2224		}
2225
2226		/*
2227		 * Node state may have changed during the lock
2228		 * changes. Re-check.
2229		 */
2230		if (dv->sdev_state == SDEV_INIT) {
2231			(void) sdev_dirdelete(ddv, dv);
2232			rw_exit(&ddv->sdev_contents);
2233			sdev_lookup_failed(ddv, nm, failed_flags);
2234			*vpp = NULL;
2235			return (ENOENT);
2236		}
2237	}
2238
2239	rw_exit(&ddv->sdev_contents);
2240	SDEV_RELE(dv);
2241
2242lookup_failed:
2243	sdev_lookup_failed(ddv, nm, failed_flags);
2244	*vpp = NULL;
2245	return (ENOENT);
2246}
2247
2248/*
2249 * Given a directory node, mark all nodes beneath as
2250 * STALE, i.e. nodes that don't exist as far as new
2251 * consumers are concerned.  Remove them from the
2252 * list of directory entries so that no lookup or
2253 * directory traversal will find them.  The node
2254 * not deallocated so existing holds are not affected.
2255 */
2256void
2257sdev_stale(struct sdev_node *ddv)
2258{
2259	struct sdev_node *dv;
2260	struct vnode *vp;
2261
2262	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2263
2264	rw_enter(&ddv->sdev_contents, RW_WRITER);
2265	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2266		vp = SDEVTOV(dv);
2267		if (vp->v_type == VDIR)
2268			sdev_stale(dv);
2269
2270		sdcmn_err9(("sdev_stale: setting stale %s\n",
2271		    dv->sdev_path));
2272		dv->sdev_flags |= SDEV_STALE;
2273		avl_remove(&ddv->sdev_entries, dv);
2274	}
2275	ddv->sdev_flags |= SDEV_BUILD;
2276	rw_exit(&ddv->sdev_contents);
2277}
2278
2279/*
2280 * Given a directory node, clean out all the nodes beneath.
2281 * If expr is specified, clean node with names matching expr.
2282 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2283 *	so they are excluded from future lookups.
2284 */
2285int
2286sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2287{
2288	int error = 0;
2289	int busy = 0;
2290	struct vnode *vp;
2291	struct sdev_node *dv, *next = NULL;
2292	int bkstore = 0;
2293	int len = 0;
2294	char *bks_name = NULL;
2295
2296	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2297
2298	/*
2299	 * We try our best to destroy all unused sdev_node's
2300	 */
2301	rw_enter(&ddv->sdev_contents, RW_WRITER);
2302	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2303		next = SDEV_NEXT_ENTRY(ddv, dv);
2304		vp = SDEVTOV(dv);
2305
2306		if (expr && gmatch(dv->sdev_name, expr) == 0)
2307			continue;
2308
2309		if (vp->v_type == VDIR &&
2310		    sdev_cleandir(dv, NULL, flags) != 0) {
2311			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2312			    dv->sdev_name));
2313			busy++;
2314			continue;
2315		}
2316
2317		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2318			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2319			    dv->sdev_name));
2320			busy++;
2321			continue;
2322		}
2323
2324		/*
2325		 * at this point, either dv is not held or SDEV_ENFORCE
2326		 * is specified. In either case, dv needs to be deleted
2327		 */
2328		SDEV_HOLD(dv);
2329
2330		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2331		if (bkstore && (vp->v_type == VDIR))
2332			bkstore += 1;
2333
2334		if (bkstore) {
2335			len = strlen(dv->sdev_name) + 1;
2336			bks_name = kmem_alloc(len, KM_SLEEP);
2337			bcopy(dv->sdev_name, bks_name, len);
2338		}
2339
2340		error = sdev_dirdelete(ddv, dv);
2341
2342		if (error == EBUSY) {
2343			sdcmn_err9(("sdev_cleandir: dir busy\n"));
2344			busy++;
2345		}
2346
2347		/* take care the backing store clean up */
2348		if (bkstore && (error == 0)) {
2349			ASSERT(bks_name);
2350			ASSERT(ddv->sdev_attrvp);
2351
2352			if (bkstore == 1) {
2353				error = VOP_REMOVE(ddv->sdev_attrvp,
2354				    bks_name, kcred, NULL, 0);
2355			} else if (bkstore == 2) {
2356				error = VOP_RMDIR(ddv->sdev_attrvp,
2357				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2358			}
2359
2360			/* do not propagate the backing store errors */
2361			if (error) {
2362				sdcmn_err9(("sdev_cleandir: backing store"
2363				    "not cleaned\n"));
2364				error = 0;
2365			}
2366
2367			bkstore = 0;
2368			kmem_free(bks_name, len);
2369			bks_name = NULL;
2370			len = 0;
2371		}
2372	}
2373
2374	ddv->sdev_flags |= SDEV_BUILD;
2375	rw_exit(&ddv->sdev_contents);
2376
2377	if (busy) {
2378		error = EBUSY;
2379	}
2380
2381	return (error);
2382}
2383
2384/*
2385 * a convenient wrapper for readdir() funcs
2386 */
2387size_t
2388add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2389{
2390	size_t reclen = DIRENT64_RECLEN(strlen(nm));
2391	if (reclen > size)
2392		return (0);
2393
2394	de->d_ino = (ino64_t)ino;
2395	de->d_off = (off64_t)off + 1;
2396	de->d_reclen = (ushort_t)reclen;
2397	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2398	return (reclen);
2399}
2400
2401/*
2402 * sdev_mount service routines
2403 */
2404int
2405sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2406{
2407	int	error;
2408
2409	if (uap->datalen != sizeof (*args))
2410		return (EINVAL);
2411
2412	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2413		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2414		    "get user data. error %d\n", error);
2415		return (EFAULT);
2416	}
2417
2418	return (0);
2419}
2420
2421#ifdef nextdp
2422#undef nextdp
2423#endif
2424#define	nextdp(dp)	((struct dirent64 *) \
2425			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
2426
2427/*
2428 * readdir helper func
2429 */
2430int
2431devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2432    int flags)
2433{
2434	struct sdev_node *ddv = VTOSDEV(vp);
2435	struct sdev_node *dv;
2436	dirent64_t	*dp;
2437	ulong_t		outcount = 0;
2438	size_t		namelen;
2439	ulong_t		alloc_count;
2440	void		*outbuf;
2441	struct iovec	*iovp;
2442	int		error = 0;
2443	size_t		reclen;
2444	offset_t	diroff;
2445	offset_t	soff;
2446	int		this_reclen;
2447	int (*vtor)(struct sdev_node *) = NULL;
2448	struct vattr attr;
2449	timestruc_t now;
2450
2451	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2452	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2453
2454	if (uiop->uio_loffset >= MAXOFF_T) {
2455		if (eofp)
2456			*eofp = 1;
2457		return (0);
2458	}
2459
2460	if (uiop->uio_iovcnt != 1)
2461		return (EINVAL);
2462
2463	if (vp->v_type != VDIR)
2464		return (ENOTDIR);
2465
2466	if (ddv->sdev_flags & SDEV_VTOR) {
2467		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2468		ASSERT(vtor);
2469	}
2470
2471	if (eofp != NULL)
2472		*eofp = 0;
2473
2474	soff = uiop->uio_loffset;
2475	iovp = uiop->uio_iov;
2476	alloc_count = iovp->iov_len;
2477	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2478	outcount = 0;
2479
2480	if (ddv->sdev_state == SDEV_ZOMBIE)
2481		goto get_cache;
2482
2483	if (SDEV_IS_GLOBAL(ddv)) {
2484
2485		if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2486		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2487		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2488		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2489		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2490		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2491		    !sdev_reconfig_disable) {
2492			/*
2493			 * invoking "devfsadm" to do system device reconfig
2494			 */
2495			mutex_enter(&ddv->sdev_lookup_lock);
2496			SDEV_BLOCK_OTHERS(ddv,
2497			    (SDEV_READDIR|SDEV_LGWAITING));
2498			mutex_exit(&ddv->sdev_lookup_lock);
2499
2500			sdcmn_err8(("readdir of %s by %s: reconfig\n",
2501			    ddv->sdev_path, curproc->p_user.u_comm));
2502			if (sdev_reconfig_verbose) {
2503				cmn_err(CE_CONT,
2504				    "?readdir of %s by %s: reconfig\n",
2505				    ddv->sdev_path, curproc->p_user.u_comm);
2506			}
2507
2508			sdev_devfsadmd_thread(ddv, NULL, kcred);
2509		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2510			/*
2511			 * compensate the "ls" started later than "devfsadm"
2512			 */
2513			mutex_enter(&ddv->sdev_lookup_lock);
2514			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2515			mutex_exit(&ddv->sdev_lookup_lock);
2516		}
2517
2518		/*
2519		 * release the contents lock so that
2520		 * the cache may be updated by devfsadmd
2521		 */
2522		rw_exit(&ddv->sdev_contents);
2523		mutex_enter(&ddv->sdev_lookup_lock);
2524		if (SDEV_IS_READDIR(ddv))
2525			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
2526		mutex_exit(&ddv->sdev_lookup_lock);
2527		rw_enter(&ddv->sdev_contents, RW_READER);
2528
2529		sdcmn_err4(("readdir of directory %s by %s\n",
2530		    ddv->sdev_name, curproc->p_user.u_comm));
2531		if (ddv->sdev_flags & SDEV_BUILD) {
2532			if (SDEV_IS_PERSIST(ddv)) {
2533				error = sdev_filldir_from_store(ddv,
2534				    alloc_count, cred);
2535			}
2536			ddv->sdev_flags &= ~SDEV_BUILD;
2537		}
2538	}
2539
2540get_cache:
2541	/* handle "." and ".." */
2542	diroff = 0;
2543	if (soff == 0) {
2544		/* first time */
2545		this_reclen = DIRENT64_RECLEN(1);
2546		if (alloc_count < this_reclen) {
2547			error = EINVAL;
2548			goto done;
2549		}
2550
2551		dp->d_ino = (ino64_t)ddv->sdev_ino;
2552		dp->d_off = (off64_t)1;
2553		dp->d_reclen = (ushort_t)this_reclen;
2554
2555		(void) strncpy(dp->d_name, ".",
2556		    DIRENT64_NAMELEN(this_reclen));
2557		outcount += dp->d_reclen;
2558		dp = nextdp(dp);
2559	}
2560
2561	diroff++;
2562	if (soff <= 1) {
2563		this_reclen = DIRENT64_RECLEN(2);
2564		if (alloc_count < outcount + this_reclen) {
2565			error = EINVAL;
2566			goto done;
2567		}
2568
2569		dp->d_reclen = (ushort_t)this_reclen;
2570		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2571		dp->d_off = (off64_t)2;
2572
2573		(void) strncpy(dp->d_name, "..",
2574		    DIRENT64_NAMELEN(this_reclen));
2575		outcount += dp->d_reclen;
2576
2577		dp = nextdp(dp);
2578	}
2579
2580
2581	/* gets the cache */
2582	diroff++;
2583	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2584	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2585		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2586		    diroff, soff, dv->sdev_name));
2587
2588		/* bypassing pre-matured nodes */
2589		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2590			sdcmn_err3(("sdev_readdir: pre-mature node  "
2591			    "%s %d\n", dv->sdev_name, dv->sdev_state));
2592			continue;
2593		}
2594
2595		/*
2596		 * Check validity of node
2597		 * Drop invalid and nodes to be skipped.
2598		 * A node the validator indicates as stale needs
2599		 * to be returned as presumably the node name itself
2600		 * is valid and the node data itself will be refreshed
2601		 * on lookup.  An application performing a readdir then
2602		 * stat on each entry should thus always see consistent
2603		 * data.  In any case, it is not possible to synchronize
2604		 * with dynamic kernel state, and any view we return can
2605		 * never be anything more than a snapshot at a point in time.
2606		 */
2607		if (vtor) {
2608			switch (vtor(dv)) {
2609			case SDEV_VTOR_VALID:
2610				break;
2611			case SDEV_VTOR_INVALID:
2612			case SDEV_VTOR_SKIP:
2613				continue;
2614			case SDEV_VTOR_STALE:
2615				sdcmn_err3(("sdev_readir: %s stale\n",
2616				    dv->sdev_name));
2617				break;
2618			default:
2619				cmn_err(CE_PANIC,
2620				    "dev fs: validator failed: %s(%p)\n",
2621				    dv->sdev_name, (void *)dv);
2622				break;
2623			/*NOTREACHED*/
2624			}
2625		}
2626
2627		namelen = strlen(dv->sdev_name);
2628		reclen = DIRENT64_RECLEN(namelen);
2629		if (outcount + reclen > alloc_count) {
2630			goto full;
2631		}
2632		dp->d_reclen = (ushort_t)reclen;
2633		dp->d_ino = (ino64_t)dv->sdev_ino;
2634		dp->d_off = (off64_t)diroff + 1;
2635		(void) strncpy(dp->d_name, dv->sdev_name,
2636		    DIRENT64_NAMELEN(reclen));
2637		outcount += reclen;
2638		dp = nextdp(dp);
2639	}
2640
2641full:
2642	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2643	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2644	    (void *)dv));
2645
2646	if (outcount)
2647		error = uiomove(outbuf, outcount, UIO_READ, uiop);
2648
2649	if (!error) {
2650		uiop->uio_loffset = diroff;
2651		if (eofp)
2652			*eofp = dv ? 0 : 1;
2653	}
2654
2655
2656	if (ddv->sdev_attrvp) {
2657		gethrestime(&now);
2658		attr.va_ctime = now;
2659		attr.va_atime = now;
2660		attr.va_mask = AT_CTIME|AT_ATIME;
2661
2662		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2663	}
2664done:
2665	kmem_free(outbuf, alloc_count);
2666	return (error);
2667}
2668
2669static int
2670sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2671{
2672	vnode_t *vp;
2673	vnode_t *cvp;
2674	struct sdev_node *svp;
2675	char *nm;
2676	struct pathname pn;
2677	int error;
2678	int persisted = 0;
2679
2680	ASSERT(INGLOBALZONE(curproc));
2681
2682	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2683		return (error);
2684	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2685
2686	vp = rootdir;
2687	VN_HOLD(vp);
2688
2689	while (pn_pathleft(&pn)) {
2690		ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2691		(void) pn_getcomponent(&pn, nm);
2692
2693		/*
2694		 * Deal with the .. special case where we may be
2695		 * traversing up across a mount point, to the
2696		 * root of this filesystem or global root.
2697		 */
2698		if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2699checkforroot:
2700			if (VN_CMP(vp, rootdir)) {
2701				nm[1] = 0;
2702			} else if (vp->v_flag & VROOT) {
2703				vfs_t *vfsp;
2704				cvp = vp;
2705				vfsp = cvp->v_vfsp;
2706				vfs_rlock_wait(vfsp);
2707				vp = cvp->v_vfsp->vfs_vnodecovered;
2708				if (vp == NULL ||
2709				    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2710					vfs_unlock(vfsp);
2711					VN_RELE(cvp);
2712					error = EIO;
2713					break;
2714				}
2715				VN_HOLD(vp);
2716				vfs_unlock(vfsp);
2717				VN_RELE(cvp);
2718				cvp = NULL;
2719				goto checkforroot;
2720			}
2721		}
2722
2723		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2724		    NULL, NULL);
2725		if (error) {
2726			VN_RELE(vp);
2727			break;
2728		}
2729
2730		/* traverse mount points encountered on our journey */
2731		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2732			VN_RELE(vp);
2733			VN_RELE(cvp);
2734			break;
2735		}
2736
2737		/*
2738		 * symbolic link, can be either relative and absolute
2739		 */
2740		if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2741			struct pathname linkpath;
2742			pn_alloc(&linkpath);
2743			if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2744				pn_free(&linkpath);
2745				break;
2746			}
2747			if (pn_pathleft(&linkpath) == 0)
2748				(void) pn_set(&linkpath, ".");
2749			error = pn_insert(&pn, &linkpath, strlen(nm));
2750			pn_free(&linkpath);
2751			if (pn.pn_pathlen == 0) {
2752				VN_RELE(vp);
2753				return (ENOENT);
2754			}
2755			if (pn.pn_path[0] == '/') {
2756				pn_skipslash(&pn);
2757				VN_RELE(vp);
2758				VN_RELE(cvp);
2759				vp = rootdir;
2760				VN_HOLD(vp);
2761			} else {
2762				VN_RELE(cvp);
2763			}
2764			continue;
2765		}
2766
2767		VN_RELE(vp);
2768
2769		/*
2770		 * Direct the operation to the persisting filesystem
2771		 * underlying /dev.  Bail if we encounter a
2772		 * non-persistent dev entity here.
2773		 */
2774		if (cvp->v_vfsp->vfs_fstype == devtype) {
2775
2776			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2777				error = ENOENT;
2778				VN_RELE(cvp);
2779				break;
2780			}
2781
2782			if (VTOSDEV(cvp) == NULL) {
2783				error = ENOENT;
2784				VN_RELE(cvp);
2785				break;
2786			}
2787			svp = VTOSDEV(cvp);
2788			if ((vp = svp->sdev_attrvp) == NULL) {
2789				error = ENOENT;
2790				VN_RELE(cvp);
2791				break;
2792			}
2793			persisted = 1;
2794			VN_HOLD(vp);
2795			VN_RELE(cvp);
2796			cvp = vp;
2797		}
2798
2799		vp = cvp;
2800		pn_skipslash(&pn);
2801	}
2802
2803	kmem_free(nm, MAXNAMELEN);
2804	pn_free(&pn);
2805
2806	if (error)
2807		return (error);
2808
2809	/*
2810	 * Only return persisted nodes in the filesystem underlying /dev.
2811	 */
2812	if (!persisted) {
2813		VN_RELE(vp);
2814		return (ENOENT);
2815	}
2816
2817	*r_vp = vp;
2818	return (0);
2819}
2820
2821int
2822sdev_modctl_readdir(const char *dir, char ***dirlistp,
2823	int *npathsp, int *npathsp_alloc, int checking_empty)
2824{
2825	char	**pathlist = NULL;
2826	char	**newlist = NULL;
2827	int	npaths = 0;
2828	int	npaths_alloc = 0;
2829	dirent64_t *dbuf = NULL;
2830	int	n;
2831	char	*s;
2832	int error;
2833	vnode_t *vp;
2834	int eof;
2835	struct iovec iov;
2836	struct uio uio;
2837	struct dirent64 *dp;
2838	size_t dlen;
2839	size_t dbuflen;
2840	int ndirents = 64;
2841	char *nm;
2842
2843	error = sdev_modctl_lookup(dir, &vp);
2844	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2845	    dir, curproc->p_user.u_comm,
2846	    (error == 0) ? "ok" : "failed"));
2847	if (error)
2848		return (error);
2849
2850	dlen = ndirents * (sizeof (*dbuf));
2851	dbuf = kmem_alloc(dlen, KM_SLEEP);
2852
2853	uio.uio_iov = &iov;
2854	uio.uio_iovcnt = 1;
2855	uio.uio_segflg = UIO_SYSSPACE;
2856	uio.uio_fmode = 0;
2857	uio.uio_extflg = UIO_COPY_CACHED;
2858	uio.uio_loffset = 0;
2859	uio.uio_llimit = MAXOFFSET_T;
2860
2861	eof = 0;
2862	error = 0;
2863	while (!error && !eof) {
2864		uio.uio_resid = dlen;
2865		iov.iov_base = (char *)dbuf;
2866		iov.iov_len = dlen;
2867
2868		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2869		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2870		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2871
2872		dbuflen = dlen - uio.uio_resid;
2873
2874		if (error || dbuflen == 0)
2875			break;
2876
2877		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2878		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2879
2880			nm = dp->d_name;
2881
2882			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2883				continue;
2884			if (npaths == npaths_alloc) {
2885				npaths_alloc += 64;
2886				newlist = (char **)
2887				    kmem_zalloc((npaths_alloc + 1) *
2888				    sizeof (char *), KM_SLEEP);
2889				if (pathlist) {
2890					bcopy(pathlist, newlist,
2891					    npaths * sizeof (char *));
2892					kmem_free(pathlist,
2893					    (npaths + 1) * sizeof (char *));
2894				}
2895				pathlist = newlist;
2896			}
2897			n = strlen(nm) + 1;
2898			s = kmem_alloc(n, KM_SLEEP);
2899			bcopy(nm, s, n);
2900			pathlist[npaths++] = s;
2901			sdcmn_err11(("  %s/%s\n", dir, s));
2902
2903			/* if checking empty, one entry is as good as many */
2904			if (checking_empty) {
2905				eof = 1;
2906				break;
2907			}
2908		}
2909	}
2910
2911exit:
2912	VN_RELE(vp);
2913
2914	if (dbuf)
2915		kmem_free(dbuf, dlen);
2916
2917	if (error)
2918		return (error);
2919
2920	*dirlistp = pathlist;
2921	*npathsp = npaths;
2922	*npathsp_alloc = npaths_alloc;
2923
2924	return (0);
2925}
2926
2927void
2928sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2929{
2930	int	i, n;
2931
2932	for (i = 0; i < npaths; i++) {
2933		n = strlen(pathlist[i]) + 1;
2934		kmem_free(pathlist[i], n);
2935	}
2936
2937	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2938}
2939
2940int
2941sdev_modctl_devexists(const char *path)
2942{
2943	vnode_t *vp;
2944	int error;
2945
2946	error = sdev_modctl_lookup(path, &vp);
2947	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2948	    path, curproc->p_user.u_comm,
2949	    (error == 0) ? "ok" : "failed"));
2950	if (error == 0)
2951		VN_RELE(vp);
2952
2953	return (error);
2954}
2955
2956extern int sdev_vnodeops_tbl_size;
2957
2958/*
2959 * construct a new template with overrides from vtab
2960 */
2961static fs_operation_def_t *
2962sdev_merge_vtab(const fs_operation_def_t tab[])
2963{
2964	fs_operation_def_t *new;
2965	const fs_operation_def_t *tab_entry;
2966
2967	/* make a copy of standard vnode ops table */
2968	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2969	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2970
2971	/* replace the overrides from tab */
2972	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2973		fs_operation_def_t *std_entry = new;
2974		while (std_entry->name) {
2975			if (strcmp(tab_entry->name, std_entry->name) == 0) {
2976				std_entry->func = tab_entry->func;
2977				break;
2978			}
2979			std_entry++;
2980		}
2981		if (std_entry->name == NULL)
2982			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2983			    tab_entry->name);
2984	}
2985
2986	return (new);
2987}
2988
2989/* free memory allocated by sdev_merge_vtab */
2990static void
2991sdev_free_vtab(fs_operation_def_t *new)
2992{
2993	kmem_free(new, sdev_vnodeops_tbl_size);
2994}
2995
2996/*
2997 * a generic setattr() function
2998 *
2999 * note: flags only supports AT_UID and AT_GID.
3000 *	 Future enhancements can be done for other types, e.g. AT_MODE
3001 */
3002int
3003devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3004    struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3005    int), int protocol)
3006{
3007	struct sdev_node	*dv = VTOSDEV(vp);
3008	struct sdev_node	*parent = dv->sdev_dotdot;
3009	struct vattr		*get;
3010	uint_t			mask = vap->va_mask;
3011	int 			error;
3012
3013	/* some sanity checks */
3014	if (vap->va_mask & AT_NOSET)
3015		return (EINVAL);
3016
3017	if (vap->va_mask & AT_SIZE) {
3018		if (vp->v_type == VDIR) {
3019			return (EISDIR);
3020		}
3021	}
3022
3023	/* no need to set attribute, but do not fail either */
3024	ASSERT(parent);
3025	rw_enter(&parent->sdev_contents, RW_READER);
3026	if (dv->sdev_state == SDEV_ZOMBIE) {
3027		rw_exit(&parent->sdev_contents);
3028		return (0);
3029	}
3030
3031	/* If backing store exists, just set it. */
3032	if (dv->sdev_attrvp) {
3033		rw_exit(&parent->sdev_contents);
3034		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3035	}
3036
3037	/*
3038	 * Otherwise, for nodes with the persistence attribute, create it.
3039	 */
3040	ASSERT(dv->sdev_attr);
3041	if (SDEV_IS_PERSIST(dv) ||
3042	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3043		sdev_vattr_merge(dv, vap);
3044		rw_enter(&dv->sdev_contents, RW_WRITER);
3045		error = sdev_shadow_node(dv, cred);
3046		rw_exit(&dv->sdev_contents);
3047		rw_exit(&parent->sdev_contents);
3048
3049		if (error)
3050			return (error);
3051		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3052	}
3053
3054
3055	/*
3056	 * sdev_attr was allocated in sdev_mknode
3057	 */
3058	rw_enter(&dv->sdev_contents, RW_WRITER);
3059	error = secpolicy_vnode_setattr(cred, vp, vap,
3060	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
3061	if (error) {
3062		rw_exit(&dv->sdev_contents);
3063		rw_exit(&parent->sdev_contents);
3064		return (error);
3065	}
3066
3067	get = dv->sdev_attr;
3068	if (mask & AT_MODE) {
3069		get->va_mode &= S_IFMT;
3070		get->va_mode |= vap->va_mode & ~S_IFMT;
3071	}
3072
3073	if ((mask & AT_UID) || (mask & AT_GID)) {
3074		if (mask & AT_UID)
3075			get->va_uid = vap->va_uid;
3076		if (mask & AT_GID)
3077			get->va_gid = vap->va_gid;
3078		/*
3079		 * a callback must be provided if the protocol is set
3080		 */
3081		if ((protocol & AT_UID) || (protocol & AT_GID)) {
3082			ASSERT(callback);
3083			error = callback(dv, get, protocol);
3084			if (error) {
3085				rw_exit(&dv->sdev_contents);
3086				rw_exit(&parent->sdev_contents);
3087				return (error);
3088			}
3089		}
3090	}
3091
3092	if (mask & AT_ATIME)
3093		get->va_atime = vap->va_atime;
3094	if (mask & AT_MTIME)
3095		get->va_mtime = vap->va_mtime;
3096	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3097		gethrestime(&get->va_ctime);
3098	}
3099
3100	sdev_vattr_merge(dv, get);
3101	rw_exit(&dv->sdev_contents);
3102	rw_exit(&parent->sdev_contents);
3103	return (0);
3104}
3105
3106/*
3107 * a generic inactive() function
3108 */
3109/*ARGSUSED*/
3110void
3111devname_inactive_func(struct vnode *vp, struct cred *cred,
3112    void (*callback)(struct vnode *))
3113{
3114	int clean;
3115	struct sdev_node *dv = VTOSDEV(vp);
3116	struct sdev_node *ddv = dv->sdev_dotdot;
3117	int state;
3118
3119	rw_enter(&ddv->sdev_contents, RW_WRITER);
3120	state = dv->sdev_state;
3121
3122	mutex_enter(&vp->v_lock);
3123	ASSERT(vp->v_count >= 1);
3124
3125	if (vp->v_count == 1 && callback != NULL)
3126		callback(vp);
3127
3128	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3129
3130	/*
3131	 * last ref count on the ZOMBIE node is released.
3132	 * clean up the sdev_node, and
3133	 * release the hold on the backing store node so that
3134	 * the ZOMBIE backing stores also cleaned out.
3135	 */
3136	if (clean) {
3137		ASSERT(ddv);
3138
3139		ddv->sdev_nlink--;
3140		if (vp->v_type == VDIR) {
3141			dv->sdev_nlink--;
3142		}
3143		if ((dv->sdev_flags & SDEV_STALE) == 0)
3144			avl_remove(&ddv->sdev_entries, dv);
3145		dv->sdev_nlink--;
3146		--vp->v_count;
3147		mutex_exit(&vp->v_lock);
3148		sdev_nodedestroy(dv, 0);
3149	} else {
3150		--vp->v_count;
3151		mutex_exit(&vp->v_lock);
3152	}
3153	rw_exit(&ddv->sdev_contents);
3154}
3155