rc_node.c revision 4321:a8930ec16e52
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * rc_node.c - object management primitives
30 *
31 * This layer manages entities, their data structure, its locking, iterators,
32 * transactions, and change notification requests.  Entities (scopes,
33 * services, instances, snapshots, snaplevels, property groups, "composed"
34 * property groups (see composition below), and properties) are represented by
35 * rc_node_t's and are kept in the cache_hash hash table.  (Property values
36 * are kept in the rn_values member of the respective property -- not as
37 * separate objects.)  Iterators are represented by rc_node_iter_t's.
38 * Transactions are represented by rc_node_tx_t's and are only allocated as
39 * part of repcache_tx_t's in the client layer (client.c).  Change
40 * notification requests are represented by rc_notify_t structures and are
41 * described below.
42 *
43 * The entity tree is rooted at rc_scope, which rc_node_init() initializes to
44 * the "localhost" scope.  The tree is filled in from the database on-demand
45 * by rc_node_fill_children(), usually from rc_iter_create() since iterators
46 * are the only way to find the children of an entity.
47 *
48 * Each rc_node_t is protected by its rn_lock member.  Operations which can
49 * take too long, however, should serialize on an RC_NODE_WAITING_FLAGS bit in
50 * rn_flags with the rc_node_{hold,rele}_flag() functions.  And since pointers
51 * to rc_node_t's are allowed, rn_refs is a reference count maintained by
52 * rc_node_{hold,rele}().  See configd.h for locking order information.
53 *
54 * When a node (property group or snapshot) is updated, a new node takes the
55 * place of the old node in the global hash, and the old node is hung off of
56 * the rn_former list of the new node.  At the same time, all of its children
57 * have their rn_parent_ref pointer set, and any holds they have are reflected
58 * in the old node's rn_other_refs count.  This is automatically kept up
59 * to date, until the final reference to the subgraph is dropped, at which
60 * point the node is unrefed and destroyed, along with all of its children.
61 *
62 * Locking rules: To dereference an rc_node_t * (usually to lock it), you must
63 * have a hold (rc_node_hold()) on it or otherwise be sure that it hasn't been
64 * rc_node_destroy()ed (hold a lock on its parent or child, hold a flag,
65 * etc.).  Once you have locked an rc_node_t you must check its rn_flags for
66 * RC_NODE_DEAD before you can use it.  This is usually done with the
67 * rc_node_{wait,hold}_flag() functions (often via the rc_node_check_*()
68 * functions & RC_NODE_*() macros), which fail if the object has died.
69 *
70 * An ITER_START for a non-ENTITY_VALUE induces an rc_node_fill_children()
71 * call via rc_node_setup_iter() to populate the rn_children uu_list of the
72 * rc_node_t * in question and a call to uu_list_walk_start() on that list.  For
73 * ITER_READ, rc_iter_next() uses uu_list_walk_next() to find the next
74 * apropriate child.
75 *
76 * An ITER_START for an ENTITY_VALUE makes sure the node has its values
77 * filled, and sets up the iterator.  An ITER_READ_VALUE just copies out
78 * the proper values and updates the offset information.
79 *
80 * When a property group gets changed by a transaction, it sticks around as
81 * a child of its replacement property group, but is removed from the parent.
82 *
83 * To allow aliases, snapshots are implemented with a level of indirection.
84 * A snapshot rc_node_t has a snapid which refers to an rc_snapshot_t in
85 * snapshot.c which contains the authoritative snaplevel information.  The
86 * snapid is "assigned" by rc_attach_snapshot().
87 *
88 * We provide the client layer with rc_node_ptr_t's to reference objects.
89 * Objects referred to by them are automatically held & released by
90 * rc_node_assign() & rc_node_clear().  The RC_NODE_PTR_*() macros are used at
91 * client.c entry points to read the pointers.  They fetch the pointer to the
92 * object, return (from the function) if it is dead, and lock, hold, or hold
93 * a flag of the object.
94 */
95
96/*
97 * Permission checking is authorization-based: some operations may only
98 * proceed if the user has been assigned at least one of a set of
99 * authorization strings.  The set of enabling authorizations depends on the
100 * operation and the target object.  The set of authorizations assigned to
101 * a user is determined by reading /etc/security/policy.conf, querying the
102 * user_attr database, and possibly querying the prof_attr database, as per
103 * chkauthattr() in libsecdb.
104 *
105 * The fastest way to decide whether the two sets intersect is by entering the
106 * strings into a hash table and detecting collisions, which takes linear time
107 * in the total size of the sets.  Except for the authorization patterns which
108 * may be assigned to users, which without advanced pattern-matching
109 * algorithms will take O(n) in the number of enabling authorizations, per
110 * pattern.
111 *
112 * We can achieve some practical speed-ups by noting that if we enter all of
113 * the authorizations from one of the sets into the hash table we can merely
114 * check the elements of the second set for existence without adding them.
115 * This reduces memory requirements and hash table clutter.  The enabling set
116 * is well suited for this because it is internal to configd (for now, at
117 * least).  Combine this with short-circuiting and we can even minimize the
118 * number of queries to the security databases (user_attr & prof_attr).
119 *
120 * To force this usage onto clients we provide functions for adding
121 * authorizations to the enabling set of a permission context structure
122 * (perm_add_*()) and one to decide whether the the user associated with the
123 * current door call client possesses any of them (perm_granted()).
124 *
125 * At some point, a generic version of this should move to libsecdb.
126 */
127
128/*
129 * Composition is the combination of sets of properties.  The sets are ordered
130 * and properties in higher sets obscure properties of the same name in lower
131 * sets.  Here we present a composed view of an instance's properties as the
132 * union of its properties and its service's properties.  Similarly the
133 * properties of snaplevels are combined to form a composed view of the
134 * properties of a snapshot (which should match the composed view of the
135 * properties of the instance when the snapshot was taken).
136 *
137 * In terms of the client interface, the client may request that a property
138 * group iterator for an instance or snapshot be composed.  Property groups
139 * traversed by such an iterator may not have the target entity as a parent.
140 * Similarly, the properties traversed by a property iterator for those
141 * property groups may not have the property groups iterated as parents.
142 *
143 * Implementation requires that iterators for instances and snapshots be
144 * composition-savvy, and that we have a "composed property group" entity
145 * which represents the composition of a number of property groups.  Iteration
146 * over "composed property groups" yields properties which may have different
147 * parents, but for all other operations a composed property group behaves
148 * like the top-most property group it represents.
149 *
150 * The implementation is based on the rn_cchain[] array of rc_node_t pointers
151 * in rc_node_t.  For instances, the pointers point to the instance and its
152 * parent service.  For snapshots they point to the child snaplevels, and for
153 * composed property groups they point to property groups.  A composed
154 * iterator carries an index into rn_cchain[].  Thus most of the magic ends up
155 * int the rc_iter_*() code.
156 */
157
158#include <assert.h>
159#include <atomic.h>
160#include <errno.h>
161#include <libuutil.h>
162#include <libscf.h>
163#include <libscf_priv.h>
164#include <prof_attr.h>
165#include <pthread.h>
166#include <stdio.h>
167#include <stdlib.h>
168#include <strings.h>
169#include <sys/types.h>
170#include <unistd.h>
171#include <user_attr.h>
172
173#include "configd.h"
174
175#define	AUTH_PREFIX		"solaris.smf."
176#define	AUTH_MANAGE		AUTH_PREFIX "manage"
177#define	AUTH_MODIFY		AUTH_PREFIX "modify"
178#define	AUTH_MODIFY_PREFIX	AUTH_MODIFY "."
179#define	AUTH_PG_ACTIONS		SCF_PG_RESTARTER_ACTIONS
180#define	AUTH_PG_ACTIONS_TYPE	SCF_PG_RESTARTER_ACTIONS_TYPE
181#define	AUTH_PG_GENERAL		SCF_PG_GENERAL
182#define	AUTH_PG_GENERAL_TYPE	SCF_PG_GENERAL_TYPE
183#define	AUTH_PG_GENERAL_OVR	SCF_PG_GENERAL_OVR
184#define	AUTH_PG_GENERAL_OVR_TYPE  SCF_PG_GENERAL_OVR_TYPE
185#define	AUTH_PROP_ACTION	"action_authorization"
186#define	AUTH_PROP_ENABLED	"enabled"
187#define	AUTH_PROP_MODIFY	"modify_authorization"
188#define	AUTH_PROP_VALUE		"value_authorization"
189/* libsecdb should take care of this. */
190#define	RBAC_AUTH_SEP		","
191
192#define	MAX_VALID_CHILDREN 3
193
194typedef struct rc_type_info {
195	uint32_t	rt_type;		/* matches array index */
196	uint32_t	rt_num_ids;
197	uint32_t	rt_name_flags;
198	uint32_t	rt_valid_children[MAX_VALID_CHILDREN];
199} rc_type_info_t;
200
201#define	RT_NO_NAME	-1U
202
203static rc_type_info_t rc_types[] = {
204	{REP_PROTOCOL_ENTITY_NONE, 0, RT_NO_NAME},
205	{REP_PROTOCOL_ENTITY_SCOPE, 0, 0,
206	    {REP_PROTOCOL_ENTITY_SERVICE, REP_PROTOCOL_ENTITY_SCOPE}},
207	{REP_PROTOCOL_ENTITY_SERVICE, 0, UU_NAME_DOMAIN | UU_NAME_PATH,
208	    {REP_PROTOCOL_ENTITY_INSTANCE, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
209	{REP_PROTOCOL_ENTITY_INSTANCE, 1, UU_NAME_DOMAIN,
210	    {REP_PROTOCOL_ENTITY_SNAPSHOT, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
211	{REP_PROTOCOL_ENTITY_SNAPSHOT, 2, UU_NAME_DOMAIN,
212	    {REP_PROTOCOL_ENTITY_SNAPLEVEL, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
213	{REP_PROTOCOL_ENTITY_SNAPLEVEL, 4, RT_NO_NAME,
214	    {REP_PROTOCOL_ENTITY_PROPERTYGRP}},
215	{REP_PROTOCOL_ENTITY_PROPERTYGRP, 5, UU_NAME_DOMAIN,
216	    {REP_PROTOCOL_ENTITY_PROPERTY}},
217	{REP_PROTOCOL_ENTITY_CPROPERTYGRP, 0, UU_NAME_DOMAIN,
218	    {REP_PROTOCOL_ENTITY_PROPERTY}},
219	{REP_PROTOCOL_ENTITY_PROPERTY, 7, UU_NAME_DOMAIN},
220	{-1UL}
221};
222#define	NUM_TYPES	((sizeof (rc_types) / sizeof (*rc_types)))
223
224/* Element of a permcheck_t hash table. */
225struct pc_elt {
226	struct pc_elt	*pce_next;
227	char		pce_auth[1];
228};
229
230/* An authorization set hash table. */
231typedef struct {
232	struct pc_elt	**pc_buckets;
233	uint_t		pc_bnum;		/* number of buckets */
234	uint_t		pc_enum;		/* number of elements */
235} permcheck_t;
236
237static uu_list_pool_t *rc_children_pool;
238static uu_list_pool_t *rc_pg_notify_pool;
239static uu_list_pool_t *rc_notify_pool;
240static uu_list_pool_t *rc_notify_info_pool;
241
242static rc_node_t *rc_scope;
243
244static pthread_mutex_t	rc_pg_notify_lock = PTHREAD_MUTEX_INITIALIZER;
245static pthread_cond_t	rc_pg_notify_cv = PTHREAD_COND_INITIALIZER;
246static uint_t		rc_notify_in_use;	/* blocks removals */
247
248static pthread_mutex_t	perm_lock = PTHREAD_MUTEX_INITIALIZER;
249
250static void rc_node_unrefed(rc_node_t *np);
251
252/*
253 * We support an arbitrary number of clients interested in events for certain
254 * types of changes.  Each client is represented by an rc_notify_info_t, and
255 * all clients are chained onto the rc_notify_info_list.
256 *
257 * The rc_notify_list is the global notification list.  Each entry is of
258 * type rc_notify_t, which is embedded in one of three other structures:
259 *
260 *	rc_node_t		property group update notification
261 *	rc_notify_delete_t	object deletion notification
262 *	rc_notify_info_t	notification clients
263 *
264 * Which type of object is determined by which pointer in the rc_notify_t is
265 * non-NULL.
266 *
267 * New notifications and clients are added to the end of the list.
268 * Notifications no-one is interested in are never added to the list.
269 *
270 * Clients use their position in the list to track which notifications they
271 * have not yet reported.  As they process notifications, they move forward
272 * in the list past them.  There is always a client at the beginning of the
273 * list -- as he moves past notifications, he removes them from the list and
274 * cleans them up.
275 *
276 * The rc_pg_notify_lock protects all notification state.  The rc_pg_notify_cv
277 * is used for global signalling, and each client has a cv which he waits for
278 * events of interest on.
279 */
280static uu_list_t	*rc_notify_info_list;
281static uu_list_t	*rc_notify_list;
282
283#define	HASH_SIZE	512
284#define	HASH_MASK	(HASH_SIZE - 1)
285
286#pragma align 64(cache_hash)
287static cache_bucket_t cache_hash[HASH_SIZE];
288
289#define	CACHE_BUCKET(h)		(&cache_hash[(h) & HASH_MASK])
290
291static uint32_t
292rc_node_hash(rc_node_lookup_t *lp)
293{
294	uint32_t type = lp->rl_type;
295	uint32_t backend = lp->rl_backend;
296	uint32_t mainid = lp->rl_main_id;
297	uint32_t *ids = lp->rl_ids;
298
299	rc_type_info_t *tp = &rc_types[type];
300	uint32_t num_ids;
301	uint32_t left;
302	uint32_t hash;
303
304	assert(backend == BACKEND_TYPE_NORMAL ||
305	    backend == BACKEND_TYPE_NONPERSIST);
306
307	assert(type > 0 && type < NUM_TYPES);
308	num_ids = tp->rt_num_ids;
309
310	left = MAX_IDS - num_ids;
311	assert(num_ids <= MAX_IDS);
312
313	hash = type * 7 + mainid * 5 + backend;
314
315	while (num_ids-- > 0)
316		hash = hash * 11 + *ids++ * 7;
317
318	/*
319	 * the rest should be zeroed
320	 */
321	while (left-- > 0)
322		assert(*ids++ == 0);
323
324	return (hash);
325}
326
327static int
328rc_node_match(rc_node_t *np, rc_node_lookup_t *l)
329{
330	rc_node_lookup_t *r = &np->rn_id;
331	rc_type_info_t *tp;
332	uint32_t type;
333	uint32_t num_ids;
334
335	if (r->rl_main_id != l->rl_main_id)
336		return (0);
337
338	type = r->rl_type;
339	if (type != l->rl_type)
340		return (0);
341
342	assert(type > 0 && type < NUM_TYPES);
343
344	tp = &rc_types[r->rl_type];
345	num_ids = tp->rt_num_ids;
346
347	assert(num_ids <= MAX_IDS);
348	while (num_ids-- > 0)
349		if (r->rl_ids[num_ids] != l->rl_ids[num_ids])
350			return (0);
351
352	return (1);
353}
354
355/*
356 * the "other" references on a node are maintained in an atomically
357 * updated refcount, rn_other_refs.  This can be bumped from arbitrary
358 * context, and tracks references to a possibly out-of-date node's children.
359 *
360 * To prevent the node from disappearing between the final drop of
361 * rn_other_refs and the unref handling, rn_other_refs_held is bumped on
362 * 0->1 transitions and decremented (with the node lock held) on 1->0
363 * transitions.
364 */
365static void
366rc_node_hold_other(rc_node_t *np)
367{
368	if (atomic_add_32_nv(&np->rn_other_refs, 1) == 1) {
369		atomic_add_32(&np->rn_other_refs_held, 1);
370		assert(np->rn_other_refs_held > 0);
371	}
372	assert(np->rn_other_refs > 0);
373}
374
375/*
376 * No node locks may be held
377 */
378static void
379rc_node_rele_other(rc_node_t *np)
380{
381	assert(np->rn_other_refs > 0);
382	if (atomic_add_32_nv(&np->rn_other_refs, -1) == 0) {
383		(void) pthread_mutex_lock(&np->rn_lock);
384		assert(np->rn_other_refs_held > 0);
385		if (atomic_add_32_nv(&np->rn_other_refs_held, -1) == 0 &&
386		    np->rn_refs == 0 && (np->rn_flags & RC_NODE_OLD))
387			rc_node_unrefed(np);
388		else
389			(void) pthread_mutex_unlock(&np->rn_lock);
390	}
391}
392
393static void
394rc_node_hold_locked(rc_node_t *np)
395{
396	assert(MUTEX_HELD(&np->rn_lock));
397
398	if (np->rn_refs == 0 && (np->rn_flags & RC_NODE_PARENT_REF))
399		rc_node_hold_other(np->rn_parent_ref);
400	np->rn_refs++;
401	assert(np->rn_refs > 0);
402}
403
404static void
405rc_node_hold(rc_node_t *np)
406{
407	(void) pthread_mutex_lock(&np->rn_lock);
408	rc_node_hold_locked(np);
409	(void) pthread_mutex_unlock(&np->rn_lock);
410}
411
412static void
413rc_node_rele_locked(rc_node_t *np)
414{
415	int unref = 0;
416	rc_node_t *par_ref = NULL;
417
418	assert(MUTEX_HELD(&np->rn_lock));
419	assert(np->rn_refs > 0);
420
421	if (--np->rn_refs == 0) {
422		if (np->rn_flags & RC_NODE_PARENT_REF)
423			par_ref = np->rn_parent_ref;
424
425		/*
426		 * Composed property groups are only as good as their
427		 * references.
428		 */
429		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
430			np->rn_flags |= RC_NODE_DEAD;
431
432		if ((np->rn_flags & (RC_NODE_DEAD|RC_NODE_OLD)) &&
433		    np->rn_other_refs == 0 && np->rn_other_refs_held == 0)
434			unref = 1;
435	}
436
437	if (unref)
438		rc_node_unrefed(np);
439	else
440		(void) pthread_mutex_unlock(&np->rn_lock);
441
442	if (par_ref != NULL)
443		rc_node_rele_other(par_ref);
444}
445
446void
447rc_node_rele(rc_node_t *np)
448{
449	(void) pthread_mutex_lock(&np->rn_lock);
450	rc_node_rele_locked(np);
451}
452
453static cache_bucket_t *
454cache_hold(uint32_t h)
455{
456	cache_bucket_t *bp = CACHE_BUCKET(h);
457	(void) pthread_mutex_lock(&bp->cb_lock);
458	return (bp);
459}
460
461static void
462cache_release(cache_bucket_t *bp)
463{
464	(void) pthread_mutex_unlock(&bp->cb_lock);
465}
466
467static rc_node_t *
468cache_lookup_unlocked(cache_bucket_t *bp, rc_node_lookup_t *lp)
469{
470	uint32_t h = rc_node_hash(lp);
471	rc_node_t *np;
472
473	assert(MUTEX_HELD(&bp->cb_lock));
474	assert(bp == CACHE_BUCKET(h));
475
476	for (np = bp->cb_head; np != NULL; np = np->rn_hash_next) {
477		if (np->rn_hash == h && rc_node_match(np, lp)) {
478			rc_node_hold(np);
479			return (np);
480		}
481	}
482
483	return (NULL);
484}
485
486static rc_node_t *
487cache_lookup(rc_node_lookup_t *lp)
488{
489	uint32_t h;
490	cache_bucket_t *bp;
491	rc_node_t *np;
492
493	h = rc_node_hash(lp);
494	bp = cache_hold(h);
495
496	np = cache_lookup_unlocked(bp, lp);
497
498	cache_release(bp);
499
500	return (np);
501}
502
503static void
504cache_insert_unlocked(cache_bucket_t *bp, rc_node_t *np)
505{
506	assert(MUTEX_HELD(&bp->cb_lock));
507	assert(np->rn_hash == rc_node_hash(&np->rn_id));
508	assert(bp == CACHE_BUCKET(np->rn_hash));
509
510	assert(np->rn_hash_next == NULL);
511
512	np->rn_hash_next = bp->cb_head;
513	bp->cb_head = np;
514}
515
516static void
517cache_remove_unlocked(cache_bucket_t *bp, rc_node_t *np)
518{
519	rc_node_t **npp;
520
521	assert(MUTEX_HELD(&bp->cb_lock));
522	assert(np->rn_hash == rc_node_hash(&np->rn_id));
523	assert(bp == CACHE_BUCKET(np->rn_hash));
524
525	for (npp = &bp->cb_head; *npp != NULL; npp = &(*npp)->rn_hash_next)
526		if (*npp == np)
527			break;
528
529	assert(*npp == np);
530	*npp = np->rn_hash_next;
531	np->rn_hash_next = NULL;
532}
533
534/*
535 * verify that the 'parent' type can have a child typed 'child'
536 * Fails with
537 *   _INVALID_TYPE - argument is invalid
538 *   _TYPE_MISMATCH - parent type cannot have children of type child
539 */
540static int
541rc_check_parent_child(uint32_t parent, uint32_t child)
542{
543	int idx;
544	uint32_t type;
545
546	if (parent == 0 || parent >= NUM_TYPES ||
547	    child == 0 || child >= NUM_TYPES)
548		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
549
550	for (idx = 0; idx < MAX_VALID_CHILDREN; idx++) {
551		type = rc_types[parent].rt_valid_children[idx];
552		if (type == child)
553			return (REP_PROTOCOL_SUCCESS);
554	}
555
556	return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
557}
558
559/*
560 * Fails with
561 *   _INVALID_TYPE - type is invalid
562 *   _BAD_REQUEST - name is an invalid name for a node of type type
563 */
564int
565rc_check_type_name(uint32_t type, const char *name)
566{
567	if (type == 0 || type >= NUM_TYPES)
568		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
569
570	if (uu_check_name(name, rc_types[type].rt_name_flags) == -1)
571		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
572
573	return (REP_PROTOCOL_SUCCESS);
574}
575
576static int
577rc_check_pgtype_name(const char *name)
578{
579	if (uu_check_name(name, UU_NAME_DOMAIN) == -1)
580		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
581
582	return (REP_PROTOCOL_SUCCESS);
583}
584
585static int
586rc_notify_info_interested(rc_notify_info_t *rnip, rc_notify_t *np)
587{
588	rc_node_t *nnp = np->rcn_node;
589	int i;
590
591	assert(MUTEX_HELD(&rc_pg_notify_lock));
592
593	if (np->rcn_delete != NULL) {
594		assert(np->rcn_info == NULL && np->rcn_node == NULL);
595		return (1);		/* everyone likes deletes */
596	}
597	if (np->rcn_node == NULL) {
598		assert(np->rcn_info != NULL || np->rcn_delete != NULL);
599		return (0);
600	}
601	assert(np->rcn_info == NULL);
602
603	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
604		if (rnip->rni_namelist[i] != NULL) {
605			if (strcmp(nnp->rn_name, rnip->rni_namelist[i]) == 0)
606				return (1);
607		}
608		if (rnip->rni_typelist[i] != NULL) {
609			if (strcmp(nnp->rn_type, rnip->rni_typelist[i]) == 0)
610				return (1);
611		}
612	}
613	return (0);
614}
615
616static void
617rc_notify_insert_node(rc_node_t *nnp)
618{
619	rc_notify_t *np = &nnp->rn_notify;
620	rc_notify_info_t *nip;
621	int found = 0;
622
623	assert(np->rcn_info == NULL);
624
625	if (nnp->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
626		return;
627
628	(void) pthread_mutex_lock(&rc_pg_notify_lock);
629	np->rcn_node = nnp;
630	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
631	    nip = uu_list_next(rc_notify_info_list, nip)) {
632		if (rc_notify_info_interested(nip, np)) {
633			(void) pthread_cond_broadcast(&nip->rni_cv);
634			found++;
635		}
636	}
637	if (found)
638		(void) uu_list_insert_before(rc_notify_list, NULL, np);
639	else
640		np->rcn_node = NULL;
641
642	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
643}
644
645static void
646rc_notify_deletion(rc_notify_delete_t *ndp, const char *service,
647    const char *instance, const char *pg)
648{
649	rc_notify_info_t *nip;
650
651	uu_list_node_init(&ndp->rnd_notify, &ndp->rnd_notify.rcn_list_node,
652	    rc_notify_pool);
653	ndp->rnd_notify.rcn_delete = ndp;
654
655	(void) snprintf(ndp->rnd_fmri, sizeof (ndp->rnd_fmri),
656	    "svc:/%s%s%s%s%s", service,
657	    (instance != NULL)? ":" : "", (instance != NULL)? instance : "",
658	    (pg != NULL)? "/:properties/" : "", (pg != NULL)? pg : "");
659
660	/*
661	 * add to notification list, notify watchers
662	 */
663	(void) pthread_mutex_lock(&rc_pg_notify_lock);
664	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
665	    nip = uu_list_next(rc_notify_info_list, nip))
666		(void) pthread_cond_broadcast(&nip->rni_cv);
667	(void) uu_list_insert_before(rc_notify_list, NULL, ndp);
668	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
669}
670
671static void
672rc_notify_remove_node(rc_node_t *nnp)
673{
674	rc_notify_t *np = &nnp->rn_notify;
675
676	assert(np->rcn_info == NULL);
677	assert(!MUTEX_HELD(&nnp->rn_lock));
678
679	(void) pthread_mutex_lock(&rc_pg_notify_lock);
680	while (np->rcn_node != NULL) {
681		if (rc_notify_in_use) {
682			(void) pthread_cond_wait(&rc_pg_notify_cv,
683			    &rc_pg_notify_lock);
684			continue;
685		}
686		(void) uu_list_remove(rc_notify_list, np);
687		np->rcn_node = NULL;
688		break;
689	}
690	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
691}
692
693static void
694rc_notify_remove_locked(rc_notify_t *np)
695{
696	assert(MUTEX_HELD(&rc_pg_notify_lock));
697	assert(rc_notify_in_use == 0);
698
699	(void) uu_list_remove(rc_notify_list, np);
700	if (np->rcn_node) {
701		np->rcn_node = NULL;
702	} else if (np->rcn_delete) {
703		uu_free(np->rcn_delete);
704	} else {
705		assert(0);	/* CAN'T HAPPEN */
706	}
707}
708
709/*
710 * Permission checking functions.  See comment atop this file.
711 */
712#ifndef NATIVE_BUILD
713static permcheck_t *
714pc_create()
715{
716	permcheck_t *p;
717
718	p = uu_zalloc(sizeof (*p));
719	if (p == NULL)
720		return (NULL);
721	p->pc_bnum = 8;			/* Normal case will only have 2 elts. */
722	p->pc_buckets = uu_zalloc(sizeof (*p->pc_buckets) * p->pc_bnum);
723	if (p->pc_buckets == NULL) {
724		uu_free(p);
725		return (NULL);
726	}
727
728	p->pc_enum = 0;
729	return (p);
730}
731
732static void
733pc_free(permcheck_t *pcp)
734{
735	uint_t i;
736	struct pc_elt *ep, *next;
737
738	for (i = 0; i < pcp->pc_bnum; ++i) {
739		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
740			next = ep->pce_next;
741			free(ep);
742		}
743	}
744
745	free(pcp->pc_buckets);
746	free(pcp);
747}
748
749static uint32_t
750pc_hash(const char *auth)
751{
752	uint32_t h = 0, g;
753	const char *p;
754
755	/*
756	 * Generic hash function from uts/common/os/modhash.c.
757	 */
758	for (p = auth; *p != '\0'; ++p) {
759		h = (h << 4) + *p;
760		g = (h & 0xf0000000);
761		if (g != 0) {
762			h ^= (g >> 24);
763			h ^= g;
764		}
765	}
766
767	return (h);
768}
769
770static int
771pc_exists(const permcheck_t *pcp, const char *auth)
772{
773	uint32_t h;
774	struct pc_elt *ep;
775
776	h = pc_hash(auth);
777	for (ep = pcp->pc_buckets[h & (pcp->pc_bnum - 1)];
778	    ep != NULL;
779	    ep = ep->pce_next) {
780		if (strcmp(auth, ep->pce_auth) == 0)
781			return (1);
782	}
783
784	return (0);
785}
786
787static int
788pc_match(const permcheck_t *pcp, const char *pattern)
789{
790	uint_t i;
791	struct pc_elt *ep;
792
793	for (i = 0; i < pcp->pc_bnum; ++i) {
794		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = ep->pce_next) {
795			if (_auth_match(pattern, ep->pce_auth))
796				return (1);
797		}
798	}
799
800	return (0);
801}
802
803static int
804pc_grow(permcheck_t *pcp)
805{
806	uint_t new_bnum, i, j;
807	struct pc_elt **new_buckets;
808	struct pc_elt *ep, *next;
809
810	new_bnum = pcp->pc_bnum * 2;
811	if (new_bnum < pcp->pc_bnum)
812		/* Homey don't play that. */
813		return (-1);
814
815	new_buckets = uu_zalloc(sizeof (*new_buckets) * new_bnum);
816	if (new_buckets == NULL)
817		return (-1);
818
819	for (i = 0; i < pcp->pc_bnum; ++i) {
820		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
821			next = ep->pce_next;
822			j = pc_hash(ep->pce_auth) & (new_bnum - 1);
823			ep->pce_next = new_buckets[j];
824			new_buckets[j] = ep;
825		}
826	}
827
828	uu_free(pcp->pc_buckets);
829	pcp->pc_buckets = new_buckets;
830	pcp->pc_bnum = new_bnum;
831
832	return (0);
833}
834
835static int
836pc_add(permcheck_t *pcp, const char *auth)
837{
838	struct pc_elt *ep;
839	uint_t i;
840
841	ep = uu_zalloc(offsetof(struct pc_elt, pce_auth) + strlen(auth) + 1);
842	if (ep == NULL)
843		return (-1);
844
845	/* Grow if pc_enum / pc_bnum > 3/4. */
846	if (pcp->pc_enum * 4 > 3 * pcp->pc_bnum)
847		/* Failure is not a stopper; we'll try again next time. */
848		(void) pc_grow(pcp);
849
850	(void) strcpy(ep->pce_auth, auth);
851
852	i = pc_hash(auth) & (pcp->pc_bnum - 1);
853	ep->pce_next = pcp->pc_buckets[i];
854	pcp->pc_buckets[i] = ep;
855
856	++pcp->pc_enum;
857
858	return (0);
859}
860
861/*
862 * For the type of a property group, return the authorization which may be
863 * used to modify it.
864 */
865static const char *
866perm_auth_for_pgtype(const char *pgtype)
867{
868	if (strcmp(pgtype, SCF_GROUP_METHOD) == 0)
869		return (AUTH_MODIFY_PREFIX "method");
870	else if (strcmp(pgtype, SCF_GROUP_DEPENDENCY) == 0)
871		return (AUTH_MODIFY_PREFIX "dependency");
872	else if (strcmp(pgtype, SCF_GROUP_APPLICATION) == 0)
873		return (AUTH_MODIFY_PREFIX "application");
874	else if (strcmp(pgtype, SCF_GROUP_FRAMEWORK) == 0)
875		return (AUTH_MODIFY_PREFIX "framework");
876	else
877		return (NULL);
878}
879
880/*
881 * Fails with
882 *   _NO_RESOURCES - out of memory
883 */
884static int
885perm_add_enabling(permcheck_t *pcp, const char *auth)
886{
887	return (pc_add(pcp, auth) == 0 ? REP_PROTOCOL_SUCCESS :
888	    REP_PROTOCOL_FAIL_NO_RESOURCES);
889}
890
891/* Note that perm_add_enabling_values() is defined below. */
892
893/*
894 * perm_granted() returns 1 if the current door caller has one of the enabling
895 * authorizations in pcp, 0 if it doesn't, and -1 if an error (usually lack of
896 * memory) occurs.  check_auth_list() checks an RBAC_AUTH_SEP-separated list
897 * of authorizations for existance in pcp, and check_prof_list() checks the
898 * authorizations granted to an RBAC_AUTH_SEP-separated list of profiles.
899 */
900static int
901check_auth_list(const permcheck_t *pcp, char *authlist)
902{
903	char *auth, *lasts;
904	int ret;
905
906	for (auth = (char *)strtok_r(authlist, RBAC_AUTH_SEP, &lasts);
907	    auth != NULL;
908	    auth = (char *)strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
909		if (strchr(auth, KV_WILDCHAR) == NULL)
910			ret = pc_exists(pcp, auth);
911		else
912			ret = pc_match(pcp, auth);
913
914		if (ret)
915			return (ret);
916	}
917
918	return (0);
919}
920
921static int
922check_prof_list(const permcheck_t *pcp, char *proflist)
923{
924	char *prof, *lasts, *authlist, *subproflist;
925	profattr_t *pap;
926	int ret = 0;
927
928	for (prof = strtok_r(proflist, RBAC_AUTH_SEP, &lasts);
929	    prof != NULL;
930	    prof = strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
931		pap = getprofnam(prof);
932		if (pap == NULL)
933			continue;
934
935		authlist = kva_match(pap->attr, PROFATTR_AUTHS_KW);
936		if (authlist != NULL)
937			ret = check_auth_list(pcp, authlist);
938
939		if (!ret) {
940			subproflist = kva_match(pap->attr, PROFATTR_PROFS_KW);
941			if (subproflist != NULL)
942				/* depth check to avoid invinite recursion? */
943				ret = check_prof_list(pcp, subproflist);
944		}
945
946		free_profattr(pap);
947		if (ret)
948			return (ret);
949	}
950
951	return (ret);
952}
953
954static int
955perm_granted(const permcheck_t *pcp)
956{
957	ucred_t *uc;
958
959	int ret = 0;
960	uid_t uid;
961	userattr_t *uap;
962	char *authlist, *userattr_authlist, *proflist, *def_prof = NULL;
963
964	/*
965	 * Get generic authorizations from policy.conf
966	 *
967	 * Note that _get_auth_policy is not threadsafe, so we single-thread
968	 * access to it.
969	 */
970	(void) pthread_mutex_lock(&perm_lock);
971	ret = _get_auth_policy(&authlist, &def_prof);
972	(void) pthread_mutex_unlock(&perm_lock);
973
974	if (ret != 0)
975		return (-1);
976
977	if (authlist != NULL) {
978		ret = check_auth_list(pcp, authlist);
979
980		if (ret) {
981			_free_auth_policy(authlist, def_prof);
982			return (ret);
983		}
984	}
985
986	/*
987	 * Put off checking def_prof for later in an attempt to consolidate
988	 * prof_attr accesses.
989	 */
990
991	/* Get the uid */
992	if ((uc = get_ucred()) == NULL) {
993		_free_auth_policy(authlist, def_prof);
994
995		if (errno == EINVAL) {
996			/*
997			 * Client is no longer waiting for our response (e.g.,
998			 * it received a signal & resumed with EINTR).
999			 * Punting with door_return() would be nice but we
1000			 * need to release all of the locks & references we
1001			 * hold.  And we must report failure to the client
1002			 * layer to keep it from ignoring retries as
1003			 * already-done (idempotency & all that).  None of the
1004			 * error codes fit very well, so we might as well
1005			 * force the return of _PERMISSION_DENIED since we
1006			 * couldn't determine the user.
1007			 */
1008			return (0);
1009		}
1010		assert(0);
1011		abort();
1012	}
1013
1014	uid = ucred_geteuid(uc);
1015	assert(uid != (uid_t)-1);
1016
1017	uap = getuseruid(uid);
1018	if (uap != NULL) {
1019		/* Get the authorizations from user_attr. */
1020		userattr_authlist = kva_match(uap->attr, USERATTR_AUTHS_KW);
1021		if (userattr_authlist != NULL)
1022			ret = check_auth_list(pcp, userattr_authlist);
1023	}
1024
1025	if (!ret && def_prof != NULL) {
1026		/* Check generic profiles. */
1027		ret = check_prof_list(pcp, def_prof);
1028	}
1029
1030	if (!ret && uap != NULL) {
1031		proflist = kva_match(uap->attr, USERATTR_PROFILES_KW);
1032		if (proflist != NULL)
1033			ret = check_prof_list(pcp, proflist);
1034	}
1035
1036	_free_auth_policy(authlist, def_prof);
1037	if (uap != NULL)
1038		free_userattr(uap);
1039
1040	return (ret);
1041}
1042#endif /* NATIVE_BUILD */
1043
1044/*
1045 * flags in RC_NODE_WAITING_FLAGS are broadcast when unset, and are used to
1046 * serialize certain actions, and to wait for certain operations to complete
1047 *
1048 * The waiting flags are:
1049 *	RC_NODE_CHILDREN_CHANGING
1050 *		The child list is being built or changed (due to creation
1051 *		or deletion).  All iterators pause.
1052 *
1053 *	RC_NODE_USING_PARENT
1054 *		Someone is actively using the parent pointer, so we can't
1055 *		be removed from the parent list.
1056 *
1057 *	RC_NODE_CREATING_CHILD
1058 *		A child is being created -- locks out other creations, to
1059 *		prevent insert-insert races.
1060 *
1061 *	RC_NODE_IN_TX
1062 *		This object is running a transaction.
1063 *
1064 *	RC_NODE_DYING
1065 *		This node might be dying.  Always set as a set, using
1066 *		RC_NODE_DYING_FLAGS (which is everything but
1067 *		RC_NODE_USING_PARENT)
1068 */
1069static int
1070rc_node_hold_flag(rc_node_t *np, uint32_t flag)
1071{
1072	assert(MUTEX_HELD(&np->rn_lock));
1073	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1074
1075	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag)) {
1076		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1077	}
1078	if (np->rn_flags & RC_NODE_DEAD)
1079		return (0);
1080
1081	np->rn_flags |= flag;
1082	return (1);
1083}
1084
1085static void
1086rc_node_rele_flag(rc_node_t *np, uint32_t flag)
1087{
1088	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1089	assert(MUTEX_HELD(&np->rn_lock));
1090	assert((np->rn_flags & flag) == flag);
1091	np->rn_flags &= ~flag;
1092	(void) pthread_cond_broadcast(&np->rn_cv);
1093}
1094
1095/*
1096 * wait until a particular flag has cleared.  Fails if the object dies.
1097 */
1098static int
1099rc_node_wait_flag(rc_node_t *np, uint32_t flag)
1100{
1101	assert(MUTEX_HELD(&np->rn_lock));
1102	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag))
1103		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1104
1105	return (!(np->rn_flags & RC_NODE_DEAD));
1106}
1107
1108/*
1109 * On entry, np's lock must be held, and this thread must be holding
1110 * RC_NODE_USING_PARENT.  On return, both of them are released.
1111 *
1112 * If the return value is NULL, np either does not have a parent, or
1113 * the parent has been marked DEAD.
1114 *
1115 * If the return value is non-NULL, it is the parent of np, and both
1116 * its lock and the requested flags are held.
1117 */
1118static rc_node_t *
1119rc_node_hold_parent_flag(rc_node_t *np, uint32_t flag)
1120{
1121	rc_node_t *pp;
1122
1123	assert(MUTEX_HELD(&np->rn_lock));
1124	assert(np->rn_flags & RC_NODE_USING_PARENT);
1125
1126	if ((pp = np->rn_parent) == NULL) {
1127		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1128		(void) pthread_mutex_unlock(&np->rn_lock);
1129		return (NULL);
1130	}
1131	(void) pthread_mutex_unlock(&np->rn_lock);
1132
1133	(void) pthread_mutex_lock(&pp->rn_lock);
1134	(void) pthread_mutex_lock(&np->rn_lock);
1135	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1136	(void) pthread_mutex_unlock(&np->rn_lock);
1137
1138	if (!rc_node_hold_flag(pp, flag)) {
1139		(void) pthread_mutex_unlock(&pp->rn_lock);
1140		return (NULL);
1141	}
1142	return (pp);
1143}
1144
1145rc_node_t *
1146rc_node_alloc(void)
1147{
1148	rc_node_t *np = uu_zalloc(sizeof (*np));
1149
1150	if (np == NULL)
1151		return (NULL);
1152
1153	(void) pthread_mutex_init(&np->rn_lock, NULL);
1154	(void) pthread_cond_init(&np->rn_cv, NULL);
1155
1156	np->rn_children = uu_list_create(rc_children_pool, np, 0);
1157	np->rn_pg_notify_list = uu_list_create(rc_pg_notify_pool, np, 0);
1158
1159	uu_list_node_init(np, &np->rn_sibling_node, rc_children_pool);
1160
1161	uu_list_node_init(&np->rn_notify, &np->rn_notify.rcn_list_node,
1162	    rc_notify_pool);
1163
1164	return (np);
1165}
1166
1167void
1168rc_node_destroy(rc_node_t *np)
1169{
1170	int i;
1171
1172	if (np->rn_flags & RC_NODE_UNREFED)
1173		return;				/* being handled elsewhere */
1174
1175	assert(np->rn_refs == 0 && np->rn_other_refs == 0);
1176	assert(np->rn_former == NULL);
1177
1178	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
1179		/* Release the holds from rc_iter_next(). */
1180		for (i = 0; i < COMPOSITION_DEPTH; ++i) {
1181			/* rn_cchain[i] may be NULL for empty snapshots. */
1182			if (np->rn_cchain[i] != NULL)
1183				rc_node_rele(np->rn_cchain[i]);
1184		}
1185	}
1186
1187	if (np->rn_name != NULL)
1188		free((void *)np->rn_name);
1189	np->rn_name = NULL;
1190	if (np->rn_type != NULL)
1191		free((void *)np->rn_type);
1192	np->rn_type = NULL;
1193	if (np->rn_values != NULL)
1194		object_free_values(np->rn_values, np->rn_valtype,
1195		    np->rn_values_count, np->rn_values_size);
1196	np->rn_values = NULL;
1197
1198	if (np->rn_snaplevel != NULL)
1199		rc_snaplevel_rele(np->rn_snaplevel);
1200	np->rn_snaplevel = NULL;
1201
1202	uu_list_node_fini(np, &np->rn_sibling_node, rc_children_pool);
1203
1204	uu_list_node_fini(&np->rn_notify, &np->rn_notify.rcn_list_node,
1205	    rc_notify_pool);
1206
1207	assert(uu_list_first(np->rn_children) == NULL);
1208	uu_list_destroy(np->rn_children);
1209	uu_list_destroy(np->rn_pg_notify_list);
1210
1211	(void) pthread_mutex_destroy(&np->rn_lock);
1212	(void) pthread_cond_destroy(&np->rn_cv);
1213
1214	uu_free(np);
1215}
1216
1217/*
1218 * Link in a child node.
1219 *
1220 * Because of the lock ordering, cp has to already be in the hash table with
1221 * its lock dropped before we get it.  To prevent anyone from noticing that
1222 * it is parentless, the creation code sets the RC_NODE_USING_PARENT.  Once
1223 * we've linked it in, we release the flag.
1224 */
1225static void
1226rc_node_link_child(rc_node_t *np, rc_node_t *cp)
1227{
1228	assert(!MUTEX_HELD(&np->rn_lock));
1229	assert(!MUTEX_HELD(&cp->rn_lock));
1230
1231	(void) pthread_mutex_lock(&np->rn_lock);
1232	(void) pthread_mutex_lock(&cp->rn_lock);
1233	assert(!(cp->rn_flags & RC_NODE_IN_PARENT) &&
1234	    (cp->rn_flags & RC_NODE_USING_PARENT));
1235
1236	assert(rc_check_parent_child(np->rn_id.rl_type, cp->rn_id.rl_type) ==
1237	    REP_PROTOCOL_SUCCESS);
1238
1239	cp->rn_parent = np;
1240	cp->rn_flags |= RC_NODE_IN_PARENT;
1241	(void) uu_list_insert_before(np->rn_children, NULL, cp);
1242
1243	(void) pthread_mutex_unlock(&np->rn_lock);
1244
1245	rc_node_rele_flag(cp, RC_NODE_USING_PARENT);
1246	(void) pthread_mutex_unlock(&cp->rn_lock);
1247}
1248
1249/*
1250 * Sets the rn_parent_ref field of all the children of np to pp -- always
1251 * initially invoked as rc_node_setup_parent_ref(np, np), we then recurse.
1252 *
1253 * This is used when we mark a node RC_NODE_OLD, so that when the object and
1254 * its children are no longer referenced, they will all be deleted as a unit.
1255 */
1256static void
1257rc_node_setup_parent_ref(rc_node_t *np, rc_node_t *pp)
1258{
1259	rc_node_t *cp;
1260
1261	assert(MUTEX_HELD(&np->rn_lock));
1262
1263	for (cp = uu_list_first(np->rn_children); cp != NULL;
1264	    cp = uu_list_next(np->rn_children, cp)) {
1265		(void) pthread_mutex_lock(&cp->rn_lock);
1266		if (cp->rn_flags & RC_NODE_PARENT_REF) {
1267			assert(cp->rn_parent_ref == pp);
1268		} else {
1269			assert(cp->rn_parent_ref == NULL);
1270
1271			cp->rn_flags |= RC_NODE_PARENT_REF;
1272			cp->rn_parent_ref = pp;
1273			if (cp->rn_refs != 0)
1274				rc_node_hold_other(pp);
1275		}
1276		rc_node_setup_parent_ref(cp, pp);		/* recurse */
1277		(void) pthread_mutex_unlock(&cp->rn_lock);
1278	}
1279}
1280
1281/*
1282 * Atomically replace 'np' with 'newp', with a parent of 'pp'.
1283 *
1284 * Requirements:
1285 *	*no* node locks may be held.
1286 *	pp must be held with RC_NODE_CHILDREN_CHANGING
1287 *	newp and np must be held with RC_NODE_IN_TX
1288 *	np must be marked RC_NODE_IN_PARENT, newp must not be
1289 *	np must be marked RC_NODE_OLD
1290 *
1291 * Afterwards:
1292 *	pp's RC_NODE_CHILDREN_CHANGING is dropped
1293 *	newp and np's RC_NODE_IN_TX is dropped
1294 *	newp->rn_former = np;
1295 *	newp is RC_NODE_IN_PARENT, np is not.
1296 *	interested notify subscribers have been notified of newp's new status.
1297 */
1298static void
1299rc_node_relink_child(rc_node_t *pp, rc_node_t *np, rc_node_t *newp)
1300{
1301	cache_bucket_t *bp;
1302	/*
1303	 * First, swap np and nnp in the cache.  newp's RC_NODE_IN_TX flag
1304	 * keeps rc_node_update() from seeing it until we are done.
1305	 */
1306	bp = cache_hold(newp->rn_hash);
1307	cache_remove_unlocked(bp, np);
1308	cache_insert_unlocked(bp, newp);
1309	cache_release(bp);
1310
1311	/*
1312	 * replace np with newp in pp's list, and attach it to newp's rn_former
1313	 * link.
1314	 */
1315	(void) pthread_mutex_lock(&pp->rn_lock);
1316	assert(pp->rn_flags & RC_NODE_CHILDREN_CHANGING);
1317
1318	(void) pthread_mutex_lock(&newp->rn_lock);
1319	assert(!(newp->rn_flags & RC_NODE_IN_PARENT));
1320	assert(newp->rn_flags & RC_NODE_IN_TX);
1321
1322	(void) pthread_mutex_lock(&np->rn_lock);
1323	assert(np->rn_flags & RC_NODE_IN_PARENT);
1324	assert(np->rn_flags & RC_NODE_OLD);
1325	assert(np->rn_flags & RC_NODE_IN_TX);
1326
1327	newp->rn_parent = pp;
1328	newp->rn_flags |= RC_NODE_IN_PARENT;
1329
1330	/*
1331	 * Note that we carefully add newp before removing np -- this
1332	 * keeps iterators on the list from missing us.
1333	 */
1334	(void) uu_list_insert_after(pp->rn_children, np, newp);
1335	(void) uu_list_remove(pp->rn_children, np);
1336
1337	/*
1338	 * re-set np
1339	 */
1340	newp->rn_former = np;
1341	np->rn_parent = NULL;
1342	np->rn_flags &= ~RC_NODE_IN_PARENT;
1343	np->rn_flags |= RC_NODE_ON_FORMER;
1344
1345	rc_notify_insert_node(newp);
1346
1347	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
1348	(void) pthread_mutex_unlock(&pp->rn_lock);
1349	rc_node_rele_flag(newp, RC_NODE_USING_PARENT | RC_NODE_IN_TX);
1350	(void) pthread_mutex_unlock(&newp->rn_lock);
1351	rc_node_setup_parent_ref(np, np);
1352	rc_node_rele_flag(np, RC_NODE_IN_TX);
1353	(void) pthread_mutex_unlock(&np->rn_lock);
1354}
1355
1356/*
1357 * makes sure a node with lookup 'nip', name 'name', and parent 'pp' exists.
1358 * 'cp' is used (and returned) if the node does not yet exist.  If it does
1359 * exist, 'cp' is freed, and the existent node is returned instead.
1360 */
1361rc_node_t *
1362rc_node_setup(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1363    rc_node_t *pp)
1364{
1365	rc_node_t *np;
1366	cache_bucket_t *bp;
1367	uint32_t h = rc_node_hash(nip);
1368
1369	assert(cp->rn_refs == 0);
1370
1371	bp = cache_hold(h);
1372	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1373		cache_release(bp);
1374
1375		/*
1376		 * make sure it matches our expectations
1377		 */
1378		(void) pthread_mutex_lock(&np->rn_lock);
1379		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1380			assert(np->rn_parent == pp);
1381			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1382			assert(strcmp(np->rn_name, name) == 0);
1383			assert(np->rn_type == NULL);
1384			assert(np->rn_flags & RC_NODE_IN_PARENT);
1385			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1386		}
1387		(void) pthread_mutex_unlock(&np->rn_lock);
1388
1389		rc_node_destroy(cp);
1390		return (np);
1391	}
1392
1393	/*
1394	 * No one is there -- create a new node.
1395	 */
1396	np = cp;
1397	rc_node_hold(np);
1398	np->rn_id = *nip;
1399	np->rn_hash = h;
1400	np->rn_name = strdup(name);
1401
1402	np->rn_flags |= RC_NODE_USING_PARENT;
1403
1404	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE) {
1405#if COMPOSITION_DEPTH == 2
1406		np->rn_cchain[0] = np;
1407		np->rn_cchain[1] = pp;
1408#else
1409#error This code must be updated.
1410#endif
1411	}
1412
1413	cache_insert_unlocked(bp, np);
1414	cache_release(bp);		/* we are now visible */
1415
1416	rc_node_link_child(pp, np);
1417
1418	return (np);
1419}
1420
1421/*
1422 * makes sure a snapshot with lookup 'nip', name 'name', and parent 'pp' exists.
1423 * 'cp' is used (and returned) if the node does not yet exist.  If it does
1424 * exist, 'cp' is freed, and the existent node is returned instead.
1425 */
1426rc_node_t *
1427rc_node_setup_snapshot(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1428    uint32_t snap_id, rc_node_t *pp)
1429{
1430	rc_node_t *np;
1431	cache_bucket_t *bp;
1432	uint32_t h = rc_node_hash(nip);
1433
1434	assert(cp->rn_refs == 0);
1435
1436	bp = cache_hold(h);
1437	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1438		cache_release(bp);
1439
1440		/*
1441		 * make sure it matches our expectations
1442		 */
1443		(void) pthread_mutex_lock(&np->rn_lock);
1444		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1445			assert(np->rn_parent == pp);
1446			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1447			assert(strcmp(np->rn_name, name) == 0);
1448			assert(np->rn_type == NULL);
1449			assert(np->rn_flags & RC_NODE_IN_PARENT);
1450			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1451		}
1452		(void) pthread_mutex_unlock(&np->rn_lock);
1453
1454		rc_node_destroy(cp);
1455		return (np);
1456	}
1457
1458	/*
1459	 * No one is there -- create a new node.
1460	 */
1461	np = cp;
1462	rc_node_hold(np);
1463	np->rn_id = *nip;
1464	np->rn_hash = h;
1465	np->rn_name = strdup(name);
1466	np->rn_snapshot_id = snap_id;
1467
1468	np->rn_flags |= RC_NODE_USING_PARENT;
1469
1470	cache_insert_unlocked(bp, np);
1471	cache_release(bp);		/* we are now visible */
1472
1473	rc_node_link_child(pp, np);
1474
1475	return (np);
1476}
1477
1478/*
1479 * makes sure a snaplevel with lookup 'nip' and parent 'pp' exists.  'cp' is
1480 * used (and returned) if the node does not yet exist.  If it does exist, 'cp'
1481 * is freed, and the existent node is returned instead.
1482 */
1483rc_node_t *
1484rc_node_setup_snaplevel(rc_node_t *cp, rc_node_lookup_t *nip,
1485    rc_snaplevel_t *lvl, rc_node_t *pp)
1486{
1487	rc_node_t *np;
1488	cache_bucket_t *bp;
1489	uint32_t h = rc_node_hash(nip);
1490
1491	assert(cp->rn_refs == 0);
1492
1493	bp = cache_hold(h);
1494	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1495		cache_release(bp);
1496
1497		/*
1498		 * make sure it matches our expectations
1499		 */
1500		(void) pthread_mutex_lock(&np->rn_lock);
1501		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1502			assert(np->rn_parent == pp);
1503			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1504			assert(np->rn_name == NULL);
1505			assert(np->rn_type == NULL);
1506			assert(np->rn_flags & RC_NODE_IN_PARENT);
1507			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1508		}
1509		(void) pthread_mutex_unlock(&np->rn_lock);
1510
1511		rc_node_destroy(cp);
1512		return (np);
1513	}
1514
1515	/*
1516	 * No one is there -- create a new node.
1517	 */
1518	np = cp;
1519	rc_node_hold(np);	/* released in snapshot_fill_children() */
1520	np->rn_id = *nip;
1521	np->rn_hash = h;
1522
1523	rc_snaplevel_hold(lvl);
1524	np->rn_snaplevel = lvl;
1525
1526	np->rn_flags |= RC_NODE_USING_PARENT;
1527
1528	cache_insert_unlocked(bp, np);
1529	cache_release(bp);		/* we are now visible */
1530
1531	/* Add this snaplevel to the snapshot's composition chain. */
1532	assert(pp->rn_cchain[lvl->rsl_level_num - 1] == NULL);
1533	pp->rn_cchain[lvl->rsl_level_num - 1] = np;
1534
1535	rc_node_link_child(pp, np);
1536
1537	return (np);
1538}
1539
1540/*
1541 * Returns NULL if strdup() fails.
1542 */
1543rc_node_t *
1544rc_node_setup_pg(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1545    const char *type, uint32_t flags, uint32_t gen_id, rc_node_t *pp)
1546{
1547	rc_node_t *np;
1548	cache_bucket_t *bp;
1549
1550	uint32_t h = rc_node_hash(nip);
1551	bp = cache_hold(h);
1552	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1553		cache_release(bp);
1554
1555		/*
1556		 * make sure it matches our expectations (don't check
1557		 * the generation number or parent, since someone could
1558		 * have gotten a transaction through while we weren't
1559		 * looking)
1560		 */
1561		(void) pthread_mutex_lock(&np->rn_lock);
1562		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1563			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1564			assert(strcmp(np->rn_name, name) == 0);
1565			assert(strcmp(np->rn_type, type) == 0);
1566			assert(np->rn_pgflags == flags);
1567			assert(np->rn_flags & RC_NODE_IN_PARENT);
1568			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1569		}
1570		(void) pthread_mutex_unlock(&np->rn_lock);
1571
1572		rc_node_destroy(cp);
1573		return (np);
1574	}
1575
1576	np = cp;
1577	rc_node_hold(np);		/* released in fill_pg_callback() */
1578	np->rn_id = *nip;
1579	np->rn_hash = h;
1580	np->rn_name = strdup(name);
1581	if (np->rn_name == NULL) {
1582		rc_node_rele(np);
1583		return (NULL);
1584	}
1585	np->rn_type = strdup(type);
1586	if (np->rn_type == NULL) {
1587		free((void *)np->rn_name);
1588		rc_node_rele(np);
1589		return (NULL);
1590	}
1591	np->rn_pgflags = flags;
1592	np->rn_gen_id = gen_id;
1593
1594	np->rn_flags |= RC_NODE_USING_PARENT;
1595
1596	cache_insert_unlocked(bp, np);
1597	cache_release(bp);		/* we are now visible */
1598
1599	rc_node_link_child(pp, np);
1600
1601	return (np);
1602}
1603
1604#if COMPOSITION_DEPTH == 2
1605/*
1606 * Initialize a "composed property group" which represents the composition of
1607 * property groups pg1 & pg2.  It is ephemeral: once created & returned for an
1608 * ITER_READ request, keeping it out of cache_hash and any child lists
1609 * prevents it from being looked up.  Operations besides iteration are passed
1610 * through to pg1.
1611 *
1612 * pg1 & pg2 should be held before entering this function.  They will be
1613 * released in rc_node_destroy().
1614 */
1615static int
1616rc_node_setup_cpg(rc_node_t *cpg, rc_node_t *pg1, rc_node_t *pg2)
1617{
1618	if (strcmp(pg1->rn_type, pg2->rn_type) != 0)
1619		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
1620
1621	cpg->rn_id.rl_type = REP_PROTOCOL_ENTITY_CPROPERTYGRP;
1622	cpg->rn_name = strdup(pg1->rn_name);
1623	if (cpg->rn_name == NULL)
1624		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1625
1626	cpg->rn_cchain[0] = pg1;
1627	cpg->rn_cchain[1] = pg2;
1628
1629	return (REP_PROTOCOL_SUCCESS);
1630}
1631#else
1632#error This code must be updated.
1633#endif
1634
1635/*
1636 * Fails with _NO_RESOURCES.
1637 */
1638int
1639rc_node_create_property(rc_node_t *pp, rc_node_lookup_t *nip,
1640    const char *name, rep_protocol_value_type_t type,
1641    const char *vals, size_t count, size_t size)
1642{
1643	rc_node_t *np;
1644	cache_bucket_t *bp;
1645
1646	uint32_t h = rc_node_hash(nip);
1647	bp = cache_hold(h);
1648	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1649		cache_release(bp);
1650		/*
1651		 * make sure it matches our expectations
1652		 */
1653		(void) pthread_mutex_lock(&np->rn_lock);
1654		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1655			assert(np->rn_parent == pp);
1656			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1657			assert(strcmp(np->rn_name, name) == 0);
1658			assert(np->rn_valtype == type);
1659			assert(np->rn_values_count == count);
1660			assert(np->rn_values_size == size);
1661			assert(vals == NULL ||
1662			    memcmp(np->rn_values, vals, size) == 0);
1663			assert(np->rn_flags & RC_NODE_IN_PARENT);
1664			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1665		}
1666		rc_node_rele_locked(np);
1667		object_free_values(vals, type, count, size);
1668		return (REP_PROTOCOL_SUCCESS);
1669	}
1670
1671	/*
1672	 * No one is there -- create a new node.
1673	 */
1674	np = rc_node_alloc();
1675	if (np == NULL) {
1676		cache_release(bp);
1677		object_free_values(vals, type, count, size);
1678		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1679	}
1680	np->rn_id = *nip;
1681	np->rn_hash = h;
1682	np->rn_name = strdup(name);
1683	if (np->rn_name == NULL) {
1684		cache_release(bp);
1685		object_free_values(vals, type, count, size);
1686		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1687	}
1688
1689	np->rn_valtype = type;
1690	np->rn_values = vals;
1691	np->rn_values_count = count;
1692	np->rn_values_size = size;
1693
1694	np->rn_flags |= RC_NODE_USING_PARENT;
1695
1696	cache_insert_unlocked(bp, np);
1697	cache_release(bp);		/* we are now visible */
1698
1699	rc_node_link_child(pp, np);
1700
1701	return (REP_PROTOCOL_SUCCESS);
1702}
1703
1704int
1705rc_node_init(void)
1706{
1707	rc_node_t *np;
1708	cache_bucket_t *bp;
1709
1710	rc_children_pool = uu_list_pool_create("rc_children_pool",
1711	    sizeof (rc_node_t), offsetof(rc_node_t, rn_sibling_node),
1712	    NULL, UU_LIST_POOL_DEBUG);
1713
1714	rc_pg_notify_pool = uu_list_pool_create("rc_pg_notify_pool",
1715	    sizeof (rc_node_pg_notify_t),
1716	    offsetof(rc_node_pg_notify_t, rnpn_node),
1717	    NULL, UU_LIST_POOL_DEBUG);
1718
1719	rc_notify_pool = uu_list_pool_create("rc_notify_pool",
1720	    sizeof (rc_notify_t), offsetof(rc_notify_t, rcn_list_node),
1721	    NULL, UU_LIST_POOL_DEBUG);
1722
1723	rc_notify_info_pool = uu_list_pool_create("rc_notify_info_pool",
1724	    sizeof (rc_notify_info_t),
1725	    offsetof(rc_notify_info_t, rni_list_node),
1726	    NULL, UU_LIST_POOL_DEBUG);
1727
1728	if (rc_children_pool == NULL || rc_pg_notify_pool == NULL ||
1729	    rc_notify_pool == NULL || rc_notify_info_pool == NULL)
1730		uu_die("out of memory");
1731
1732	rc_notify_list = uu_list_create(rc_notify_pool,
1733	    &rc_notify_list, 0);
1734
1735	rc_notify_info_list = uu_list_create(rc_notify_info_pool,
1736	    &rc_notify_info_list, 0);
1737
1738	if (rc_notify_list == NULL || rc_notify_info_list == NULL)
1739		uu_die("out of memory");
1740
1741	if ((np = rc_node_alloc()) == NULL)
1742		uu_die("out of memory");
1743
1744	rc_node_hold(np);
1745	np->rn_id.rl_type = REP_PROTOCOL_ENTITY_SCOPE;
1746	np->rn_id.rl_backend = BACKEND_TYPE_NORMAL;
1747	np->rn_hash = rc_node_hash(&np->rn_id);
1748	np->rn_name = "localhost";
1749
1750	bp = cache_hold(np->rn_hash);
1751	cache_insert_unlocked(bp, np);
1752	cache_release(bp);
1753
1754	rc_scope = np;
1755	return (1);
1756}
1757
1758/*
1759 * Fails with
1760 *   _INVALID_TYPE - type is invalid
1761 *   _TYPE_MISMATCH - np doesn't carry children of type type
1762 *   _DELETED - np has been deleted
1763 *   _NO_RESOURCES
1764 */
1765static int
1766rc_node_fill_children(rc_node_t *np, uint32_t type)
1767{
1768	int rc;
1769
1770	assert(MUTEX_HELD(&np->rn_lock));
1771
1772	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
1773	    REP_PROTOCOL_SUCCESS)
1774		return (rc);
1775
1776	if (!rc_node_hold_flag(np, RC_NODE_CHILDREN_CHANGING))
1777		return (REP_PROTOCOL_FAIL_DELETED);
1778
1779	if (np->rn_flags & RC_NODE_HAS_CHILDREN) {
1780		rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1781		return (REP_PROTOCOL_SUCCESS);
1782	}
1783
1784	(void) pthread_mutex_unlock(&np->rn_lock);
1785	rc = object_fill_children(np);
1786	(void) pthread_mutex_lock(&np->rn_lock);
1787
1788	if (rc == REP_PROTOCOL_SUCCESS) {
1789		np->rn_flags |= RC_NODE_HAS_CHILDREN;
1790	}
1791	rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1792
1793	return (rc);
1794}
1795
1796/*
1797 * Returns
1798 *   _INVALID_TYPE - type is invalid
1799 *   _TYPE_MISMATCH - np doesn't carry children of type type
1800 *   _DELETED - np has been deleted
1801 *   _NO_RESOURCES
1802 *   _SUCCESS - if *cpp is not NULL, it is held
1803 */
1804static int
1805rc_node_find_named_child(rc_node_t *np, const char *name, uint32_t type,
1806    rc_node_t **cpp)
1807{
1808	int ret;
1809	rc_node_t *cp;
1810
1811	assert(MUTEX_HELD(&np->rn_lock));
1812	assert(np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP);
1813
1814	ret = rc_node_fill_children(np, type);
1815	if (ret != REP_PROTOCOL_SUCCESS)
1816		return (ret);
1817
1818	for (cp = uu_list_first(np->rn_children);
1819	    cp != NULL;
1820	    cp = uu_list_next(np->rn_children, cp)) {
1821		if (cp->rn_id.rl_type == type && strcmp(cp->rn_name, name) == 0)
1822			break;
1823	}
1824
1825	if (cp != NULL)
1826		rc_node_hold(cp);
1827	*cpp = cp;
1828
1829	return (REP_PROTOCOL_SUCCESS);
1830}
1831
1832#ifndef NATIVE_BUILD
1833static int rc_node_parent(rc_node_t *, rc_node_t **);
1834
1835/*
1836 * If the propname property exists in pg, and it is of type string, add its
1837 * values as authorizations to pcp.  pg must not be locked on entry, and it is
1838 * returned unlocked.  Returns
1839 *   _DELETED - pg was deleted
1840 *   _NO_RESOURCES
1841 *   _NOT_FOUND - pg has no property named propname
1842 *   _SUCCESS
1843 */
1844static int
1845perm_add_pg_prop_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1846{
1847	rc_node_t *prop;
1848	int result;
1849
1850	uint_t count;
1851	const char *cp;
1852
1853	assert(!MUTEX_HELD(&pg->rn_lock));
1854	assert(pg->rn_id.rl_type == REP_PROTOCOL_ENTITY_PROPERTYGRP);
1855	assert(pg->rn_id.rl_ids[ID_SNAPSHOT] == 0);
1856
1857	(void) pthread_mutex_lock(&pg->rn_lock);
1858	result = rc_node_find_named_child(pg, propname,
1859	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
1860	(void) pthread_mutex_unlock(&pg->rn_lock);
1861	if (result != REP_PROTOCOL_SUCCESS) {
1862		switch (result) {
1863		case REP_PROTOCOL_FAIL_DELETED:
1864		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1865			return (result);
1866
1867		case REP_PROTOCOL_FAIL_INVALID_TYPE:
1868		case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
1869		default:
1870			bad_error("rc_node_find_named_child", result);
1871		}
1872	}
1873
1874	if (prop == NULL)
1875		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1876
1877	/* rn_valtype is immutable, so no locking. */
1878	if (prop->rn_valtype != REP_PROTOCOL_TYPE_STRING) {
1879		rc_node_rele(prop);
1880		return (REP_PROTOCOL_SUCCESS);
1881	}
1882
1883	(void) pthread_mutex_lock(&prop->rn_lock);
1884	for (count = prop->rn_values_count, cp = prop->rn_values;
1885	    count > 0;
1886	    --count) {
1887		result = perm_add_enabling(pcp, cp);
1888		if (result != REP_PROTOCOL_SUCCESS)
1889			break;
1890
1891		cp = strchr(cp, '\0') + 1;
1892	}
1893
1894	rc_node_rele_locked(prop);
1895
1896	return (result);
1897}
1898
1899/*
1900 * Assuming that ent is a service or instance node, if the pgname property
1901 * group has type pgtype, and it has a propname property with string type, add
1902 * its values as authorizations to pcp.  If pgtype is NULL, it is not checked.
1903 * Returns
1904 *   _SUCCESS
1905 *   _DELETED - ent was deleted
1906 *   _NO_RESOURCES - no resources
1907 *   _NOT_FOUND - ent does not have pgname pg or propname property
1908 */
1909static int
1910perm_add_ent_prop_values(permcheck_t *pcp, rc_node_t *ent, const char *pgname,
1911    const char *pgtype, const char *propname)
1912{
1913	int r;
1914	rc_node_t *pg;
1915
1916	assert(!MUTEX_HELD(&ent->rn_lock));
1917
1918	(void) pthread_mutex_lock(&ent->rn_lock);
1919	r = rc_node_find_named_child(ent, pgname,
1920	    REP_PROTOCOL_ENTITY_PROPERTYGRP, &pg);
1921	(void) pthread_mutex_unlock(&ent->rn_lock);
1922
1923	switch (r) {
1924	case REP_PROTOCOL_SUCCESS:
1925		break;
1926
1927	case REP_PROTOCOL_FAIL_DELETED:
1928	case REP_PROTOCOL_FAIL_NO_RESOURCES:
1929		return (r);
1930
1931	default:
1932		bad_error("rc_node_find_named_child", r);
1933	}
1934
1935	if (pg == NULL)
1936		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1937
1938	if (pgtype == NULL || strcmp(pg->rn_type, pgtype) == 0) {
1939		r = perm_add_pg_prop_values(pcp, pg, propname);
1940		switch (r) {
1941		case REP_PROTOCOL_FAIL_DELETED:
1942			r = REP_PROTOCOL_FAIL_NOT_FOUND;
1943			break;
1944
1945		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1946		case REP_PROTOCOL_SUCCESS:
1947		case REP_PROTOCOL_FAIL_NOT_FOUND:
1948			break;
1949
1950		default:
1951			bad_error("perm_add_pg_prop_values", r);
1952		}
1953	}
1954
1955	rc_node_rele(pg);
1956
1957	return (r);
1958}
1959
1960/*
1961 * If pg has a property named propname, and it string typed, add its values as
1962 * authorizations to pcp.  If pg has no such property, and its parent is an
1963 * instance, walk up to the service and try doing the same with the property
1964 * of the same name from the property group of the same name.  Returns
1965 *   _SUCCESS
1966 *   _NO_RESOURCES
1967 *   _DELETED - pg (or an ancestor) was deleted
1968 */
1969static int
1970perm_add_enabling_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1971{
1972	int r;
1973
1974	r = perm_add_pg_prop_values(pcp, pg, propname);
1975
1976	if (r == REP_PROTOCOL_FAIL_NOT_FOUND) {
1977		char pgname[REP_PROTOCOL_NAME_LEN + 1];
1978		rc_node_t *inst, *svc;
1979		size_t sz;
1980
1981		assert(!MUTEX_HELD(&pg->rn_lock));
1982
1983		if (pg->rn_id.rl_ids[ID_INSTANCE] == 0) {
1984			/* not an instance pg */
1985			return (REP_PROTOCOL_SUCCESS);
1986		}
1987
1988		sz = strlcpy(pgname, pg->rn_name, sizeof (pgname));
1989		assert(sz < sizeof (pgname));
1990
1991		/* get pg's parent */
1992		r = rc_node_parent(pg, &inst);
1993		if (r != REP_PROTOCOL_SUCCESS) {
1994			assert(r == REP_PROTOCOL_FAIL_DELETED);
1995			return (r);
1996		}
1997
1998		assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
1999
2000		/* get instance's parent */
2001		r = rc_node_parent(inst, &svc);
2002		rc_node_rele(inst);
2003		if (r != REP_PROTOCOL_SUCCESS) {
2004			assert(r == REP_PROTOCOL_FAIL_DELETED);
2005			return (r);
2006		}
2007
2008		assert(svc->rn_id.rl_type == REP_PROTOCOL_ENTITY_SERVICE);
2009
2010		r = perm_add_ent_prop_values(pcp, svc, pgname, NULL, propname);
2011
2012		rc_node_rele(svc);
2013
2014		if (r == REP_PROTOCOL_FAIL_NOT_FOUND)
2015			r = REP_PROTOCOL_SUCCESS;
2016	}
2017
2018	return (r);
2019}
2020
2021/*
2022 * Call perm_add_enabling_values() for the "action_authorization" property of
2023 * the "general" property group of inst.  Returns
2024 *   _DELETED - inst (or an ancestor) was deleted
2025 *   _NO_RESOURCES
2026 *   _SUCCESS
2027 */
2028static int
2029perm_add_inst_action_auth(permcheck_t *pcp, rc_node_t *inst)
2030{
2031	int r;
2032	rc_node_t *svc;
2033
2034	assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
2035
2036	r = perm_add_ent_prop_values(pcp, inst, AUTH_PG_GENERAL,
2037	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2038
2039	if (r != REP_PROTOCOL_FAIL_NOT_FOUND)
2040		return (r);
2041
2042	r = rc_node_parent(inst, &svc);
2043	if (r != REP_PROTOCOL_SUCCESS) {
2044		assert(r == REP_PROTOCOL_FAIL_DELETED);
2045		return (r);
2046	}
2047
2048	r = perm_add_ent_prop_values(pcp, svc, AUTH_PG_GENERAL,
2049	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2050
2051	return (r == REP_PROTOCOL_FAIL_NOT_FOUND ? REP_PROTOCOL_SUCCESS : r);
2052}
2053#endif /* NATIVE_BUILD */
2054
2055void
2056rc_node_ptr_init(rc_node_ptr_t *out)
2057{
2058	out->rnp_node = NULL;
2059	out->rnp_authorized = 0;
2060	out->rnp_deleted = 0;
2061}
2062
2063static void
2064rc_node_assign(rc_node_ptr_t *out, rc_node_t *val)
2065{
2066	rc_node_t *cur = out->rnp_node;
2067	if (val != NULL)
2068		rc_node_hold(val);
2069	out->rnp_node = val;
2070	if (cur != NULL)
2071		rc_node_rele(cur);
2072	out->rnp_authorized = 0;
2073	out->rnp_deleted = 0;
2074}
2075
2076void
2077rc_node_clear(rc_node_ptr_t *out, int deleted)
2078{
2079	rc_node_assign(out, NULL);
2080	out->rnp_deleted = deleted;
2081}
2082
2083void
2084rc_node_ptr_assign(rc_node_ptr_t *out, const rc_node_ptr_t *val)
2085{
2086	rc_node_assign(out, val->rnp_node);
2087}
2088
2089/*
2090 * rc_node_check()/RC_NODE_CHECK()
2091 *	generic "entry" checks, run before the use of an rc_node pointer.
2092 *
2093 * Fails with
2094 *   _NOT_SET
2095 *   _DELETED
2096 */
2097static int
2098rc_node_check_and_lock(rc_node_t *np)
2099{
2100	int result = REP_PROTOCOL_SUCCESS;
2101	if (np == NULL)
2102		return (REP_PROTOCOL_FAIL_NOT_SET);
2103
2104	(void) pthread_mutex_lock(&np->rn_lock);
2105	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2106		result = REP_PROTOCOL_FAIL_DELETED;
2107		(void) pthread_mutex_unlock(&np->rn_lock);
2108	}
2109
2110	return (result);
2111}
2112
2113/*
2114 * Fails with
2115 *   _NOT_SET - ptr is reset
2116 *   _DELETED - node has been deleted
2117 */
2118static rc_node_t *
2119rc_node_ptr_check_and_lock(rc_node_ptr_t *npp, int *res)
2120{
2121	rc_node_t *np = npp->rnp_node;
2122	if (np == NULL) {
2123		if (npp->rnp_deleted)
2124			*res = REP_PROTOCOL_FAIL_DELETED;
2125		else
2126			*res = REP_PROTOCOL_FAIL_NOT_SET;
2127		return (NULL);
2128	}
2129
2130	(void) pthread_mutex_lock(&np->rn_lock);
2131	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2132		(void) pthread_mutex_unlock(&np->rn_lock);
2133		rc_node_clear(npp, 1);
2134		*res = REP_PROTOCOL_FAIL_DELETED;
2135		return (NULL);
2136	}
2137	return (np);
2138}
2139
2140#define	RC_NODE_CHECK_AND_LOCK(n) {					\
2141	int rc__res;							\
2142	if ((rc__res = rc_node_check_and_lock(n)) != REP_PROTOCOL_SUCCESS) \
2143		return (rc__res);					\
2144}
2145
2146#define	RC_NODE_CHECK(n) {						\
2147	RC_NODE_CHECK_AND_LOCK(n);					\
2148	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2149}
2150
2151#define	RC_NODE_CHECK_AND_HOLD(n) {					\
2152	RC_NODE_CHECK_AND_LOCK(n);					\
2153	rc_node_hold_locked(n);						\
2154	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2155}
2156
2157#define	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp) {			\
2158	int rc__res;							\
2159	if (((np) = rc_node_ptr_check_and_lock(npp, &rc__res)) == NULL)	\
2160		return (rc__res);					\
2161}
2162
2163#define	RC_NODE_PTR_GET_CHECK(np, npp) {				\
2164	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2165	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2166}
2167
2168#define	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp) {			\
2169	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2170	rc_node_hold_locked(np);					\
2171	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2172}
2173
2174#define	HOLD_FLAG_OR_RETURN(np, flag) {					\
2175	assert(MUTEX_HELD(&(np)->rn_lock));				\
2176	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2177	if (!rc_node_hold_flag((np), flag)) {				\
2178		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2179		return (REP_PROTOCOL_FAIL_DELETED);			\
2180	}								\
2181}
2182
2183#define	HOLD_PTR_FLAG_OR_RETURN(np, npp, flag) {			\
2184	assert(MUTEX_HELD(&(np)->rn_lock));				\
2185	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2186	if (!rc_node_hold_flag((np), flag)) {				\
2187		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2188		assert((np) == (npp)->rnp_node);			\
2189		rc_node_clear(npp, 1);					\
2190		return (REP_PROTOCOL_FAIL_DELETED);			\
2191	}								\
2192}
2193
2194int
2195rc_local_scope(uint32_t type, rc_node_ptr_t *out)
2196{
2197	if (type != REP_PROTOCOL_ENTITY_SCOPE) {
2198		rc_node_clear(out, 0);
2199		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2200	}
2201
2202	/*
2203	 * the main scope never gets destroyed
2204	 */
2205	rc_node_assign(out, rc_scope);
2206
2207	return (REP_PROTOCOL_SUCCESS);
2208}
2209
2210/*
2211 * Fails with
2212 *   _NOT_SET - npp is not set
2213 *   _DELETED - the node npp pointed at has been deleted
2214 *   _TYPE_MISMATCH - type is not _SCOPE
2215 *   _NOT_FOUND - scope has no parent
2216 */
2217static int
2218rc_scope_parent_scope(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2219{
2220	rc_node_t *np;
2221
2222	rc_node_clear(out, 0);
2223
2224	RC_NODE_PTR_GET_CHECK(np, npp);
2225
2226	if (type != REP_PROTOCOL_ENTITY_SCOPE)
2227		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2228
2229	return (REP_PROTOCOL_FAIL_NOT_FOUND);
2230}
2231
2232/*
2233 * Fails with
2234 *   _NOT_SET
2235 *   _DELETED
2236 *   _NOT_APPLICABLE
2237 *   _NOT_FOUND
2238 *   _BAD_REQUEST
2239 *   _TRUNCATED
2240 */
2241int
2242rc_node_name(rc_node_ptr_t *npp, char *buf, size_t sz, uint32_t answertype,
2243    size_t *sz_out)
2244{
2245	size_t actual;
2246	rc_node_t *np;
2247
2248	assert(sz == *sz_out);
2249
2250	RC_NODE_PTR_GET_CHECK(np, npp);
2251
2252	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2253		np = np->rn_cchain[0];
2254		RC_NODE_CHECK(np);
2255	}
2256
2257	switch (answertype) {
2258	case RP_ENTITY_NAME_NAME:
2259		if (np->rn_name == NULL)
2260			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2261		actual = strlcpy(buf, np->rn_name, sz);
2262		break;
2263	case RP_ENTITY_NAME_PGTYPE:
2264		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2265			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2266		actual = strlcpy(buf, np->rn_type, sz);
2267		break;
2268	case RP_ENTITY_NAME_PGFLAGS:
2269		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2270			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2271		actual = snprintf(buf, sz, "%d", np->rn_pgflags);
2272		break;
2273	case RP_ENTITY_NAME_SNAPLEVEL_SCOPE:
2274		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2275			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2276		actual = strlcpy(buf, np->rn_snaplevel->rsl_scope, sz);
2277		break;
2278	case RP_ENTITY_NAME_SNAPLEVEL_SERVICE:
2279		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2280			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2281		actual = strlcpy(buf, np->rn_snaplevel->rsl_service, sz);
2282		break;
2283	case RP_ENTITY_NAME_SNAPLEVEL_INSTANCE:
2284		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2285			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2286		if (np->rn_snaplevel->rsl_instance == NULL)
2287			return (REP_PROTOCOL_FAIL_NOT_FOUND);
2288		actual = strlcpy(buf, np->rn_snaplevel->rsl_instance, sz);
2289		break;
2290	default:
2291		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2292	}
2293	if (actual >= sz)
2294		return (REP_PROTOCOL_FAIL_TRUNCATED);
2295
2296	*sz_out = actual;
2297	return (REP_PROTOCOL_SUCCESS);
2298}
2299
2300int
2301rc_node_get_property_type(rc_node_ptr_t *npp, rep_protocol_value_type_t *out)
2302{
2303	rc_node_t *np;
2304
2305	RC_NODE_PTR_GET_CHECK(np, npp);
2306
2307	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
2308		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2309
2310	*out = np->rn_valtype;
2311
2312	return (REP_PROTOCOL_SUCCESS);
2313}
2314
2315/*
2316 * Get np's parent.  If np is deleted, returns _DELETED.  Otherwise puts a hold
2317 * on the parent, returns a pointer to it in *out, and returns _SUCCESS.
2318 */
2319static int
2320rc_node_parent(rc_node_t *np, rc_node_t **out)
2321{
2322	rc_node_t *pnp;
2323	rc_node_t *np_orig;
2324
2325	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2326		RC_NODE_CHECK_AND_LOCK(np);
2327	} else {
2328		np = np->rn_cchain[0];
2329		RC_NODE_CHECK_AND_LOCK(np);
2330	}
2331
2332	np_orig = np;
2333	rc_node_hold_locked(np);		/* simplifies the remainder */
2334
2335	for (;;) {
2336		if (!rc_node_wait_flag(np,
2337		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
2338			rc_node_rele_locked(np);
2339			return (REP_PROTOCOL_FAIL_DELETED);
2340		}
2341
2342		if (!(np->rn_flags & RC_NODE_OLD))
2343			break;
2344
2345		rc_node_rele_locked(np);
2346		np = cache_lookup(&np_orig->rn_id);
2347		assert(np != np_orig);
2348
2349		if (np == NULL)
2350			goto deleted;
2351		(void) pthread_mutex_lock(&np->rn_lock);
2352	}
2353
2354	/* guaranteed to succeed without dropping the lock */
2355	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2356		(void) pthread_mutex_unlock(&np->rn_lock);
2357		*out = NULL;
2358		rc_node_rele(np);
2359		return (REP_PROTOCOL_FAIL_DELETED);
2360	}
2361
2362	assert(np->rn_parent != NULL);
2363	pnp = np->rn_parent;
2364	(void) pthread_mutex_unlock(&np->rn_lock);
2365
2366	(void) pthread_mutex_lock(&pnp->rn_lock);
2367	(void) pthread_mutex_lock(&np->rn_lock);
2368	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2369	(void) pthread_mutex_unlock(&np->rn_lock);
2370
2371	rc_node_hold_locked(pnp);
2372
2373	(void) pthread_mutex_unlock(&pnp->rn_lock);
2374
2375	rc_node_rele(np);
2376	*out = pnp;
2377	return (REP_PROTOCOL_SUCCESS);
2378
2379deleted:
2380	rc_node_rele(np);
2381	return (REP_PROTOCOL_FAIL_DELETED);
2382}
2383
2384/*
2385 * Fails with
2386 *   _NOT_SET
2387 *   _DELETED
2388 */
2389static int
2390rc_node_ptr_parent(rc_node_ptr_t *npp, rc_node_t **out)
2391{
2392	rc_node_t *np;
2393
2394	RC_NODE_PTR_GET_CHECK(np, npp);
2395
2396	return (rc_node_parent(np, out));
2397}
2398
2399/*
2400 * Fails with
2401 *   _NOT_SET - npp is not set
2402 *   _DELETED - the node npp pointed at has been deleted
2403 *   _TYPE_MISMATCH - npp's node's parent is not of type type
2404 *
2405 * If npp points to a scope, can also fail with
2406 *   _NOT_FOUND - scope has no parent
2407 */
2408int
2409rc_node_get_parent(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2410{
2411	rc_node_t *pnp;
2412	int rc;
2413
2414	if (npp->rnp_node != NULL &&
2415	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE)
2416		return (rc_scope_parent_scope(npp, type, out));
2417
2418	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS) {
2419		rc_node_clear(out, 0);
2420		return (rc);
2421	}
2422
2423	if (type != pnp->rn_id.rl_type) {
2424		rc_node_rele(pnp);
2425		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2426	}
2427
2428	rc_node_assign(out, pnp);
2429	rc_node_rele(pnp);
2430
2431	return (REP_PROTOCOL_SUCCESS);
2432}
2433
2434int
2435rc_node_parent_type(rc_node_ptr_t *npp, uint32_t *type_out)
2436{
2437	rc_node_t *pnp;
2438	int rc;
2439
2440	if (npp->rnp_node != NULL &&
2441	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE) {
2442		*type_out = REP_PROTOCOL_ENTITY_SCOPE;
2443		return (REP_PROTOCOL_SUCCESS);
2444	}
2445
2446	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS)
2447		return (rc);
2448
2449	*type_out = pnp->rn_id.rl_type;
2450
2451	rc_node_rele(pnp);
2452
2453	return (REP_PROTOCOL_SUCCESS);
2454}
2455
2456/*
2457 * Fails with
2458 *   _INVALID_TYPE - type is invalid
2459 *   _TYPE_MISMATCH - np doesn't carry children of type type
2460 *   _DELETED - np has been deleted
2461 *   _NOT_FOUND - no child with that name/type combo found
2462 *   _NO_RESOURCES
2463 *   _BACKEND_ACCESS
2464 */
2465int
2466rc_node_get_child(rc_node_ptr_t *npp, const char *name, uint32_t type,
2467    rc_node_ptr_t *outp)
2468{
2469	rc_node_t *np, *cp;
2470	rc_node_t *child = NULL;
2471	int ret, idx;
2472
2473	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2474	if ((ret = rc_check_type_name(type, name)) == REP_PROTOCOL_SUCCESS) {
2475		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2476			ret = rc_node_find_named_child(np, name, type, &child);
2477		} else {
2478			(void) pthread_mutex_unlock(&np->rn_lock);
2479			ret = REP_PROTOCOL_SUCCESS;
2480			for (idx = 0; idx < COMPOSITION_DEPTH; idx++) {
2481				cp = np->rn_cchain[idx];
2482				if (cp == NULL)
2483					break;
2484				RC_NODE_CHECK_AND_LOCK(cp);
2485				ret = rc_node_find_named_child(cp, name, type,
2486				    &child);
2487				(void) pthread_mutex_unlock(&cp->rn_lock);
2488				/*
2489				 * loop only if we succeeded, but no child of
2490				 * the correct name was found.
2491				 */
2492				if (ret != REP_PROTOCOL_SUCCESS ||
2493				    child != NULL)
2494					break;
2495			}
2496			(void) pthread_mutex_lock(&np->rn_lock);
2497		}
2498	}
2499	(void) pthread_mutex_unlock(&np->rn_lock);
2500
2501	if (ret == REP_PROTOCOL_SUCCESS) {
2502		rc_node_assign(outp, child);
2503		if (child != NULL)
2504			rc_node_rele(child);
2505		else
2506			ret = REP_PROTOCOL_FAIL_NOT_FOUND;
2507	} else {
2508		rc_node_assign(outp, NULL);
2509	}
2510	return (ret);
2511}
2512
2513int
2514rc_node_update(rc_node_ptr_t *npp)
2515{
2516	cache_bucket_t *bp;
2517	rc_node_t *np = npp->rnp_node;
2518	rc_node_t *nnp;
2519	rc_node_t *cpg = NULL;
2520
2521	if (np != NULL &&
2522	    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2523		/*
2524		 * If we're updating a composed property group, actually
2525		 * update the top-level property group & return the
2526		 * appropriate value.  But leave *nnp pointing at us.
2527		 */
2528		cpg = np;
2529		np = np->rn_cchain[0];
2530	}
2531
2532	RC_NODE_CHECK(np);
2533
2534	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP &&
2535	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT)
2536		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2537
2538	for (;;) {
2539		bp = cache_hold(np->rn_hash);
2540		nnp = cache_lookup_unlocked(bp, &np->rn_id);
2541		if (nnp == NULL) {
2542			cache_release(bp);
2543			rc_node_clear(npp, 1);
2544			return (REP_PROTOCOL_FAIL_DELETED);
2545		}
2546		/*
2547		 * grab the lock before dropping the cache bucket, so
2548		 * that no one else can sneak in
2549		 */
2550		(void) pthread_mutex_lock(&nnp->rn_lock);
2551		cache_release(bp);
2552
2553		if (!(nnp->rn_flags & RC_NODE_IN_TX) ||
2554		    !rc_node_wait_flag(nnp, RC_NODE_IN_TX))
2555			break;
2556
2557		rc_node_rele_locked(nnp);
2558	}
2559
2560	/*
2561	 * If it is dead, we want to update it so that it will continue to
2562	 * report being dead.
2563	 */
2564	if (nnp->rn_flags & RC_NODE_DEAD) {
2565		(void) pthread_mutex_unlock(&nnp->rn_lock);
2566		if (nnp != np && cpg == NULL)
2567			rc_node_assign(npp, nnp);	/* updated */
2568		rc_node_rele(nnp);
2569		return (REP_PROTOCOL_FAIL_DELETED);
2570	}
2571
2572	assert(!(nnp->rn_flags & RC_NODE_OLD));
2573	(void) pthread_mutex_unlock(&nnp->rn_lock);
2574
2575	if (nnp != np && cpg == NULL)
2576		rc_node_assign(npp, nnp);		/* updated */
2577
2578	rc_node_rele(nnp);
2579
2580	return ((nnp == np)? REP_PROTOCOL_SUCCESS : REP_PROTOCOL_DONE);
2581}
2582
2583/*
2584 * does a generic modification check, for creation, deletion, and snapshot
2585 * management only.  Property group transactions have different checks.
2586 */
2587int
2588rc_node_modify_permission_check(void)
2589{
2590	int rc = REP_PROTOCOL_SUCCESS;
2591	permcheck_t *pcp;
2592	int granted;
2593
2594	if (!client_is_privileged()) {
2595#ifdef NATIVE_BUILD
2596		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2597#else
2598		pcp = pc_create();
2599		if (pcp != NULL) {
2600			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2601
2602			if (rc == REP_PROTOCOL_SUCCESS) {
2603				granted = perm_granted(pcp);
2604
2605				if (granted < 0)
2606					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2607			}
2608
2609			pc_free(pcp);
2610		} else {
2611			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2612		}
2613
2614		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2615			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2616#endif /* NATIVE_BUILD */
2617	}
2618	return (rc);
2619}
2620
2621/*
2622 * Fails with
2623 *   _DELETED - node has been deleted
2624 *   _NOT_SET - npp is reset
2625 *   _NOT_APPLICABLE - type is _PROPERTYGRP
2626 *   _INVALID_TYPE - node is corrupt or type is invalid
2627 *   _TYPE_MISMATCH - node cannot have children of type type
2628 *   _BAD_REQUEST - name is invalid
2629 *		    cannot create children for this type of node
2630 *   _NO_RESOURCES - out of memory, or could not allocate new id
2631 *   _PERMISSION_DENIED
2632 *   _BACKEND_ACCESS
2633 *   _BACKEND_READONLY
2634 *   _EXISTS - child already exists
2635 */
2636int
2637rc_node_create_child(rc_node_ptr_t *npp, uint32_t type, const char *name,
2638    rc_node_ptr_t *cpp)
2639{
2640	rc_node_t *np;
2641	rc_node_t *cp = NULL;
2642	int rc;
2643
2644	rc_node_clear(cpp, 0);
2645
2646	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2647
2648	/*
2649	 * there is a separate interface for creating property groups
2650	 */
2651	if (type == REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2652		(void) pthread_mutex_unlock(&np->rn_lock);
2653		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2654	}
2655
2656	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2657		(void) pthread_mutex_unlock(&np->rn_lock);
2658		np = np->rn_cchain[0];
2659		RC_NODE_CHECK_AND_LOCK(np);
2660	}
2661
2662	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2663	    REP_PROTOCOL_SUCCESS) {
2664		(void) pthread_mutex_unlock(&np->rn_lock);
2665		return (rc);
2666	}
2667	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS) {
2668		(void) pthread_mutex_unlock(&np->rn_lock);
2669		return (rc);
2670	}
2671
2672	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
2673		(void) pthread_mutex_unlock(&np->rn_lock);
2674		return (rc);
2675	}
2676
2677	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2678	(void) pthread_mutex_unlock(&np->rn_lock);
2679
2680	rc = object_create(np, type, name, &cp);
2681	assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2682
2683	if (rc == REP_PROTOCOL_SUCCESS) {
2684		rc_node_assign(cpp, cp);
2685		rc_node_rele(cp);
2686	}
2687
2688	(void) pthread_mutex_lock(&np->rn_lock);
2689	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2690	(void) pthread_mutex_unlock(&np->rn_lock);
2691
2692	return (rc);
2693}
2694
2695int
2696rc_node_create_child_pg(rc_node_ptr_t *npp, uint32_t type, const char *name,
2697    const char *pgtype, uint32_t flags, rc_node_ptr_t *cpp)
2698{
2699	rc_node_t *np;
2700	rc_node_t *cp;
2701	int rc;
2702	permcheck_t *pcp;
2703	int granted;
2704
2705	rc_node_clear(cpp, 0);
2706
2707	/* verify flags is valid */
2708	if (flags & ~SCF_PG_FLAG_NONPERSISTENT)
2709		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2710
2711	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
2712
2713	if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2714		rc_node_rele(np);
2715		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2716	}
2717
2718	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2719	    REP_PROTOCOL_SUCCESS) {
2720		rc_node_rele(np);
2721		return (rc);
2722	}
2723	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS ||
2724	    (rc = rc_check_pgtype_name(pgtype)) != REP_PROTOCOL_SUCCESS) {
2725		rc_node_rele(np);
2726		return (rc);
2727	}
2728
2729	if (!client_is_privileged()) {
2730#ifdef NATIVE_BUILD
2731		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2732#else
2733		/* Must have .smf.modify or smf.modify.<type> authorization */
2734		pcp = pc_create();
2735		if (pcp != NULL) {
2736			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2737
2738			if (rc == REP_PROTOCOL_SUCCESS) {
2739				const char * const auth =
2740				    perm_auth_for_pgtype(pgtype);
2741
2742				if (auth != NULL)
2743					rc = perm_add_enabling(pcp, auth);
2744			}
2745
2746			/*
2747			 * .manage or $action_authorization can be used to
2748			 * create the actions pg and the general_ovr pg.
2749			 */
2750			if (rc == REP_PROTOCOL_SUCCESS &&
2751			    (flags & SCF_PG_FLAG_NONPERSISTENT) != 0 &&
2752			    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE &&
2753			    ((strcmp(name, AUTH_PG_ACTIONS) == 0 &&
2754			    strcmp(pgtype, AUTH_PG_ACTIONS_TYPE) == 0) ||
2755			    (strcmp(name, AUTH_PG_GENERAL_OVR) == 0 &&
2756			    strcmp(pgtype, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
2757				rc = perm_add_enabling(pcp, AUTH_MANAGE);
2758
2759				if (rc == REP_PROTOCOL_SUCCESS)
2760					rc = perm_add_inst_action_auth(pcp, np);
2761			}
2762
2763			if (rc == REP_PROTOCOL_SUCCESS) {
2764				granted = perm_granted(pcp);
2765
2766				if (granted < 0)
2767					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2768			}
2769
2770			pc_free(pcp);
2771		} else {
2772			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2773		}
2774
2775		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2776			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2777#endif /* NATIVE_BUILD */
2778
2779		if (rc != REP_PROTOCOL_SUCCESS) {
2780			rc_node_rele(np);
2781			return (rc);
2782		}
2783	}
2784
2785	(void) pthread_mutex_lock(&np->rn_lock);
2786	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2787	(void) pthread_mutex_unlock(&np->rn_lock);
2788
2789	rc = object_create_pg(np, type, name, pgtype, flags, &cp);
2790
2791	if (rc == REP_PROTOCOL_SUCCESS) {
2792		rc_node_assign(cpp, cp);
2793		rc_node_rele(cp);
2794	}
2795
2796	(void) pthread_mutex_lock(&np->rn_lock);
2797	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2798	(void) pthread_mutex_unlock(&np->rn_lock);
2799
2800	return (rc);
2801}
2802
2803static void
2804rc_pg_notify_fire(rc_node_pg_notify_t *pnp)
2805{
2806	assert(MUTEX_HELD(&rc_pg_notify_lock));
2807
2808	if (pnp->rnpn_pg != NULL) {
2809		uu_list_remove(pnp->rnpn_pg->rn_pg_notify_list, pnp);
2810		(void) close(pnp->rnpn_fd);
2811
2812		pnp->rnpn_pg = NULL;
2813		pnp->rnpn_fd = -1;
2814	} else {
2815		assert(pnp->rnpn_fd == -1);
2816	}
2817}
2818
2819static void
2820rc_notify_node_delete(rc_notify_delete_t *ndp, rc_node_t *np_arg)
2821{
2822	rc_node_t *svc = NULL;
2823	rc_node_t *inst = NULL;
2824	rc_node_t *pg = NULL;
2825	rc_node_t *np = np_arg;
2826	rc_node_t *nnp;
2827
2828	while (svc == NULL) {
2829		(void) pthread_mutex_lock(&np->rn_lock);
2830		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2831			(void) pthread_mutex_unlock(&np->rn_lock);
2832			goto cleanup;
2833		}
2834		nnp = np->rn_parent;
2835		rc_node_hold_locked(np);	/* hold it in place */
2836
2837		switch (np->rn_id.rl_type) {
2838		case REP_PROTOCOL_ENTITY_PROPERTYGRP:
2839			assert(pg == NULL);
2840			pg = np;
2841			break;
2842		case REP_PROTOCOL_ENTITY_INSTANCE:
2843			assert(inst == NULL);
2844			inst = np;
2845			break;
2846		case REP_PROTOCOL_ENTITY_SERVICE:
2847			assert(svc == NULL);
2848			svc = np;
2849			break;
2850		default:
2851			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2852			rc_node_rele_locked(np);
2853			goto cleanup;
2854		}
2855
2856		(void) pthread_mutex_unlock(&np->rn_lock);
2857
2858		np = nnp;
2859		if (np == NULL)
2860			goto cleanup;
2861	}
2862
2863	rc_notify_deletion(ndp,
2864	    svc->rn_name,
2865	    inst != NULL ? inst->rn_name : NULL,
2866	    pg != NULL ? pg->rn_name : NULL);
2867
2868	ndp = NULL;
2869
2870cleanup:
2871	if (ndp != NULL)
2872		uu_free(ndp);
2873
2874	for (;;) {
2875		if (svc != NULL) {
2876			np = svc;
2877			svc = NULL;
2878		} else if (inst != NULL) {
2879			np = inst;
2880			inst = NULL;
2881		} else if (pg != NULL) {
2882			np = pg;
2883			pg = NULL;
2884		} else
2885			break;
2886
2887		(void) pthread_mutex_lock(&np->rn_lock);
2888		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2889		rc_node_rele_locked(np);
2890	}
2891}
2892
2893/*
2894 * N.B.:  this function drops np->rn_lock on the way out.
2895 */
2896static void
2897rc_node_delete_hold(rc_node_t *np, int andformer)
2898{
2899	rc_node_t *cp;
2900
2901again:
2902	assert(MUTEX_HELD(&np->rn_lock));
2903	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2904
2905	for (cp = uu_list_first(np->rn_children); cp != NULL;
2906	    cp = uu_list_next(np->rn_children, cp)) {
2907		(void) pthread_mutex_lock(&cp->rn_lock);
2908		(void) pthread_mutex_unlock(&np->rn_lock);
2909		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS)) {
2910			/*
2911			 * already marked as dead -- can't happen, since that
2912			 * would require setting RC_NODE_CHILDREN_CHANGING
2913			 * in np, and we're holding that...
2914			 */
2915			abort();
2916		}
2917		rc_node_delete_hold(cp, andformer);	/* recurse, drop lock */
2918
2919		(void) pthread_mutex_lock(&np->rn_lock);
2920	}
2921	if (andformer && (cp = np->rn_former) != NULL) {
2922		(void) pthread_mutex_lock(&cp->rn_lock);
2923		(void) pthread_mutex_unlock(&np->rn_lock);
2924		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS))
2925			abort();		/* can't happen, see above */
2926		np = cp;
2927		goto again;		/* tail-recurse down rn_former */
2928	}
2929	(void) pthread_mutex_unlock(&np->rn_lock);
2930}
2931
2932/*
2933 * N.B.:  this function drops np->rn_lock on the way out.
2934 */
2935static void
2936rc_node_delete_rele(rc_node_t *np, int andformer)
2937{
2938	rc_node_t *cp;
2939
2940again:
2941	assert(MUTEX_HELD(&np->rn_lock));
2942	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2943
2944	for (cp = uu_list_first(np->rn_children); cp != NULL;
2945	    cp = uu_list_next(np->rn_children, cp)) {
2946		(void) pthread_mutex_lock(&cp->rn_lock);
2947		(void) pthread_mutex_unlock(&np->rn_lock);
2948		rc_node_delete_rele(cp, andformer);	/* recurse, drop lock */
2949		(void) pthread_mutex_lock(&np->rn_lock);
2950	}
2951	if (andformer && (cp = np->rn_former) != NULL) {
2952		(void) pthread_mutex_lock(&cp->rn_lock);
2953		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2954		(void) pthread_mutex_unlock(&np->rn_lock);
2955
2956		np = cp;
2957		goto again;		/* tail-recurse down rn_former */
2958	}
2959	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2960	(void) pthread_mutex_unlock(&np->rn_lock);
2961}
2962
2963static void
2964rc_node_finish_delete(rc_node_t *cp)
2965{
2966	cache_bucket_t *bp;
2967	rc_node_pg_notify_t *pnp;
2968
2969	assert(MUTEX_HELD(&cp->rn_lock));
2970
2971	if (!(cp->rn_flags & RC_NODE_OLD)) {
2972		assert(cp->rn_flags & RC_NODE_IN_PARENT);
2973		if (!rc_node_wait_flag(cp, RC_NODE_USING_PARENT)) {
2974			abort();		/* can't happen, see above */
2975		}
2976		cp->rn_flags &= ~RC_NODE_IN_PARENT;
2977		cp->rn_parent = NULL;
2978	}
2979
2980	cp->rn_flags |= RC_NODE_DEAD;
2981
2982	/*
2983	 * If this node is not out-dated, we need to remove it from
2984	 * the notify list and cache hash table.
2985	 */
2986	if (!(cp->rn_flags & RC_NODE_OLD)) {
2987		assert(cp->rn_refs > 0);	/* can't go away yet */
2988		(void) pthread_mutex_unlock(&cp->rn_lock);
2989
2990		(void) pthread_mutex_lock(&rc_pg_notify_lock);
2991		while ((pnp = uu_list_first(cp->rn_pg_notify_list)) != NULL)
2992			rc_pg_notify_fire(pnp);
2993		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
2994		rc_notify_remove_node(cp);
2995
2996		bp = cache_hold(cp->rn_hash);
2997		(void) pthread_mutex_lock(&cp->rn_lock);
2998		cache_remove_unlocked(bp, cp);
2999		cache_release(bp);
3000	}
3001}
3002
3003/*
3004 * N.B.:  this function drops np->rn_lock and a reference on the way out.
3005 */
3006static void
3007rc_node_delete_children(rc_node_t *np, int andformer)
3008{
3009	rc_node_t *cp;
3010
3011again:
3012	assert(np->rn_refs > 0);
3013	assert(MUTEX_HELD(&np->rn_lock));
3014	assert(np->rn_flags & RC_NODE_DEAD);
3015
3016	while ((cp = uu_list_first(np->rn_children)) != NULL) {
3017		uu_list_remove(np->rn_children, cp);
3018		(void) pthread_mutex_lock(&cp->rn_lock);
3019		(void) pthread_mutex_unlock(&np->rn_lock);
3020		rc_node_hold_locked(cp);	/* hold while we recurse */
3021		rc_node_finish_delete(cp);
3022		rc_node_delete_children(cp, andformer);	/* drops lock + ref */
3023		(void) pthread_mutex_lock(&np->rn_lock);
3024	}
3025
3026	/*
3027	 * when we drop cp's lock, all the children will be gone, so we
3028	 * can release DYING_FLAGS.
3029	 */
3030	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3031	if (andformer && (cp = np->rn_former) != NULL) {
3032		np->rn_former = NULL;		/* unlink */
3033		(void) pthread_mutex_lock(&cp->rn_lock);
3034		(void) pthread_mutex_unlock(&np->rn_lock);
3035		np->rn_flags &= ~RC_NODE_ON_FORMER;
3036
3037		rc_node_hold_locked(cp);	/* hold while we loop */
3038
3039		rc_node_finish_delete(cp);
3040
3041		rc_node_rele(np);		/* drop the old reference */
3042
3043		np = cp;
3044		goto again;		/* tail-recurse down rn_former */
3045	}
3046	rc_node_rele_locked(np);
3047}
3048
3049static void
3050rc_node_unrefed(rc_node_t *np)
3051{
3052	int unrefed;
3053	rc_node_t *pp, *cur;
3054
3055	assert(MUTEX_HELD(&np->rn_lock));
3056	assert(np->rn_refs == 0);
3057	assert(np->rn_other_refs == 0);
3058	assert(np->rn_other_refs_held == 0);
3059
3060	if (np->rn_flags & RC_NODE_DEAD) {
3061		(void) pthread_mutex_unlock(&np->rn_lock);
3062		rc_node_destroy(np);
3063		return;
3064	}
3065
3066	assert(np->rn_flags & RC_NODE_OLD);
3067	if (np->rn_flags & RC_NODE_UNREFED) {
3068		(void) pthread_mutex_unlock(&np->rn_lock);
3069		return;
3070	}
3071	np->rn_flags |= RC_NODE_UNREFED;
3072
3073	(void) pthread_mutex_unlock(&np->rn_lock);
3074
3075	/*
3076	 * find the current in-hash object, and grab it's RC_NODE_IN_TX
3077	 * flag.  That protects the entire rn_former chain.
3078	 */
3079	for (;;) {
3080		pp = cache_lookup(&np->rn_id);
3081		if (pp == NULL) {
3082			(void) pthread_mutex_lock(&np->rn_lock);
3083			if (np->rn_flags & RC_NODE_DEAD)
3084				goto died;
3085			/*
3086			 * We are trying to unreference this node, but the
3087			 * owner of the former list does not exist.  It must
3088			 * be the case that another thread is deleting this
3089			 * entire sub-branch, but has not yet reached us.
3090			 * We will in short order be deleted.
3091			 */
3092			np->rn_flags &= ~RC_NODE_UNREFED;
3093			(void) pthread_mutex_unlock(&np->rn_lock);
3094			return;
3095		}
3096		if (pp == np) {
3097			/*
3098			 * no longer unreferenced
3099			 */
3100			(void) pthread_mutex_lock(&np->rn_lock);
3101			np->rn_flags &= ~RC_NODE_UNREFED;
3102			rc_node_rele_locked(np);
3103			return;
3104		}
3105		(void) pthread_mutex_lock(&pp->rn_lock);
3106		if ((pp->rn_flags & RC_NODE_OLD) ||
3107		    !rc_node_hold_flag(pp, RC_NODE_IN_TX)) {
3108			rc_node_rele_locked(pp);
3109			continue;
3110		}
3111		if (!(pp->rn_flags & RC_NODE_OLD)) {
3112			(void) pthread_mutex_unlock(&pp->rn_lock);
3113			break;
3114		}
3115		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3116		rc_node_rele_locked(pp);
3117	}
3118
3119	(void) pthread_mutex_lock(&np->rn_lock);
3120	if (!(np->rn_flags & (RC_NODE_OLD | RC_NODE_DEAD)) ||
3121	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3122	    np->rn_other_refs_held != 0) {
3123		np->rn_flags &= ~RC_NODE_UNREFED;
3124		(void) pthread_mutex_lock(&pp->rn_lock);
3125
3126		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3127		rc_node_rele_locked(pp);
3128		return;
3129	}
3130
3131	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3132		(void) pthread_mutex_unlock(&np->rn_lock);
3133
3134		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3135		rc_node_rele_locked(pp);
3136
3137		(void) pthread_mutex_lock(&np->rn_lock);
3138		goto died;
3139	}
3140
3141	rc_node_delete_hold(np, 0);
3142
3143	(void) pthread_mutex_lock(&np->rn_lock);
3144	if (!(np->rn_flags & RC_NODE_OLD) ||
3145	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3146	    np->rn_other_refs_held != 0) {
3147		np->rn_flags &= ~RC_NODE_UNREFED;
3148		rc_node_delete_rele(np, 0);
3149
3150		(void) pthread_mutex_lock(&pp->rn_lock);
3151		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3152		rc_node_rele_locked(pp);
3153		return;
3154	}
3155
3156	np->rn_flags |= RC_NODE_DEAD;
3157	rc_node_hold_locked(np);
3158	rc_node_delete_children(np, 0);
3159
3160	/*
3161	 * It's gone -- remove it from the former chain and destroy it.
3162	 */
3163	(void) pthread_mutex_lock(&pp->rn_lock);
3164	for (cur = pp; cur != NULL && cur->rn_former != np;
3165	    cur = cur->rn_former)
3166		;
3167	assert(cur != NULL && cur != np);
3168
3169	cur->rn_former = np->rn_former;
3170	np->rn_former = NULL;
3171
3172	rc_node_rele_flag(pp, RC_NODE_IN_TX);
3173	rc_node_rele_locked(pp);
3174
3175	(void) pthread_mutex_lock(&np->rn_lock);
3176	assert(np->rn_flags & RC_NODE_ON_FORMER);
3177	np->rn_flags &= ~(RC_NODE_UNREFED | RC_NODE_ON_FORMER);
3178	(void) pthread_mutex_unlock(&np->rn_lock);
3179	rc_node_destroy(np);
3180	return;
3181
3182died:
3183	np->rn_flags &= ~RC_NODE_UNREFED;
3184	unrefed = (np->rn_refs == 0 && np->rn_other_refs == 0 &&
3185	    np->rn_other_refs_held == 0);
3186	(void) pthread_mutex_unlock(&np->rn_lock);
3187	if (unrefed)
3188		rc_node_destroy(np);
3189}
3190
3191/*
3192 * Fails with
3193 *   _NOT_SET
3194 *   _DELETED
3195 *   _BAD_REQUEST
3196 *   _PERMISSION_DENIED
3197 *   _NO_RESOURCES
3198 * and whatever object_delete() fails with.
3199 */
3200int
3201rc_node_delete(rc_node_ptr_t *npp)
3202{
3203	rc_node_t *np, *np_orig;
3204	rc_node_t *pp = NULL;
3205	int rc;
3206	rc_node_pg_notify_t *pnp;
3207	cache_bucket_t *bp;
3208	rc_notify_delete_t *ndp;
3209	permcheck_t *pcp;
3210	int granted;
3211
3212	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3213
3214	switch (np->rn_id.rl_type) {
3215	case REP_PROTOCOL_ENTITY_SERVICE:
3216	case REP_PROTOCOL_ENTITY_INSTANCE:
3217	case REP_PROTOCOL_ENTITY_SNAPSHOT:
3218		break;			/* deletable */
3219
3220	case REP_PROTOCOL_ENTITY_SCOPE:
3221	case REP_PROTOCOL_ENTITY_SNAPLEVEL:
3222		/* Scopes and snaplevels are indelible. */
3223		(void) pthread_mutex_unlock(&np->rn_lock);
3224		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3225
3226	case REP_PROTOCOL_ENTITY_CPROPERTYGRP:
3227		(void) pthread_mutex_unlock(&np->rn_lock);
3228		np = np->rn_cchain[0];
3229		RC_NODE_CHECK_AND_LOCK(np);
3230		break;
3231
3232	case REP_PROTOCOL_ENTITY_PROPERTYGRP:
3233		if (np->rn_id.rl_ids[ID_SNAPSHOT] == 0)
3234			break;
3235
3236		/* Snapshot property groups are indelible. */
3237		(void) pthread_mutex_unlock(&np->rn_lock);
3238		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
3239
3240	case REP_PROTOCOL_ENTITY_PROPERTY:
3241		(void) pthread_mutex_unlock(&np->rn_lock);
3242		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3243
3244	default:
3245		assert(0);
3246		abort();
3247		break;
3248	}
3249
3250	np_orig = np;
3251	rc_node_hold_locked(np);	/* simplifies rest of the code */
3252
3253again:
3254	/*
3255	 * The following loop is to deal with the fact that snapshots and
3256	 * property groups are moving targets -- changes to them result
3257	 * in a new "child" node.  Since we can only delete from the top node,
3258	 * we have to loop until we have a non-RC_NODE_OLD version.
3259	 */
3260	for (;;) {
3261		if (!rc_node_wait_flag(np,
3262		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
3263			rc_node_rele_locked(np);
3264			return (REP_PROTOCOL_FAIL_DELETED);
3265		}
3266
3267		if (np->rn_flags & RC_NODE_OLD) {
3268			rc_node_rele_locked(np);
3269			np = cache_lookup(&np_orig->rn_id);
3270			assert(np != np_orig);
3271
3272			if (np == NULL) {
3273				rc = REP_PROTOCOL_FAIL_DELETED;
3274				goto fail;
3275			}
3276			(void) pthread_mutex_lock(&np->rn_lock);
3277			continue;
3278		}
3279
3280		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3281			rc_node_rele_locked(np);
3282			rc_node_clear(npp, 1);
3283			return (REP_PROTOCOL_FAIL_DELETED);
3284		}
3285
3286		/*
3287		 * Mark our parent as children changing.  this call drops our
3288		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3289		 * pp's lock held
3290		 */
3291		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3292		if (pp == NULL) {
3293			/* our parent is gone, we're going next... */
3294			rc_node_rele(np);
3295
3296			rc_node_clear(npp, 1);
3297			return (REP_PROTOCOL_FAIL_DELETED);
3298		}
3299
3300		rc_node_hold_locked(pp);		/* hold for later */
3301		(void) pthread_mutex_unlock(&pp->rn_lock);
3302
3303		(void) pthread_mutex_lock(&np->rn_lock);
3304		if (!(np->rn_flags & RC_NODE_OLD))
3305			break;			/* not old -- we're done */
3306
3307		(void) pthread_mutex_unlock(&np->rn_lock);
3308		(void) pthread_mutex_lock(&pp->rn_lock);
3309		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3310		rc_node_rele_locked(pp);
3311		(void) pthread_mutex_lock(&np->rn_lock);
3312		continue;			/* loop around and try again */
3313	}
3314	/*
3315	 * Everyone out of the pool -- we grab everything but
3316	 * RC_NODE_USING_PARENT (including RC_NODE_DYING) to keep
3317	 * any changes from occurring while we are attempting to
3318	 * delete the node.
3319	 */
3320	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3321		(void) pthread_mutex_unlock(&np->rn_lock);
3322		rc = REP_PROTOCOL_FAIL_DELETED;
3323		goto fail;
3324	}
3325
3326	assert(!(np->rn_flags & RC_NODE_OLD));
3327
3328	if (!client_is_privileged()) {
3329		/* permission check */
3330		(void) pthread_mutex_unlock(&np->rn_lock);
3331
3332#ifdef NATIVE_BUILD
3333		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3334#else
3335		pcp = pc_create();
3336		if (pcp != NULL) {
3337			rc = perm_add_enabling(pcp, AUTH_MODIFY);
3338
3339			/* add .smf.modify.<type> for pgs. */
3340			if (rc == REP_PROTOCOL_SUCCESS && np->rn_id.rl_type ==
3341			    REP_PROTOCOL_ENTITY_PROPERTYGRP) {
3342				const char * const auth =
3343				    perm_auth_for_pgtype(np->rn_type);
3344
3345				if (auth != NULL)
3346					rc = perm_add_enabling(pcp, auth);
3347			}
3348
3349			if (rc == REP_PROTOCOL_SUCCESS) {
3350				granted = perm_granted(pcp);
3351
3352				if (granted < 0)
3353					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3354			}
3355
3356			pc_free(pcp);
3357		} else {
3358			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3359		}
3360
3361		if (rc == REP_PROTOCOL_SUCCESS && !granted)
3362			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3363#endif /* NATIVE_BUILD */
3364
3365		if (rc != REP_PROTOCOL_SUCCESS) {
3366			(void) pthread_mutex_lock(&np->rn_lock);
3367			rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3368			(void) pthread_mutex_unlock(&np->rn_lock);
3369			goto fail;
3370		}
3371
3372		(void) pthread_mutex_lock(&np->rn_lock);
3373	}
3374
3375	ndp = uu_zalloc(sizeof (*ndp));
3376	if (ndp == NULL) {
3377		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3378		(void) pthread_mutex_unlock(&np->rn_lock);
3379		rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3380		goto fail;
3381	}
3382
3383	rc_node_delete_hold(np, 1);	/* hold entire subgraph, drop lock */
3384
3385	rc = object_delete(np);
3386
3387	if (rc != REP_PROTOCOL_SUCCESS) {
3388		(void) pthread_mutex_lock(&np->rn_lock);
3389		rc_node_delete_rele(np, 1);		/* drops lock */
3390		uu_free(ndp);
3391		goto fail;
3392	}
3393
3394	/*
3395	 * Now, delicately unlink and delete the object.
3396	 *
3397	 * Create the delete notification, atomically remove
3398	 * from the hash table and set the NODE_DEAD flag, and
3399	 * remove from the parent's children list.
3400	 */
3401	rc_notify_node_delete(ndp, np); /* frees or uses ndp */
3402
3403	bp = cache_hold(np->rn_hash);
3404
3405	(void) pthread_mutex_lock(&np->rn_lock);
3406	cache_remove_unlocked(bp, np);
3407	cache_release(bp);
3408
3409	np->rn_flags |= RC_NODE_DEAD;
3410	if (pp != NULL) {
3411		(void) pthread_mutex_unlock(&np->rn_lock);
3412
3413		(void) pthread_mutex_lock(&pp->rn_lock);
3414		(void) pthread_mutex_lock(&np->rn_lock);
3415		uu_list_remove(pp->rn_children, np);
3416		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3417		(void) pthread_mutex_unlock(&pp->rn_lock);
3418		np->rn_flags &= ~RC_NODE_IN_PARENT;
3419	}
3420	/*
3421	 * finally, propagate death to our children, handle notifications,
3422	 * and release our hold.
3423	 */
3424	rc_node_hold_locked(np);	/* hold for delete */
3425	rc_node_delete_children(np, 1);	/* drops DYING_FLAGS, lock, ref */
3426
3427	rc_node_clear(npp, 1);
3428
3429	(void) pthread_mutex_lock(&rc_pg_notify_lock);
3430	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
3431		rc_pg_notify_fire(pnp);
3432	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
3433	rc_notify_remove_node(np);
3434
3435	rc_node_rele(np);
3436
3437	return (rc);
3438
3439fail:
3440	rc_node_rele(np);
3441	if (rc == REP_PROTOCOL_FAIL_DELETED)
3442		rc_node_clear(npp, 1);
3443	if (pp != NULL) {
3444		(void) pthread_mutex_lock(&pp->rn_lock);
3445		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3446		rc_node_rele_locked(pp);	/* drop ref and lock */
3447	}
3448	return (rc);
3449}
3450
3451int
3452rc_node_next_snaplevel(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3453{
3454	rc_node_t *np;
3455	rc_node_t *cp, *pp;
3456	int res;
3457
3458	rc_node_clear(cpp, 0);
3459
3460	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3461
3462	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT &&
3463	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL) {
3464		(void) pthread_mutex_unlock(&np->rn_lock);
3465		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
3466	}
3467
3468	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3469		if ((res = rc_node_fill_children(np,
3470		    REP_PROTOCOL_ENTITY_SNAPLEVEL)) != REP_PROTOCOL_SUCCESS) {
3471			(void) pthread_mutex_unlock(&np->rn_lock);
3472			return (res);
3473		}
3474
3475		for (cp = uu_list_first(np->rn_children);
3476		    cp != NULL;
3477		    cp = uu_list_next(np->rn_children, cp)) {
3478			if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3479				continue;
3480			rc_node_hold(cp);
3481			break;
3482		}
3483
3484		(void) pthread_mutex_unlock(&np->rn_lock);
3485	} else {
3486		HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_USING_PARENT);
3487		/*
3488		 * mark our parent as children changing.  This call drops our
3489		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3490		 * pp's lock held
3491		 */
3492		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3493		if (pp == NULL) {
3494			/* our parent is gone, we're going next... */
3495
3496			rc_node_clear(npp, 1);
3497			return (REP_PROTOCOL_FAIL_DELETED);
3498		}
3499
3500		/*
3501		 * find the next snaplevel
3502		 */
3503		cp = np;
3504		while ((cp = uu_list_next(pp->rn_children, cp)) != NULL &&
3505		    cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3506			;
3507
3508		/* it must match the snaplevel list */
3509		assert((cp == NULL && np->rn_snaplevel->rsl_next == NULL) ||
3510		    (cp != NULL && np->rn_snaplevel->rsl_next ==
3511		    cp->rn_snaplevel));
3512
3513		if (cp != NULL)
3514			rc_node_hold(cp);
3515
3516		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3517
3518		(void) pthread_mutex_unlock(&pp->rn_lock);
3519	}
3520
3521	rc_node_assign(cpp, cp);
3522	if (cp != NULL) {
3523		rc_node_rele(cp);
3524
3525		return (REP_PROTOCOL_SUCCESS);
3526	}
3527	return (REP_PROTOCOL_FAIL_NOT_FOUND);
3528}
3529
3530/*
3531 * This call takes a snapshot (np) and either:
3532 *	an existing snapid (to be associated with np), or
3533 *	a non-NULL parentp (from which a new snapshot is taken, and associated
3534 *	    with np)
3535 *
3536 * To do the association, np is duplicated, the duplicate is made to
3537 * represent the new snapid, and np is replaced with the new rc_node_t on
3538 * np's parent's child list. np is placed on the new node's rn_former list,
3539 * and replaces np in cache_hash (so rc_node_update() will find the new one).
3540 */
3541static int
3542rc_attach_snapshot(rc_node_t *np, uint32_t snapid, rc_node_t *parentp)
3543{
3544	rc_node_t *np_orig;
3545	rc_node_t *nnp, *prev;
3546	rc_node_t *pp;
3547	int rc;
3548
3549	if (parentp != NULL)
3550		assert(snapid == 0);
3551
3552	assert(MUTEX_HELD(&np->rn_lock));
3553
3554	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3555		(void) pthread_mutex_unlock(&np->rn_lock);
3556		return (rc);
3557	}
3558
3559	np_orig = np;
3560	rc_node_hold_locked(np);		/* simplifies the remainder */
3561
3562	/*
3563	 * get the latest node, holding RC_NODE_IN_TX to keep the rn_former
3564	 * list from changing.
3565	 */
3566	for (;;) {
3567		if (!(np->rn_flags & RC_NODE_OLD)) {
3568			if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3569				goto again;
3570			}
3571			pp = rc_node_hold_parent_flag(np,
3572			    RC_NODE_CHILDREN_CHANGING);
3573
3574			(void) pthread_mutex_lock(&np->rn_lock);
3575			if (pp == NULL) {
3576				goto again;
3577			}
3578			if (np->rn_flags & RC_NODE_OLD) {
3579				rc_node_rele_flag(pp,
3580				    RC_NODE_CHILDREN_CHANGING);
3581				(void) pthread_mutex_unlock(&pp->rn_lock);
3582				goto again;
3583			}
3584			(void) pthread_mutex_unlock(&pp->rn_lock);
3585
3586			if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
3587				/*
3588				 * Can't happen, since we're holding our
3589				 * parent's CHILDREN_CHANGING flag...
3590				 */
3591				abort();
3592			}
3593			break;			/* everything's ready */
3594		}
3595again:
3596		rc_node_rele_locked(np);
3597		np = cache_lookup(&np_orig->rn_id);
3598
3599		if (np == NULL)
3600			return (REP_PROTOCOL_FAIL_DELETED);
3601
3602		(void) pthread_mutex_lock(&np->rn_lock);
3603	}
3604
3605	if (parentp != NULL) {
3606		if (pp != parentp) {
3607			rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
3608			goto fail;
3609		}
3610		nnp = NULL;
3611	} else {
3612		/*
3613		 * look for a former node with the snapid we need.
3614		 */
3615		if (np->rn_snapshot_id == snapid) {
3616			rc_node_rele_flag(np, RC_NODE_IN_TX);
3617			rc_node_rele_locked(np);
3618
3619			(void) pthread_mutex_lock(&pp->rn_lock);
3620			rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3621			(void) pthread_mutex_unlock(&pp->rn_lock);
3622			return (REP_PROTOCOL_SUCCESS);	/* nothing to do */
3623		}
3624
3625		prev = np;
3626		while ((nnp = prev->rn_former) != NULL) {
3627			if (nnp->rn_snapshot_id == snapid) {
3628				rc_node_hold(nnp);
3629				break;		/* existing node with that id */
3630			}
3631			prev = nnp;
3632		}
3633	}
3634
3635	if (nnp == NULL) {
3636		prev = NULL;
3637		nnp = rc_node_alloc();
3638		if (nnp == NULL) {
3639			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3640			goto fail;
3641		}
3642
3643		nnp->rn_id = np->rn_id;		/* structure assignment */
3644		nnp->rn_hash = np->rn_hash;
3645		nnp->rn_name = strdup(np->rn_name);
3646		nnp->rn_snapshot_id = snapid;
3647		nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
3648
3649		if (nnp->rn_name == NULL) {
3650			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3651			goto fail;
3652		}
3653	}
3654
3655	(void) pthread_mutex_unlock(&np->rn_lock);
3656
3657	rc = object_snapshot_attach(&np->rn_id, &snapid, (parentp != NULL));
3658
3659	if (parentp != NULL)
3660		nnp->rn_snapshot_id = snapid;	/* fill in new snapid */
3661	else
3662		assert(nnp->rn_snapshot_id == snapid);
3663
3664	(void) pthread_mutex_lock(&np->rn_lock);
3665	if (rc != REP_PROTOCOL_SUCCESS)
3666		goto fail;
3667
3668	/*
3669	 * fix up the former chain
3670	 */
3671	if (prev != NULL) {
3672		prev->rn_former = nnp->rn_former;
3673		(void) pthread_mutex_lock(&nnp->rn_lock);
3674		nnp->rn_flags &= ~RC_NODE_ON_FORMER;
3675		nnp->rn_former = NULL;
3676		(void) pthread_mutex_unlock(&nnp->rn_lock);
3677	}
3678	np->rn_flags |= RC_NODE_OLD;
3679	(void) pthread_mutex_unlock(&np->rn_lock);
3680
3681	/*
3682	 * replace np with nnp
3683	 */
3684	rc_node_relink_child(pp, np, nnp);
3685
3686	rc_node_rele(np);
3687
3688	return (REP_PROTOCOL_SUCCESS);
3689
3690fail:
3691	rc_node_rele_flag(np, RC_NODE_IN_TX);
3692	rc_node_rele_locked(np);
3693	(void) pthread_mutex_lock(&pp->rn_lock);
3694	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3695	(void) pthread_mutex_unlock(&pp->rn_lock);
3696
3697	if (nnp != NULL) {
3698		if (prev == NULL)
3699			rc_node_destroy(nnp);
3700		else
3701			rc_node_rele(nnp);
3702	}
3703
3704	return (rc);
3705}
3706
3707int
3708rc_snapshot_take_new(rc_node_ptr_t *npp, const char *svcname,
3709    const char *instname, const char *name, rc_node_ptr_t *outpp)
3710{
3711	rc_node_t *np;
3712	rc_node_t *outp = NULL;
3713	int rc;
3714
3715	rc_node_clear(outpp, 0);
3716
3717	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3718	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3719		(void) pthread_mutex_unlock(&np->rn_lock);
3720		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3721	}
3722
3723	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_SNAPSHOT, name);
3724	if (rc != REP_PROTOCOL_SUCCESS) {
3725		(void) pthread_mutex_unlock(&np->rn_lock);
3726		return (rc);
3727	}
3728
3729	if (svcname != NULL && (rc =
3730	    rc_check_type_name(REP_PROTOCOL_ENTITY_SERVICE, svcname)) !=
3731	    REP_PROTOCOL_SUCCESS) {
3732		(void) pthread_mutex_unlock(&np->rn_lock);
3733		return (rc);
3734	}
3735
3736	if (instname != NULL && (rc =
3737	    rc_check_type_name(REP_PROTOCOL_ENTITY_INSTANCE, instname)) !=
3738	    REP_PROTOCOL_SUCCESS) {
3739		(void) pthread_mutex_unlock(&np->rn_lock);
3740		return (rc);
3741	}
3742
3743	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3744		(void) pthread_mutex_unlock(&np->rn_lock);
3745		return (rc);
3746	}
3747
3748	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
3749	(void) pthread_mutex_unlock(&np->rn_lock);
3750
3751	rc = object_snapshot_take_new(np, svcname, instname, name, &outp);
3752
3753	if (rc == REP_PROTOCOL_SUCCESS) {
3754		rc_node_assign(outpp, outp);
3755		rc_node_rele(outp);
3756	}
3757
3758	(void) pthread_mutex_lock(&np->rn_lock);
3759	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
3760	(void) pthread_mutex_unlock(&np->rn_lock);
3761
3762	return (rc);
3763}
3764
3765int
3766rc_snapshot_take_attach(rc_node_ptr_t *npp, rc_node_ptr_t *outpp)
3767{
3768	rc_node_t *np, *outp;
3769
3770	RC_NODE_PTR_GET_CHECK(np, npp);
3771	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3772		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3773	}
3774
3775	RC_NODE_PTR_GET_CHECK_AND_LOCK(outp, outpp);
3776	if (outp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3777		(void) pthread_mutex_unlock(&outp->rn_lock);
3778		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3779	}
3780
3781	return (rc_attach_snapshot(outp, 0, np));	/* drops outp's lock */
3782}
3783
3784int
3785rc_snapshot_attach(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3786{
3787	rc_node_t *np;
3788	rc_node_t *cp;
3789	uint32_t snapid;
3790
3791	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3792	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3793		(void) pthread_mutex_unlock(&np->rn_lock);
3794		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3795	}
3796	snapid = np->rn_snapshot_id;
3797	(void) pthread_mutex_unlock(&np->rn_lock);
3798
3799	RC_NODE_PTR_GET_CHECK_AND_LOCK(cp, cpp);
3800	if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3801		(void) pthread_mutex_unlock(&cp->rn_lock);
3802		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3803	}
3804
3805	return (rc_attach_snapshot(cp, snapid, NULL));	/* drops cp's lock */
3806}
3807
3808/*
3809 * Iteration
3810 */
3811static int
3812rc_iter_filter_name(rc_node_t *np, void *s)
3813{
3814	const char *name = s;
3815
3816	return (strcmp(np->rn_name, name) == 0);
3817}
3818
3819static int
3820rc_iter_filter_type(rc_node_t *np, void *s)
3821{
3822	const char *type = s;
3823
3824	return (np->rn_type != NULL && strcmp(np->rn_type, type) == 0);
3825}
3826
3827/*ARGSUSED*/
3828static int
3829rc_iter_null_filter(rc_node_t *np, void *s)
3830{
3831	return (1);
3832}
3833
3834/*
3835 * Allocate & initialize an rc_node_iter_t structure.  Essentially, ensure
3836 * np->rn_children is populated and call uu_list_walk_start(np->rn_children).
3837 * If successful, leaves a hold on np & increments np->rn_other_refs
3838 *
3839 * If composed is true, then set up for iteration across the top level of np's
3840 * composition chain.  If successful, leaves a hold on np and increments
3841 * rn_other_refs for the top level of np's composition chain.
3842 *
3843 * Fails with
3844 *   _NO_RESOURCES
3845 *   _INVALID_TYPE
3846 *   _TYPE_MISMATCH - np cannot carry type children
3847 *   _DELETED
3848 */
3849static int
3850rc_iter_create(rc_node_iter_t **resp, rc_node_t *np, uint32_t type,
3851    rc_iter_filter_func *filter, void *arg, boolean_t composed)
3852{
3853	rc_node_iter_t *nip;
3854	int res;
3855
3856	assert(*resp == NULL);
3857
3858	nip = uu_zalloc(sizeof (*nip));
3859	if (nip == NULL)
3860		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3861
3862	/* np is held by the client's rc_node_ptr_t */
3863	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
3864		composed = 1;
3865
3866	if (!composed) {
3867		(void) pthread_mutex_lock(&np->rn_lock);
3868
3869		if ((res = rc_node_fill_children(np, type)) !=
3870		    REP_PROTOCOL_SUCCESS) {
3871			(void) pthread_mutex_unlock(&np->rn_lock);
3872			uu_free(nip);
3873			return (res);
3874		}
3875
3876		nip->rni_clevel = -1;
3877
3878		nip->rni_iter = uu_list_walk_start(np->rn_children,
3879		    UU_WALK_ROBUST);
3880		if (nip->rni_iter != NULL) {
3881			nip->rni_iter_node = np;
3882			rc_node_hold_other(np);
3883		} else {
3884			(void) pthread_mutex_unlock(&np->rn_lock);
3885			uu_free(nip);
3886			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3887		}
3888		(void) pthread_mutex_unlock(&np->rn_lock);
3889	} else {
3890		rc_node_t *ent;
3891
3892		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3893			/* rn_cchain isn't valid until children are loaded. */
3894			(void) pthread_mutex_lock(&np->rn_lock);
3895			res = rc_node_fill_children(np,
3896			    REP_PROTOCOL_ENTITY_SNAPLEVEL);
3897			(void) pthread_mutex_unlock(&np->rn_lock);
3898			if (res != REP_PROTOCOL_SUCCESS) {
3899				uu_free(nip);
3900				return (res);
3901			}
3902
3903			/* Check for an empty snapshot. */
3904			if (np->rn_cchain[0] == NULL)
3905				goto empty;
3906		}
3907
3908		/* Start at the top of the composition chain. */
3909		for (nip->rni_clevel = 0; ; ++nip->rni_clevel) {
3910			if (nip->rni_clevel >= COMPOSITION_DEPTH) {
3911				/* Empty composition chain. */
3912empty:
3913				nip->rni_clevel = -1;
3914				nip->rni_iter = NULL;
3915				/* It's ok, iter_next() will return _DONE. */
3916				goto out;
3917			}
3918
3919			ent = np->rn_cchain[nip->rni_clevel];
3920			assert(ent != NULL);
3921
3922			if (rc_node_check_and_lock(ent) == REP_PROTOCOL_SUCCESS)
3923				break;
3924
3925			/* Someone deleted it, so try the next one. */
3926		}
3927
3928		res = rc_node_fill_children(ent, type);
3929
3930		if (res == REP_PROTOCOL_SUCCESS) {
3931			nip->rni_iter = uu_list_walk_start(ent->rn_children,
3932			    UU_WALK_ROBUST);
3933
3934			if (nip->rni_iter == NULL)
3935				res = REP_PROTOCOL_FAIL_NO_RESOURCES;
3936			else {
3937				nip->rni_iter_node = ent;
3938				rc_node_hold_other(ent);
3939			}
3940		}
3941
3942		if (res != REP_PROTOCOL_SUCCESS) {
3943			(void) pthread_mutex_unlock(&ent->rn_lock);
3944			uu_free(nip);
3945			return (res);
3946		}
3947
3948		(void) pthread_mutex_unlock(&ent->rn_lock);
3949	}
3950
3951out:
3952	rc_node_hold(np);		/* released by rc_iter_end() */
3953	nip->rni_parent = np;
3954	nip->rni_type = type;
3955	nip->rni_filter = (filter != NULL)? filter : rc_iter_null_filter;
3956	nip->rni_filter_arg = arg;
3957	*resp = nip;
3958	return (REP_PROTOCOL_SUCCESS);
3959}
3960
3961static void
3962rc_iter_end(rc_node_iter_t *iter)
3963{
3964	rc_node_t *np = iter->rni_parent;
3965
3966	if (iter->rni_clevel >= 0)
3967		np = np->rn_cchain[iter->rni_clevel];
3968
3969	assert(MUTEX_HELD(&np->rn_lock));
3970	if (iter->rni_iter != NULL)
3971		uu_list_walk_end(iter->rni_iter);
3972	iter->rni_iter = NULL;
3973
3974	(void) pthread_mutex_unlock(&np->rn_lock);
3975	rc_node_rele(iter->rni_parent);
3976	if (iter->rni_iter_node != NULL)
3977		rc_node_rele_other(iter->rni_iter_node);
3978}
3979
3980/*
3981 * Fails with
3982 *   _NOT_SET - npp is reset
3983 *   _DELETED - npp's node has been deleted
3984 *   _NOT_APPLICABLE - npp's node is not a property
3985 *   _NO_RESOURCES - out of memory
3986 */
3987static int
3988rc_node_setup_value_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp)
3989{
3990	rc_node_t *np;
3991
3992	rc_node_iter_t *nip;
3993
3994	assert(*iterp == NULL);
3995
3996	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3997
3998	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
3999		(void) pthread_mutex_unlock(&np->rn_lock);
4000		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4001	}
4002
4003	nip = uu_zalloc(sizeof (*nip));
4004	if (nip == NULL) {
4005		(void) pthread_mutex_unlock(&np->rn_lock);
4006		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4007	}
4008
4009	nip->rni_parent = np;
4010	nip->rni_iter = NULL;
4011	nip->rni_clevel = -1;
4012	nip->rni_type = REP_PROTOCOL_ENTITY_VALUE;
4013	nip->rni_offset = 0;
4014	nip->rni_last_offset = 0;
4015
4016	rc_node_hold_locked(np);
4017
4018	*iterp = nip;
4019	(void) pthread_mutex_unlock(&np->rn_lock);
4020
4021	return (REP_PROTOCOL_SUCCESS);
4022}
4023
4024/*
4025 * Returns:
4026 *   _NOT_SET - npp is reset
4027 *   _DELETED - npp's node has been deleted
4028 *   _TYPE_MISMATCH - npp's node is not a property
4029 *   _NOT_FOUND - property has no values
4030 *   _TRUNCATED - property has >1 values (first is written into out)
4031 *   _SUCCESS - property has 1 value (which is written into out)
4032 *
4033 * We shorten *sz_out to not include anything after the final '\0'.
4034 */
4035int
4036rc_node_get_property_value(rc_node_ptr_t *npp,
4037    struct rep_protocol_value_response *out, size_t *sz_out)
4038{
4039	rc_node_t *np;
4040	size_t w;
4041	int ret;
4042
4043	assert(*sz_out == sizeof (*out));
4044
4045	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4046
4047	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4048		(void) pthread_mutex_unlock(&np->rn_lock);
4049		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4050	}
4051
4052	if (np->rn_values_size == 0) {
4053		(void) pthread_mutex_unlock(&np->rn_lock);
4054		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4055	}
4056	out->rpr_type = np->rn_valtype;
4057	w = strlcpy(out->rpr_value, &np->rn_values[0],
4058	    sizeof (out->rpr_value));
4059
4060	if (w >= sizeof (out->rpr_value))
4061		backend_panic("value too large");
4062
4063	*sz_out = offsetof(struct rep_protocol_value_response,
4064	    rpr_value[w + 1]);
4065
4066	ret = (np->rn_values_count != 1)? REP_PROTOCOL_FAIL_TRUNCATED :
4067	    REP_PROTOCOL_SUCCESS;
4068	(void) pthread_mutex_unlock(&np->rn_lock);
4069	return (ret);
4070}
4071
4072int
4073rc_iter_next_value(rc_node_iter_t *iter,
4074    struct rep_protocol_value_response *out, size_t *sz_out, int repeat)
4075{
4076	rc_node_t *np = iter->rni_parent;
4077	const char *vals;
4078	size_t len;
4079
4080	size_t start;
4081	size_t w;
4082
4083	rep_protocol_responseid_t result;
4084
4085	assert(*sz_out == sizeof (*out));
4086
4087	(void) memset(out, '\0', *sz_out);
4088
4089	if (iter->rni_type != REP_PROTOCOL_ENTITY_VALUE)
4090		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4091
4092	RC_NODE_CHECK_AND_LOCK(np);
4093
4094	vals = np->rn_values;
4095	len = np->rn_values_size;
4096
4097	out->rpr_type = np->rn_valtype;
4098
4099	start = (repeat)? iter->rni_last_offset : iter->rni_offset;
4100
4101	if (len == 0 || start >= len) {
4102		result = REP_PROTOCOL_DONE;
4103		*sz_out -= sizeof (out->rpr_value);
4104	} else {
4105		w = strlcpy(out->rpr_value, &vals[start],
4106		    sizeof (out->rpr_value));
4107
4108		if (w >= sizeof (out->rpr_value))
4109			backend_panic("value too large");
4110
4111		*sz_out = offsetof(struct rep_protocol_value_response,
4112		    rpr_value[w + 1]);
4113
4114		/*
4115		 * update the offsets if we're not repeating
4116		 */
4117		if (!repeat) {
4118			iter->rni_last_offset = iter->rni_offset;
4119			iter->rni_offset += (w + 1);
4120		}
4121
4122		result = REP_PROTOCOL_SUCCESS;
4123	}
4124
4125	(void) pthread_mutex_unlock(&np->rn_lock);
4126	return (result);
4127}
4128
4129/*
4130 * Entry point for ITER_START from client.c.  Validate the arguments & call
4131 * rc_iter_create().
4132 *
4133 * Fails with
4134 *   _NOT_SET
4135 *   _DELETED
4136 *   _TYPE_MISMATCH - np cannot carry type children
4137 *   _BAD_REQUEST - flags is invalid
4138 *		    pattern is invalid
4139 *   _NO_RESOURCES
4140 *   _INVALID_TYPE
4141 *   _TYPE_MISMATCH - *npp cannot have children of type
4142 *   _BACKEND_ACCESS
4143 */
4144int
4145rc_node_setup_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp,
4146    uint32_t type, uint32_t flags, const char *pattern)
4147{
4148	rc_node_t *np;
4149	rc_iter_filter_func *f = NULL;
4150	int rc;
4151
4152	RC_NODE_PTR_GET_CHECK(np, npp);
4153
4154	if (pattern != NULL && pattern[0] == '\0')
4155		pattern = NULL;
4156
4157	if (type == REP_PROTOCOL_ENTITY_VALUE) {
4158		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
4159			return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4160		if (flags != RP_ITER_START_ALL || pattern != NULL)
4161			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4162
4163		rc = rc_node_setup_value_iter(npp, iterp);
4164		assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4165		return (rc);
4166	}
4167
4168	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
4169	    REP_PROTOCOL_SUCCESS)
4170		return (rc);
4171
4172	if (((flags & RP_ITER_START_FILT_MASK) == RP_ITER_START_ALL) ^
4173	    (pattern == NULL))
4174		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4175
4176	/* Composition only works for instances & snapshots. */
4177	if ((flags & RP_ITER_START_COMPOSED) &&
4178	    (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE &&
4179	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT))
4180		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4181
4182	if (pattern != NULL) {
4183		if ((rc = rc_check_type_name(type, pattern)) !=
4184		    REP_PROTOCOL_SUCCESS)
4185			return (rc);
4186		pattern = strdup(pattern);
4187		if (pattern == NULL)
4188			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4189	}
4190
4191	switch (flags & RP_ITER_START_FILT_MASK) {
4192	case RP_ITER_START_ALL:
4193		f = NULL;
4194		break;
4195	case RP_ITER_START_EXACT:
4196		f = rc_iter_filter_name;
4197		break;
4198	case RP_ITER_START_PGTYPE:
4199		if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4200			free((void *)pattern);
4201			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4202		}
4203		f = rc_iter_filter_type;
4204		break;
4205	default:
4206		free((void *)pattern);
4207		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4208	}
4209
4210	rc = rc_iter_create(iterp, np, type, f, (void *)pattern,
4211	    flags & RP_ITER_START_COMPOSED);
4212	if (rc != REP_PROTOCOL_SUCCESS && pattern != NULL)
4213		free((void *)pattern);
4214
4215	return (rc);
4216}
4217
4218/*
4219 * Do uu_list_walk_next(iter->rni_iter) until we find a child which matches
4220 * the filter.
4221 * For composed iterators, then check to see if there's an overlapping entity
4222 * (see embedded comments).  If we reach the end of the list, start over at
4223 * the next level.
4224 *
4225 * Returns
4226 *   _BAD_REQUEST - iter walks values
4227 *   _TYPE_MISMATCH - iter does not walk type entities
4228 *   _DELETED - parent was deleted
4229 *   _NO_RESOURCES
4230 *   _INVALID_TYPE - type is invalid
4231 *   _DONE
4232 *   _SUCCESS
4233 *
4234 * For composed property group iterators, can also return
4235 *   _TYPE_MISMATCH - parent cannot have type children
4236 */
4237int
4238rc_iter_next(rc_node_iter_t *iter, rc_node_ptr_t *out, uint32_t type)
4239{
4240	rc_node_t *np = iter->rni_parent;
4241	rc_node_t *res;
4242	int rc;
4243
4244	if (iter->rni_type == REP_PROTOCOL_ENTITY_VALUE)
4245		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4246
4247	if (iter->rni_iter == NULL) {
4248		rc_node_clear(out, 0);
4249		return (REP_PROTOCOL_DONE);
4250	}
4251
4252	if (iter->rni_type != type) {
4253		rc_node_clear(out, 0);
4254		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4255	}
4256
4257	(void) pthread_mutex_lock(&np->rn_lock);  /* held by _iter_create() */
4258
4259	if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4260		(void) pthread_mutex_unlock(&np->rn_lock);
4261		rc_node_clear(out, 1);
4262		return (REP_PROTOCOL_FAIL_DELETED);
4263	}
4264
4265	if (iter->rni_clevel >= 0) {
4266		/* Composed iterator.  Iterate over appropriate level. */
4267		(void) pthread_mutex_unlock(&np->rn_lock);
4268		np = np->rn_cchain[iter->rni_clevel];
4269		/*
4270		 * If iter->rni_parent is an instance or a snapshot, np must
4271		 * be valid since iter holds iter->rni_parent & possible
4272		 * levels (service, instance, snaplevel) cannot be destroyed
4273		 * while rni_parent is held.  If iter->rni_parent is
4274		 * a composed property group then rc_node_setup_cpg() put
4275		 * a hold on np.
4276		 */
4277
4278		(void) pthread_mutex_lock(&np->rn_lock);
4279
4280		if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4281			(void) pthread_mutex_unlock(&np->rn_lock);
4282			rc_node_clear(out, 1);
4283			return (REP_PROTOCOL_FAIL_DELETED);
4284		}
4285	}
4286
4287	assert(np->rn_flags & RC_NODE_HAS_CHILDREN);
4288
4289	for (;;) {
4290		res = uu_list_walk_next(iter->rni_iter);
4291		if (res == NULL) {
4292			rc_node_t *parent = iter->rni_parent;
4293
4294#if COMPOSITION_DEPTH == 2
4295			if (iter->rni_clevel < 0 || iter->rni_clevel == 1) {
4296				/* release walker and lock */
4297				rc_iter_end(iter);
4298				break;
4299			}
4300
4301			/* Stop walking current level. */
4302			uu_list_walk_end(iter->rni_iter);
4303			iter->rni_iter = NULL;
4304			(void) pthread_mutex_unlock(&np->rn_lock);
4305			rc_node_rele_other(iter->rni_iter_node);
4306			iter->rni_iter_node = NULL;
4307
4308			/* Start walking next level. */
4309			++iter->rni_clevel;
4310			np = parent->rn_cchain[iter->rni_clevel];
4311			assert(np != NULL);
4312#else
4313#error This code must be updated.
4314#endif
4315
4316			(void) pthread_mutex_lock(&np->rn_lock);
4317
4318			rc = rc_node_fill_children(np, iter->rni_type);
4319
4320			if (rc == REP_PROTOCOL_SUCCESS) {
4321				iter->rni_iter =
4322				    uu_list_walk_start(np->rn_children,
4323					UU_WALK_ROBUST);
4324
4325				if (iter->rni_iter == NULL)
4326					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4327				else {
4328					iter->rni_iter_node = np;
4329					rc_node_hold_other(np);
4330				}
4331			}
4332
4333			if (rc != REP_PROTOCOL_SUCCESS) {
4334				(void) pthread_mutex_unlock(&np->rn_lock);
4335				rc_node_clear(out, 0);
4336				return (rc);
4337			}
4338
4339			continue;
4340		}
4341
4342		if (res->rn_id.rl_type != type ||
4343		    !iter->rni_filter(res, iter->rni_filter_arg))
4344			continue;
4345
4346		/*
4347		 * If we're composed and not at the top level, check to see if
4348		 * there's an entity at a higher level with the same name.  If
4349		 * so, skip this one.
4350		 */
4351		if (iter->rni_clevel > 0) {
4352			rc_node_t *ent = iter->rni_parent->rn_cchain[0];
4353			rc_node_t *pg;
4354
4355#if COMPOSITION_DEPTH == 2
4356			assert(iter->rni_clevel == 1);
4357
4358			(void) pthread_mutex_unlock(&np->rn_lock);
4359			(void) pthread_mutex_lock(&ent->rn_lock);
4360			rc = rc_node_find_named_child(ent, res->rn_name, type,
4361			    &pg);
4362			if (rc == REP_PROTOCOL_SUCCESS && pg != NULL)
4363				rc_node_rele(pg);
4364			(void) pthread_mutex_unlock(&ent->rn_lock);
4365			if (rc != REP_PROTOCOL_SUCCESS) {
4366				rc_node_clear(out, 0);
4367				return (rc);
4368			}
4369			(void) pthread_mutex_lock(&np->rn_lock);
4370
4371			/* Make sure np isn't being deleted all of a sudden. */
4372			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4373				(void) pthread_mutex_unlock(&np->rn_lock);
4374				rc_node_clear(out, 1);
4375				return (REP_PROTOCOL_FAIL_DELETED);
4376			}
4377
4378			if (pg != NULL)
4379				/* Keep going. */
4380				continue;
4381#else
4382#error This code must be updated.
4383#endif
4384		}
4385
4386		/*
4387		 * If we're composed, iterating over property groups, and not
4388		 * at the bottom level, check to see if there's a pg at lower
4389		 * level with the same name.  If so, return a cpg.
4390		 */
4391		if (iter->rni_clevel >= 0 &&
4392		    type == REP_PROTOCOL_ENTITY_PROPERTYGRP &&
4393		    iter->rni_clevel < COMPOSITION_DEPTH - 1) {
4394#if COMPOSITION_DEPTH == 2
4395			rc_node_t *pg;
4396			rc_node_t *ent = iter->rni_parent->rn_cchain[1];
4397
4398			rc_node_hold(res);	/* While we drop np->rn_lock */
4399
4400			(void) pthread_mutex_unlock(&np->rn_lock);
4401			(void) pthread_mutex_lock(&ent->rn_lock);
4402			rc = rc_node_find_named_child(ent, res->rn_name, type,
4403			    &pg);
4404			/* holds pg if not NULL */
4405			(void) pthread_mutex_unlock(&ent->rn_lock);
4406			if (rc != REP_PROTOCOL_SUCCESS) {
4407				rc_node_rele(res);
4408				rc_node_clear(out, 0);
4409				return (rc);
4410			}
4411
4412			(void) pthread_mutex_lock(&np->rn_lock);
4413			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4414				(void) pthread_mutex_unlock(&np->rn_lock);
4415				rc_node_rele(res);
4416				if (pg != NULL)
4417					rc_node_rele(pg);
4418				rc_node_clear(out, 1);
4419				return (REP_PROTOCOL_FAIL_DELETED);
4420			}
4421
4422			if (pg == NULL) {
4423				rc_node_rele(res);
4424			} else {
4425				rc_node_t *cpg;
4426
4427				/* Keep res held for rc_node_setup_cpg(). */
4428
4429				cpg = rc_node_alloc();
4430				if (cpg == NULL) {
4431					(void) pthread_mutex_unlock(
4432					    &np->rn_lock);
4433					rc_node_rele(res);
4434					rc_node_rele(pg);
4435					rc_node_clear(out, 0);
4436					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4437				}
4438
4439				switch (rc_node_setup_cpg(cpg, res, pg)) {
4440				case REP_PROTOCOL_SUCCESS:
4441					res = cpg;
4442					break;
4443
4444				case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
4445					/* Nevermind. */
4446					rc_node_destroy(cpg);
4447					rc_node_rele(pg);
4448					rc_node_rele(res);
4449					break;
4450
4451				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4452					rc_node_destroy(cpg);
4453					(void) pthread_mutex_unlock(
4454					    &np->rn_lock);
4455					rc_node_rele(res);
4456					rc_node_rele(pg);
4457					rc_node_clear(out, 0);
4458					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4459
4460				default:
4461					assert(0);
4462					abort();
4463				}
4464			}
4465#else
4466#error This code must be updated.
4467#endif
4468		}
4469
4470		rc_node_hold(res);
4471		(void) pthread_mutex_unlock(&np->rn_lock);
4472		break;
4473	}
4474	rc_node_assign(out, res);
4475
4476	if (res == NULL)
4477		return (REP_PROTOCOL_DONE);
4478	rc_node_rele(res);
4479	return (REP_PROTOCOL_SUCCESS);
4480}
4481
4482void
4483rc_iter_destroy(rc_node_iter_t **nipp)
4484{
4485	rc_node_iter_t *nip = *nipp;
4486	rc_node_t *np;
4487
4488	if (nip == NULL)
4489		return;				/* already freed */
4490
4491	np = nip->rni_parent;
4492
4493	if (nip->rni_filter_arg != NULL)
4494		free(nip->rni_filter_arg);
4495	nip->rni_filter_arg = NULL;
4496
4497	if (nip->rni_type == REP_PROTOCOL_ENTITY_VALUE ||
4498	    nip->rni_iter != NULL) {
4499		if (nip->rni_clevel < 0)
4500			(void) pthread_mutex_lock(&np->rn_lock);
4501		else
4502			(void) pthread_mutex_lock(
4503			    &np->rn_cchain[nip->rni_clevel]->rn_lock);
4504		rc_iter_end(nip);		/* release walker and lock */
4505	}
4506	nip->rni_parent = NULL;
4507
4508	uu_free(nip);
4509	*nipp = NULL;
4510}
4511
4512int
4513rc_node_setup_tx(rc_node_ptr_t *npp, rc_node_ptr_t *txp)
4514{
4515	rc_node_t *np;
4516	permcheck_t *pcp;
4517	int ret;
4518	int authorized = 0;
4519
4520	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
4521
4522	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
4523		rc_node_rele(np);
4524		np = np->rn_cchain[0];
4525		RC_NODE_CHECK_AND_HOLD(np);
4526	}
4527
4528	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4529		rc_node_rele(np);
4530		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4531	}
4532
4533	if (np->rn_id.rl_ids[ID_SNAPSHOT] != 0) {
4534		rc_node_rele(np);
4535		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4536	}
4537
4538	if (client_is_privileged())
4539		goto skip_checks;
4540
4541#ifdef NATIVE_BUILD
4542	rc_node_rele(np);
4543	return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4544#else
4545	/* permission check */
4546	pcp = pc_create();
4547	if (pcp == NULL) {
4548		rc_node_rele(np);
4549		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4550	}
4551
4552	if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&	/* instance pg */
4553	    ((strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0 &&
4554	    strcmp(np->rn_type, AUTH_PG_ACTIONS_TYPE) == 0) ||
4555	    (strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4556	    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
4557		rc_node_t *instn;
4558
4559		/* solaris.smf.manage can be used. */
4560		ret = perm_add_enabling(pcp, AUTH_MANAGE);
4561
4562		if (ret != REP_PROTOCOL_SUCCESS) {
4563			pc_free(pcp);
4564			rc_node_rele(np);
4565			return (ret);
4566		}
4567
4568		/* general/action_authorization values can be used. */
4569		ret = rc_node_parent(np, &instn);
4570		if (ret != REP_PROTOCOL_SUCCESS) {
4571			assert(ret == REP_PROTOCOL_FAIL_DELETED);
4572			rc_node_rele(np);
4573			pc_free(pcp);
4574			return (REP_PROTOCOL_FAIL_DELETED);
4575		}
4576
4577		assert(instn->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
4578
4579		ret = perm_add_inst_action_auth(pcp, instn);
4580		rc_node_rele(instn);
4581		switch (ret) {
4582		case REP_PROTOCOL_SUCCESS:
4583			break;
4584
4585		case REP_PROTOCOL_FAIL_DELETED:
4586		case REP_PROTOCOL_FAIL_NO_RESOURCES:
4587			rc_node_rele(np);
4588			pc_free(pcp);
4589			return (ret);
4590
4591		default:
4592			bad_error("perm_add_inst_action_auth", ret);
4593		}
4594
4595		if (strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0)
4596			authorized = 1;		/* Don't check on commit. */
4597	} else {
4598		ret = perm_add_enabling(pcp, AUTH_MODIFY);
4599
4600		if (ret == REP_PROTOCOL_SUCCESS) {
4601			/* propertygroup-type-specific authorization */
4602			/* no locking because rn_type won't change anyway */
4603			const char * const auth =
4604			    perm_auth_for_pgtype(np->rn_type);
4605
4606			if (auth != NULL)
4607				ret = perm_add_enabling(pcp, auth);
4608		}
4609
4610		if (ret == REP_PROTOCOL_SUCCESS)
4611			/* propertygroup/transaction-type-specific auths */
4612			ret =
4613			    perm_add_enabling_values(pcp, np, AUTH_PROP_VALUE);
4614
4615		if (ret == REP_PROTOCOL_SUCCESS)
4616			ret =
4617			    perm_add_enabling_values(pcp, np, AUTH_PROP_MODIFY);
4618
4619		/* AUTH_MANAGE can manipulate general/AUTH_PROP_ACTION */
4620		if (ret == REP_PROTOCOL_SUCCESS &&
4621		    strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4622		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0)
4623			ret = perm_add_enabling(pcp, AUTH_MANAGE);
4624
4625		if (ret != REP_PROTOCOL_SUCCESS) {
4626			pc_free(pcp);
4627			rc_node_rele(np);
4628			return (ret);
4629		}
4630	}
4631
4632	ret = perm_granted(pcp);
4633	if (ret != 1) {
4634		pc_free(pcp);
4635		rc_node_rele(np);
4636		return (ret == 0 ? REP_PROTOCOL_FAIL_PERMISSION_DENIED :
4637		    REP_PROTOCOL_FAIL_NO_RESOURCES);
4638	}
4639
4640	pc_free(pcp);
4641#endif /* NATIVE_BUILD */
4642
4643skip_checks:
4644	rc_node_assign(txp, np);
4645	txp->rnp_authorized = authorized;
4646
4647	rc_node_rele(np);
4648	return (REP_PROTOCOL_SUCCESS);
4649}
4650
4651/*
4652 * Return 1 if the given transaction commands only modify the values of
4653 * properties other than "modify_authorization".  Return -1 if any of the
4654 * commands are invalid, and 0 otherwise.
4655 */
4656static int
4657tx_allow_value(const void *cmds_arg, size_t cmds_sz, rc_node_t *pg)
4658{
4659	const struct rep_protocol_transaction_cmd *cmds;
4660	uintptr_t loc;
4661	uint32_t sz;
4662	rc_node_t *prop;
4663	boolean_t ok;
4664
4665	assert(!MUTEX_HELD(&pg->rn_lock));
4666
4667	loc = (uintptr_t)cmds_arg;
4668
4669	while (cmds_sz > 0) {
4670		cmds = (struct rep_protocol_transaction_cmd *)loc;
4671
4672		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4673			return (-1);
4674
4675		sz = cmds->rptc_size;
4676		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4677			return (-1);
4678
4679		sz = TX_SIZE(sz);
4680		if (sz > cmds_sz)
4681			return (-1);
4682
4683		switch (cmds[0].rptc_action) {
4684		case REP_PROTOCOL_TX_ENTRY_CLEAR:
4685			break;
4686
4687		case REP_PROTOCOL_TX_ENTRY_REPLACE:
4688			/* Check type */
4689			(void) pthread_mutex_lock(&pg->rn_lock);
4690			if (rc_node_find_named_child(pg,
4691			    (const char *)cmds[0].rptc_data,
4692			    REP_PROTOCOL_ENTITY_PROPERTY, &prop) ==
4693			    REP_PROTOCOL_SUCCESS) {
4694				ok = (prop != NULL &&
4695				    prop->rn_valtype == cmds[0].rptc_type);
4696			} else {
4697				/* Return more particular error? */
4698				ok = B_FALSE;
4699			}
4700			(void) pthread_mutex_unlock(&pg->rn_lock);
4701			if (ok)
4702				break;
4703			return (0);
4704
4705		default:
4706			return (0);
4707		}
4708
4709		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_MODIFY)
4710		    == 0)
4711			return (0);
4712
4713		loc += sz;
4714		cmds_sz -= sz;
4715	}
4716
4717	return (1);
4718}
4719
4720/*
4721 * Return 1 if any of the given transaction commands affect
4722 * "action_authorization".  Return -1 if any of the commands are invalid and
4723 * 0 in all other cases.
4724 */
4725static int
4726tx_modifies_action(const void *cmds_arg, size_t cmds_sz)
4727{
4728	const struct rep_protocol_transaction_cmd *cmds;
4729	uintptr_t loc;
4730	uint32_t sz;
4731
4732	loc = (uintptr_t)cmds_arg;
4733
4734	while (cmds_sz > 0) {
4735		cmds = (struct rep_protocol_transaction_cmd *)loc;
4736
4737		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4738			return (-1);
4739
4740		sz = cmds->rptc_size;
4741		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4742			return (-1);
4743
4744		sz = TX_SIZE(sz);
4745		if (sz > cmds_sz)
4746			return (-1);
4747
4748		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_ACTION)
4749		    == 0)
4750			return (1);
4751
4752		loc += sz;
4753		cmds_sz -= sz;
4754	}
4755
4756	return (0);
4757}
4758
4759/*
4760 * Returns 1 if the transaction commands only modify properties named
4761 * 'enabled'.
4762 */
4763static int
4764tx_only_enabled(const void *cmds_arg, size_t cmds_sz)
4765{
4766	const struct rep_protocol_transaction_cmd *cmd;
4767	uintptr_t loc;
4768	uint32_t sz;
4769
4770	loc = (uintptr_t)cmds_arg;
4771
4772	while (cmds_sz > 0) {
4773		cmd = (struct rep_protocol_transaction_cmd *)loc;
4774
4775		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4776			return (-1);
4777
4778		sz = cmd->rptc_size;
4779		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4780			return (-1);
4781
4782		sz = TX_SIZE(sz);
4783		if (sz > cmds_sz)
4784			return (-1);
4785
4786		if (strcmp((const char *)cmd->rptc_data, AUTH_PROP_ENABLED)
4787		    != 0)
4788			return (0);
4789
4790		loc += sz;
4791		cmds_sz -= sz;
4792	}
4793
4794	return (1);
4795}
4796
4797int
4798rc_tx_commit(rc_node_ptr_t *txp, const void *cmds, size_t cmds_sz)
4799{
4800	rc_node_t *np = txp->rnp_node;
4801	rc_node_t *pp;
4802	rc_node_t *nnp;
4803	rc_node_pg_notify_t *pnp;
4804	int rc;
4805	permcheck_t *pcp;
4806	int granted, normal;
4807
4808	RC_NODE_CHECK(np);
4809
4810	if (!client_is_privileged() && !txp->rnp_authorized) {
4811#ifdef NATIVE_BUILD
4812		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4813#else
4814		/* permission check: depends on contents of transaction */
4815		pcp = pc_create();
4816		if (pcp == NULL)
4817			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4818
4819		/* If normal is cleared, we won't do the normal checks. */
4820		normal = 1;
4821		rc = REP_PROTOCOL_SUCCESS;
4822
4823		if (strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4824		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0) {
4825			/* Touching general[framework]/action_authorization? */
4826			rc = tx_modifies_action(cmds, cmds_sz);
4827			if (rc == -1) {
4828				pc_free(pcp);
4829				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4830			}
4831
4832			if (rc) {
4833				/* Yes: only AUTH_MANAGE can be used. */
4834				rc = perm_add_enabling(pcp, AUTH_MANAGE);
4835				normal = 0;
4836			} else {
4837				rc = REP_PROTOCOL_SUCCESS;
4838			}
4839		} else if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&
4840		    strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4841		    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0) {
4842			rc_node_t *instn;
4843
4844			rc = tx_only_enabled(cmds, cmds_sz);
4845			if (rc == -1) {
4846				pc_free(pcp);
4847				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4848			}
4849
4850			if (rc) {
4851				rc = rc_node_parent(np, &instn);
4852				if (rc != REP_PROTOCOL_SUCCESS) {
4853					assert(rc == REP_PROTOCOL_FAIL_DELETED);
4854					pc_free(pcp);
4855					return (rc);
4856				}
4857
4858				assert(instn->rn_id.rl_type ==
4859				    REP_PROTOCOL_ENTITY_INSTANCE);
4860
4861				rc = perm_add_inst_action_auth(pcp, instn);
4862				rc_node_rele(instn);
4863				switch (rc) {
4864				case REP_PROTOCOL_SUCCESS:
4865					break;
4866
4867				case REP_PROTOCOL_FAIL_DELETED:
4868				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4869					pc_free(pcp);
4870					return (rc);
4871
4872				default:
4873					bad_error("perm_add_inst_action_auth",
4874					    rc);
4875				}
4876			} else {
4877				rc = REP_PROTOCOL_SUCCESS;
4878			}
4879		}
4880
4881		if (rc == REP_PROTOCOL_SUCCESS && normal) {
4882			rc = perm_add_enabling(pcp, AUTH_MODIFY);
4883
4884			if (rc == REP_PROTOCOL_SUCCESS) {
4885				/* Add pgtype-specific authorization. */
4886				const char * const auth =
4887				    perm_auth_for_pgtype(np->rn_type);
4888
4889				if (auth != NULL)
4890					rc = perm_add_enabling(pcp, auth);
4891			}
4892
4893			/* Add pg-specific modify_authorization auths. */
4894			if (rc == REP_PROTOCOL_SUCCESS)
4895				rc = perm_add_enabling_values(pcp, np,
4896				    AUTH_PROP_MODIFY);
4897
4898			/* If value_authorization values are ok, add them. */
4899			if (rc == REP_PROTOCOL_SUCCESS) {
4900				rc = tx_allow_value(cmds, cmds_sz, np);
4901				if (rc == -1)
4902					rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
4903				else if (rc)
4904					rc = perm_add_enabling_values(pcp, np,
4905					    AUTH_PROP_VALUE);
4906			}
4907		}
4908
4909		if (rc == REP_PROTOCOL_SUCCESS) {
4910			granted = perm_granted(pcp);
4911			if (granted < 0)
4912				rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4913		}
4914
4915		pc_free(pcp);
4916
4917		if (rc != REP_PROTOCOL_SUCCESS)
4918			return (rc);
4919
4920		if (!granted)
4921			return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4922#endif /* NATIVE_BUILD */
4923	}
4924
4925	nnp = rc_node_alloc();
4926	if (nnp == NULL)
4927		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4928
4929	nnp->rn_id = np->rn_id;			/* structure assignment */
4930	nnp->rn_hash = np->rn_hash;
4931	nnp->rn_name = strdup(np->rn_name);
4932	nnp->rn_type = strdup(np->rn_type);
4933	nnp->rn_pgflags = np->rn_pgflags;
4934
4935	nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
4936
4937	if (nnp->rn_name == NULL || nnp->rn_type == NULL) {
4938		rc_node_destroy(nnp);
4939		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4940	}
4941
4942	(void) pthread_mutex_lock(&np->rn_lock);
4943	/*
4944	 * We must have all of the old properties in the cache, or the
4945	 * database deletions could cause inconsistencies.
4946	 */
4947	if ((rc = rc_node_fill_children(np, REP_PROTOCOL_ENTITY_PROPERTY)) !=
4948	    REP_PROTOCOL_SUCCESS) {
4949		(void) pthread_mutex_unlock(&np->rn_lock);
4950		rc_node_destroy(nnp);
4951		return (rc);
4952	}
4953
4954	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
4955		(void) pthread_mutex_unlock(&np->rn_lock);
4956		rc_node_destroy(nnp);
4957		return (REP_PROTOCOL_FAIL_DELETED);
4958	}
4959
4960	if (np->rn_flags & RC_NODE_OLD) {
4961		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
4962		(void) pthread_mutex_unlock(&np->rn_lock);
4963		rc_node_destroy(nnp);
4964		return (REP_PROTOCOL_FAIL_NOT_LATEST);
4965	}
4966
4967	pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
4968	if (pp == NULL) {
4969		/* our parent is gone, we're going next... */
4970		rc_node_destroy(nnp);
4971		(void) pthread_mutex_lock(&np->rn_lock);
4972		if (np->rn_flags & RC_NODE_OLD) {
4973			(void) pthread_mutex_unlock(&np->rn_lock);
4974			return (REP_PROTOCOL_FAIL_NOT_LATEST);
4975		}
4976		(void) pthread_mutex_unlock(&np->rn_lock);
4977		return (REP_PROTOCOL_FAIL_DELETED);
4978	}
4979	(void) pthread_mutex_unlock(&pp->rn_lock);
4980
4981	/*
4982	 * prepare for the transaction
4983	 */
4984	(void) pthread_mutex_lock(&np->rn_lock);
4985	if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
4986		(void) pthread_mutex_unlock(&np->rn_lock);
4987		(void) pthread_mutex_lock(&pp->rn_lock);
4988		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
4989		(void) pthread_mutex_unlock(&pp->rn_lock);
4990		rc_node_destroy(nnp);
4991		return (REP_PROTOCOL_FAIL_DELETED);
4992	}
4993	nnp->rn_gen_id = np->rn_gen_id;
4994	(void) pthread_mutex_unlock(&np->rn_lock);
4995
4996	/* Sets nnp->rn_gen_id on success. */
4997	rc = object_tx_commit(&np->rn_id, cmds, cmds_sz, &nnp->rn_gen_id);
4998
4999	(void) pthread_mutex_lock(&np->rn_lock);
5000	if (rc != REP_PROTOCOL_SUCCESS) {
5001		rc_node_rele_flag(np, RC_NODE_IN_TX);
5002		(void) pthread_mutex_unlock(&np->rn_lock);
5003		(void) pthread_mutex_lock(&pp->rn_lock);
5004		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
5005		(void) pthread_mutex_unlock(&pp->rn_lock);
5006		rc_node_destroy(nnp);
5007		rc_node_clear(txp, 0);
5008		if (rc == REP_PROTOCOL_DONE)
5009			rc = REP_PROTOCOL_SUCCESS; /* successful empty tx */
5010		return (rc);
5011	}
5012
5013	/*
5014	 * Notify waiters
5015	 */
5016	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5017	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
5018		rc_pg_notify_fire(pnp);
5019	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5020
5021	np->rn_flags |= RC_NODE_OLD;
5022	(void) pthread_mutex_unlock(&np->rn_lock);
5023
5024	rc_notify_remove_node(np);
5025
5026	/*
5027	 * replace np with nnp
5028	 */
5029	rc_node_relink_child(pp, np, nnp);
5030
5031	/*
5032	 * all done -- clear the transaction.
5033	 */
5034	rc_node_clear(txp, 0);
5035
5036	return (REP_PROTOCOL_SUCCESS);
5037}
5038
5039void
5040rc_pg_notify_init(rc_node_pg_notify_t *pnp)
5041{
5042	uu_list_node_init(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5043	pnp->rnpn_pg = NULL;
5044	pnp->rnpn_fd = -1;
5045}
5046
5047int
5048rc_pg_notify_setup(rc_node_pg_notify_t *pnp, rc_node_ptr_t *npp, int fd)
5049{
5050	rc_node_t *np;
5051
5052	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
5053
5054	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
5055		(void) pthread_mutex_unlock(&np->rn_lock);
5056		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5057	}
5058
5059	/*
5060	 * wait for any transaction in progress to complete
5061	 */
5062	if (!rc_node_wait_flag(np, RC_NODE_IN_TX)) {
5063		(void) pthread_mutex_unlock(&np->rn_lock);
5064		return (REP_PROTOCOL_FAIL_DELETED);
5065	}
5066
5067	if (np->rn_flags & RC_NODE_OLD) {
5068		(void) pthread_mutex_unlock(&np->rn_lock);
5069		return (REP_PROTOCOL_FAIL_NOT_LATEST);
5070	}
5071
5072	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5073	rc_pg_notify_fire(pnp);
5074	pnp->rnpn_pg = np;
5075	pnp->rnpn_fd = fd;
5076	(void) uu_list_insert_after(np->rn_pg_notify_list, NULL, pnp);
5077	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5078
5079	(void) pthread_mutex_unlock(&np->rn_lock);
5080	return (REP_PROTOCOL_SUCCESS);
5081}
5082
5083void
5084rc_pg_notify_fini(rc_node_pg_notify_t *pnp)
5085{
5086	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5087	rc_pg_notify_fire(pnp);
5088	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5089
5090	uu_list_node_fini(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5091}
5092
5093void
5094rc_notify_info_init(rc_notify_info_t *rnip)
5095{
5096	int i;
5097
5098	uu_list_node_init(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5099	uu_list_node_init(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5100	    rc_notify_pool);
5101
5102	rnip->rni_notify.rcn_node = NULL;
5103	rnip->rni_notify.rcn_info = rnip;
5104
5105	bzero(rnip->rni_namelist, sizeof (rnip->rni_namelist));
5106	bzero(rnip->rni_typelist, sizeof (rnip->rni_typelist));
5107
5108	(void) pthread_cond_init(&rnip->rni_cv, NULL);
5109
5110	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5111		rnip->rni_namelist[i] = NULL;
5112		rnip->rni_typelist[i] = NULL;
5113	}
5114}
5115
5116static void
5117rc_notify_info_insert_locked(rc_notify_info_t *rnip)
5118{
5119	assert(MUTEX_HELD(&rc_pg_notify_lock));
5120
5121	assert(!(rnip->rni_flags & RC_NOTIFY_ACTIVE));
5122
5123	rnip->rni_flags |= RC_NOTIFY_ACTIVE;
5124	(void) uu_list_insert_after(rc_notify_info_list, NULL, rnip);
5125	(void) uu_list_insert_before(rc_notify_list, NULL, &rnip->rni_notify);
5126}
5127
5128static void
5129rc_notify_info_remove_locked(rc_notify_info_t *rnip)
5130{
5131	rc_notify_t *me = &rnip->rni_notify;
5132	rc_notify_t *np;
5133
5134	assert(MUTEX_HELD(&rc_pg_notify_lock));
5135
5136	assert(rnip->rni_flags & RC_NOTIFY_ACTIVE);
5137
5138	assert(!(rnip->rni_flags & RC_NOTIFY_DRAIN));
5139	rnip->rni_flags |= RC_NOTIFY_DRAIN;
5140	(void) pthread_cond_broadcast(&rnip->rni_cv);
5141
5142	(void) uu_list_remove(rc_notify_info_list, rnip);
5143
5144	/*
5145	 * clean up any notifications at the beginning of the list
5146	 */
5147	if (uu_list_first(rc_notify_list) == me) {
5148		while ((np = uu_list_next(rc_notify_list, me)) != NULL &&
5149		    np->rcn_info == NULL)
5150			rc_notify_remove_locked(np);
5151	}
5152	(void) uu_list_remove(rc_notify_list, me);
5153
5154	while (rnip->rni_waiters) {
5155		(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5156		(void) pthread_cond_broadcast(&rnip->rni_cv);
5157		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5158	}
5159
5160	rnip->rni_flags &= ~(RC_NOTIFY_DRAIN | RC_NOTIFY_ACTIVE);
5161}
5162
5163static int
5164rc_notify_info_add_watch(rc_notify_info_t *rnip, const char **arr,
5165    const char *name)
5166{
5167	int i;
5168	int rc;
5169	char *f;
5170
5171	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_PROPERTYGRP, name);
5172	if (rc != REP_PROTOCOL_SUCCESS)
5173		return (rc);
5174
5175	f = strdup(name);
5176	if (f == NULL)
5177		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5178
5179	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5180
5181	while (rnip->rni_flags & RC_NOTIFY_EMPTYING)
5182		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5183
5184	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++)
5185		if (arr[i] == NULL)
5186			break;
5187
5188	if (i == RC_NOTIFY_MAX_NAMES) {
5189		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5190		free(f);
5191		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5192	}
5193
5194	arr[i] = f;
5195	if (!(rnip->rni_flags & RC_NOTIFY_ACTIVE))
5196		rc_notify_info_insert_locked(rnip);
5197
5198	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5199	return (REP_PROTOCOL_SUCCESS);
5200}
5201
5202int
5203rc_notify_info_add_name(rc_notify_info_t *rnip, const char *name)
5204{
5205	return (rc_notify_info_add_watch(rnip, rnip->rni_namelist, name));
5206}
5207
5208int
5209rc_notify_info_add_type(rc_notify_info_t *rnip, const char *type)
5210{
5211	return (rc_notify_info_add_watch(rnip, rnip->rni_typelist, type));
5212}
5213
5214/*
5215 * Wait for and report an event of interest to rnip, a notification client
5216 */
5217int
5218rc_notify_info_wait(rc_notify_info_t *rnip, rc_node_ptr_t *out,
5219    char *outp, size_t sz)
5220{
5221	rc_notify_t *np;
5222	rc_notify_t *me = &rnip->rni_notify;
5223	rc_node_t *nnp;
5224	rc_notify_delete_t *ndp;
5225
5226	int am_first_info;
5227
5228	if (sz > 0)
5229		outp[0] = 0;
5230
5231	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5232
5233	while ((rnip->rni_flags & (RC_NOTIFY_ACTIVE | RC_NOTIFY_DRAIN)) ==
5234	    RC_NOTIFY_ACTIVE) {
5235		/*
5236		 * If I'm first on the notify list, it is my job to
5237		 * clean up any notifications I pass by.  I can't do that
5238		 * if someone is blocking the list from removals, so I
5239		 * have to wait until they have all drained.
5240		 */
5241		am_first_info = (uu_list_first(rc_notify_list) == me);
5242		if (am_first_info && rc_notify_in_use) {
5243			rnip->rni_waiters++;
5244			(void) pthread_cond_wait(&rc_pg_notify_cv,
5245			    &rc_pg_notify_lock);
5246			rnip->rni_waiters--;
5247			continue;
5248		}
5249
5250		/*
5251		 * Search the list for a node of interest.
5252		 */
5253		np = uu_list_next(rc_notify_list, me);
5254		while (np != NULL && !rc_notify_info_interested(rnip, np)) {
5255			rc_notify_t *next = uu_list_next(rc_notify_list, np);
5256
5257			if (am_first_info) {
5258				if (np->rcn_info) {
5259					/*
5260					 * Passing another client -- stop
5261					 * cleaning up notifications
5262					 */
5263					am_first_info = 0;
5264				} else {
5265					rc_notify_remove_locked(np);
5266				}
5267			}
5268			np = next;
5269		}
5270
5271		/*
5272		 * Nothing of interest -- wait for notification
5273		 */
5274		if (np == NULL) {
5275			rnip->rni_waiters++;
5276			(void) pthread_cond_wait(&rnip->rni_cv,
5277			    &rc_pg_notify_lock);
5278			rnip->rni_waiters--;
5279			continue;
5280		}
5281
5282		/*
5283		 * found something to report -- move myself after the
5284		 * notification and process it.
5285		 */
5286		(void) uu_list_remove(rc_notify_list, me);
5287		(void) uu_list_insert_after(rc_notify_list, np, me);
5288
5289		if ((ndp = np->rcn_delete) != NULL) {
5290			(void) strlcpy(outp, ndp->rnd_fmri, sz);
5291			if (am_first_info)
5292				rc_notify_remove_locked(np);
5293			(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5294			rc_node_clear(out, 0);
5295			return (REP_PROTOCOL_SUCCESS);
5296		}
5297
5298		nnp = np->rcn_node;
5299		assert(nnp != NULL);
5300
5301		/*
5302		 * We can't bump nnp's reference count without grabbing its
5303		 * lock, and rc_pg_notify_lock is a leaf lock.  So we
5304		 * temporarily block all removals to keep nnp from
5305		 * disappearing.
5306		 */
5307		rc_notify_in_use++;
5308		assert(rc_notify_in_use > 0);
5309		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5310
5311		rc_node_assign(out, nnp);
5312
5313		(void) pthread_mutex_lock(&rc_pg_notify_lock);
5314		assert(rc_notify_in_use > 0);
5315		rc_notify_in_use--;
5316		if (am_first_info)
5317			rc_notify_remove_locked(np);
5318		if (rc_notify_in_use == 0)
5319			(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5320		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5321
5322		return (REP_PROTOCOL_SUCCESS);
5323	}
5324	/*
5325	 * If we're the last one out, let people know it's clear.
5326	 */
5327	if (rnip->rni_waiters == 0)
5328		(void) pthread_cond_broadcast(&rnip->rni_cv);
5329	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5330	return (REP_PROTOCOL_DONE);
5331}
5332
5333static void
5334rc_notify_info_reset(rc_notify_info_t *rnip)
5335{
5336	int i;
5337
5338	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5339	if (rnip->rni_flags & RC_NOTIFY_ACTIVE)
5340		rc_notify_info_remove_locked(rnip);
5341	assert(!(rnip->rni_flags & (RC_NOTIFY_DRAIN | RC_NOTIFY_EMPTYING)));
5342	rnip->rni_flags |= RC_NOTIFY_EMPTYING;
5343	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5344
5345	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5346		if (rnip->rni_namelist[i] != NULL) {
5347			free((void *)rnip->rni_namelist[i]);
5348			rnip->rni_namelist[i] = NULL;
5349		}
5350		if (rnip->rni_typelist[i] != NULL) {
5351			free((void *)rnip->rni_typelist[i]);
5352			rnip->rni_typelist[i] = NULL;
5353		}
5354	}
5355
5356	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5357	rnip->rni_flags &= ~RC_NOTIFY_EMPTYING;
5358	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5359}
5360
5361void
5362rc_notify_info_fini(rc_notify_info_t *rnip)
5363{
5364	rc_notify_info_reset(rnip);
5365
5366	uu_list_node_fini(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5367	uu_list_node_fini(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5368	    rc_notify_pool);
5369}
5370