idn_smr.c revision 11066:cebb50cbe4f9
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Inter-Domain Network
26 *
27 * Shared Memory Region (SMR) supporting code.
28 */
29
30#include <sys/types.h>
31#include <sys/param.h>
32#include <sys/machparam.h>
33#include <sys/debug.h>
34#include <sys/cpuvar.h>
35#include <sys/kmem.h>
36#include <sys/mutex.h>
37#include <sys/rwlock.h>
38#include <sys/systm.h>
39#include <sys/machlock.h>
40#include <sys/membar.h>
41#include <sys/mman.h>
42#include <vm/hat.h>
43#include <vm/as.h>
44#include <vm/hat_sfmmu.h>
45#include <sys/vm_machparam.h>
46#include <sys/x_call.h>
47
48#include <sys/idn.h>
49
50#ifdef DEBUG
51#define	DIOCHECK(domid) \
52{ \
53	int	_dio; \
54	if ((_dio = idn_domain[domid].dio) < 0) { \
55		cmn_err(CE_WARN, \
56			">>>>> file %s, line %d: domain %d, dio = %d", \
57			__FILE__, __LINE__, (domid), _dio); \
58	} \
59}
60#else
61#define	DIOCHECK(domid)
62#endif /* DEBUG */
63
64static int	smr_slab_alloc_local(int domid, smr_slab_t **spp);
65static int	smr_slab_alloc_remote(int domid, smr_slab_t **spp);
66static void	smr_slab_free_local(int domid, smr_slab_t *sp);
67static void	smr_slab_free_remote(int domid, smr_slab_t *sp);
68static int 	smr_slabwaiter_register(int domid);
69static int 	smr_slabwaiter_unregister(int domid, smr_slab_t **spp);
70static int 	smr_slaballoc_wait(int domid, smr_slab_t **spp);
71static smr_slab_t 	*smr_slab_reserve(int domid);
72static void 	smr_slab_unreserve(int domid, smr_slab_t *sp);
73static void	smr_slab_reap_global();
74
75/*
76 * Can only be called by the master.  Allocate a slab from the
77 * local pool representing the SMR, on behalf of the given
78 * domain.  Slab is either being requested for use by the
79 * local domain (i.e. domid == idn.localid), or it's being
80 * allocated to give to a remote domain which requested one.
81 * In the base of allocating on behalf of a remote domain,
82 * smr_slab_t structure is used simply to manage ownership.
83 *
84 * Returns:	smr_slaballoc_wait
85 * 		(EINVAL, ETIMEDOUT)
86 *		smr_slabwatier_unregister
87 *		(0, EINVAL, EBUSY, ENOMEM)
88 *		ENOLCK
89 */
90static int
91smr_slab_alloc_local(int domid, smr_slab_t **spp)
92{
93	int		serrno = 0;
94	int		nwait;
95	smr_slab_t	*sp;
96	idn_domain_t	*dp;
97
98
99	/*
100	 * Only the master can make local allocations.
101	 */
102	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
103	ASSERT(idn.localid == IDN_GET_MASTERID());
104
105	*spp = NULL;
106
107	dp = &idn_domain[domid];
108	ASSERT(DSLAB_READ_HELD(domid));
109	ASSERT(dp->dslab_state == DSLAB_STATE_LOCAL);
110
111	/*
112	 * Register myself with the waiting list.
113	 */
114	nwait = smr_slabwaiter_register(domid);
115
116	if (nwait > 1) {
117		/*
118		 * XXX - old comment?
119		 * Need to drop the read lock _after_ registering
120		 * ourselves with the potential wait list for this allocation.
121		 * Although this allocation is not a remote one, we could
122		 * still have multiple threads on the master trying to
123		 * satisfy (allocate) request on behalf of a remote domain.
124		 */
125		/*
126		 * Somebody is already in the process of satisfying
127		 * the allocation request for this respective
128		 * domain.  All we need to do is wait and let
129		 * it happen.
130		 */
131		serrno = smr_slaballoc_wait(domid, spp);
132		return (serrno);
133	}
134	/*
135	 * I'm the original slab requester for this domain.  It's local
136	 * so go ahead and do the job.
137	 */
138
139	if ((sp = smr_slab_reserve(domid)) == NULL)
140		serrno = ENOMEM;
141
142	/*
143	 * Allocation may have failed.  In either case we've
144	 * got to do the put to at least wake potential waiters up.
145	 */
146	if (!serrno) {
147		if (DSLAB_LOCK_TRYUPGRADE(domid) == 0) {
148			DSLAB_UNLOCK(domid);
149			DSLAB_LOCK_EXCL(domid);
150		}
151	}
152
153	(void) smr_slaballoc_put(domid, sp, 0, serrno);
154
155	/*
156	 * If serrno is ENOLCK here, then we must have failed
157	 * on the upgrade above, so lock already dropped.
158	 */
159	if (serrno != ENOLCK) {
160		/*
161		 * Need to drop since reaping may be recursive?
162		 */
163		DSLAB_UNLOCK(domid);
164	}
165
166	/*
167	 * Since we were the original requester but never went
168	 * to sleep, we need to directly unregister ourselves
169	 * from the waiting list.
170	 */
171	serrno = smr_slabwaiter_unregister(domid, spp);
172
173	/*
174	 * Now that we've satisfied the request, let's check if any
175	 * reaping is necessary.  Only the master does this and only
176	 * when allocating slabs, an infrequent event :-o
177	 */
178	smr_slab_reap_global();
179
180	ASSERT((serrno == 0) ? (*spp != NULL) : (*spp == NULL));
181
182	DSLAB_LOCK_SHARED(domid);
183
184	return (serrno);
185}
186
187/*
188 * Can only be called by a slave on behalf of himself.  Need to
189 * make a request to the master to allocate a slab of SMR buffers
190 * for the local domain.
191 *
192 * Returns:	smr_slaballoc_wait
193 *		(0, EINVAL, EBUSY, ENOMEM)
194 *		ENOLCK
195 *		ECANCELED
196 */
197static int
198smr_slab_alloc_remote(int domid, smr_slab_t **spp)
199{
200	int		nwait;
201	int		serrno = 0;
202	int		bailout = 0;
203	int		masterid;
204	idn_domain_t	*dp, *mdp = NULL;
205	procname_t	proc = "smr_slab_alloc_remote";
206
207	/*
208	 * Only slaves make remote allocations.
209	 */
210	ASSERT(idn.localid != IDN_GET_MASTERID());
211	ASSERT(domid == idn.localid);
212	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
213
214	*spp = NULL;
215
216	dp = &idn_domain[domid];
217	ASSERT(DSLAB_READ_HELD(domid));
218	ASSERT(dp->dslab_state == DSLAB_STATE_REMOTE);
219
220	/*
221	 * Register myself with the slaballoc waiting list.
222	 * Note that only allow one outstanding allocation
223	 * request for the given domain.  Other callers which
224	 * detect a slab is needed simply get stuck on the
225	 * waiting list waiting for the original caller to
226	 * get the job done.
227	 * The waiter_register routine will allocate the necessary
228	 * slab structure which will ultimately be inserted in
229	 * the domain's slab list via smr_slaballoc_put().
230	 */
231	nwait = smr_slabwaiter_register(domid);
232
233	/*
234	 * Make sure we have a connection with the master
235	 * before we wait around for nothing and send a
236	 * command off to nowhere.
237	 * First do a quick (no lock) check for global okayness.
238	 */
239	if ((idn.state != IDNGS_ONLINE) ||
240	    ((masterid = IDN_GET_MASTERID()) == IDN_NIL_DOMID)) {
241		bailout = 1;
242		serrno = ECANCELED;
243	}
244	/*
245	 * We need to drop our read lock _before_ acquiring the
246	 * slaballoc waiter lock.  This is necessary because the
247	 * thread that receives the slab alloc response and fills
248	 * in the slab structure will need to grab the domain write
249	 * lock while holding onto the slaballoc waiter lock.
250	 * Potentially could deadlock if we didn't drop our domain
251	 * lock before.  Plus, we've registered.
252	 *
253	 * 4093209 - Note also that we do this _after_ the check for
254	 *	idn.masterid where we grab the READER global
255	 *	lock.  This is to prevent somebody from
256	 *	changing our state after we drop the drwlock.
257	 *	A deadlock can occur when shutting down a
258	 *	domain we're holding the
259	 */
260
261	if (!bailout) {
262		mdp = &idn_domain[masterid];
263		/*
264		 * Global state is okay.  Let's double check the
265		 * state of our actual target domain.
266		 */
267		if (mdp->dstate != IDNDS_CONNECTED) {
268			bailout = 1;
269			serrno = ECANCELED;
270		} else if (IDN_DLOCK_TRY_SHARED(masterid)) {
271			if (mdp->dstate != IDNDS_CONNECTED) {
272				bailout = 1;
273				serrno = ECANCELED;
274				IDN_DUNLOCK(masterid);
275			} else if (nwait != 1) {
276				IDN_DUNLOCK(masterid);
277			}
278			/*
279			 * Note that keep the drwlock(read) for
280			 * the target (master) domain if it appears
281			 * we're the lucky one to send the command.
282			 * We hold onto the lock until we've actually
283			 * sent the command out.
284			 * We don't reach this place unless it
285			 * appears everything is kosher with
286			 * the target (master) domain.
287			 */
288		} else {
289			bailout = 1;
290			serrno = ENOLCK;
291		}
292	}
293
294	if (bailout) {
295		ASSERT(serrno);
296		/*
297		 * Gotta bail.  Abort operation.  Error result
298		 * will be picked up when we attempt to wait.
299		 */
300		PR_SMR("%s: BAILING OUT on behalf domain %d "
301		    "(err=%d, gs=%s, ms=%s)\n",
302		    proc, domid, serrno, idngs_str[idn.state],
303		    (masterid == IDN_NIL_DOMID)
304		    ? "unknown" : idnds_str[idn_domain[masterid].dstate]);
305		(void) smr_slabwaiter_abort(domid, serrno);
306
307	} else if (nwait == 1) {
308		/*
309		 * We are the original requester.  Initiate the
310		 * actual request to the master.
311		 */
312		idn_send_cmd(masterid, IDNCMD_SLABALLOC, IDN_SLAB_SIZE, 0, 0);
313		ASSERT(mdp);
314		IDN_DUNLOCK(masterid);
315	}
316
317	/*
318	 * Wait here for response.  Once awakened func returns
319	 * with slab structure possibly filled with gifts!
320	 */
321	serrno = smr_slaballoc_wait(domid, spp);
322
323	return (serrno);
324}
325
326/*
327 * Allocate a slab from the Master on behalf
328 * of the given domain.  Note that master uses
329 * this function to allocate slabs on behalf of
330 * remote domains also.
331 * Entered with drwlock held.
332 * Leaves with drwlock dropped.
333 * Returns:	EDQUOT
334 *		EINVAL
335 *		ENOLCK
336 *		smr_slab_alloc_local
337 *		smr_slab_alloc_remote
338 *		(0, EINVAL, EBUSY, ENOMEM)
339 */
340int
341smr_slab_alloc(int domid, smr_slab_t **spp)
342{
343	int		serrno = 0;
344	idn_domain_t	*dp;
345	procname_t	proc = "smr_slab_alloc";
346
347
348	dp = &idn_domain[domid];
349
350	ASSERT(DSLAB_READ_HELD(domid));
351	ASSERT(dp->dslab_state != DSLAB_STATE_UNKNOWN);
352
353	*spp = NULL;
354
355	switch (dp->dslab_state) {
356	case DSLAB_STATE_UNKNOWN:
357		cmn_err(CE_WARN,
358		    "IDN: 300: no slab allocations without a master");
359		serrno = EINVAL;
360		break;
361
362	case DSLAB_STATE_LOCAL:
363		/*
364		 * If I'm the master, then get a slab
365		 * from the local SMR pool, but only
366		 * if the number of allocated slabs has
367		 * not been exceeded.
368		 */
369		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
370		    !IDN_SLAB_MAXPERDOMAIN)
371			serrno = smr_slab_alloc_local(domid, spp);
372		else
373			serrno = EDQUOT;
374		break;
375
376	case DSLAB_STATE_REMOTE:
377		/*
378		 * Have to make a remote request.
379		 * In order to prevent overwhelming the master
380		 * with a bunch of requests that he won't be able
381		 * to handle we do a check to see if we're still
382		 * under quota.  Note that the limit is known
383		 * apriori based on the SMR/NWR size and
384		 * IDN_SLAB_MINTOTAL.  Domains must have the same
385		 * size SMR/NWR, however they can have different
386		 * IDN_SLAB_MINTOTAL.  Thus a domain could throttle
387		 * itself however it wishes.
388		 */
389		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
390		    !IDN_SLAB_MAXPERDOMAIN)
391			serrno = smr_slab_alloc_remote(domid, spp);
392		else
393			serrno = EDQUOT;
394		break;
395
396	default:
397		cmn_err(CE_WARN,
398		    "IDN: 301: (ALLOC) unknown slab state (%d) "
399		    "for domain %d", dp->dslab_state, domid);
400		serrno = EINVAL;
401		break;
402	}
403
404	if (*spp == NULL) {
405		PR_SMR("%s: failed to allocate %s slab [serrno = %d]\n",
406		    proc, (idn.localid == IDN_GET_MASTERID()) ?
407		    "local" : "remote", serrno);
408	}
409
410	if (serrno) {
411		IDN_GKSTAT_GLOBAL_EVENT(gk_slabfail, gk_slabfail_last);
412	}
413
414	return (serrno);
415}
416
417static void
418smr_slab_free_local(int domid, smr_slab_t *sp)
419{
420	int	rv;
421
422	/*
423	 * Do a slaballoc_put just in case there may have
424	 * been waiters for slabs for this respective domain
425	 * before we unreserve this slab.
426	 */
427	rv = smr_slaballoc_put(domid, sp, 0, 0);
428
429	if (rv == -1) {
430		/*
431		 * Put failed.  Must not have been any waiters.
432		 * Go ahead and unreserve the space.
433		 */
434		smr_slab_unreserve(domid, sp);
435	}
436}
437
438static void
439smr_slab_free_remote(int domid, smr_slab_t *sp)
440{
441	smr_offset_t	slab_offset;
442	int		slab_size;
443	int		rv;
444	int		masterid;
445
446	ASSERT(domid == idn.localid);
447	ASSERT(idn.localid != IDN_GET_MASTERID());
448	ASSERT(DSLAB_WRITE_HELD(domid));
449	ASSERT(idn_domain[domid].dslab_state == DSLAB_STATE_REMOTE);
450
451	masterid = IDN_GET_MASTERID();
452
453	ASSERT(masterid != IDN_NIL_DOMID);
454
455	slab_offset = IDN_ADDR2OFFSET(sp->sl_start);
456	slab_size   = (int)(sp->sl_end - sp->sl_start);
457
458	/*
459	 * Do a slaballoc_put just in case there may have
460	 * been waiters for slabs for this domain before
461	 * returning back to the master.
462	 */
463	rv = smr_slaballoc_put(domid, sp, 0, 0);
464
465	if ((rv == -1) && (masterid != IDN_NIL_DOMID)) {
466		/*
467		 * Put failed.  No waiters so free the local data
468		 * structure ship the SMR range off to the master.
469		 */
470		smr_free_buflist(sp);
471		FREESTRUCT(sp, smr_slab_t, 1);
472
473		IDN_DLOCK_SHARED(masterid);
474		idn_send_cmd(masterid, IDNCMD_SLABFREE, slab_offset, slab_size,
475		    0);
476		IDN_DUNLOCK(masterid);
477	}
478}
479
480/*
481 * Free up the list of slabs passed
482 */
483void
484smr_slab_free(int domid, smr_slab_t *sp)
485{
486	smr_slab_t	*nsp = NULL;
487
488	ASSERT(DSLAB_WRITE_HELD(domid));
489
490	if (sp == NULL)
491		return;
492
493	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
494
495	switch (idn_domain[domid].dslab_state) {
496	case DSLAB_STATE_UNKNOWN:
497		cmn_err(CE_WARN, "IDN: 302: no slab free without a master");
498		break;
499
500	case DSLAB_STATE_LOCAL:
501		/*
502		 * If I'm the master then put the slabs
503		 * back to the local SMR pool.
504		 */
505		for (; sp; sp = nsp) {
506			nsp = sp->sl_next;
507			smr_slab_free_local(domid, sp);
508		}
509		break;
510
511	case DSLAB_STATE_REMOTE:
512		/*
513		 * If the domid is my own then I'm freeing
514		 * a slab back to the Master.
515		 */
516		for (; sp; sp = nsp) {
517			nsp = sp->sl_next;
518			smr_slab_free_remote(domid, sp);
519		}
520		break;
521
522	default:
523		cmn_err(CE_WARN,
524		    "IDN: 301: (FREE) unknown slab state (%d) for domain %d",
525		    idn_domain[domid].dslab_state, domid);
526		break;
527	}
528}
529
530/*
531 * Free up the list of slab data structures ONLY.
532 * This is called during a fatal shutdown of the master
533 * where we need to garbage collect the locally allocated
534 * data structures used to manage slabs allocated to the
535 * local domain.  Should never be called by a master since
536 * the master can do a regular smr_slab_free.
537 */
538void
539smr_slab_garbage_collection(smr_slab_t *sp)
540{
541	smr_slab_t	*nsp;
542
543	ASSERT(idn_domain[idn.localid].dvote.v.master == 0);
544
545	if (sp == NULL)
546		return;
547	/*
548	 * Since this is only ever called by a slave,
549	 * the slab structure size always contains a buflist.
550	 */
551	for (; sp; sp = nsp) {
552		nsp = sp->sl_next;
553		smr_free_buflist(sp);
554		FREESTRUCT(sp, smr_slab_t, 1);
555	}
556}
557
558/*
559 * Allocate a SMR buffer on behalf of the local domain
560 * which is ultimately targeted for the given domain.
561 *
562 * IMPORTANT: This routine is going to drop the domain rwlock (drwlock)
563 *	      for the domain on whose behalf the request is being
564 *	      made.  This routine canNOT block on trying to
565 *	      reacquire the drwlock.  If he does block then somebody
566 *	      must have the write lock on the domain which most likely
567 *	      means the domain is going south anyway, so just bail on
568 *	      this buffer.  Higher levels will retry if needed.
569 *
570 * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
571 *
572 * Returns:	A negative return value indicates lock lost on domid.
573 *		EINVAL, ENOLINK, ENOLCK(internal)
574 *		smr_slaballoc_wait
575 * 		(EINVAL, ETIMEDOUT)
576 *		smr_slabwatier_unregister
577 *		(0, EINVAL, EBUSY, ENOMEM)
578 */
579int
580smr_buf_alloc(int domid, uint_t len, caddr_t *bufpp)
581{
582	register idn_domain_t	*dp, *ldp;
583	smr_slab_t	*sp;
584	caddr_t		bufp = NULL;
585	int		serrno;
586	procname_t	proc = "smr_buf_alloc";
587
588	dp = &idn_domain[domid];
589	/*
590	 * Local domain can only allocate on behalf of
591	 * itself if this is a priviledged call and the
592	 * caller is the master.
593	 */
594	ASSERT((domid != idn.localid) && (domid != IDN_NIL_DOMID));
595
596	*bufpp = NULL;
597
598	if (len > IDN_DATA_SIZE) {
599		cmn_err(CE_WARN,
600		    "IDN: 303: buffer len %d > IDN_DATA_SIZE (%lu)",
601		    len, IDN_DATA_SIZE);
602		IDN_GKSTAT_GLOBAL_EVENT(gk_buffail, gk_buffail_last);
603		return (EINVAL);
604	}
605
606	/*
607	 * Need to go to my local slab list to find
608	 * a buffer.
609	 */
610	ldp = &idn_domain[idn.localid];
611	/*
612	 * Now we loop trying to locate a buffer out of our
613	 * slabs.  We continue this until either we find a
614	 * buffer or we're unable to allocate a slab.  Note
615	 * that new slabs are allocated to the front.
616	 */
617	DSLAB_LOCK_SHARED(idn.localid);
618	sp = ldp->dslab;
619	do {
620		int	spl, all_empty;
621
622		if (sp == NULL) {
623			if ((serrno = smr_slab_alloc(idn.localid, &sp)) != 0) {
624				PR_SMR("%s:%d: failed to allocate "
625				    "slab [serrno = %d]",
626				    proc, domid, serrno);
627				DSLAB_UNLOCK(idn.localid);
628				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
629				    gk_buffail_last);
630				return (serrno);
631			}
632			/*
633			 * Of course, the world may have changed while
634			 * we dropped the lock.  Better make sure we're
635			 * still established.
636			 */
637			if (dp->dstate != IDNDS_CONNECTED) {
638				PR_SMR("%s:%d: state changed during slab "
639				    "alloc (dstate = %s)\n",
640				    proc, domid, idnds_str[dp->dstate]);
641				DSLAB_UNLOCK(idn.localid);
642				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
643				    gk_buffail_last);
644				return (ENOLINK);
645			}
646			/*
647			 * We were able to allocate a slab.  Should
648			 * be at the front of the list, spin again.
649			 */
650			sp = ldp->dslab;
651		}
652		/*
653		 * If we have reached here then we have a slab!
654		 * Hopefully there are free bufs there :-o
655		 */
656		spl = splhi();
657		all_empty = 1;
658		for (; sp && !bufp; sp = sp->sl_next) {
659			smr_slabbuf_t	*bp;
660
661			if (sp->sl_free == NULL)
662				continue;
663
664			if (!lock_try(&sp->sl_lock)) {
665				all_empty = 0;
666				continue;
667			}
668
669			if ((bp = sp->sl_free) == NULL) {
670				lock_clear(&sp->sl_lock);
671				continue;
672			}
673
674			sp->sl_free = bp->sb_next;
675			bp->sb_next = sp->sl_inuse;
676			sp->sl_inuse = bp;
677			/*
678			 * Found a free buffer.
679			 */
680			bp->sb_domid = domid;
681			bufp = bp->sb_bufp;
682			lock_clear(&sp->sl_lock);
683		}
684		splx(spl);
685
686		if (!all_empty && !bufp) {
687			/*
688			 * If we still haven't found a buffer, but
689			 * there's still possibly a buffer available,
690			 * then try again.  Only if we're absolutely
691			 * sure all slabs are empty do we attempt
692			 * to allocate a new one.
693			 */
694			sp = ldp->dslab;
695		}
696	} while (bufp == NULL);
697
698	*bufpp = bufp;
699
700	ATOMIC_INC(dp->dio);
701
702	DSLAB_UNLOCK(idn.localid);
703
704	return (0);
705}
706
707/*
708 * Free a buffer allocated to the local domain back to
709 * its respective slab.  Slabs are freed via the slab-reap command.
710 * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
711 */
712int
713smr_buf_free(int domid, caddr_t bufp, uint_t len)
714{
715	register smr_slab_t	*sp;
716	smr_slabbuf_t		*bp, **bpp;
717	idn_domain_t		*ldp;
718	int		buffreed;
719	int		lockheld = (len == (uint_t)-1);
720
721	/*
722	 * We should never be free'ing a buffer on
723	 * behalf of ourselves as we are never the
724	 * target for allocated SMR buffers.
725	 */
726	ASSERT(domid != idn.localid);
727
728	sp = NULL;
729	buffreed = 0;
730	ldp = &idn_domain[idn.localid];
731
732	DSLAB_LOCK_SHARED(idn.localid);
733
734	if (((uintptr_t)bufp & (IDN_SMR_BUFSIZE-1)) &&
735	    (IDN_ADDR2OFFSET(bufp) % IDN_SMR_BUFSIZE)) {
736		cmn_err(CE_WARN,
737		    "IDN: 304: buffer (0x%p) from domain %d not on a "
738		    "%d boundary", bufp, domid, IDN_SMR_BUFSIZE);
739		goto bfdone;
740	}
741	if (!lockheld && (len > IDN_DATA_SIZE)) {
742		cmn_err(CE_WARN,
743		    "IDN: 305: buffer length (%d) from domain %d greater "
744		    "than IDN_DATA_SIZE (%lu)",
745		    len, domid, IDN_DATA_SIZE);
746		goto bfdone;
747	}
748
749	for (sp = ldp->dslab; sp; sp = sp->sl_next)
750		if ((bufp >= sp->sl_start) && (bufp < sp->sl_end))
751			break;
752
753	if (sp) {
754		int spl;
755
756		spl = splhi();
757		while (!lock_try(&sp->sl_lock))
758			;
759		bpp = &sp->sl_inuse;
760		for (bp = *bpp; bp; bp = *bpp) {
761			if (bp->sb_bufp == bufp)
762				break;
763			bpp = &bp->sb_next;
764		}
765		if (bp) {
766			ASSERT(bp->sb_domid == domid);
767			buffreed++;
768			bp->sb_domid = IDN_NIL_DOMID;
769			*bpp = bp->sb_next;
770			bp->sb_next = sp->sl_free;
771			sp->sl_free = bp;
772		}
773		lock_clear(&sp->sl_lock);
774		splx(spl);
775	}
776bfdone:
777	if (buffreed) {
778		ATOMIC_DEC(idn_domain[domid].dio);
779		DIOCHECK(domid);
780	} else {
781		cmn_err(CE_WARN,
782		    "IDN: 306: unknown buffer (0x%p) from domain %d",
783		    bufp, domid);
784		ATOMIC_INC(idn_domain[domid].dioerr);
785	}
786
787	DSLAB_UNLOCK(idn.localid);
788
789	return (sp ? 0 : -1);
790}
791
792/*
793 * Alternative interface to smr_buf_free, but with local drwlock
794 * held.
795 */
796/* ARGSUSED2 */
797int
798smr_buf_free_locked(int domid, caddr_t bufp, uint_t len)
799{
800	return (smr_buf_free(domid, bufp, (uint_t)-1));
801}
802
803/*
804 * Free any and all buffers associated with the given domain.
805 * Assumption is that domain is dead and buffers are not in use.
806 * Returns:	Number of buffers freed.
807 *		-1 if error.
808 */
809int
810smr_buf_free_all(int domid)
811{
812	register smr_slab_t	*sp;
813	register smr_slabbuf_t	*bp, **bpp;
814	idn_domain_t		*ldp;
815	int			nbufsfreed = 0;
816	procname_t	proc = "smr_buf_free_all";
817
818	/*
819	 * We should never be free'ing buffers on
820	 * behalf of ourself
821	 */
822	ASSERT(domid != idn.localid);
823
824	if (!VALID_DOMAINID(domid)) {
825		cmn_err(CE_WARN, "IDN: 307: domain ID (%d) invalid", domid);
826		return (-1);
827	}
828
829	ldp = &idn_domain[idn.localid];
830
831	/*
832	 * We grab the writer lock so that we don't have any
833	 * competition during a "free-all" call.
834	 * No need to grab individual slab locks when holding
835	 * dslab(writer).
836	 */
837	DSLAB_LOCK_EXCL(idn.localid);
838
839	for (sp = ldp->dslab; sp; sp = sp->sl_next) {
840		bpp = &sp->sl_inuse;
841		for (bp = *bpp; bp; bp = *bpp) {
842			if (bp->sb_domid == domid) {
843				bp->sb_domid = IDN_NIL_DOMID;
844				*bpp = bp->sb_next;
845				bp->sb_next = sp->sl_free;
846				sp->sl_free = bp;
847				nbufsfreed++;
848			} else {
849				bpp = &bp->sb_next;
850			}
851		}
852	}
853
854	if (nbufsfreed > 0) {
855		ATOMIC_SUB(idn_domain[domid].dio, nbufsfreed);
856		idn_domain[domid].dioerr = 0;
857		DIOCHECK(domid);
858	}
859
860	DSLAB_UNLOCK(idn.localid);
861
862	PR_SMR("%s: freed %d buffers for domain %d\n", proc, nbufsfreed, domid);
863
864	return (nbufsfreed);
865}
866
867int
868smr_buf_reclaim(int domid, int nbufs)
869{
870	int		num_reclaimed = 0;
871	idn_domain_t	*ldp, *dp;
872	procname_t	proc = "smr_buf_reclaim";
873
874	ldp = &idn_domain[idn.localid];
875	dp  = &idn_domain[domid];
876
877	ASSERT(domid != idn.localid);
878
879	if (ATOMIC_CAS(&dp->dreclaim_inprogress, 0, 1)) {
880		/*
881		 * Reclaim is already in progress, don't
882		 * bother.
883		 */
884		PR_DATA("%s: reclaim already in progress\n", proc);
885		return (0);
886	}
887
888	PR_SMR("%s: requested %d buffers from domain %d\n", proc, nbufs, domid);
889
890	if (dp->dio && nbufs) {
891		register smr_slab_t	*sp;
892		int spl;
893
894		DSLAB_LOCK_SHARED(idn.localid);
895		spl = splhi();
896		for (sp = ldp->dslab; sp && nbufs; sp = sp->sl_next) {
897			register smr_slabbuf_t	*bp, **bpp;
898
899			if (sp->sl_inuse == NULL)
900				continue;
901
902			if (!lock_try(&sp->sl_lock))
903				continue;
904
905			if (sp->sl_inuse == NULL) {
906				lock_clear(&sp->sl_lock);
907				continue;
908			}
909
910			bpp = &sp->sl_inuse;
911			for (bp = *bpp; bp && nbufs; bp = *bpp) {
912				if (bp->sb_domid == domid) {
913					/*
914					 * Buffer no longer in use,
915					 * reclaim it.
916					 */
917					bp->sb_domid = IDN_NIL_DOMID;
918					*bpp = bp->sb_next;
919					bp->sb_next = sp->sl_free;
920					sp->sl_free = bp;
921					num_reclaimed++;
922					nbufs--;
923				} else {
924					bpp = &bp->sb_next;
925				}
926			}
927			lock_clear(&sp->sl_lock);
928		}
929		splx(spl);
930
931		if (num_reclaimed > 0) {
932			ATOMIC_SUB(dp->dio, num_reclaimed);
933			DIOCHECK(domid);
934		}
935		DSLAB_UNLOCK(idn.localid);
936	}
937
938	PR_SMR("%s: reclaimed %d buffers from domain %d\n",
939	    proc, num_reclaimed, domid);
940
941	return (num_reclaimed);
942}
943
944/*
945 * Returns 1	If any buffers are locked for the given slab.
946 *	   0	If all buffers are free for the given slab.
947 *
948 * The caller is assumed to have the slab protected so that no
949 * new allocations are attempted from it.  Also, this is only
950 * valid to be called with respect to slabs that were allocated
951 * on behalf of the local domain, i.e. the master is not expected
952 * to call this function with (slave) slab "representatives".
953 */
954int
955smr_slab_busy(smr_slab_t *sp)
956{
957	return ((sp && sp->sl_inuse) ? 1 : 0);
958}
959
960int
961smr_slabwaiter_init()
962{
963	register int		i;
964	struct slabwaiter	*wp;
965
966	if (idn.slabwaiter != NULL)
967		return (0);
968
969	/*
970	 * Initialize the slab waiting area for MAX_DOMAINS.
971	 */
972	idn.slabwaiter = GETSTRUCT(struct slabwaiter, MAX_DOMAINS);
973	wp = idn.slabwaiter;
974	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
975		wp->w_closed = 0;
976		mutex_init(&wp->w_mutex, NULL, MUTEX_DEFAULT, NULL);
977		cv_init(&wp->w_cv, NULL, CV_DEFAULT, NULL);
978	}
979
980	return (0);
981}
982
983void
984smr_slabwaiter_deinit()
985{
986	register int		i;
987	struct slabwaiter	*wp;
988
989	if ((wp = idn.slabwaiter) == NULL)
990		return;
991
992	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
993		ASSERT(wp->w_nwaiters == 0);
994		ASSERT(wp->w_sp == NULL);
995		cv_destroy(&wp->w_cv);
996		mutex_destroy(&wp->w_mutex);
997	}
998
999	FREESTRUCT(idn.slabwaiter, struct slabwaiter, MAX_DOMAINS);
1000	idn.slabwaiter = NULL;
1001}
1002
1003void
1004smr_slabwaiter_open(domainset_t domset)
1005{
1006	int			d;
1007	struct slabwaiter	*wp;
1008
1009	if ((domset == 0) || !idn.slabwaiter)
1010		return;
1011
1012	wp = idn.slabwaiter;
1013
1014	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1015		if (!DOMAIN_IN_SET(domset, d))
1016			continue;
1017		mutex_enter(&wp->w_mutex);
1018		wp->w_closed = 0;
1019		mutex_exit(&wp->w_mutex);
1020	}
1021}
1022
1023void
1024smr_slabwaiter_close(domainset_t domset)
1025{
1026	int			d;
1027	struct slabwaiter	*wp;
1028
1029	if ((domset == 0) || !idn.slabwaiter)
1030		return;
1031
1032	wp = idn.slabwaiter;
1033
1034	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1035		if (!DOMAIN_IN_SET(domset, d))
1036			continue;
1037		mutex_enter(&wp->w_mutex);
1038		wp->w_closed = 1;
1039		cv_broadcast(&wp->w_cv);
1040		mutex_exit(&wp->w_mutex);
1041	}
1042}
1043
1044/*
1045 * Register the caller with the waiting list for the
1046 * given domain.
1047 *
1048 * Protocol:
1049 *	1st Local requester:	register -> alloc ->
1050 *						put(wakeup|xdc) -> unregister
1051 *	Nth Local requester:	register -> wait
1052 *	1st Remote requester:	register -> xdc -> wait
1053 *	Nth Remote requester:	register -> wait
1054 *
1055 *	Remote Responder:	local alloc -> put(xdc)
1056 *	Local Handler:		xdc -> put(wakeup)
1057 *
1058 * E.g. A standard slave allocation request:
1059 *	slave			master
1060 *	-----			------
1061 *	idn_slab_alloc(remote)
1062 *	- register
1063 *	- xdc		->	idn_handler
1064 *	- wait			...
1065 *				idn_slab_alloc(local)
1066 *				- register
1067 *				- alloc
1068 *				- put
1069 *				  . wakeup [local]
1070 *				- unregister
1071 *	idn_handler    	<-	- xdc
1072 *	- put       		DONE
1073 *	  . wakeup [local]
1074 *	    |
1075 *	    V
1076 *      - wait
1077 *	  . unregister
1078 *	DONE
1079 */
1080static int
1081smr_slabwaiter_register(int domid)
1082{
1083	struct slabwaiter	*wp;
1084	int		nwait;
1085	procname_t	proc = "smr_slabwaiter_register";
1086
1087
1088	ASSERT(domid != IDN_NIL_DOMID);
1089
1090	ASSERT(DSLAB_READ_HELD(domid));
1091
1092	wp = &idn.slabwaiter[domid];
1093
1094	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1095
1096	mutex_enter(&wp->w_mutex);
1097
1098	nwait = ++(wp->w_nwaiters);
1099	ASSERT(nwait > 0);
1100
1101	PR_SMR("%s: domain = %d, (new)nwaiters = %d\n", proc, domid, nwait);
1102
1103	if (nwait > 1) {
1104		/*
1105		 * There are already waiters for slab allocations
1106		 * with respect to this domain.
1107		 */
1108		PR_SMR("%s: existing waiters for slabs for domain %d\n",
1109		    proc, domid);
1110		mutex_exit(&wp->w_mutex);
1111
1112		return (nwait);
1113	}
1114	PR_SMR("%s: initial waiter for slabs for domain %d\n", proc, domid);
1115	/*
1116	 * We are the first requester of a slab allocation for this
1117	 * respective domain.  Need to prep waiting area for
1118	 * subsequent arrival of a slab.
1119	 */
1120	wp->w_sp = NULL;
1121	wp->w_done = 0;
1122	wp->w_serrno = 0;
1123
1124	mutex_exit(&wp->w_mutex);
1125
1126	return (nwait);
1127}
1128
1129/*
1130 * It is assumed that the caller had previously registered,
1131 * but wakeup did not occur due to caller never waiting.
1132 * Thus, slaballoc mutex is still held by caller.
1133 *
1134 * Returns:	0
1135 *		EINVAL
1136 *		EBUSY
1137 *		w_serrno (smr_slaballoc_put)
1138 *		(0, ENOLCK, ENOMEM, EDQUOT, EBUSY, ECANCELED)
1139 */
1140static int
1141smr_slabwaiter_unregister(int domid, smr_slab_t **spp)
1142{
1143	struct slabwaiter	*wp;
1144	int		serrno = 0;
1145	procname_t	proc = "smr_slabwaiter_unregister";
1146
1147
1148	ASSERT(domid != IDN_NIL_DOMID);
1149
1150	wp = &idn.slabwaiter[domid];
1151
1152	mutex_enter(&wp->w_mutex);
1153
1154	PR_SMR("%s: domain = %d, nwaiters = %d\n", proc, domid, wp->w_nwaiters);
1155
1156	if (wp->w_nwaiters <= 0) {
1157		/*
1158		 * Hmmm...nobody is registered!
1159		 */
1160		PR_SMR("%s: NO WAITERS (domid = %d)\n", proc, domid);
1161		mutex_exit(&wp->w_mutex);
1162		return (EINVAL);
1163	}
1164	(wp->w_nwaiters)--;
1165	/*
1166	 * Is our present under the tree?
1167	 */
1168	if (!wp->w_done) {
1169		/*
1170		 * Bummer...no presents.  Let the caller know
1171		 * via a null slab pointer.
1172		 * Note that we don't clean up immediately since
1173		 * message might still come in for other waiters.
1174		 * Thus, late sleepers may still get a chance.
1175		 */
1176		PR_SMR("%s: bummer no slab allocated for domain %d\n",
1177		    proc, domid);
1178		ASSERT(wp->w_sp == NULL);
1179		(*spp) = NULL;
1180		serrno = wp->w_closed ? ECANCELED : EBUSY;
1181
1182	} else {
1183		(*spp) = wp->w_sp;
1184		serrno = wp->w_serrno;
1185
1186#ifdef DEBUG
1187		if (serrno == 0) {
1188			register smr_slab_t	*sp;
1189
1190			ASSERT(wp->w_sp);
1191			PR_SMR("%s: allocation succeeded (domain %d)\n",
1192			    proc, domid);
1193
1194			DSLAB_LOCK_SHARED(domid);
1195			for (sp = idn_domain[domid].dslab; sp; sp = sp->sl_next)
1196				if (sp == wp->w_sp)
1197					break;
1198			if (sp == NULL)
1199				cmn_err(CE_WARN,
1200				    "%s:%d: slab ptr = NULL",
1201				    proc, domid);
1202			DSLAB_UNLOCK(domid);
1203		} else {
1204			PR_SMR("%s: allocation failed (domain %d) "
1205			    "[serrno = %d]\n", proc, domid, serrno);
1206		}
1207#endif /* DEBUG */
1208	}
1209	if (wp->w_nwaiters == 0) {
1210		/*
1211		 * Last one turns out the lights.
1212		 */
1213		PR_SMR("%s: domain %d last waiter, turning out lights\n",
1214		    proc, domid);
1215		wp->w_sp = NULL;
1216		wp->w_done = 0;
1217		wp->w_serrno = 0;
1218	}
1219	mutex_exit(&wp->w_mutex);
1220
1221	return (serrno);
1222}
1223
1224/*
1225 * Called to abort any slaballoc requests on behalf of the
1226 * given domain.
1227 */
1228int
1229smr_slabwaiter_abort(int domid, int serrno)
1230{
1231	ASSERT(serrno != 0);
1232
1233	return (smr_slaballoc_put(domid, NULL, 0, serrno));
1234}
1235
1236/*
1237 * Put ourselves into a timedwait waiting for slab to be
1238 * allocated.
1239 * Returns with slaballoc mutex dropped.
1240 *
1241 * Returns:	EINVAL
1242 *		ETIMEDOUT
1243 *		smr_slabwatier_unregister
1244 *		(0, EINVAL, EBUSY, ENOMEM)
1245 */
1246static int
1247smr_slaballoc_wait(int domid, smr_slab_t **spp)
1248{
1249	struct slabwaiter	*wp;
1250	int			serrno = 0, serrno_unreg;
1251	procname_t		proc = "smr_slaballoc_wait";
1252
1253
1254	wp = &idn.slabwaiter[domid];
1255
1256	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1257
1258	mutex_enter(&wp->w_mutex);
1259
1260	PR_SMR("%s: domain = %d, nwaiters = %d, wsp = 0x%p\n",
1261	    proc, domid, wp->w_nwaiters, wp->w_sp);
1262
1263	if (wp->w_nwaiters <= 0) {
1264		/*
1265		 * Hmmm...no waiters registered.
1266		 */
1267		PR_SMR("%s: domain %d, no waiters!\n", proc, domid);
1268		mutex_exit(&wp->w_mutex);
1269		return (EINVAL);
1270	}
1271	ASSERT(DSLAB_READ_HELD(domid));
1272	DSLAB_UNLOCK(domid);
1273
1274	if (!wp->w_done && !wp->w_closed) {
1275		int	rv;
1276
1277		/*
1278		 * Only wait if data hasn't arrived yet.
1279		 */
1280		PR_SMR("%s: domain %d, going to sleep...\n", proc, domid);
1281
1282		rv = cv_reltimedwait_sig(&wp->w_cv, &wp->w_mutex,
1283		    IDN_SLABALLOC_WAITTIME, TR_CLOCK_TICK);
1284		if (rv == -1)
1285			serrno = ETIMEDOUT;
1286
1287		PR_SMR("%s: domain %d, awakened (reason = %s)\n",
1288		    proc, domid, (rv == -1) ? "TIMEOUT" : "SIGNALED");
1289	}
1290	/*
1291	 * We've awakened or request already filled!
1292	 * Unregister ourselves.
1293	 */
1294	mutex_exit(&wp->w_mutex);
1295
1296	/*
1297	 * Any gifts will be entered into spp.
1298	 */
1299	serrno_unreg = smr_slabwaiter_unregister(domid, spp);
1300
1301	/*
1302	 * Leave with reader lock on dslab_lock.
1303	 */
1304	DSLAB_LOCK_SHARED(domid);
1305
1306	if ((serrno_unreg == EBUSY) && (serrno == ETIMEDOUT))
1307		return (serrno);
1308	else
1309		return (serrno_unreg);
1310}
1311
1312/*
1313 * A SMR slab was allocated on behalf of the given domain.
1314 * Wakeup anybody that may have been waiting for the allocation.
1315 * Note that if the domain is a remote one, i.e. master is allocating
1316 * on behalf of a slave, it's up to the caller to transmit the
1317 * allocation response to that domain.
1318 * The force flag indicates that we want to install the slab for
1319 * the given user regardless of whether there are waiters or not.
1320 * This is used primarily in situations where a slave may have timed
1321 * out before the response actually arrived.  In this situation we
1322 * don't want to send slab back to the master after we went through
1323 * the trouble of allocating one.  Master is _not_ allowed to do this
1324 * for remote domains.
1325 *
1326 * Returns:	-1	Non-registered waiter or waiting area garbaged.
1327 *		0	Successfully performed operation.
1328 */
1329int
1330smr_slaballoc_put(int domid, smr_slab_t *sp, int forceflag, int serrno)
1331{
1332	idn_domain_t		*dp;
1333	struct slabwaiter	*wp;
1334	procname_t		proc = "smr_slaballoc_put";
1335
1336
1337	dp = &idn_domain[domid];
1338
1339	ASSERT(!serrno ? DSLAB_WRITE_HELD(domid) : 1);
1340
1341	if (domid == IDN_NIL_DOMID)
1342		return (-1);
1343
1344	ASSERT(serrno ? (sp == NULL) : (sp != NULL));
1345
1346	wp = &idn.slabwaiter[domid];
1347
1348	mutex_enter(&wp->w_mutex);
1349
1350	PR_SMR("%s: domain = %d, bufp = 0x%p, ebufp = 0x%p, "
1351	    "(f = %d, se = %d)\n", proc, domid,
1352	    (sp ? sp->sl_start : 0),
1353	    (sp ? sp->sl_end : 0), forceflag, serrno);
1354
1355	if (wp->w_nwaiters <= 0) {
1356		/*
1357		 * There are no waiters!!  Must have timed out
1358		 * and left.  Oh well...
1359		 */
1360		PR_SMR("%s: no slaballoc waiters found for domain %d\n",
1361		    proc, domid);
1362		if (!forceflag || serrno || !sp) {
1363			/*
1364			 * No waiters and caller doesn't want to force it.
1365			 */
1366			mutex_exit(&wp->w_mutex);
1367			return (-1);
1368		}
1369		PR_SMR("%s: forcing slab onto domain %d\n", proc, domid);
1370		ASSERT(domid == idn.localid);
1371		ASSERT(wp->w_sp == NULL);
1372		wp->w_done = 0;
1373		/*
1374		 * Now we fall through and let it be added in the
1375		 * regular manor.
1376		 */
1377	}
1378	if (wp->w_done) {
1379		/*
1380		 * There's at least one waiter so there has
1381		 * to be a slab structure waiting for us.
1382		 * If everything is going smoothly, there should only
1383		 * be one guy coming through the path of inserting
1384		 * an error or good slab.  However, if a disconnect was
1385		 * detected, you may get several guys coming through
1386		 * trying to let everybody know.
1387		 */
1388		ASSERT(wp->w_serrno ?
1389		    (wp->w_sp == NULL) : (wp->w_sp != NULL));
1390
1391		cv_broadcast(&wp->w_cv);
1392		mutex_exit(&wp->w_mutex);
1393
1394		return (-1);
1395	}
1396	if (serrno != 0) {
1397		/*
1398		 * Bummer...allocation failed.  This call is simply
1399		 * to wake up the sleepers and let them know.
1400		 */
1401		PR_SMR("%s: slaballoc failed for domain %d\n", proc, domid);
1402		wp->w_serrno = serrno;
1403		wp->w_done = 1;
1404		cv_broadcast(&wp->w_cv);
1405		mutex_exit(&wp->w_mutex);
1406
1407		return (0);
1408	}
1409	PR_SMR("%s: putting slab into struct (domid=%d, localid=%d)\n",
1410	    proc, domid, idn.localid);
1411	/*
1412	 * Prep the slab structure.
1413	 */
1414
1415	if (domid == idn.localid) {
1416		/*
1417		 * Allocation was indeed for me.
1418		 * Slab may or may not be locked when
1419		 * we reach.  Normally they will be locked
1420		 * if we're being called on behalf of a
1421		 * free, and not locked if on behalf of
1422		 * a new allocation request.
1423		 */
1424		lock_clear(&sp->sl_lock);
1425		smr_alloc_buflist(sp);
1426#ifdef DEBUG
1427	} else {
1428		uint_t	rv;
1429		/*
1430		 * Slab was not allocated on my behalf.  Must be
1431		 * a master request on behalf of some other domain.
1432		 * Prep appropriately.  Slab should have been locked
1433		 * by smr_slab_reserve.
1434		 */
1435		rv = lock_try(&sp->sl_lock);
1436		ASSERT(!rv);
1437		ASSERT(sp->sl_domid == (short)domid);
1438#endif /* DEBUG */
1439	}
1440
1441	/*
1442	 * Slab is ready to go.  Insert it into the domain's
1443	 * slab list so once we wake everybody up they'll find it.
1444	 * You better have write lock if you're putting treasures
1445	 * there.
1446	 */
1447	ASSERT(DSLAB_WRITE_HELD(domid));
1448
1449	sp->sl_next = dp->dslab;
1450	dp->dslab  = sp;
1451	dp->dnslabs++;
1452
1453	/*
1454	 * It's possible to fall through here without waiters.
1455	 * This is a case where forceflag was set.
1456	 */
1457	if (wp->w_nwaiters > 0) {
1458		wp->w_sp = sp;
1459		wp->w_serrno = serrno;
1460		wp->w_done = 1;
1461		cv_broadcast(&wp->w_cv);
1462	} else {
1463		ASSERT(forceflag);
1464		wp->w_sp = NULL;
1465		wp->w_serrno = 0;
1466		wp->w_done = 0;
1467	}
1468	mutex_exit(&wp->w_mutex);
1469
1470	return (0);
1471}
1472
1473/*
1474 * Get the slab representing [bufp,ebufp] from the respective
1475 * domain's pool if all the buffers are free.  Remove them from
1476 * the domain's list and return it.
1477 * If bufp == NULL, then return however many free ones you
1478 * can find.
1479 * List of slabs are returned locked (sl_lock).
1480 * XXX - Need minimum limit to make sure we don't free up _all_
1481 *	 of our slabs!  However, during a shutdown we will need
1482 *	 method to free them all up regardless of locking.
1483 */
1484smr_slab_t *
1485smr_slaballoc_get(int domid, caddr_t bufp, caddr_t ebufp)
1486{
1487	idn_domain_t	*dp;
1488	smr_slab_t	*retsp, *sp, **psp;
1489	int		foundit, islocal = 0;
1490	int		nslabs;
1491	procname_t	proc = "smr_slaballoc_get";
1492
1493	PR_SMR("%s: getting slab for domain %d [bufp=0x%p, ebufp=0x%p]\n",
1494	    proc, domid, bufp, ebufp);
1495
1496	dp = &idn_domain[domid];
1497
1498	ASSERT(DSLAB_WRITE_HELD(domid));
1499
1500	if ((sp = dp->dslab) == NULL) {
1501		PR_SMR("%s: oops, no slabs for domain %d\n", proc, domid);
1502		return (NULL);
1503	}
1504	/*
1505	 * If domid is myself then I'm trying to get a slab out
1506	 * of my local pool.  Otherwise, I'm the master and
1507	 * I'm trying to get the slab representative from the
1508	 * global pool.
1509	 */
1510	if (domid == idn.localid)
1511		islocal = 1;
1512
1513	if (bufp != NULL) {
1514		nslabs = -1;
1515	} else {
1516		nslabs = *(int *)ebufp;
1517		if (nslabs == 0) {
1518			PR_SMR("%s: requested nslabs (%d) <= 0\n",
1519			    proc, nslabs);
1520			return (NULL);
1521		} else if (nslabs < 0) {
1522			/*
1523			 * Caller wants them all!
1524			 */
1525			nslabs = (int)dp->dnslabs;
1526		}
1527	}
1528
1529	retsp = NULL;
1530	foundit = 0;
1531	for (psp = &dp->dslab; sp; sp = *psp) {
1532		int	isbusy;
1533
1534		if (bufp && (sp->sl_start != bufp)) {
1535			psp = &sp->sl_next;
1536			continue;
1537		}
1538
1539		if (bufp && (ebufp > sp->sl_end)) {
1540			PR_SMR("%s: bufp/ebufp (0x%p/0x%p) "
1541			    "expected (0x%p/0x%p)\n", proc, bufp, ebufp,
1542			    sp->sl_start, sp->sl_end);
1543			ASSERT(0);
1544		}
1545		/*
1546		 * We found the desired slab.  Make sure
1547		 * it's free.
1548		 */
1549		foundit++;
1550		isbusy = 0;
1551		if (islocal) {
1552			int spl;
1553
1554			/*
1555			 * Some of the buffers in the slab
1556			 * are still in use.  Unlock the
1557			 * buffers we locked and bail out.
1558			 */
1559			spl = splhi();
1560			if (!lock_try(&sp->sl_lock)) {
1561				isbusy = 1;
1562				foundit--;
1563			} else if (sp->sl_inuse) {
1564				lock_clear(&sp->sl_lock);
1565				isbusy = 1;
1566				foundit--;
1567			}
1568			splx(spl);
1569		} else {
1570			/*
1571			 * If not local, then I'm the master getting
1572			 * a slab from one of the slaves.  In this case,
1573			 * their slab structs will always be locked.
1574			 */
1575			ASSERT(!lock_try(&sp->sl_lock));
1576		}
1577		if (!isbusy) {
1578			/*
1579			 * Delete the entry from the list and slap
1580			 * it onto our return list.
1581			 */
1582			*psp = sp->sl_next;
1583			sp->sl_next = retsp;
1584			retsp = sp;
1585		} else {
1586			psp = &sp->sl_next;
1587		}
1588		/*
1589		 * If bufp == NULL (alternate interface) and we haven't
1590		 * found the desired number of slabs yet, keep looking.
1591		 */
1592		if (bufp || (foundit == nslabs))
1593			break;
1594	}
1595	dp->dnslabs -= (short)foundit;
1596
1597	if (foundit) {
1598		PR_SMR("%s: found %d free slabs (domid = %d)\n", proc, foundit,
1599		    domid);
1600	} else {
1601		PR_SMR("%s: no free slabs found (domid = %d)\n", proc, domid);
1602	}
1603
1604	/*
1605	 * If this is the alternate interface, need to return
1606	 * the number of slabs found in the ebufp parameter.
1607	 */
1608	if (bufp == NULL)
1609		*(int *)ebufp = foundit;
1610
1611	return (retsp);
1612}
1613
1614/*
1615 * Wrapper to hide alternate interface to smr_slaballoc_get()
1616 */
1617smr_slab_t *
1618smr_slaballoc_get_n(int domid, int *nslabs)
1619{
1620	smr_slab_t	*sp;
1621
1622	ASSERT(DSLAB_WRITE_HELD(domid));
1623
1624	sp = smr_slaballoc_get(domid, NULL, (caddr_t)nslabs);
1625
1626	return (sp);
1627}
1628
1629/*
1630 * Only called by master.  Initialize slab pool based on local SMR.
1631 * Returns number of slabs initialized.
1632 * reserved_size = Length of area at the front of the NWR portion
1633 *		   of the SMR to reserve and not make available for
1634 *		   slab allocations.  Must be a IDN_SMR_BUFSIZE multiple.
1635 * reserved_area = Pointer to reserved area, if any.
1636 */
1637int
1638smr_slabpool_init(size_t reserved_size, caddr_t *reserved_area)
1639{
1640	size_t			nwr_available;
1641	int			minperpool, ntotslabs, nxslabs, nslabs;
1642	register int		p, pp;
1643	register caddr_t	bufp;
1644	register smr_slab_t	*sp;
1645
1646	ASSERT(IDN_GLOCK_IS_EXCL());
1647	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1648
1649	*reserved_area = NULL;
1650
1651	nwr_available = MB2B(IDN_NWR_SIZE) - reserved_size;
1652
1653	if ((idn.localid != IDN_GET_MASTERID()) ||
1654	    (nwr_available < IDN_SLAB_SIZE) ||
1655	    (idn.slabpool != NULL) ||
1656	    ((reserved_size != 0) && (reserved_size & (IDN_SMR_BUFSIZE-1)))) {
1657		return (-1);
1658	}
1659
1660	idn.slabpool = GETSTRUCT(struct slabpool, 1);
1661	idn.slabpool->ntotslabs = ntotslabs = nwr_available / IDN_SLAB_SIZE;
1662	ASSERT(ntotslabs > 0);
1663	minperpool = (ntotslabs < IDN_SLAB_MINPERPOOL) ?
1664	    1 : IDN_SLAB_MINPERPOOL;
1665	idn.slabpool->npools = (ntotslabs + (minperpool - 1)) / minperpool;
1666
1667	if ((idn.slabpool->npools & 1) == 0) {
1668		/*
1669		 * npools needs to be odd for hashing algorithm.
1670		 */
1671		idn.slabpool->npools++;
1672	}
1673	ASSERT(idn.slabpool->npools > 0);
1674	minperpool = (ntotslabs < idn.slabpool->npools) ?
1675	    1 : (ntotslabs / idn.slabpool->npools);
1676
1677	/*
1678	 * Calculate the number of extra slabs that will need to
1679	 * be alloted to the pools.  This number will be less than
1680	 * npools.  Only one extra slab is allocated to each pool
1681	 * until we have assigned all the extra slabs.
1682	 */
1683	if (ntotslabs > (idn.slabpool->npools * minperpool))
1684		nxslabs = ntotslabs - (idn.slabpool->npools * minperpool);
1685	else
1686		nxslabs = 0;
1687	ASSERT((nxslabs >= 0) && (nxslabs < idn.slabpool->npools));
1688
1689	idn.slabpool->pool = GETSTRUCT(struct smr_slabtbl,
1690	    idn.slabpool->npools);
1691	sp = GETSTRUCT(smr_slab_t, idn.slabpool->ntotslabs);
1692
1693	idn.slabpool->savep = sp;
1694	bufp = idn.smr.vaddr + reserved_size;
1695
1696	for (p = nslabs = 0;
1697	    (p < idn.slabpool->npools) && (ntotslabs > 0);
1698	    p++, ntotslabs -= nslabs) {
1699
1700		nslabs = (ntotslabs < minperpool) ? ntotslabs : minperpool;
1701		if (nxslabs > 0) {
1702			nslabs++;
1703			nxslabs--;
1704		}
1705		idn.slabpool->pool[p].sarray = sp;
1706		for (pp = 0; pp < nslabs; pp++) {
1707
1708			sp->sl_next  = NULL;
1709			sp->sl_start = bufp;
1710			sp->sl_end   = bufp = sp->sl_start + IDN_SLAB_SIZE;
1711			sp->sl_lock  = 0;
1712			sp->sl_domid = (short)IDN_NIL_DOMID;
1713
1714			sp++;
1715		}
1716		idn.slabpool->pool[p].nfree   = nslabs;
1717		idn.slabpool->pool[p].nslabs  = nslabs;
1718	}
1719	ASSERT((ntotslabs == 0) && (nxslabs == 0));
1720	/*
1721	 * We should be at the end of the SMR at this point.
1722	 */
1723	ASSERT(bufp == (idn.smr.vaddr + reserved_size
1724	    + (idn.slabpool->ntotslabs * IDN_SLAB_SIZE)));
1725
1726	if (reserved_size != 0)
1727		*reserved_area = idn.smr.vaddr;
1728
1729	return (0);
1730}
1731
1732void
1733smr_slabpool_deinit()
1734{
1735	if (idn.slabpool == NULL)
1736		return;
1737
1738	FREESTRUCT(idn.slabpool->savep, smr_slab_t, idn.slabpool->ntotslabs);
1739	FREESTRUCT(idn.slabpool->pool, struct smr_slabtbl,
1740	    idn.slabpool->npools);
1741	FREESTRUCT(idn.slabpool, struct slabpool, 1);
1742
1743	idn.slabpool = NULL;
1744}
1745
1746void
1747smr_alloc_buflist(smr_slab_t *sp)
1748{
1749	int		n, nbufs;
1750	caddr_t		sbufp;
1751	smr_slabbuf_t	*hp, *bp;
1752
1753	if (sp->sl_head)
1754		return;
1755
1756	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1757	ASSERT(nbufs > 0);
1758	if (nbufs <= 0) {
1759		sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1760		return;
1761	}
1762
1763	hp = GETSTRUCT(smr_slabbuf_t, nbufs);
1764
1765	sbufp = sp->sl_start;
1766	for (n = 0, bp = hp; n < nbufs; bp++, n++) {
1767		bp->sb_bufp = sbufp;
1768		bp->sb_domid = IDN_NIL_DOMID;
1769		bp->sb_next = bp + 1;
1770		sbufp += IDN_SMR_BUFSIZE;
1771	}
1772	(--bp)->sb_next = NULL;
1773
1774	sp->sl_head = sp->sl_free = hp;
1775	sp->sl_inuse = NULL;
1776}
1777
1778void
1779smr_free_buflist(smr_slab_t *sp)
1780{
1781	int	nbufs;
1782
1783	if (sp->sl_head == NULL)
1784		return;
1785
1786	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1787
1788	FREESTRUCT(sp->sl_head, smr_slabbuf_t, nbufs);
1789
1790	sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1791}
1792
1793/*
1794 * Returns:	0 Successfully located a slab.
1795 *	       -1 Failure.
1796 */
1797static smr_slab_t *
1798smr_slab_reserve(int domid)
1799{
1800	register int		p, nextp, s, nexts;
1801	register smr_slab_t	*spa;
1802	int			startp, starts;
1803	int			foundone = 0;
1804	int			spl;
1805	procname_t		proc = "smr_slab_reserve";
1806
1807	p = startp = SMR_SLABPOOL_HASH(domid);
1808	nextp = -1;
1809
1810	spl = splhi();
1811	while ((nextp != startp) && !foundone) {
1812
1813		s = starts = SMR_SLAB_HASH(p, domid);
1814		nexts = -1;
1815		spa = &(idn.slabpool->pool[p].sarray[0]);
1816
1817		while ((nexts != starts) && !foundone) {
1818			if (lock_try(&spa[s].sl_lock)) {
1819				foundone = 1;
1820				break;
1821			}
1822			nexts = SMR_SLAB_HASHSTEP(p, s);
1823			s = nexts;
1824		}
1825		if (foundone)
1826			break;
1827		nextp = SMR_SLABPOOL_HASHSTEP(p);
1828		p = nextp;
1829	}
1830	splx(spl);
1831
1832	if (foundone) {
1833		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1834		    (&spa[s] < (idn.slabpool->savep +
1835		    idn.slabpool->ntotslabs)));
1836
1837		spa[s].sl_domid = (short)domid;
1838
1839		ATOMIC_DEC(idn.slabpool->pool[p].nfree);
1840
1841		if (domid == idn.localid) {
1842			smr_slab_t	*nsp;
1843			/*
1844			 * Caller is actually reserving a slab for
1845			 * themself which means they'll need the full
1846			 * slab structure to represent all of the I/O
1847			 * buffers.  The "spa" is just a representative
1848			 * and doesn't contain the space to manage the
1849			 * individual buffers.  Need to alloc a full-size
1850			 * struct.
1851			 * Note that this results in the returning
1852			 * smr_slab_t structure being unlocked.
1853			 */
1854			ASSERT(idn.localid == IDN_GET_MASTERID());
1855			nsp = GETSTRUCT(smr_slab_t, 1);
1856			nsp->sl_start = spa[s].sl_start;
1857			nsp->sl_end   = spa[s].sl_end;
1858			smr_alloc_buflist(nsp);
1859			spa = nsp;
1860			PR_SMR("%s: allocated full slab struct for domain %d\n",
1861			    proc, domid);
1862		} else {
1863			/*
1864			 * Slab structure gets returned locked.
1865			 */
1866			spa += s;
1867		}
1868
1869		PR_SMR("%s: allocated slab 0x%p (start=0x%p, size=%lu) for "
1870		    "domain %d\n", proc, spa, spa->sl_start,
1871		    spa->sl_end - spa->sl_start, domid);
1872	} else {
1873		PR_SMR("%s: FAILED to allocate for domain %d\n",
1874		    proc, domid);
1875		spa = NULL;
1876	}
1877
1878	return (spa);
1879}
1880
1881static void
1882smr_slab_unreserve(int domid, smr_slab_t *sp)
1883{
1884	register int		p, nextp, s, nexts;
1885	register smr_slab_t	*spa;
1886	int			foundit = 0;
1887	int			startp, starts;
1888	caddr_t			bufp;
1889	procname_t		proc = "smr_slab_unreserve";
1890
1891	bufp = sp->sl_start;
1892	p = startp = SMR_SLABPOOL_HASH(domid);
1893	nextp = -1;
1894
1895	while ((nextp != startp) && !foundit) {
1896
1897		s = starts = SMR_SLAB_HASH(p, domid);
1898		nexts = -1;
1899		spa = &(idn.slabpool->pool[p].sarray[0]);
1900
1901		while ((nexts != starts) && !foundit) {
1902			if (spa[s].sl_start == bufp) {
1903				foundit = 1;
1904				break;
1905			}
1906			nexts = SMR_SLAB_HASHSTEP(p, s);
1907			s = nexts;
1908		}
1909		if (foundit)
1910			break;
1911		nextp = SMR_SLABPOOL_HASHSTEP(p);
1912		p = nextp;
1913	}
1914	if (foundit) {
1915		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1916		    (&spa[s] < (idn.slabpool->savep +
1917		    idn.slabpool->ntotslabs)));
1918		ASSERT(!lock_try(&spa[s].sl_lock));
1919		ASSERT(spa[s].sl_domid == (short)domid);
1920
1921		spa[s].sl_next = NULL;
1922		spa[s].sl_domid = (short)IDN_NIL_DOMID;
1923		lock_clear(&spa[s].sl_lock);
1924
1925		ATOMIC_INC(idn.slabpool->pool[p].nfree);
1926
1927		PR_SMR("%s: freed (bufp=0x%p) for domain %d\n",
1928		    proc, bufp, domid);
1929
1930		if (domid == idn.localid) {
1931			/*
1932			 * Caller is actually unreserving a slab of their
1933			 * own.  Note that only the master calls this
1934			 * routine.  Since the master's local slab
1935			 * structures do not get entered into the global
1936			 * "representative" pool, we need to free up the
1937			 * data structure that was passed in.
1938			 */
1939			ASSERT(idn.localid == IDN_GET_MASTERID());
1940			ASSERT(sp != &spa[s]);
1941
1942			smr_free_buflist(sp);
1943			FREESTRUCT(sp, smr_slab_t, 1);
1944		} else {
1945			ASSERT(sp == &spa[s]);
1946		}
1947	} else {
1948		/*
1949		 * Couldn't find slab entry for given buf!
1950		 */
1951		PR_SMR("%s: FAILED to free (bufp=0x%p) for domain %d\n",
1952		    proc, bufp, domid);
1953	}
1954}
1955
1956/*
1957 * The Reap Protocol:
1958 *	master				   slave
1959 *	------				   -----
1960 *	smr_slab_reap_global
1961 *	- idn_broadcast_cmd(SLABREAP) ->   idn_recv_cmd(SLABREAP)
1962 *	  . idn_local_cmd(SLABREAP)        - idn_recv_slabreap_req
1963 *	    - smr_slab_reap	             . smr_slab_reap
1964 *	      . smr_slaballoc_get_n            - smr_slaballoc_get_n
1965 *	      . smr_slab_free		       - smr_slab_free
1966 *		- smr_slab_free_local		 . smr_slab_free_remote
1967 *		  . smr_slab_unreserve
1968 *				      <-	   - idn_send_cmd(SLABFREE)
1969 *	idn_recv_cmd(SLABFREE)
1970 *	- idn_recv_slabfree_req
1971 *	  . smr_slaballoc_get
1972 *	  . smr_slab_free
1973 *	    - smr_slab_free_local
1974 *	      . smr_slab_unreserve
1975 *        . idn_send_slabfree_resp    ->   idn_recv_cmd(SLABFREE | ack)
1976 *					   - idn_recv_slabfree_resp
1977 *
1978 *	idn_recv_cmd(SLABREAP | ack)  <-     . idn_send_slabreap_resp
1979 *	- idn_recv_slabreap_resp	   DONE
1980 *	DONE
1981 *
1982 * Check available slabs and if we're below the threshold, kick
1983 * off reaping to all remote domains.  There is no guarantee remote
1984 * domains will be able to free up any.
1985 */
1986static void
1987smr_slab_reap_global()
1988{
1989	register int	p, npools;
1990	register int	total_free = 0;
1991	register struct smr_slabtbl	*tblp;
1992	static clock_t	reap_last = 0;
1993	procname_t	proc = "smr_slab_reap_global";
1994	clock_t		now;
1995
1996	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1997
1998	DSLAB_LOCK_SHARED(idn.localid);
1999	if (idn_domain[idn.localid].dslab_state != DSLAB_STATE_LOCAL) {
2000		PR_SMR("%s: only allowed by master (%d)\n",
2001		    proc, IDN_GET_MASTERID());
2002		DSLAB_UNLOCK(idn.localid);
2003		return;
2004	}
2005	DSLAB_UNLOCK(idn.localid);
2006
2007	now = ddi_get_lbolt();
2008	if ((now > 0) && (now > reap_last) &&
2009	    ((now - reap_last) < IDN_REAP_INTERVAL))
2010		return;
2011
2012	reap_last = now;
2013
2014	ASSERT(idn.slabpool);
2015
2016	npools = idn.slabpool->npools;
2017	tblp   = idn.slabpool->pool;
2018
2019	for (p = 0; p < npools; tblp++, p++)
2020		total_free += tblp->nfree;
2021
2022	if (total_free <= IDN_SLAB_THRESHOLD) {
2023		int	diff, reap_per_domain;
2024
2025		PR_SMR("%s: kicking off reaping "
2026		    "(total_free = %d, min = %d)\n",
2027		    proc, total_free, IDN_SLAB_THRESHOLD);
2028
2029		diff = IDN_SLAB_THRESHOLD - total_free;
2030		reap_per_domain = (diff < idn.ndomains) ?
2031		    1 : (diff / idn.ndomains);
2032
2033		idn_broadcast_cmd(IDNCMD_SLABREAP, reap_per_domain, 0, 0);
2034	}
2035}
2036
2037void
2038smr_slab_reap(int domid, int *nslabs)
2039{
2040	register int	d;
2041	int		nreclaimed;
2042	smr_slab_t	*sp;
2043	domainset_t	reapset;
2044	procname_t	proc = "smr_slab_reap";
2045
2046	/*
2047	 * Should only be called on behalf of local
2048	 * domain.
2049	 */
2050	if (domid != idn.localid) {
2051		PR_SMR("%s: called by domain %d, should only be local (%d)\n",
2052		    proc, domid, idn.localid);
2053		ASSERT(0);
2054		return;
2055	}
2056	/*
2057	 * Try and reclaim some buffers so we can possibly
2058	 * free up some slabs.
2059	 */
2060	reapset = idn.domset.ds_connected;
2061
2062	IDN_GKSTAT_GLOBAL_EVENT(gk_reaps, gk_reap_last);
2063
2064	nreclaimed = 0;
2065	for (d = 0; d < MAX_DOMAINS; d++) {
2066		int		nr;
2067		idn_domain_t	*dp;
2068
2069		if (!DOMAIN_IN_SET(reapset, d))
2070			continue;
2071
2072		IDN_DLOCK_SHARED(d);
2073
2074		dp = &idn_domain[d];
2075		if ((d == idn.localid) || (dp->dcpu < 0)) {
2076			IDN_DUNLOCK(d);
2077			continue;
2078		}
2079		/*
2080		 * Clean up any dead I/O errors if possible.
2081		 */
2082		if (dp->dioerr > 0) {
2083			idn_domain_t	*ldp;
2084			register int	cnt;
2085			register smr_slabbuf_t	*bp;
2086			/*
2087			 * We need to grab the writer lock to prevent
2088			 * anybody from allocating buffers while we
2089			 * traverse the slabs outstanding.
2090			 */
2091			cnt = 0;
2092			ldp = &idn_domain[idn.localid];
2093			IDN_DLOCK_EXCL(idn.localid);
2094			DSLAB_LOCK_EXCL(idn.localid);
2095			for (sp = ldp->dslab; sp; sp = sp->sl_next)
2096				for (bp = sp->sl_inuse; bp; bp = bp->sb_next)
2097					if (bp->sb_domid == d)
2098						cnt++;
2099			DSLAB_UNLOCK(idn.localid);
2100			ASSERT((dp->dio + dp->dioerr) >= cnt);
2101			dp->dio = cnt;
2102			dp->dioerr = 0;
2103			IDN_DUNLOCK(idn.localid);
2104		}
2105		if ((dp->dstate == IDNDS_CONNECTED) &&
2106		    ((nr = idn_reclaim_mboxdata(d, 0, -1)) > 0))
2107			nreclaimed += nr;
2108
2109		IDN_DUNLOCK(d);
2110	}
2111
2112	DSLAB_LOCK_EXCL(domid);
2113	sp = smr_slaballoc_get_n(domid, nslabs);
2114	if (sp) {
2115		IDN_GKSTAT_ADD(gk_reap_count, (ulong_t)(*nslabs));
2116		smr_slab_free(domid, sp);
2117	}
2118	DSLAB_UNLOCK(domid);
2119}
2120
2121/*
2122 * ---------------------------------------------------------------------
2123 * Remap the (IDN) shared memory region to a new physical address.
2124 * Caller is expected to have performed a ecache flush if needed.
2125 * ---------------------------------------------------------------------
2126 */
2127void
2128smr_remap(struct as *as, register caddr_t vaddr,
2129		register pfn_t new_pfn, uint_t mblen)
2130{
2131	tte_t		tte;
2132	size_t		blen;
2133	pgcnt_t		p, npgs;
2134	procname_t	proc = "smr_remap";
2135
2136	if (va_to_pfn(vaddr) == new_pfn) {
2137		PR_REMAP("%s: vaddr (0x%p) already mapped to pfn (0x%lx)\n",
2138		    proc, vaddr, new_pfn);
2139		return;
2140	}
2141
2142	blen = MB2B(mblen);
2143	npgs = btopr(blen);
2144	ASSERT(npgs != 0);
2145
2146	PR_REMAP("%s: va = 0x%p, pfn = 0x%lx, npgs = %ld, mb = %d MB (%ld)\n",
2147	    proc, vaddr, new_pfn, npgs, mblen, blen);
2148
2149	/*
2150	 * Unmap the SMR virtual address from it's current
2151	 * mapping.
2152	 */
2153	hat_unload(as->a_hat, vaddr, blen, HAT_UNLOAD_UNLOCK);
2154
2155	if (new_pfn == PFN_INVALID)
2156		return;
2157
2158	/*
2159	 * Map the SMR to the new physical address space,
2160	 * presumably a remote pfn.  Cannot use hat_devload
2161	 * because it will think pfn represents non-memory,
2162	 * i.e. space since it may beyond his physmax.
2163	 */
2164	for (p = 0; p < npgs; p++) {
2165		sfmmu_memtte(&tte, new_pfn, PROT_READ | PROT_WRITE | HAT_NOSYNC,
2166		    TTE8K);
2167		sfmmu_tteload(as->a_hat, &tte, vaddr, NULL, HAT_LOAD_LOCK);
2168
2169		vaddr += MMU_PAGESIZE;
2170		new_pfn++;
2171	}
2172
2173	PR_REMAP("%s: remapped %ld pages (expected %ld)\n",
2174	    proc, npgs, btopr(MB2B(mblen)));
2175}
2176