1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/note.h>
26#include <sys/sysmacros.h>
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/kmem.h>
31#include <sys/cmn_err.h>
32#include <sys/debug.h>
33#include <sys/ddi.h>
34#include <sys/sunndi.h>
35#include <sys/ndi_impldefs.h>	/* include prototypes */
36
37#if defined(__i386) || defined(__amd64)
38/*
39 * MSI-X allocation limit.
40 */
41extern uint_t		ddi_msix_alloc_limit;
42#endif
43
44/*
45 * Interrupt Resource Management (IRM).
46 */
47
48#define	DDI_IRM_BALANCE_DELAY	(60)	/* In seconds */
49
50#define	DDI_IRM_HAS_CB(c)	((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
51
52#define	DDI_IRM_IS_REDUCIBLE(r)	(((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
53				(r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
54				(r->ireq_flags & DDI_IRM_FLAG_NEW))
55
56extern pri_t	minclsyspri;
57
58/* Global policies */
59int		irm_enable = 1;
60boolean_t	irm_active = B_FALSE;
61int		irm_default_policy = DDI_IRM_POLICY_LARGE;
62uint_t		irm_balance_delay = DDI_IRM_BALANCE_DELAY;
63
64/* Global list of interrupt pools */
65kmutex_t	irm_pools_lock;
66list_t		irm_pools_list;
67
68/* Global debug tunables */
69#ifdef	DEBUG
70int		irm_debug_policy = 0;
71uint_t		irm_debug_size = 0;
72#endif	/* DEBUG */
73
74static void	irm_balance_thread(ddi_irm_pool_t *);
75static void	i_ddi_irm_balance(ddi_irm_pool_t *);
76static void	i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
77static void	i_ddi_irm_reduce(ddi_irm_pool_t *pool);
78static int	i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *, int, int);
79static void	i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
80static void	i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
81static int	i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
82static int	i_ddi_irm_modify_increase(ddi_irm_req_t *, int);
83
84/*
85 * OS Initialization Routines
86 */
87
88/*
89 * irm_init()
90 *
91 *	Initialize IRM subsystem before any drivers are attached.
92 */
93void
94irm_init(void)
95{
96	/* Do nothing if IRM is disabled */
97	if (!irm_enable)
98		return;
99
100	/* Verify that the default balancing policy is valid */
101	if (!DDI_IRM_POLICY_VALID(irm_default_policy))
102		irm_default_policy = DDI_IRM_POLICY_LARGE;
103
104	/* Initialize the global list of interrupt pools */
105	mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
106	list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
107	    offsetof(ddi_irm_pool_t, ipool_link));
108}
109
110/*
111 * i_ddi_irm_poststartup()
112 *
113 *	IRM is not activated until after the IO subsystem is initialized.
114 *	When activated, per-pool balancing threads are spawned and a flag
115 *	is set so that all future pools will be activated when created.
116 *
117 *	NOTE: the global variable 'irm_enable' disables IRM if zero.
118 */
119void
120i_ddi_irm_poststartup(void)
121{
122	ddi_irm_pool_t	*pool_p;
123
124	/* Do nothing if IRM is disabled */
125	if (!irm_enable)
126		return;
127
128	/* Lock the global list */
129	mutex_enter(&irm_pools_lock);
130
131	/* Activate all defined pools */
132	for (pool_p = list_head(&irm_pools_list); pool_p;
133	    pool_p = list_next(&irm_pools_list, pool_p))
134		pool_p->ipool_thread = thread_create(NULL, 0,
135		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
136
137	/* Set future pools to be active */
138	irm_active = B_TRUE;
139
140	/* Unlock the global list */
141	mutex_exit(&irm_pools_lock);
142}
143
144/*
145 * NDI interfaces for creating/destroying IRM pools.
146 */
147
148/*
149 * ndi_irm_create()
150 *
151 *	Nexus interface to create an IRM pool.  Create the new
152 *	pool and add it to the global list of interrupt pools.
153 */
154int
155ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
156    ddi_irm_pool_t **pool_retp)
157{
158	ddi_irm_pool_t	*pool_p;
159
160	ASSERT(dip != NULL);
161	ASSERT(paramsp != NULL);
162	ASSERT(pool_retp != NULL);
163	ASSERT(paramsp->iparams_total >= 1);
164	ASSERT(paramsp->iparams_types != 0);
165
166	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
167
168	/* Check if IRM is enabled */
169	if (!irm_enable)
170		return (NDI_FAILURE);
171
172	/* Validate parameters */
173	if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
174	    (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0))
175		return (NDI_FAILURE);
176
177	/* Allocate and initialize the pool */
178	pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
179	pool_p->ipool_owner = dip;
180	pool_p->ipool_policy = irm_default_policy;
181	pool_p->ipool_types = paramsp->iparams_types;
182	pool_p->ipool_totsz = paramsp->iparams_total;
183	pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC,
184	    paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER));
185	list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
186	    offsetof(ddi_irm_req_t, ireq_link));
187	list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
188	    offsetof(ddi_irm_req_t, ireq_scratch_link));
189	cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
190	mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
191	mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
192
193	/* Add to global list of pools */
194	mutex_enter(&irm_pools_lock);
195	list_insert_tail(&irm_pools_list, pool_p);
196	mutex_exit(&irm_pools_lock);
197
198	/* If IRM is active, then activate the pool */
199	if (irm_active)
200		pool_p->ipool_thread = thread_create(NULL, 0,
201		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
202
203	*pool_retp = pool_p;
204	return (NDI_SUCCESS);
205}
206
207/*
208 * ndi_irm_resize_pool()
209 *
210 *	Nexus interface to resize IRM pool. If the pool size drops
211 *	below  the allocated number of vectors then initiate rebalance
212 *	operation before resizing the pool. If rebalance operation fails
213 *	then return NDI_FAILURE.
214 */
215int
216ndi_irm_resize_pool(ddi_irm_pool_t *pool_p, uint_t new_size)
217{
218	uint_t prev_size;
219
220	ASSERT(pool_p != NULL);
221
222	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
223	    " current-size 0x%x new-size 0x%x\n",
224	    (void *)pool_p, pool_p->ipool_totsz, new_size));
225
226	if (pool_p == NULL)
227		return (NDI_EINVAL);
228
229	/* Check if IRM is enabled */
230	if (!irm_enable)
231		return (NDI_FAILURE);
232
233	mutex_enter(&pool_p->ipool_lock);
234
235	/*
236	 * If we are increasing the pool size or if the reserved
237	 * number of vectors is <= the new pool size then simply
238	 * update the pool size and enqueue a reblance operation
239	 * if necessary to use the new vectors.
240	 */
241	if ((pool_p->ipool_totsz < new_size) ||
242	    (pool_p->ipool_resno <= new_size)) {
243		/* set new pool size */
244		pool_p->ipool_totsz = new_size;
245		/* adjust the default allocation limit */
246		pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC,
247		    MAX(DDI_MIN_MSIX_ALLOC, new_size / DDI_MSIX_ALLOC_DIVIDER));
248		/* queue a rebalance operation to use the new vectors */
249		if (pool_p->ipool_reqno > pool_p->ipool_resno)
250			i_ddi_irm_enqueue(pool_p, B_FALSE);
251		mutex_exit(&pool_p->ipool_lock);
252		return (NDI_SUCCESS);
253	}
254
255	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
256	    " needs a rebalance operation\n", (void *)pool_p));
257
258	/*
259	 * requires a rebalance operation
260	 */
261	/* save the current pool size */
262	prev_size = pool_p->ipool_totsz;
263	/* set the pool size to the desired new value */
264	pool_p->ipool_totsz = new_size;
265	/* perform the rebalance operation */
266	i_ddi_irm_enqueue(pool_p, B_TRUE);
267
268	/*
269	 * If rebalance operation couldn't free up enough
270	 * vectors then fail the resize operation.
271	 */
272	if (pool_p->ipool_resno > new_size) { /* rebalance failed */
273		/* restore the pool size to the previous value */
274		pool_p->ipool_totsz = prev_size;
275		/* enqueue a rebalance operation for the original pool size */
276		i_ddi_irm_enqueue(pool_p, B_FALSE);
277		mutex_exit(&pool_p->ipool_lock);
278		return (NDI_FAILURE);
279	} else { /* rebalance worked */
280		/* adjust the default allocation limit */
281		pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC,
282		    MAX(DDI_MIN_MSIX_ALLOC, new_size / DDI_MSIX_ALLOC_DIVIDER));
283		mutex_exit(&pool_p->ipool_lock);
284		DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
285		    " resized from %x to %x\n",
286		    (void *)pool_p, prev_size, pool_p->ipool_totsz));
287		return (NDI_SUCCESS);
288	}
289}
290
291/*
292 * ndi_irm_destroy()
293 *
294 *	Nexus interface to destroy an IRM pool.  Destroy the pool
295 *	and remove it from the global list of interrupt pools.
296 */
297int
298ndi_irm_destroy(ddi_irm_pool_t *pool_p)
299{
300	ASSERT(pool_p != NULL);
301	ASSERT(pool_p->ipool_resno == 0);
302
303	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
304	    (void *)pool_p));
305
306	/* Validate parameters */
307	if (pool_p == NULL)
308		return (NDI_FAILURE);
309
310	/* Validate that pool is empty */
311	if (pool_p->ipool_resno != 0)
312		return (NDI_BUSY);
313
314	/* Remove the pool from the global list */
315	mutex_enter(&irm_pools_lock);
316	list_remove(&irm_pools_list, pool_p);
317	mutex_exit(&irm_pools_lock);
318
319	/* Terminate the balancing thread */
320	mutex_enter(&pool_p->ipool_lock);
321	if (pool_p->ipool_thread &&
322	    (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
323		pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
324		cv_signal(&pool_p->ipool_cv);
325		mutex_exit(&pool_p->ipool_lock);
326		thread_join(pool_p->ipool_thread->t_did);
327	} else
328		mutex_exit(&pool_p->ipool_lock);
329
330	/* Destroy the pool */
331	cv_destroy(&pool_p->ipool_cv);
332	mutex_destroy(&pool_p->ipool_lock);
333	mutex_destroy(&pool_p->ipool_navail_lock);
334	list_destroy(&pool_p->ipool_req_list);
335	list_destroy(&pool_p->ipool_scratch_list);
336	kmem_free(pool_p, sizeof (ddi_irm_pool_t));
337
338	return (NDI_SUCCESS);
339}
340
341/*
342 * Insert/Modify/Remove Interrupt Requests
343 */
344
345/*
346 * i_ddi_irm_insert()
347 *
348 *	Insert a new request into an interrupt pool, and balance the pool.
349 */
350int
351i_ddi_irm_insert(dev_info_t *dip, int type, int count)
352{
353	ddi_irm_req_t	*req_p;
354	devinfo_intr_t	*intr_p;
355	ddi_irm_pool_t	*pool_p;
356	uint_t		nreq, nmin, npartial;
357	boolean_t	irm_flag = B_FALSE;
358
359	ASSERT(dip != NULL);
360	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
361	ASSERT(count > 0);
362
363	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
364	    (void *)dip, type, count));
365
366	/* Validate parameters */
367	if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
368		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
369		return (DDI_EINVAL);
370	}
371
372	/* Check for an existing request */
373	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
374	    (intr_p->devi_irm_req_p != NULL))
375		return (DDI_SUCCESS);
376
377	/* Check for IRM support from the system */
378	if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
379		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
380		return (DDI_ENOTSUP);
381	}
382
383	/* Check for IRM support from the driver */
384	if (i_ddi_irm_supported(dip, type) == DDI_SUCCESS)
385		irm_flag = B_TRUE;
386
387	/* Determine request size */
388	nreq = (irm_flag) ? count :
389	    MIN(count, i_ddi_intr_get_limit(dip, type, pool_p));
390	nmin = (irm_flag) ? 1 : nreq;
391	npartial = MIN(nreq, pool_p->ipool_defsz);
392
393	/* Allocate and initialize the request */
394	req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
395	req_p->ireq_type = type;
396	req_p->ireq_dip = dip;
397	req_p->ireq_pool_p = pool_p;
398	req_p->ireq_nreq = nreq;
399	req_p->ireq_flags = DDI_IRM_FLAG_NEW;
400	if (irm_flag)
401		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
402
403	/* Lock the pool */
404	mutex_enter(&pool_p->ipool_lock);
405
406	/* Check for minimal fit before inserting */
407	if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
408		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
409		    ddi_driver_name(dip), ddi_get_instance(dip));
410		mutex_exit(&pool_p->ipool_lock);
411		kmem_free(req_p, sizeof (ddi_irm_req_t));
412		return (DDI_EAGAIN);
413	}
414
415	/* Insert the request into the pool */
416	pool_p->ipool_reqno += nreq;
417	pool_p->ipool_minno += nmin;
418	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
419
420	/*
421	 * Try to fulfill the request.
422	 *
423	 * If all the interrupts are available, and either the request
424	 * is static or the pool is active, then just take them directly.
425	 *
426	 * If only some of the interrupts are available, and the request
427	 * can receive future callbacks, then take some now but queue the
428	 * pool to be rebalanced later.
429	 *
430	 * Otherwise, immediately rebalance the pool and wait.
431	 */
432	if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
433	    ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
434
435		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
436		    "request completely fulfilled.\n"));
437		pool_p->ipool_resno += nreq;
438		req_p->ireq_navail = nreq;
439		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
440
441	} else if (irm_flag &&
442	    ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
443
444		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
445		    "request partially fulfilled.\n"));
446		pool_p->ipool_resno += npartial;
447		req_p->ireq_navail = npartial;
448		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
449		i_ddi_irm_enqueue(pool_p, B_FALSE);
450
451	} else {
452
453		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
454		    "request needs immediate rebalance.\n"));
455		i_ddi_irm_enqueue(pool_p, B_TRUE);
456		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
457	}
458
459	/* Fail if the request cannot be fulfilled at all */
460	if (req_p->ireq_navail == 0) {
461		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
462		    ddi_driver_name(dip), ddi_get_instance(dip));
463		pool_p->ipool_reqno -= nreq;
464		pool_p->ipool_minno -= nmin;
465		list_remove(&pool_p->ipool_req_list, req_p);
466		mutex_exit(&pool_p->ipool_lock);
467		kmem_free(req_p, sizeof (ddi_irm_req_t));
468		return (DDI_EAGAIN);
469	}
470
471	/* Unlock the pool */
472	mutex_exit(&pool_p->ipool_lock);
473
474	intr_p->devi_irm_req_p = req_p;
475	return (DDI_SUCCESS);
476}
477
478/*
479 * i_ddi_irm_modify()
480 *
481 *	Modify an existing request in an interrupt pool, and balance the pool.
482 */
483int
484i_ddi_irm_modify(dev_info_t *dip, int nreq)
485{
486	devinfo_intr_t	*intr_p;
487	ddi_irm_req_t	*req_p;
488	ddi_irm_pool_t	*pool_p;
489	int		type;
490	int		retval = DDI_SUCCESS;
491
492	ASSERT(dip != NULL);
493	ASSERT(nreq > 0);
494
495	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
496	    (void *)dip, nreq));
497
498	/* Validate parameters */
499	if ((dip == NULL) || (nreq < 1)) {
500		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
501		return (DDI_EINVAL);
502	}
503
504	/* Do nothing if not mapped to an IRM pool */
505	if (((intr_p = DEVI(dip)->devi_intr_p) == NULL) ||
506	    ((req_p = intr_p->devi_irm_req_p) == NULL))
507		return (DDI_SUCCESS);
508
509	/* Do nothing if new size is the same */
510	if (nreq == req_p->ireq_nreq)
511		return (DDI_SUCCESS);
512
513	/* Do not allow MSI requests to be resized */
514	if ((type = req_p->ireq_type) == DDI_INTR_TYPE_MSI) {
515		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid type\n"));
516		return (DDI_ENOTSUP);
517	}
518
519	/* Select the pool */
520	if ((pool_p = req_p->ireq_pool_p) == NULL) {
521		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: missing pool\n"));
522		return (DDI_FAILURE);
523	}
524
525	/* Validate request size is not too large */
526	if (nreq > i_ddi_intr_get_limit(dip, type, pool_p)) {
527		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
528		return (DDI_EINVAL);
529	}
530
531	/* Lock the pool */
532	mutex_enter(&pool_p->ipool_lock);
533
534	/*
535	 * Process the modification.
536	 *
537	 *	- To increase a non-IRM request, call the implementation in
538	 *	  i_ddi_irm_modify_increase().
539	 *
540	 *	- To decrease a non-IRM request, directly update the pool and
541	 *	  request, then queue the pool for later rebalancing.
542	 *
543	 *	- To modify an IRM request, always queue the pool for later
544	 *	  rebalancing.  IRM consumers rely upon callbacks for changes.
545	 */
546	if ((nreq > req_p->ireq_nreq) &&
547	    (i_ddi_irm_supported(dip, type) != DDI_SUCCESS)) {
548
549		retval = i_ddi_irm_modify_increase(req_p, nreq);
550
551	} else {
552
553		/* Update pool and request */
554		pool_p->ipool_reqno -= req_p->ireq_nreq;
555		pool_p->ipool_reqno += nreq;
556		if (i_ddi_irm_supported(dip, type) != DDI_SUCCESS) {
557			pool_p->ipool_minno -= req_p->ireq_navail;
558			pool_p->ipool_resno -= req_p->ireq_navail;
559			pool_p->ipool_minno += nreq;
560			pool_p->ipool_resno += nreq;
561			req_p->ireq_navail = nreq;
562		}
563		req_p->ireq_nreq = nreq;
564
565		/* Re-sort request into the pool */
566		list_remove(&pool_p->ipool_req_list, req_p);
567		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
568
569		/* Queue pool for asynchronous rebalance */
570		i_ddi_irm_enqueue(pool_p, B_FALSE);
571	}
572
573	/* Unlock the pool */
574	mutex_exit(&pool_p->ipool_lock);
575
576	return (retval);
577}
578
579/*
580 * i_ddi_irm_modify_increase()
581 *
582 *	Increase a non-IRM request.  The additional interrupts are
583 *	directly taken from the pool when possible.  Otherwise, an
584 *	immediate, synchronous rebalance is performed.  A temporary
585 *	proxy request is used for any rebalance operation to ensure
586 *	the request is not reduced below its current allocation.
587 *
588 *	NOTE: pool must already be locked.
589 */
590static int
591i_ddi_irm_modify_increase(ddi_irm_req_t *req_p, int nreq)
592{
593	dev_info_t	*dip = req_p->ireq_dip;
594	ddi_irm_pool_t	*pool_p = req_p->ireq_pool_p;
595	ddi_irm_req_t	new_req;
596	int		count, delta;
597
598	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
599
600	/* Compute number of additional vectors */
601	count = nreq - req_p->ireq_nreq;
602
603	/* Check for minimal fit */
604	if ((pool_p->ipool_minno + count) > pool_p->ipool_totsz) {
605		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
606		    ddi_driver_name(dip), ddi_get_instance(dip));
607		return (DDI_EAGAIN);
608	}
609
610	/* Update the pool */
611	pool_p->ipool_reqno += count;
612	pool_p->ipool_minno += count;
613
614	/* Attempt direct implementation */
615	if ((pool_p->ipool_resno + count) <= pool_p->ipool_totsz) {
616		req_p->ireq_nreq += count;
617		req_p->ireq_navail += count;
618		pool_p->ipool_resno += count;
619		return (DDI_SUCCESS);
620	}
621
622	/* Rebalance required: fail if pool is not active */
623	if ((pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE) == 0) {
624		pool_p->ipool_reqno -= count;
625		pool_p->ipool_minno -= count;
626		return (DDI_EAGAIN);
627	}
628
629	/* Insert temporary proxy request */
630	bzero(&new_req, sizeof (ddi_irm_req_t));
631	new_req.ireq_dip = dip;
632	new_req.ireq_nreq = count;
633	new_req.ireq_pool_p = pool_p;
634	new_req.ireq_type = req_p->ireq_type;
635	new_req.ireq_flags = DDI_IRM_FLAG_NEW;
636	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, &new_req);
637
638	/* Synchronously rebalance */
639	i_ddi_irm_enqueue(pool_p, B_TRUE);
640
641	/* Remove proxy request, and merge into original request */
642	req_p->ireq_nreq += count;
643	if ((delta = (count - new_req.ireq_navail)) > 0) {
644		req_p->ireq_nreq -= delta;
645		pool_p->ipool_reqno -= delta;
646		pool_p->ipool_minno -= delta;
647	}
648	req_p->ireq_navail += new_req.ireq_navail;
649	list_remove(&pool_p->ipool_req_list, req_p);
650	list_remove(&pool_p->ipool_req_list, &new_req);
651	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
652
653	return (DDI_SUCCESS);
654}
655
656/*
657 * i_ddi_irm_remove()
658 *
659 *	Remove a request from an interrupt pool, and balance the pool.
660 */
661int
662i_ddi_irm_remove(dev_info_t *dip)
663{
664	devinfo_intr_t	*intr_p;
665	ddi_irm_pool_t	*pool_p;
666	ddi_irm_req_t	*req_p;
667	uint_t		nmin;
668
669	ASSERT(dip != NULL);
670
671	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
672
673	/* Validate parameters */
674	if (dip == NULL) {
675		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
676		return (DDI_EINVAL);
677	}
678
679	/* Check if the device has a request */
680	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
681	    !(req_p = intr_p->devi_irm_req_p)) {
682		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
683		return (DDI_EINVAL);
684	}
685
686	/* Lock the pool */
687	pool_p = req_p->ireq_pool_p;
688	mutex_enter(&pool_p->ipool_lock);
689
690	/* Remove request */
691	nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
692	pool_p->ipool_minno -= nmin;
693	pool_p->ipool_reqno -= req_p->ireq_nreq;
694	pool_p->ipool_resno -= req_p->ireq_navail;
695	list_remove(&pool_p->ipool_req_list, req_p);
696
697	/* Queue pool to be rebalanced */
698	i_ddi_irm_enqueue(pool_p, B_FALSE);
699
700	/* Unlock the pool */
701	mutex_exit(&pool_p->ipool_lock);
702
703	/* Destroy the request */
704	intr_p->devi_irm_req_p = NULL;
705	kmem_free(req_p, sizeof (ddi_irm_req_t));
706
707	return (DDI_SUCCESS);
708}
709
710/*
711 * i_ddi_irm_set_cb()
712 *
713 *	Change the callback flag for a request, in response to
714 *	a change in its callback registration.  Then rebalance
715 *	the interrupt pool.
716 *
717 *	NOTE: the request is not locked because the navail value
718 *	      is not directly affected.  The balancing thread may
719 *	      modify the navail value in the background after it
720 *	      locks the request itself.
721 */
722void
723i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
724{
725	devinfo_intr_t	*intr_p;
726	ddi_irm_pool_t	*pool_p;
727	ddi_irm_req_t	*req_p;
728	uint_t		nreq;
729
730	ASSERT(dip != NULL);
731
732	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
733	    (void *)dip, (int)has_cb_flag));
734
735	/* Validate parameters */
736	if (dip == NULL)
737		return;
738
739	/* Check for association with interrupt pool */
740	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
741	    !(req_p = intr_p->devi_irm_req_p)) {
742		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
743		return;
744	}
745
746	/* Lock the pool */
747	pool_p = req_p->ireq_pool_p;
748	mutex_enter(&pool_p->ipool_lock);
749
750	/*
751	 * Update the request and the pool
752	 */
753	if (has_cb_flag) {
754
755		/* Update pool statistics */
756		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
757			pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
758
759		/* Update request */
760		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
761
762		/* Rebalance in background */
763		i_ddi_irm_enqueue(pool_p, B_FALSE);
764
765	} else {
766
767		/* Determine new request size */
768		nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
769
770#if defined(__i386) || defined(__amd64)
771		/* Use the default static limit for non-IRM drivers */
772		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
773			nreq = MIN(nreq, ddi_msix_alloc_limit);
774#endif
775
776		/* Update pool statistics */
777		pool_p->ipool_reqno -= req_p->ireq_nreq;
778		pool_p->ipool_reqno += nreq;
779		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
780			pool_p->ipool_minno -= 1;
781			pool_p->ipool_minno += nreq;
782		} else {
783			pool_p->ipool_minno -= req_p->ireq_nreq;
784			pool_p->ipool_minno += nreq;
785		}
786
787		/* Update request size, and re-sort in pool */
788		req_p->ireq_nreq = nreq;
789		list_remove(&pool_p->ipool_req_list, req_p);
790		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
791
792		/* Rebalance synchronously, before losing callback */
793		i_ddi_irm_enqueue(pool_p, B_TRUE);
794
795		/* Remove callback flag */
796		req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
797	}
798
799	/* Unlock the pool */
800	mutex_exit(&pool_p->ipool_lock);
801}
802
803/*
804 * i_ddi_irm_supported()
805 *
806 *	Query if IRM is supported by a driver using a specific interrupt type.
807 *	Notice that IRM is limited to MSI-X users with registered callbacks.
808 */
809int
810i_ddi_irm_supported(dev_info_t *dip, int type)
811{
812	ddi_cb_t	*cb_p = DEVI(dip)->devi_cb_p;
813
814	return ((DDI_IRM_HAS_CB(cb_p) && (type == DDI_INTR_TYPE_MSIX)) ?
815	    DDI_SUCCESS : DDI_ENOTSUP);
816}
817
818/*
819 * Interrupt Pool Balancing
820 */
821
822/*
823 * irm_balance_thread()
824 *
825 *	One instance of this thread operates per each defined IRM pool.
826 *	It does the initial activation of the pool, as well as balancing
827 *	any requests that were queued up before the pool was active.
828 *	Once active, it waits forever to service balance operations.
829 */
830static void
831irm_balance_thread(ddi_irm_pool_t *pool_p)
832{
833	clock_t		interval;
834
835	DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
836	    (void *)pool_p));
837
838	/* Lock the pool */
839	mutex_enter(&pool_p->ipool_lock);
840
841	/* Perform initial balance if required */
842	if (pool_p->ipool_reqno > pool_p->ipool_resno)
843		i_ddi_irm_balance(pool_p);
844
845	/* Activate the pool */
846	pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
847
848	/*
849	 * Main loop.
850	 * Iterate once first before wait on signal, in case there is signal
851	 * sent before this thread being created
852	 */
853	for (;;) {
854
855		/* Compute the delay interval */
856		interval = drv_usectohz(irm_balance_delay * 1000000);
857
858		/* Wait one interval, or until there are waiters */
859		if ((interval > 0) &&
860		    !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
861		    !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
862			(void) cv_reltimedwait(&pool_p->ipool_cv,
863			    &pool_p->ipool_lock, interval, TR_CLOCK_TICK);
864		}
865
866		/* Check if awakened to exit */
867		if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
868			DDI_INTR_IRMDBG((CE_CONT,
869			    "irm_balance_thread: exiting...\n"));
870			mutex_exit(&pool_p->ipool_lock);
871			thread_exit();
872		}
873
874		/* Balance the pool */
875		i_ddi_irm_balance(pool_p);
876
877		/* Notify waiters */
878		if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
879			cv_broadcast(&pool_p->ipool_cv);
880			pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
881		}
882
883		/* Clear QUEUED condition */
884		pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
885
886		/* Sleep until queued */
887		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
888
889		DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
890	}
891}
892
893/*
894 * i_ddi_irm_balance()
895 *
896 *	Balance a pool.  The general algorithm is to first reset all
897 *	requests to their maximum size, use reduction algorithms to
898 *	solve any imbalance, and then notify affected drivers.
899 */
900static void
901i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
902{
903	ddi_irm_req_t	*req_p;
904
905#ifdef	DEBUG
906	uint_t		debug_totsz = 0;
907	int		debug_policy = 0;
908#endif	/* DEBUG */
909
910	ASSERT(pool_p != NULL);
911	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
912
913	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
914	    (void *)pool_p));
915
916#ifndef DEBUG
917	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
918#else
919	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
920#endif  /* DEBUG */
921		DDI_INTR_IRMDBG((CE_CONT,
922		    "i_ddi_irm_balance: pool already balanced\n"));
923		return;
924	}
925
926#ifdef	DEBUG	/* Adjust size and policy settings */
927	if (irm_debug_size > pool_p->ipool_minno) {
928		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
929		    irm_debug_size));
930		debug_totsz = pool_p->ipool_totsz;
931		pool_p->ipool_totsz = irm_debug_size;
932	}
933	if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
934		DDI_INTR_IRMDBG((CE_CONT,
935		    "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
936		debug_policy = pool_p->ipool_policy;
937		pool_p->ipool_policy = irm_debug_policy;
938	}
939#endif	/* DEBUG */
940
941	/* Lock the availability lock */
942	mutex_enter(&pool_p->ipool_navail_lock);
943
944	/*
945	 * Put all of the reducible requests into a scratch list.
946	 * Reset each one of them to their maximum availability.
947	 */
948	for (req_p = list_head(&pool_p->ipool_req_list); req_p;
949	    req_p = list_next(&pool_p->ipool_req_list, req_p)) {
950		if (DDI_IRM_IS_REDUCIBLE(req_p)) {
951			pool_p->ipool_resno -= req_p->ireq_navail;
952			req_p->ireq_scratch = req_p->ireq_navail;
953			req_p->ireq_navail = req_p->ireq_nreq;
954			pool_p->ipool_resno += req_p->ireq_navail;
955			list_insert_tail(&pool_p->ipool_scratch_list, req_p);
956		}
957	}
958
959	/* Balance the requests */
960	i_ddi_irm_reduce(pool_p);
961
962	/* Unlock the availability lock */
963	mutex_exit(&pool_p->ipool_navail_lock);
964
965	/*
966	 * Process REMOVE notifications.
967	 *
968	 * If a driver fails to release interrupts: exclude it from
969	 * further processing, correct the resulting imbalance, and
970	 * start over again at the head of the scratch list.
971	 */
972	req_p = list_head(&pool_p->ipool_scratch_list);
973	while (req_p) {
974		if ((req_p->ireq_navail < req_p->ireq_scratch) &&
975		    (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
976			list_remove(&pool_p->ipool_scratch_list, req_p);
977			mutex_enter(&pool_p->ipool_navail_lock);
978			i_ddi_irm_reduce(pool_p);
979			mutex_exit(&pool_p->ipool_navail_lock);
980			req_p = list_head(&pool_p->ipool_scratch_list);
981		} else {
982			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
983		}
984	}
985
986	/*
987	 * Process ADD notifications.
988	 *
989	 * This is the last use of the scratch list, so empty it.
990	 */
991	while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
992		if (req_p->ireq_navail > req_p->ireq_scratch) {
993			(void) i_ddi_irm_notify(pool_p, req_p);
994		}
995	}
996
997#ifdef	DEBUG	/* Restore size and policy settings */
998	if (debug_totsz != 0)
999		pool_p->ipool_totsz = debug_totsz;
1000	if (debug_policy != 0)
1001		pool_p->ipool_policy = debug_policy;
1002#endif	/* DEBUG */
1003}
1004
1005/*
1006 * i_ddi_irm_reduce()
1007 *
1008 *	Use reduction algorithms to correct an imbalance in a pool.
1009 */
1010static void
1011i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
1012{
1013	int	imbalance;
1014
1015	ASSERT(pool_p != NULL);
1016	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1017	ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
1018
1019	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
1020	    (void *)pool_p));
1021
1022	/* Compute the imbalance.  Do nothing if already balanced. */
1023	if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
1024		return;
1025
1026	/*
1027	 * Try policy based reduction first. If it failed, then
1028	 * possibly reduce new requests as a last resort.
1029	 */
1030	if (i_ddi_irm_reduce_by_policy(pool_p, imbalance, pool_p->ipool_policy)
1031	    != DDI_SUCCESS) {
1032
1033		DDI_INTR_IRMDBG((CE_CONT,
1034		    "i_ddi_irm_reduce: policy reductions failed.\n"));
1035
1036		/* Compute remaining imbalance */
1037		imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
1038
1039		ASSERT(imbalance > 0);
1040
1041		i_ddi_irm_reduce_new(pool_p, imbalance);
1042	}
1043}
1044
1045/*
1046 * i_ddi_irm_enqueue()
1047 *
1048 *	Queue a pool to be balanced.  Signals the balancing thread to wake
1049 *	up and process the pool.  If 'wait_flag' is true, then the current
1050 *	thread becomes a waiter and blocks until the balance is completed.
1051 */
1052static void
1053i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
1054{
1055	ASSERT(pool_p != NULL);
1056	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1057
1058	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
1059	    (void *)pool_p, (int)wait_flag));
1060
1061	/* Do nothing if pool is already balanced */
1062#ifndef	DEBUG
1063	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
1064#else
1065	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
1066#endif	/* DEBUG */
1067		DDI_INTR_IRMDBG((CE_CONT,
1068		    "i_ddi_irm_enqueue: pool already balanced\n"));
1069		return;
1070	}
1071
1072	/* Avoid deadlocks when IRM is not active */
1073	if (!irm_active && wait_flag) {
1074		DDI_INTR_IRMDBG((CE_CONT,
1075		    "i_ddi_irm_enqueue: pool not active.\n"));
1076		return;
1077	}
1078
1079	if (wait_flag)
1080		pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
1081
1082	if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
1083		pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
1084		cv_signal(&pool_p->ipool_cv);
1085		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
1086	}
1087
1088	if (wait_flag) {
1089		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
1090		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
1091	}
1092}
1093
1094/*
1095 * i_ddi_irm_reduce_by_policy()
1096 *
1097 *	Reduces requests based on reduction policies.
1098 *
1099 *	For the DDI_IRM_POLICY_LARGE reduction policy, the algorithm
1100 *	generally reduces larger requests first, before advancing
1101 *	to smaller requests.
1102 *	For the DDI_IRM_POLICY_EVEN reduction policy, the algorithm
1103 *	reduces requests evenly, without giving a specific preference
1104 *	to smaller or larger requests. Each iteration reduces all
1105 *	reducible requests by the same amount until the imbalance is
1106 *	corrected.
1107 *
1108 *	The scratch list is initially sorted in descending order by current
1109 *	navail values, which are maximized prior to reduction. This sorted
1110 *	order is preserved.  It avoids reducing requests below the threshold
1111 *	of the interrupt pool's default allocation size.
1112 *
1113 *	Optimizations in this algorithm include trying to reduce multiple
1114 *	requests together.  And the algorithm attempts to reduce in larger
1115 *	increments when possible to minimize the total number of iterations.
1116 */
1117static int
1118i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *pool_p, int imbalance, int policy)
1119{
1120	ASSERT(pool_p != NULL);
1121	ASSERT(imbalance > 0);
1122	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1123
1124	while (imbalance > 0) {
1125		list_t		*slist_p = &pool_p->ipool_scratch_list;
1126		ddi_irm_req_t	*req_p = list_head(slist_p), *last_p;
1127		uint_t		nreduce = 0, nremain = 0, stop_navail;
1128		uint_t		pool_defsz = pool_p->ipool_defsz;
1129		uint_t		reduction, max_redu;
1130
1131		/* Fail if none are reducible */
1132		if (!req_p || req_p->ireq_navail <= pool_defsz) {
1133			DDI_INTR_IRMDBG((CE_CONT,
1134			    "i_ddi_irm_reduce_by_policy: Failure. "
1135			    "All requests have downsized to low limit.\n"));
1136			return (DDI_FAILURE);
1137		}
1138
1139		/* Count reducible requests */
1140		stop_navail = (policy == DDI_IRM_POLICY_LARGE) ?
1141		    req_p->ireq_navail - 1 : pool_defsz;
1142		for (; req_p; req_p = list_next(slist_p, req_p)) {
1143			if (req_p->ireq_navail <= stop_navail)
1144				break;
1145			nreduce++;
1146		}
1147
1148		/* Compute reduction */
1149		last_p = req_p ? list_prev(slist_p, req_p) : list_tail(slist_p);
1150		if ((policy == DDI_IRM_POLICY_LARGE) && req_p &&
1151		    req_p->ireq_navail > pool_defsz)
1152			reduction = last_p->ireq_navail - req_p->ireq_navail;
1153		else
1154			reduction = last_p->ireq_navail - pool_defsz;
1155
1156		if ((max_redu = reduction * nreduce) > imbalance) {
1157			reduction = imbalance / nreduce;
1158			nremain = imbalance % nreduce;
1159			pool_p->ipool_resno -= imbalance;
1160			imbalance = 0;
1161		} else {
1162			pool_p->ipool_resno -= max_redu;
1163			imbalance -= max_redu;
1164		}
1165
1166		/* Reduce */
1167		for (req_p = list_head(slist_p); (reduction != 0) && nreduce--;
1168		    req_p = list_next(slist_p, req_p)) {
1169			req_p->ireq_navail -= reduction;
1170		}
1171
1172		for (req_p = last_p; nremain--;
1173		    req_p = list_prev(slist_p, req_p)) {
1174			req_p->ireq_navail--;
1175		}
1176	}
1177
1178	return (DDI_SUCCESS);
1179}
1180
1181/*
1182 * i_ddi_irm_reduce_new()
1183 *
1184 *	Reduces new requests.  This is only used as a last resort
1185 *	after another reduction algorithm failed.
1186 *
1187 *	NOTE: The pool locking in i_ddi_irm_insert() ensures
1188 *	there can be only one new request at a time in a pool.
1189 */
1190static void
1191i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1192{
1193	ddi_irm_req_t	*req_p;
1194
1195	ASSERT(pool_p != NULL);
1196	ASSERT(imbalance > 0);
1197	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1198
1199	DDI_INTR_IRMDBG((CE_CONT,
1200	    "i_ddi_irm_reduce_new: pool_p %p imbalance %d\n",
1201	    (void *)pool_p, imbalance));
1202
1203	for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1204	    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1205		if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1206			ASSERT(req_p->ireq_navail >= imbalance);
1207			req_p->ireq_navail -= imbalance;
1208			pool_p->ipool_resno -= imbalance;
1209			return;
1210		}
1211	}
1212
1213	/* should never go here */
1214	ASSERT(B_FALSE);
1215}
1216
1217/*
1218 * Miscellaneous Helper Functions
1219 */
1220
1221/*
1222 * i_ddi_intr_get_pool()
1223 *
1224 *	Get an IRM pool that supplies interrupts of a specified type.
1225 *	Invokes a DDI_INTROP_GETPOOL to the bus nexus driver.  Fails
1226 *	if no pool exists.
1227 */
1228ddi_irm_pool_t *
1229i_ddi_intr_get_pool(dev_info_t *dip, int type)
1230{
1231	devinfo_intr_t		*intr_p;
1232	ddi_irm_pool_t		*pool_p;
1233	ddi_irm_req_t		*req_p;
1234	ddi_intr_handle_impl_t	hdl;
1235
1236	ASSERT(dip != NULL);
1237	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1238
1239	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1240	    ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1241	    ((pool_p = req_p->ireq_pool_p) != NULL) &&
1242	    (pool_p->ipool_types & type)) {
1243		return (pool_p);
1244	}
1245
1246	bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1247	hdl.ih_dip = dip;
1248	hdl.ih_type = type;
1249
1250	if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1251	    &hdl, (void *)&pool_p) == DDI_SUCCESS)
1252		return (pool_p);
1253
1254	return (NULL);
1255}
1256
1257/*
1258 * i_ddi_irm_insertion_sort()
1259 *
1260 *	Use the insertion sort method to insert a request into a list.
1261 *	The list is sorted in descending order by request size.
1262 */
1263static void
1264i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1265{
1266	ddi_irm_req_t	*next_p;
1267
1268	next_p = list_head(req_list);
1269
1270	while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1271		next_p = list_next(req_list, next_p);
1272
1273	list_insert_before(req_list, next_p, req_p);
1274}
1275
1276/*
1277 * i_ddi_irm_notify()
1278 *
1279 *	Notify a driver of changes to its interrupt request using the
1280 *	generic callback mechanism.  Checks for errors in processing.
1281 */
1282static int
1283i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1284{
1285	ddi_cb_action_t	action;
1286	ddi_cb_t	*cb_p;
1287	uint_t		nintrs;
1288	int		ret, count;
1289
1290	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1291	    (void *)pool_p, (void *)req_p));
1292
1293	/* Do not notify new or unchanged requests */
1294	if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1295	    (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1296		return (DDI_SUCCESS);
1297
1298	/* Determine action and count */
1299	if (req_p->ireq_navail > req_p->ireq_scratch) {
1300		action = DDI_CB_INTR_ADD;
1301		count = req_p->ireq_navail - req_p->ireq_scratch;
1302		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1303		    count));
1304	} else {
1305		action = DDI_CB_INTR_REMOVE;
1306		count = req_p->ireq_scratch - req_p->ireq_navail;
1307		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1308		    count));
1309	}
1310
1311	/* Lookup driver callback */
1312	if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1313		DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1314		return (DDI_FAILURE);
1315	}
1316
1317	/* Do callback */
1318	ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1319	    cb_p->cb_arg1, cb_p->cb_arg2);
1320
1321	/* Log callback errors */
1322	if (ret != DDI_SUCCESS) {
1323		cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1324		    ddi_driver_name(req_p->ireq_dip),
1325		    ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1326	}
1327
1328	/* Check if the driver exceeds its availability */
1329	nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1330	if (nintrs > req_p->ireq_navail) {
1331		cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1332		    "(nintrs=%d, navail=%d).\n",
1333		    ddi_driver_name(req_p->ireq_dip),
1334		    ddi_get_instance(req_p->ireq_dip), nintrs,
1335		    req_p->ireq_navail);
1336		pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1337		req_p->ireq_navail = nintrs;
1338		return (DDI_FAILURE);
1339	}
1340
1341	/* Update request */
1342	req_p->ireq_scratch = req_p->ireq_navail;
1343
1344	return (DDI_SUCCESS);
1345}
1346
1347/*
1348 * i_ddi_irm_debug_balance()
1349 *
1350 *	A debug/test only routine to force the immediate,
1351 *	synchronous rebalancing of an interrupt pool.
1352 */
1353#ifdef	DEBUG
1354void
1355i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1356{
1357	ddi_irm_pool_t	*pool_p;
1358	int		type;
1359
1360	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1361	    (void *)dip, (int)wait_flag));
1362
1363	if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1364	    ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1365		mutex_enter(&pool_p->ipool_lock);
1366		i_ddi_irm_enqueue(pool_p, wait_flag);
1367		mutex_exit(&pool_p->ipool_lock);
1368	}
1369}
1370#endif
1371