1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright 2019 Justin Hibbits
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29#include "opt_platform.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/module.h>
34#include <sys/bus.h>
35#include <sys/conf.h>
36#include <sys/endian.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mutex.h>
41#include <sys/smp.h>
42
43#include <vm/vm.h>
44#include <vm/pmap.h>
45
46#include <machine/bus.h>
47#include <machine/intr_machdep.h>
48#include <machine/md_var.h>
49
50#include <dev/ofw/ofw_bus.h>
51#include <dev/ofw/ofw_bus_subr.h>
52
53#ifdef POWERNV
54#include <powerpc/powernv/opal.h>
55#endif
56
57#include "pic_if.h"
58
59#define XIVE_PRIORITY	7	/* Random non-zero number */
60#define MAX_XIVE_IRQS	(1<<24)	/* 24-bit XIRR field */
61
62/* Registers */
63#define	XIVE_TM_QW1_OS		0x010	/* Guest OS registers */
64#define	XIVE_TM_QW2_HV_POOL	0x020	/* Hypervisor pool registers */
65#define	XIVE_TM_QW3_HV		0x030	/* Hypervisor registers */
66
67#define	XIVE_TM_NSR	0x00
68#define	XIVE_TM_CPPR	0x01
69#define	XIVE_TM_IPB	0x02
70#define	XIVE_TM_LSMFB	0x03
71#define	XIVE_TM_ACK_CNT	0x04
72#define	XIVE_TM_INC	0x05
73#define	XIVE_TM_AGE	0x06
74#define	XIVE_TM_PIPR	0x07
75
76#define	TM_WORD0	0x0
77#define	TM_WORD2	0x8
78#define	  TM_QW2W2_VP	  0x80000000
79
80#define	XIVE_TM_SPC_ACK			0x800
81#define	  TM_QW3NSR_HE_SHIFT		  14
82#define	  TM_QW3_NSR_HE_NONE		  0
83#define	  TM_QW3_NSR_HE_POOL		  1
84#define	  TM_QW3_NSR_HE_PHYS		  2
85#define	  TM_QW3_NSR_HE_LSI		  3
86#define	XIVE_TM_SPC_PULL_POOL_CTX	0x828
87
88#define	XIVE_IRQ_LOAD_EOI	0x000
89#define	XIVE_IRQ_STORE_EOI	0x400
90#define	XIVE_IRQ_PQ_00		0xc00
91#define	XIVE_IRQ_PQ_01		0xd00
92
93#define	XIVE_IRQ_VAL_P		0x02
94#define	XIVE_IRQ_VAL_Q		0x01
95
96struct xive_softc;
97struct xive_irq;
98
99extern void (*powernv_smp_ap_extra_init)(void);
100
101/* Private support */
102static void	xive_setup_cpu(void);
103static void	xive_smp_cpu_startup(void);
104static void	xive_init_irq(struct xive_irq *irqd, u_int irq);
105static struct xive_irq	*xive_configure_irq(u_int irq);
106static int	xive_provision_page(struct xive_softc *sc);
107
108/* Interfaces */
109static int	xive_probe(device_t);
110static int	xive_attach(device_t);
111static int	xics_probe(device_t);
112static int	xics_attach(device_t);
113
114static void	xive_bind(device_t, u_int, cpuset_t, void **);
115static void	xive_dispatch(device_t, struct trapframe *);
116static void	xive_enable(device_t, u_int, u_int, void **);
117static void	xive_eoi(device_t, u_int, void *);
118static void	xive_ipi(device_t, u_int);
119static void	xive_mask(device_t, u_int, void *);
120static void	xive_unmask(device_t, u_int, void *);
121static void	xive_translate_code(device_t dev, u_int irq, int code,
122		    enum intr_trigger *trig, enum intr_polarity *pol);
123
124static device_method_t  xive_methods[] = {
125	/* Device interface */
126	DEVMETHOD(device_probe,		xive_probe),
127	DEVMETHOD(device_attach,	xive_attach),
128
129	/* PIC interface */
130	DEVMETHOD(pic_bind,		xive_bind),
131	DEVMETHOD(pic_dispatch,		xive_dispatch),
132	DEVMETHOD(pic_enable,		xive_enable),
133	DEVMETHOD(pic_eoi,		xive_eoi),
134	DEVMETHOD(pic_ipi,		xive_ipi),
135	DEVMETHOD(pic_mask,		xive_mask),
136	DEVMETHOD(pic_unmask,		xive_unmask),
137	DEVMETHOD(pic_translate_code,	xive_translate_code),
138
139	DEVMETHOD_END
140};
141
142static device_method_t  xics_methods[] = {
143	/* Device interface */
144	DEVMETHOD(device_probe,		xics_probe),
145	DEVMETHOD(device_attach,	xics_attach),
146
147	DEVMETHOD_END
148};
149
150struct xive_softc {
151	struct mtx sc_mtx;
152	struct resource *sc_mem;
153	vm_size_t	sc_prov_page_size;
154	uint32_t	sc_offset;
155};
156
157struct xive_queue {
158	uint32_t	*q_page;
159	uint32_t	*q_eoi_page;
160	uint32_t	 q_toggle;
161	uint32_t	 q_size;
162	uint32_t	 q_index;
163	uint32_t	 q_mask;
164};
165
166struct xive_irq {
167	uint32_t	girq;
168	uint32_t	lirq;
169	uint64_t	vp;
170	uint64_t	flags;
171#define	OPAL_XIVE_IRQ_SHIFT_BUG		0x00000008
172#define	OPAL_XIVE_IRQ_LSI		0x00000004
173#define	OPAL_XIVE_IRQ_STORE_EOI		0x00000002
174#define	OPAL_XIVE_IRQ_TRIGGER_PAGE	0x00000001
175	uint8_t	prio;
176	vm_offset_t	eoi_page;
177	vm_offset_t	trig_page;
178	vm_size_t	esb_size;
179	int		chip;
180};
181
182struct xive_cpu {
183	uint64_t	vp;
184	uint64_t	flags;
185	struct xive_irq	ipi_data;
186	struct xive_queue	queue; /* We only use a single queue for now. */
187	uint64_t	cam;
188	uint32_t	chip;
189};
190
191static driver_t xive_driver = {
192	"xive",
193	xive_methods,
194	sizeof(struct xive_softc)
195};
196
197static driver_t xics_driver = {
198	"xivevc",
199	xics_methods,
200	0
201};
202
203EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, 0, 0, BUS_PASS_INTERRUPT - 1);
204EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, 0, 0, BUS_PASS_INTERRUPT);
205
206MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory");
207
208DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data);
209
210static int xive_ipi_vector = -1;
211
212/*
213 * XIVE Exploitation mode driver.
214 *
215 * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation
216 * mode, and "Exploitation mode".  XICS emulation mode is compatible with the
217 * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate
218 * hypervisor calls and memory accesses.  Exploitation mode gives us raw access
219 * to the XIVE MMIO, improving performance significantly.
220 *
221 * The XIVE controller is a very bizarre interrupt controller.  It uses queues
222 * in memory to pass interrupts around, and maps itself into 512GB of physical
223 * device address space, giving each interrupt in the system one or more pages
224 * of address space.  An IRQ is tied to a virtual processor, which could be a
225 * physical CPU thread, or a guest CPU thread (LPAR running on a physical
226 * thread).  Thus, the controller can route interrupts directly to guest OSes
227 * bypassing processing by the hypervisor, thereby improving performance of the
228 * guest OS.
229 *
230 * An IRQ, in addition to being tied to a virtual processor, has one or two
231 * page mappings: an EOI page, and an optional trigger page.  The trigger page
232 * could be the same as the EOI page.  Level-sensitive interrupts (LSIs) don't
233 * have a trigger page, as they're external interrupts controlled by physical
234 * lines.  MSIs and IPIs have trigger pages.  An IPI is really just another IRQ
235 * in the XIVE, which is triggered by software.
236 *
237 * An interesting behavior of the XIVE controller is that oftentimes the
238 * contents of an address location don't actually matter, but the direction of
239 * the action is the signifier (read vs write), and the address is significant.
240 * Hence, masking and unmasking an interrupt is done by reading different
241 * addresses in the EOI page, and triggering an interrupt consists of writing to
242 * the trigger page.
243 *
244 * Additionally, the MMIO region mapped is CPU-sensitive, just like the
245 * per-processor register space (private access) in OpenPIC.  In order for a CPU
246 * to receive interrupts it must itself configure its CPPR (Current Processor
247 * Priority Register), it cannot be set by any other processor.  This
248 * necessitates the xive_smp_cpu_startup() function.
249 *
250 * Queues are pages of memory, sized powers-of-two, that are shared with the
251 * XIVE.  The XIVE writes into the queue with an alternating polarity bit, which
252 * flips when the queue wraps.
253 */
254
255/*
256 * Offset-based read/write interfaces.
257 */
258static uint16_t
259xive_read_2(struct xive_softc *sc, bus_size_t offset)
260{
261
262	return (bus_read_2(sc->sc_mem, sc->sc_offset + offset));
263}
264
265static void
266xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val)
267{
268
269	bus_write_1(sc->sc_mem, sc->sc_offset + offset, val);
270}
271
272/* EOI and Trigger page access interfaces. */
273static uint64_t
274xive_read_mmap8(vm_offset_t addr)
275{
276	return (*(volatile uint64_t *)addr);
277}
278
279static void
280xive_write_mmap8(vm_offset_t addr, uint64_t val)
281{
282	*(uint64_t *)(addr) = val;
283}
284
285/* Device interfaces. */
286static int
287xive_probe(device_t dev)
288{
289
290	if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe"))
291		return (ENXIO);
292
293	device_set_desc(dev, "External Interrupt Virtualization Engine");
294
295	/* Make sure we always win against the xicp driver. */
296	return (BUS_PROBE_DEFAULT);
297}
298
299static int
300xics_probe(device_t dev)
301{
302
303	if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc"))
304		return (ENXIO);
305
306	device_set_desc(dev, "External Interrupt Virtualization Engine Root");
307	return (BUS_PROBE_DEFAULT);
308}
309
310static int
311xive_attach(device_t dev)
312{
313	struct xive_softc *sc = device_get_softc(dev);
314	struct xive_cpu *xive_cpud;
315	phandle_t phandle = ofw_bus_get_node(dev);
316	int64_t vp_block;
317	int error;
318	int rid;
319	int i, order;
320	uint64_t vp_id;
321	int64_t ipi_irq;
322
323	opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP);
324
325	error = OF_getencprop(phandle, "ibm,xive-provision-page-size",
326	    (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size));
327
328	rid = 1;	/* Get the Hypervisor-level register set. */
329	sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
330	    &rid, RF_ACTIVE);
331	sc->sc_offset = XIVE_TM_QW3_HV;
332
333	mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF);
334
335	/* Workaround for qemu single-thread powernv */
336	if (mp_maxid == 0)
337		order = 1;
338	else
339		order = fls(mp_maxid + (mp_maxid - 1)) - 1;
340
341	do {
342		vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order);
343		if (vp_block == OPAL_BUSY)
344			DELAY(10);
345		else if (vp_block == OPAL_XIVE_PROVISIONING)
346			xive_provision_page(sc);
347		else
348			break;
349	} while (1);
350
351	if (vp_block < 0) {
352		device_printf(dev,
353		    "Unable to allocate VP block.  Opal error %d\n",
354		    (int)vp_block);
355		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem);
356		return (ENXIO);
357	}
358
359	/*
360	 * Set up the VPs.  Try to do as much as we can in attach, to lessen
361	 * what's needed at AP spawn time.
362	 */
363	CPU_FOREACH(i) {
364		vp_id = pcpu_find(i)->pc_hwref;
365
366		xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data);
367		xive_cpud->vp = vp_id + vp_block;
368		opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL,
369		    vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip));
370
371		xive_cpud->cam = be64toh(xive_cpud->cam);
372		xive_cpud->chip = be64toh(xive_cpud->chip);
373
374		/* Allocate the queue page and populate the queue state data. */
375		xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE,
376		    M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
377		xive_cpud->queue.q_size = 1 << PAGE_SHIFT;
378		xive_cpud->queue.q_mask =
379		    ((xive_cpud->queue.q_size / sizeof(int)) - 1);
380		xive_cpud->queue.q_toggle = 0;
381		xive_cpud->queue.q_index = 0;
382		do {
383			error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp,
384			    OPAL_XIVE_VP_ENABLED, 0);
385		} while (error == OPAL_BUSY);
386		error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id,
387		    XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT,
388		    OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED);
389
390		do {
391			ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ,
392			    xive_cpud->chip);
393		} while (ipi_irq == OPAL_BUSY);
394
395		if (ipi_irq < 0)
396			device_printf(root_pic,
397			    "Failed allocating IPI.  OPAL error %d\n",
398			    (int)ipi_irq);
399		else {
400			xive_init_irq(&xive_cpud->ipi_data, ipi_irq);
401			xive_cpud->ipi_data.vp = vp_id;
402			xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS;
403			opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq,
404			    xive_cpud->ipi_data.vp, XIVE_PRIORITY,
405			    MAX_XIVE_IRQS);
406		}
407	}
408
409	powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS,
410	    1 /* Number of IPIs */, FALSE);
411	root_pic = dev;
412
413	xive_setup_cpu();
414	powernv_smp_ap_extra_init = xive_smp_cpu_startup;
415
416	return (0);
417}
418
419static int
420xics_attach(device_t dev)
421{
422	phandle_t phandle = ofw_bus_get_node(dev);
423
424	/* The XIVE (root PIC) will handle all our interrupts */
425	powerpc_register_pic(root_pic, OF_xref_from_node(phandle),
426	    MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE);
427
428	return (0);
429}
430
431/*
432 * PIC I/F methods.
433 */
434
435static void
436xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv)
437{
438	struct xive_irq *irqd;
439	int cpu;
440	int ncpus, i, error;
441
442	if (*priv == NULL)
443		*priv = xive_configure_irq(irq);
444
445	irqd = *priv;
446
447	/*
448	 * This doesn't appear to actually support affinity groups, so pick a
449	 * random CPU.
450	 */
451	ncpus = 0;
452	CPU_FOREACH(cpu)
453		if (CPU_ISSET(cpu, &cpumask)) ncpus++;
454
455	i = mftb() % ncpus;
456	ncpus = 0;
457	CPU_FOREACH(cpu) {
458		if (!CPU_ISSET(cpu, &cpumask))
459			continue;
460		if (ncpus == i)
461			break;
462		ncpus++;
463	}
464
465	opal_call(OPAL_XIVE_SYNC, OPAL_XIVE_SYNC_QUEUE, irq);
466
467	irqd->vp = pcpu_find(cpu)->pc_hwref;
468	error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp,
469	    XIVE_PRIORITY, irqd->lirq);
470
471	if (error < 0)
472		panic("Cannot bind interrupt %d to CPU %d", irq, cpu);
473
474	xive_eoi(dev, irq, irqd);
475}
476
477/* Read the next entry in the queue page and update the index. */
478static int
479xive_read_eq(struct xive_queue *q)
480{
481	uint32_t i = be32toh(q->q_page[q->q_index]);
482
483	/* Check validity, using current queue polarity. */
484	if ((i >> 31) == q->q_toggle)
485		return (0);
486
487	q->q_index = (q->q_index + 1) & q->q_mask;
488
489	if (q->q_index == 0)
490		q->q_toggle ^= 1;
491
492	return (i & 0x7fffffff);
493}
494
495static void
496xive_dispatch(device_t dev, struct trapframe *tf)
497{
498	struct xive_softc *sc;
499	struct xive_cpu *xive_cpud;
500	uint32_t vector;
501	uint16_t ack;
502	uint8_t cppr, he;
503
504	sc = device_get_softc(dev);
505
506	xive_cpud = DPCPU_PTR(xive_cpu_data);
507	for (;;) {
508		ack = xive_read_2(sc, XIVE_TM_SPC_ACK);
509		cppr = (ack & 0xff);
510
511		he = ack >> TM_QW3NSR_HE_SHIFT;
512
513		if (he == TM_QW3_NSR_HE_NONE)
514			break;
515
516		else if (__predict_false(he != TM_QW3_NSR_HE_PHYS)) {
517			/*
518			 * We don't support TM_QW3_NSR_HE_POOL or
519			 * TM_QW3_NSR_HE_LSI interrupts.
520			 */
521			device_printf(dev,
522			    "Unexpected interrupt he type: %d\n", he);
523			goto end;
524		}
525
526		xive_write_1(sc, XIVE_TM_CPPR, cppr);
527
528		for (;;) {
529			vector = xive_read_eq(&xive_cpud->queue);
530
531			if (vector == 0)
532				break;
533
534			if (vector == MAX_XIVE_IRQS)
535				vector = xive_ipi_vector;
536
537			powerpc_dispatch_intr(vector, tf);
538		}
539	}
540end:
541	xive_write_1(sc, XIVE_TM_CPPR, 0xff);
542}
543
544static void
545xive_enable(device_t dev, u_int irq, u_int vector, void **priv)
546{
547	struct xive_irq *irqd;
548	cell_t status, cpu;
549
550	if (irq == MAX_XIVE_IRQS) {
551		if (xive_ipi_vector == -1)
552			xive_ipi_vector = vector;
553		return;
554	}
555	if (*priv == NULL)
556		*priv = xive_configure_irq(irq);
557
558	irqd = *priv;
559
560	/* Bind to this CPU to start */
561	cpu = PCPU_GET(hwref);
562	irqd->lirq = vector;
563
564	for (;;) {
565		status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu,
566		    XIVE_PRIORITY, vector);
567		if (status != OPAL_BUSY)
568			break;
569		DELAY(10);
570	}
571
572	if (status != 0)
573		panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq,
574		    cpu, status);
575
576	xive_unmask(dev, irq, *priv);
577}
578
579static void
580xive_eoi(device_t dev, u_int irq, void *priv)
581{
582	struct xive_irq *rirq;
583	struct xive_cpu *cpud;
584	uint8_t eoi_val;
585
586	if (irq == MAX_XIVE_IRQS) {
587		cpud = DPCPU_PTR(xive_cpu_data);
588		rirq = &cpud->ipi_data;
589	} else
590		rirq = priv;
591
592	if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI)
593		xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0);
594	else if (rirq->flags & OPAL_XIVE_IRQ_LSI)
595		xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI);
596	else {
597		eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00);
598		if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0)
599			xive_write_mmap8(rirq->trig_page, 0);
600	}
601}
602
603static void
604xive_ipi(device_t dev, u_int cpu)
605{
606	struct xive_cpu *xive_cpud;
607
608	xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data);
609
610	if (xive_cpud->ipi_data.trig_page == 0)
611		return;
612	xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0);
613}
614
615static void
616xive_mask(device_t dev, u_int irq, void *priv)
617{
618	struct xive_irq *rirq;
619
620	/* Never mask IPIs */
621	if (irq == MAX_XIVE_IRQS)
622		return;
623
624	rirq = priv;
625
626	if (!(rirq->flags & OPAL_XIVE_IRQ_LSI))
627		return;
628	xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01);
629}
630
631static void
632xive_unmask(device_t dev, u_int irq, void *priv)
633{
634	struct xive_irq *rirq;
635
636	rirq = priv;
637
638	xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00);
639}
640
641static void
642xive_translate_code(device_t dev, u_int irq, int code,
643    enum intr_trigger *trig, enum intr_polarity *pol)
644{
645	switch (code) {
646	case 0:
647		/* L to H edge */
648		*trig = INTR_TRIGGER_EDGE;
649		*pol = INTR_POLARITY_HIGH;
650		break;
651	case 1:
652		/* Active L level */
653		*trig = INTR_TRIGGER_LEVEL;
654		*pol = INTR_POLARITY_LOW;
655		break;
656	default:
657		*trig = INTR_TRIGGER_CONFORM;
658		*pol = INTR_POLARITY_CONFORM;
659	}
660}
661
662/* Private functions. */
663/*
664 * Setup the current CPU.  Called by the BSP at driver attachment, and by each
665 * AP at wakeup (via xive_smp_cpu_startup()).
666 */
667static void
668xive_setup_cpu(void)
669{
670	struct xive_softc *sc;
671	struct xive_cpu *cpup;
672	uint32_t val;
673
674	cpup = DPCPU_PTR(xive_cpu_data);
675
676	sc = device_get_softc(root_pic);
677
678	val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2);
679	if (val & TM_QW2W2_VP)
680		bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX);
681
682	bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff);
683	bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2,
684	    TM_QW2W2_VP | cpup->cam);
685
686	xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data);
687	xive_write_1(sc, XIVE_TM_CPPR, 0xff);
688}
689
690/* Populate an IRQ structure, mapping the EOI and trigger pages. */
691static void
692xive_init_irq(struct xive_irq *irqd, u_int irq)
693{
694	uint64_t eoi_phys, trig_phys;
695	uint32_t esb_shift;
696
697	opal_call(OPAL_XIVE_GET_IRQ_INFO, irq,
698	    vtophys(&irqd->flags), vtophys(&eoi_phys),
699	    vtophys(&trig_phys), vtophys(&esb_shift),
700	    vtophys(&irqd->chip));
701
702	irqd->flags = be64toh(irqd->flags);
703	eoi_phys = be64toh(eoi_phys);
704	trig_phys = be64toh(trig_phys);
705	esb_shift = be32toh(esb_shift);
706	irqd->chip = be32toh(irqd->chip);
707
708	irqd->girq = irq;
709	irqd->esb_size = 1 << esb_shift;
710	irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size);
711
712	if (eoi_phys == trig_phys)
713		irqd->trig_page = irqd->eoi_page;
714	else if (trig_phys != 0)
715		irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys,
716		    irqd->esb_size);
717	else
718		irqd->trig_page = 0;
719
720	opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp),
721	    vtophys(&irqd->prio), vtophys(&irqd->lirq));
722
723	irqd->vp = be64toh(irqd->vp);
724	irqd->prio = be64toh(irqd->prio);
725	irqd->lirq = be32toh(irqd->lirq);
726}
727
728/* Allocate an IRQ struct before populating it. */
729static struct xive_irq *
730xive_configure_irq(u_int irq)
731{
732	struct xive_irq *irqd;
733
734	irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK);
735
736	xive_init_irq(irqd, irq);
737
738	return (irqd);
739}
740
741/*
742 * Part of the OPAL API.  OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages,
743 * provisioned through this call.
744 */
745static int
746xive_provision_page(struct xive_softc *sc)
747{
748	void *prov_page;
749	int error;
750
751	do {
752		prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0,
753		    0, BUS_SPACE_MAXADDR,
754		    sc->sc_prov_page_size, sc->sc_prov_page_size);
755
756		error = opal_call(OPAL_XIVE_DONATE_PAGE, -1,
757		    vtophys(prov_page));
758	} while (error == OPAL_XIVE_PROVISIONING);
759
760	return (0);
761}
762
763/* The XIVE_TM_CPPR register must be set by each thread */
764static void
765xive_smp_cpu_startup(void)
766{
767
768	xive_setup_cpu();
769}
770