1/*
2 * Copyright (c) 2015 Jordan Hargrave <jordan_hargrave@hotmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17#include <sys/param.h>
18#include <sys/systm.h>
19#include <sys/kernel.h>
20#include <sys/device.h>
21#include <sys/malloc.h>
22#include <sys/queue.h>
23#include <sys/types.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <uvm/uvm_extern.h>
28
29#include <machine/apicvar.h>
30#include <machine/biosvar.h>
31#include <machine/cpuvar.h>
32#include <machine/bus.h>
33
34#include <dev/acpi/acpireg.h>
35#include <dev/acpi/acpivar.h>
36#include <dev/acpi/acpidev.h>
37#include <dev/acpi/amltypes.h>
38#include <dev/acpi/dsdt.h>
39
40#include <machine/i8259.h>
41#include <machine/i82093reg.h>
42#include <machine/i82093var.h>
43#include <machine/i82489reg.h>
44#include <machine/i82489var.h>
45
46#include <machine/mpbiosvar.h>
47
48#include <dev/pci/pcireg.h>
49#include <dev/pci/pcivar.h>
50#include <dev/pci/pcidevs.h>
51#include <dev/pci/ppbreg.h>
52
53#include "ioapic.h"
54
55#include "acpidmar.h"
56#include "amd_iommu.h"
57
58/* We don't want IOMMU to remap MSI */
59#define MSI_BASE_ADDRESS	0xFEE00000L
60#define MSI_BASE_SIZE		0x00100000L
61#define MAX_DEVFN		65536
62
63#ifdef IOMMU_DEBUG
64int acpidmar_dbg_lvl = 0;
65#define DPRINTF(lvl,x...) if (acpidmar_dbg_lvl >= lvl) { printf(x); }
66#else
67#define DPRINTF(lvl,x...)
68#endif
69
70#ifdef DDB
71int	acpidmar_ddb = 0;
72#endif
73
74int	acpidmar_force_cm = 1;
75
76/* Page Table Entry per domain */
77struct iommu_softc;
78
79static inline int
80mksid(int b, int d, int f)
81{
82	return (b << 8) + (d << 3) + f;
83}
84
85static inline int
86sid_devfn(int sid)
87{
88	return sid & 0xff;
89}
90
91static inline int
92sid_bus(int sid)
93{
94	return (sid >> 8) & 0xff;
95}
96
97static inline int
98sid_dev(int sid)
99{
100	return (sid >> 3) & 0x1f;
101}
102
103static inline int
104sid_fun(int sid)
105{
106	return (sid >> 0) & 0x7;
107}
108
109/* Alias mapping */
110#define SID_INVALID 0x80000000L
111static uint32_t sid_flag[MAX_DEVFN];
112
113struct domain_dev {
114	int			sid;
115	int			sec;
116	int			sub;
117	TAILQ_ENTRY(domain_dev)	link;
118};
119
120struct domain {
121	struct iommu_softc	*iommu;
122	int			did;
123	int			gaw;
124	struct pte_entry	*pte;
125	paddr_t			ptep;
126	struct bus_dma_tag	dmat;
127	int			flag;
128
129	struct mutex		exlck;
130	char			exname[32];
131	struct extent		*iovamap;
132	TAILQ_HEAD(,domain_dev)	devices;
133	TAILQ_ENTRY(domain)	link;
134};
135
136#define DOM_DEBUG 0x1
137#define DOM_NOMAP 0x2
138
139struct dmar_devlist {
140	int				type;
141	int				bus;
142	int				ndp;
143	struct acpidmar_devpath		*dp;
144	TAILQ_ENTRY(dmar_devlist)	link;
145};
146
147TAILQ_HEAD(devlist_head, dmar_devlist);
148
149struct ivhd_devlist {
150	int				start_id;
151	int				end_id;
152	int				cfg;
153	TAILQ_ENTRY(ivhd_devlist)	link;
154};
155
156struct rmrr_softc {
157	TAILQ_ENTRY(rmrr_softc)	link;
158	struct devlist_head	devices;
159	int			segment;
160	uint64_t		start;
161	uint64_t		end;
162};
163
164struct atsr_softc {
165	TAILQ_ENTRY(atsr_softc)	link;
166	struct devlist_head	devices;
167	int			segment;
168	int			flags;
169};
170
171struct iommu_pic {
172	struct pic		pic;
173	struct iommu_softc	*iommu;
174};
175
176#define IOMMU_FLAGS_CATCHALL		0x1
177#define IOMMU_FLAGS_BAD			0x2
178#define IOMMU_FLAGS_SUSPEND		0x4
179
180struct iommu_softc {
181	TAILQ_ENTRY(iommu_softc)link;
182	struct devlist_head	devices;
183	int			id;
184	int			flags;
185	int			segment;
186
187	struct mutex		reg_lock;
188
189	bus_space_tag_t		iot;
190	bus_space_handle_t	ioh;
191
192	uint64_t		cap;
193	uint64_t		ecap;
194	uint32_t		gcmd;
195
196	int			mgaw;
197	int			agaw;
198	int			ndoms;
199
200	struct root_entry	*root;
201	struct context_entry	*ctx[256];
202
203	void			*intr;
204	struct iommu_pic	pic;
205	int			fedata;
206	uint64_t		feaddr;
207	uint64_t		rtaddr;
208
209	/* Queued Invalidation */
210	int			qi_head;
211	int			qi_tail;
212	paddr_t			qip;
213	struct qi_entry		*qi;
214
215	struct domain		*unity;
216	TAILQ_HEAD(,domain)	domains;
217
218	/* AMD iommu */
219	struct ivhd_dte		*dte;
220	void			*cmd_tbl;
221	void			*evt_tbl;
222	paddr_t			cmd_tblp;
223	paddr_t			evt_tblp;
224};
225
226static inline int iommu_bad(struct iommu_softc *sc)
227{
228	return (sc->flags & IOMMU_FLAGS_BAD);
229}
230
231static inline int iommu_enabled(struct iommu_softc *sc)
232{
233	if (sc->dte) {
234		return 1;
235	}
236	return (sc->gcmd & GCMD_TE);
237}
238
239struct acpidmar_softc {
240	struct device		sc_dev;
241
242	pci_chipset_tag_t	sc_pc;
243	bus_space_tag_t		sc_memt;
244	int			sc_haw;
245	int			sc_flags;
246	bus_dma_tag_t		sc_dmat;
247
248	struct ivhd_dte		*sc_hwdte;
249	paddr_t			sc_hwdtep;
250
251	TAILQ_HEAD(,iommu_softc)sc_drhds;
252	TAILQ_HEAD(,rmrr_softc)	sc_rmrrs;
253	TAILQ_HEAD(,atsr_softc)	sc_atsrs;
254};
255
256int		acpidmar_activate(struct device *, int);
257int		acpidmar_match(struct device *, void *, void *);
258void		acpidmar_attach(struct device *, struct device *, void *);
259struct domain	*acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
260
261const struct cfattach acpidmar_ca = {
262	sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach, NULL,
263	acpidmar_activate
264};
265
266struct cfdriver acpidmar_cd = {
267	NULL, "acpidmar", DV_DULL
268};
269
270struct		acpidmar_softc *acpidmar_sc;
271int		acpidmar_intr(void *);
272int		acpiivhd_intr(void *);
273
274#define DID_UNITY 0x1
275
276void _dumppte(struct pte_entry *, int, vaddr_t);
277
278struct domain *domain_create(struct iommu_softc *, int);
279struct domain *domain_lookup(struct acpidmar_softc *, int, int);
280
281void domain_unload_map(struct domain *, bus_dmamap_t);
282void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
283
284void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
285void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
286void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
287void domain_map_pthru(struct domain *, paddr_t, paddr_t);
288
289void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
290void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
291    struct devlist_head *);
292int acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
293
294void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
295void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
296void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
297void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
298void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
299
300void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
301    const char *);
302
303void iommu_write_4(struct iommu_softc *, int, uint32_t);
304uint32_t iommu_read_4(struct iommu_softc *, int);
305void iommu_write_8(struct iommu_softc *, int, uint64_t);
306uint64_t iommu_read_8(struct iommu_softc *, int);
307void iommu_showfault(struct iommu_softc *, int,
308    struct fault_entry *);
309void iommu_showcfg(struct iommu_softc *, int);
310
311int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
312    struct acpidmar_drhd *);
313int iommu_enable_translation(struct iommu_softc *, int);
314void iommu_enable_qi(struct iommu_softc *, int);
315void iommu_flush_cache(struct iommu_softc *, void *, size_t);
316void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
317void iommu_flush_write_buffer(struct iommu_softc *);
318void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
319
320void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
321void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
322void iommu_flush_tlb(struct iommu_softc *, int, int);
323void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
324
325void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
326
327void *iommu_alloc_hwdte(struct acpidmar_softc *, size_t, paddr_t *);
328
329const char *dmar_bdf(int);
330
331const char *
332dmar_bdf(int sid)
333{
334	static char	bdf[32];
335
336	snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
337	    sid_bus(sid), sid_dev(sid), sid_fun(sid));
338
339	return (bdf);
340}
341
342/* busdma */
343static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
344    bus_size_t, int, bus_dmamap_t *);
345static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
346static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
347    struct proc *, int);
348static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
349    int);
350static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
351static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
352    bus_dma_segment_t *, int, bus_size_t, int);
353static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
354static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
355    bus_size_t, int);
356static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
357    bus_dma_segment_t *, int, int *, int);
358static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
359static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
360    caddr_t *, int);
361static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
362static paddr_t	dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
363    int, int);
364
365static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
366const char *dom_bdf(struct domain *);
367void domain_map_check(struct domain *);
368
369struct pte_entry *pte_lvl(struct iommu_softc *, struct pte_entry *, vaddr_t, int, uint64_t);
370int  ivhd_poll_events(struct iommu_softc *);
371void ivhd_showreg(struct iommu_softc *);
372void ivhd_showdte(struct iommu_softc *);
373void ivhd_showcmd(struct iommu_softc *);
374
375static inline int
376debugme(struct domain *dom)
377{
378	return 0;
379	return (dom->flag & DOM_DEBUG);
380}
381
382void
383domain_map_check(struct domain *dom)
384{
385	struct iommu_softc *iommu;
386	struct domain_dev *dd;
387	struct context_entry *ctx;
388	int v;
389
390	iommu = dom->iommu;
391	TAILQ_FOREACH(dd, &dom->devices, link) {
392		acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
393
394		if (iommu->dte)
395			continue;
396
397		/* Check if this is the first time we are mapped */
398		ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
399		v = context_user(ctx);
400		if (v != 0xA) {
401			printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
402			    iommu->segment,
403			    sid_bus(dd->sid),
404			    sid_dev(dd->sid),
405			    sid_fun(dd->sid),
406			    iommu->id,
407			    dom->did);
408			context_set_user(ctx, 0xA);
409		}
410	}
411}
412
413/* Map a single page as passthrough - used for DRM */
414void
415dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
416{
417	struct domain *dom = tag->_cookie;
418
419	if (!acpidmar_sc)
420		return;
421	domain_map_check(dom);
422	domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
423}
424
425/* Map a range of pages 1:1 */
426void
427domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
428{
429	domain_map_check(dom);
430	while (start < end) {
431		domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
432		start += VTD_PAGE_SIZE;
433	}
434}
435
436/* Map a single paddr to IOMMU paddr */
437void
438domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
439{
440	paddr_t paddr;
441	struct pte_entry *pte, *npte;
442	int lvl, idx;
443	struct iommu_softc *iommu;
444
445	iommu = dom->iommu;
446	/* Insert physical address into virtual address map
447	 * XXX: could we use private pmap here?
448	 * essentially doing a pmap_enter(map, va, pa, prot);
449	 */
450
451	/* Only handle 4k pages for now */
452	npte = dom->pte;
453	for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
454	    lvl -= VTD_STRIDE_SIZE) {
455		idx = (va >> lvl) & VTD_STRIDE_MASK;
456		pte = &npte[idx];
457		if (lvl == VTD_LEVEL0) {
458			/* Level 1: Page Table - add physical address */
459			pte->val = pa | flags;
460			iommu_flush_cache(iommu, pte, sizeof(*pte));
461			break;
462		} else if (!(pte->val & PTE_P)) {
463			/* Level N: Point to lower level table */
464			iommu_alloc_page(iommu, &paddr);
465			pte->val = paddr | PTE_P | PTE_R | PTE_W;
466			iommu_flush_cache(iommu, pte, sizeof(*pte));
467		}
468		npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
469	}
470}
471
472/* Map a single paddr to IOMMU paddr: AMD
473 * physical address breakdown into levels:
474 * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
475 *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
476 * mode:
477 *  000 = none   shift
478 *  001 = 1 [21].12
479 *  010 = 2 [30].21
480 *  011 = 3 [39].30
481 *  100 = 4 [48].39
482 *  101 = 5 [57]
483 *  110 = 6
484 *  111 = reserved
485 */
486struct pte_entry *
487pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
488	int shift, uint64_t flags)
489{
490	paddr_t paddr;
491	int idx;
492
493	idx = (va >> shift) & VTD_STRIDE_MASK;
494	if (!(pte[idx].val & PTE_P)) {
495		/* Page Table entry is not present... create a new page entry */
496		iommu_alloc_page(iommu, &paddr);
497		pte[idx].val = paddr | flags;
498		iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
499	}
500	return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
501}
502
503void
504domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
505{
506	struct pte_entry *pte;
507	struct iommu_softc *iommu;
508	int idx;
509
510	iommu = dom->iommu;
511	/* Insert physical address into virtual address map
512	 * XXX: could we use private pmap here?
513	 * essentially doing a pmap_enter(map, va, pa, prot);
514	 */
515
516	/* Always assume AMD levels=4                           */
517	/*        39        30        21        12              */
518	/* ---------|---------|---------|---------|------------ */
519	pte = dom->pte;
520	pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
521	pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
522	if (flags)
523		flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
524
525	/* Level 1: Page Table - add physical address */
526	idx = (va >> 12) & 0x1FF;
527	pte[idx].val = pa | flags;
528
529	iommu_flush_cache(iommu, pte, sizeof(*pte));
530}
531
532static void
533dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
534    const char *lbl)
535{
536	struct domain *dom = tag->_cookie;
537	int i;
538
539	return;
540	if (!debugme(dom))
541		return;
542	printf("%s: %s\n", lbl, dom_bdf(dom));
543	for (i = 0; i < nseg; i++) {
544		printf("  %.16llx %.8x\n",
545		    (uint64_t)segs[i].ds_addr,
546		    (uint32_t)segs[i].ds_len);
547	}
548}
549
550/* Unload mapping */
551void
552domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
553{
554	bus_dma_segment_t	*seg;
555	paddr_t			base, end, idx;
556	psize_t			alen;
557	int			i;
558
559	if (iommu_bad(dom->iommu)) {
560		printf("unload map no iommu\n");
561		return;
562	}
563
564	for (i = 0; i < dmam->dm_nsegs; i++) {
565		seg = &dmam->dm_segs[i];
566
567		base = trunc_page(seg->ds_addr);
568		end = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
569		alen = end - base;
570
571		if (debugme(dom)) {
572			printf("  va:%.16llx len:%x\n",
573			    (uint64_t)base, (uint32_t)alen);
574		}
575
576		/* Clear PTE */
577		for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
578			domain_map_page(dom, base + idx, 0, 0);
579
580		if (dom->flag & DOM_NOMAP) {
581			printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
582			continue;
583		}
584
585		mtx_enter(&dom->exlck);
586		if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
587			panic("domain_unload_map: extent_free");
588		}
589		mtx_leave(&dom->exlck);
590	}
591}
592
593/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
594void
595domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
596{
597	bus_dma_segment_t	*seg;
598	struct iommu_softc	*iommu;
599	paddr_t			base, end, idx;
600	psize_t			alen;
601	u_long			res;
602	int			i;
603
604	iommu = dom->iommu;
605	if (!iommu_enabled(iommu)) {
606		/* Lazy enable translation when required */
607		if (iommu_enable_translation(iommu, 1)) {
608			return;
609		}
610	}
611	domain_map_check(dom);
612	for (i = 0; i < map->dm_nsegs; i++) {
613		seg = &map->dm_segs[i];
614
615		base = trunc_page(seg->ds_addr);
616		end = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
617		alen = end - base;
618		res = base;
619
620		if (dom->flag & DOM_NOMAP) {
621			goto nomap;
622		}
623
624		/* Allocate DMA Virtual Address */
625		mtx_enter(&dom->exlck);
626		if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
627		    map->_dm_boundary, EX_NOWAIT, &res)) {
628			panic("domain_load_map: extent_alloc");
629		}
630		if (res == -1) {
631			panic("got -1 address");
632		}
633		mtx_leave(&dom->exlck);
634
635		/* Reassign DMA address */
636		seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
637nomap:
638		if (debugme(dom)) {
639			printf("  LOADMAP: %.16llx %x => %.16llx\n",
640			    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
641			    (uint64_t)res);
642		}
643		for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
644			domain_map_page(dom, res + idx, base + idx,
645			    PTE_P | pteflag);
646		}
647	}
648	if ((iommu->cap & CAP_CM) || acpidmar_force_cm) {
649		iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
650	} else {
651		iommu_flush_write_buffer(iommu);
652	}
653}
654
655const char *
656dom_bdf(struct domain *dom)
657{
658	struct domain_dev *dd;
659	static char		mmm[48];
660
661	dd = TAILQ_FIRST(&dom->devices);
662	snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
663	    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
664	    dom->did == DID_UNITY ? " [unity]" : "");
665	return (mmm);
666}
667
668/* Bus DMA Map functions */
669static int
670dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
671    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
672{
673	int rc;
674
675	rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
676	    flags, dmamp);
677	if (!rc) {
678		dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
679		    __FUNCTION__);
680	}
681	return (rc);
682}
683
684static void
685dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
686{
687	dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
688	_bus_dmamap_destroy(tag, dmam);
689}
690
691static int
692dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
693    bus_size_t buflen, struct proc *p, int flags)
694{
695	struct domain *dom = tag->_cookie;
696	int		rc;
697
698	rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
699	if (!rc) {
700		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
701		    __FUNCTION__);
702		domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
703		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
704		    __FUNCTION__);
705	}
706	return (rc);
707}
708
709static int
710dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
711    int flags)
712{
713	struct domain	*dom = tag->_cookie;
714	int		rc;
715
716	rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
717	if (!rc) {
718		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
719		    __FUNCTION__);
720		domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
721		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
722		    __FUNCTION__);
723	}
724	return (rc);
725}
726
727static int
728dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
729    int flags)
730{
731	struct domain	*dom = tag->_cookie;
732	int		rc;
733
734	rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
735	if (!rc) {
736		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
737		    __FUNCTION__);
738		domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
739		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
740		    __FUNCTION__);
741	}
742	return (rc);
743}
744
745static int
746dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
747    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
748{
749	struct domain *dom = tag->_cookie;
750	int rc;
751
752	rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
753	if (!rc) {
754		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
755		    __FUNCTION__);
756		domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
757		dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
758		    __FUNCTION__);
759	}
760	return (rc);
761}
762
763static void
764dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
765{
766	struct domain *dom = tag->_cookie;
767
768	dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
769	domain_unload_map(dom, dmam);
770	_bus_dmamap_unload(tag, dmam);
771}
772
773static void
774dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
775    bus_size_t len, int ops)
776{
777#if 0
778	struct domain *dom = tag->_cookie;
779	int		flag;
780
781	flag = PTE_P;
782	if (ops == BUS_DMASYNC_PREREAD) {
783		/* make readable */
784		flag |= PTE_R;
785	}
786	else if (ops == BUS_DMASYNC_PREWRITE) {
787		/* make writeable */
788		flag |= PTE_W;
789	}
790	dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
791#endif
792	_bus_dmamap_sync(tag, dmam, offset, len, ops);
793}
794
795static int
796dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
797    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
798    int flags)
799{
800	int rc;
801
802	rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
803	    rsegs, flags);
804	if (!rc) {
805		dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
806	}
807	return (rc);
808}
809
810static void
811dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
812{
813	dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
814	_bus_dmamem_free(tag, segs, nsegs);
815}
816
817static int
818dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
819    size_t size, caddr_t *kvap, int flags)
820{
821	dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
822	return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
823}
824
825static void
826dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
827{
828	struct domain	*dom = tag->_cookie;
829
830	if (debugme(dom)) {
831		printf("dmamap_unmap: %s\n", dom_bdf(dom));
832	}
833	_bus_dmamem_unmap(tag, kva, size);
834}
835
836static paddr_t
837dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
838    off_t off, int prot, int flags)
839{
840	dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
841	return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
842}
843
844/*===================================
845 * IOMMU code
846 *===================================*/
847
848/* Intel: Set Context Root Address */
849void
850iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
851{
852	int i, sts;
853
854	mtx_enter(&iommu->reg_lock);
855	iommu_write_8(iommu, DMAR_RTADDR_REG, paddr);
856	iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
857	for (i = 0; i < 5; i++) {
858		sts = iommu_read_4(iommu, DMAR_GSTS_REG);
859		if (sts & GSTS_RTPS)
860			break;
861	}
862	mtx_leave(&iommu->reg_lock);
863
864	if (i == 5) {
865		printf("set_rtaddr fails\n");
866	}
867}
868
869/* Allocate contiguous memory (1Mb) for the Device Table Entries */
870void *
871iommu_alloc_hwdte(struct acpidmar_softc *sc, size_t size, paddr_t *paddr)
872{
873	caddr_t vaddr;
874	bus_dmamap_t map;
875	bus_dma_segment_t seg;
876	bus_dma_tag_t dmat = sc->sc_dmat;
877	int rc, nsegs;
878
879	rc = _bus_dmamap_create(dmat, size, 1, size, 0,
880	    BUS_DMA_NOWAIT, &map);
881	if (rc != 0) {
882		printf("hwdte_create fails\n");
883		return NULL;
884	}
885	rc = _bus_dmamem_alloc(dmat, size, 4, 0, &seg, 1,
886	    &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO);
887	if (rc != 0) {
888		printf("hwdte alloc fails\n");
889		return NULL;
890	}
891	rc = _bus_dmamem_map(dmat, &seg, 1, size, &vaddr,
892	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
893	if (rc != 0) {
894		printf("hwdte map fails\n");
895		return NULL;
896	}
897	rc = _bus_dmamap_load_raw(dmat, map, &seg, 1, size, BUS_DMA_NOWAIT);
898	if (rc != 0) {
899		printf("hwdte load raw fails\n");
900		return NULL;
901	}
902	*paddr = map->dm_segs[0].ds_addr;
903	return vaddr;
904}
905
906/* COMMON: Allocate a new memory page */
907void *
908iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
909{
910	void	*va;
911
912	*paddr = 0;
913	va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
914	if (va == NULL) {
915		panic("can't allocate page");
916	}
917	pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
918	return (va);
919}
920
921
922/* Intel: Issue command via queued invalidation */
923void
924iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
925{
926#if 0
927	struct qi_entry *pi, *pw;
928
929	idx = iommu->qi_head;
930	pi = &iommu->qi[idx];
931	pw = &iommu->qi[(idx+1) % MAXQ];
932	iommu->qi_head = (idx+2) % MAXQ;
933
934	memcpy(pw, &qi, sizeof(qi));
935	issue command;
936	while (pw->xxx)
937		;
938#endif
939}
940
941/* Intel: Flush TLB entries, Queued Invalidation mode */
942void
943iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
944{
945	struct qi_entry qi;
946
947	/* Use queued invalidation */
948	qi.hi = 0;
949	switch (mode) {
950	case IOTLB_GLOBAL:
951		qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
952		break;
953	case IOTLB_DOMAIN:
954		qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
955		    QI_IOTLB_DID(did);
956		break;
957	case IOTLB_PAGE:
958		qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
959		qi.hi = 0;
960		break;
961	}
962	if (iommu->cap & CAP_DRD)
963		qi.lo |= QI_IOTLB_DR;
964	if (iommu->cap & CAP_DWD)
965		qi.lo |= QI_IOTLB_DW;
966	iommu_issue_qi(iommu, &qi);
967}
968
969/* Intel: Flush Context entries, Queued Invalidation mode */
970void
971iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
972    int sid, int fm)
973{
974	struct qi_entry qi;
975
976	/* Use queued invalidation */
977	qi.hi = 0;
978	switch (mode) {
979	case CTX_GLOBAL:
980		qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
981		break;
982	case CTX_DOMAIN:
983		qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
984		break;
985	case CTX_DEVICE:
986		qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
987		    QI_CTX_SID(sid) | QI_CTX_FM(fm);
988		break;
989	}
990	iommu_issue_qi(iommu, &qi);
991}
992
993/* Intel: Flush write buffers */
994void
995iommu_flush_write_buffer(struct iommu_softc *iommu)
996{
997	int i, sts;
998
999	if (iommu->dte)
1000		return;
1001	if (!(iommu->cap & CAP_RWBF))
1002		return;
1003	DPRINTF(1,"writebuf\n");
1004	iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
1005	for (i = 0; i < 5; i++) {
1006		sts = iommu_read_4(iommu, DMAR_GSTS_REG);
1007		if (sts & GSTS_WBFS)
1008			break;
1009		delay(10000);
1010	}
1011	if (i == 5) {
1012		printf("write buffer flush fails\n");
1013	}
1014}
1015
1016void
1017iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
1018{
1019	if (iommu->dte) {
1020		pmap_flush_cache((vaddr_t)addr, size);
1021		return;
1022	}
1023	if (!(iommu->ecap & ECAP_C))
1024		pmap_flush_cache((vaddr_t)addr, size);
1025}
1026
1027/*
1028 * Intel: Flush IOMMU TLB Entries
1029 * Flushing can occur globally, per domain or per page
1030 */
1031void
1032iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
1033{
1034	int		n;
1035	uint64_t	val;
1036
1037	/* Call AMD */
1038	if (iommu->dte) {
1039		ivhd_invalidate_domain(iommu, did);
1040		return;
1041	}
1042	val = IOTLB_IVT;
1043	switch (mode) {
1044	case IOTLB_GLOBAL:
1045		val |= IIG_GLOBAL;
1046		break;
1047	case IOTLB_DOMAIN:
1048		val |= IIG_DOMAIN | IOTLB_DID(did);
1049		break;
1050	case IOTLB_PAGE:
1051		val |= IIG_PAGE | IOTLB_DID(did);
1052		break;
1053	}
1054
1055	/* Check for Read/Write Drain */
1056	if (iommu->cap & CAP_DRD)
1057		val |= IOTLB_DR;
1058	if (iommu->cap & CAP_DWD)
1059		val |= IOTLB_DW;
1060
1061	mtx_enter(&iommu->reg_lock);
1062
1063	iommu_write_8(iommu, DMAR_IOTLB_REG(iommu), val);
1064	n = 0;
1065	do {
1066		val = iommu_read_8(iommu, DMAR_IOTLB_REG(iommu));
1067	} while (n++ < 5 && val & IOTLB_IVT);
1068
1069	mtx_leave(&iommu->reg_lock);
1070}
1071
1072/* Intel: Flush IOMMU settings
1073 * Flushes can occur globally, per domain, or per device
1074 */
1075void
1076iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
1077{
1078	uint64_t	val;
1079	int		n;
1080
1081	if (iommu->dte)
1082		return;
1083	val = CCMD_ICC;
1084	switch (mode) {
1085	case CTX_GLOBAL:
1086		val |= CIG_GLOBAL;
1087		break;
1088	case CTX_DOMAIN:
1089		val |= CIG_DOMAIN | CCMD_DID(did);
1090		break;
1091	case CTX_DEVICE:
1092		val |= CIG_DEVICE | CCMD_DID(did) |
1093		    CCMD_SID(sid) | CCMD_FM(fm);
1094		break;
1095	}
1096
1097	mtx_enter(&iommu->reg_lock);
1098
1099	n = 0;
1100	iommu_write_8(iommu, DMAR_CCMD_REG, val);
1101	do {
1102		val = iommu_read_8(iommu, DMAR_CCMD_REG);
1103	} while (n++ < 5 && val & CCMD_ICC);
1104
1105	mtx_leave(&iommu->reg_lock);
1106}
1107
1108/* Intel: Enable Queued Invalidation */
1109void
1110iommu_enable_qi(struct iommu_softc *iommu, int enable)
1111{
1112	int	n = 0;
1113	int	sts;
1114
1115	if (!(iommu->ecap & ECAP_QI))
1116		return;
1117
1118	if (enable) {
1119		iommu->gcmd |= GCMD_QIE;
1120
1121		mtx_enter(&iommu->reg_lock);
1122
1123		iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
1124		do {
1125			sts = iommu_read_4(iommu, DMAR_GSTS_REG);
1126		} while (n++ < 5 && !(sts & GSTS_QIES));
1127
1128		mtx_leave(&iommu->reg_lock);
1129
1130		DPRINTF(1,"set.qie: %d\n", n);
1131	} else {
1132		iommu->gcmd &= ~GCMD_QIE;
1133
1134		mtx_enter(&iommu->reg_lock);
1135
1136		iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
1137		do {
1138			sts = iommu_read_4(iommu, DMAR_GSTS_REG);
1139		} while (n++ < 5 && sts & GSTS_QIES);
1140
1141		mtx_leave(&iommu->reg_lock);
1142
1143		DPRINTF(1,"clr.qie: %d\n", n);
1144	}
1145}
1146
1147/* Intel: Enable IOMMU translation */
1148int
1149iommu_enable_translation(struct iommu_softc *iommu, int enable)
1150{
1151	uint32_t	sts;
1152	uint64_t	reg;
1153	int		n = 0;
1154
1155	if (iommu->dte)
1156		return (0);
1157	reg = 0;
1158	if (enable) {
1159		DPRINTF(0,"enable iommu %d\n", iommu->id);
1160		iommu_showcfg(iommu, -1);
1161
1162		iommu->gcmd |= GCMD_TE;
1163
1164		/* Enable translation */
1165		printf(" pre tes: ");
1166
1167		mtx_enter(&iommu->reg_lock);
1168		iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
1169		printf("xxx");
1170		do {
1171			printf("yyy");
1172			sts = iommu_read_4(iommu, DMAR_GSTS_REG);
1173			delay(n * 10000);
1174		} while (n++ < 5 && !(sts & GSTS_TES));
1175		mtx_leave(&iommu->reg_lock);
1176
1177		printf(" set.tes: %d\n", n);
1178
1179		if (n >= 5) {
1180			printf("error.. unable to initialize iommu %d\n",
1181			    iommu->id);
1182			iommu->flags |= IOMMU_FLAGS_BAD;
1183
1184			/* Disable IOMMU */
1185			iommu->gcmd &= ~GCMD_TE;
1186			mtx_enter(&iommu->reg_lock);
1187			iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
1188			mtx_leave(&iommu->reg_lock);
1189
1190			return (1);
1191		}
1192
1193		iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
1194		iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
1195	} else {
1196		iommu->gcmd &= ~GCMD_TE;
1197
1198		mtx_enter(&iommu->reg_lock);
1199
1200		iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
1201		do {
1202			sts = iommu_read_4(iommu, DMAR_GSTS_REG);
1203		} while (n++ < 5 && sts & GSTS_TES);
1204		mtx_leave(&iommu->reg_lock);
1205
1206		printf(" clr.tes: %d\n", n);
1207	}
1208
1209	return (0);
1210}
1211
1212/* Intel: Initialize IOMMU */
1213int
1214iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
1215    struct acpidmar_drhd *dh)
1216{
1217	static int	niommu;
1218	int		len = VTD_PAGE_SIZE;
1219	int		i, gaw;
1220	uint32_t	sts;
1221	paddr_t		paddr;
1222
1223	if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
1224		return (-1);
1225	}
1226
1227	TAILQ_INIT(&iommu->domains);
1228	iommu->id = ++niommu;
1229	iommu->flags = dh->flags;
1230	iommu->segment = dh->segment;
1231	iommu->iot = sc->sc_memt;
1232
1233	iommu->cap = iommu_read_8(iommu, DMAR_CAP_REG);
1234	iommu->ecap = iommu_read_8(iommu, DMAR_ECAP_REG);
1235	iommu->ndoms = cap_nd(iommu->cap);
1236
1237	/* Print Capabilities & Extended Capabilities */
1238	DPRINTF(0, "  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
1239	    iommu->cap & CAP_AFL ? "afl " : "",		/* adv fault */
1240	    iommu->cap & CAP_RWBF ? "rwbf " : "",	/* write-buffer flush */
1241	    iommu->cap & CAP_PLMR ? "plmr " : "",	/* protected lo region */
1242	    iommu->cap & CAP_PHMR ? "phmr " : "",	/* protected hi region */
1243	    iommu->cap & CAP_CM ? "cm " : "",		/* caching mode */
1244	    iommu->cap & CAP_ZLR ? "zlr " : "",		/* zero-length read */
1245	    iommu->cap & CAP_PSI ? "psi " : "",		/* page invalidate */
1246	    iommu->cap & CAP_DWD ? "dwd " : "",		/* write drain */
1247	    iommu->cap & CAP_DRD ? "drd " : "",		/* read drain */
1248	    iommu->cap & CAP_FL1GP ? "Gb " : "",	/* 1Gb pages */
1249	    iommu->cap & CAP_PI ? "pi " : "");		/* posted interrupts */
1250	DPRINTF(0, "  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1251	    iommu->ecap & ECAP_C ? "c " : "",		/* coherent */
1252	    iommu->ecap & ECAP_QI ? "qi " : "",		/* queued invalidate */
1253	    iommu->ecap & ECAP_DT ? "dt " : "",		/* device iotlb */
1254	    iommu->ecap & ECAP_IR ? "ir " : "",		/* intr remap */
1255	    iommu->ecap & ECAP_EIM ? "eim " : "",	/* x2apic */
1256	    iommu->ecap & ECAP_PT ? "pt " : "",		/* passthrough */
1257	    iommu->ecap & ECAP_SC ? "sc " : "",		/* snoop control */
1258	    iommu->ecap & ECAP_ECS ? "ecs " : "",	/* extended context */
1259	    iommu->ecap & ECAP_MTS ? "mts " : "",	/* memory type */
1260	    iommu->ecap & ECAP_NEST ? "nest " : "",	/* nested translations */
1261	    iommu->ecap & ECAP_DIS ? "dis " : "",	/* deferred invalidation */
1262	    iommu->ecap & ECAP_PASID ? "pas " : "",	/* pasid */
1263	    iommu->ecap & ECAP_PRS ? "prs " : "",	/* page request */
1264	    iommu->ecap & ECAP_ERS ? "ers " : "",	/* execute request */
1265	    iommu->ecap & ECAP_SRS ? "srs " : "",	/* supervisor request */
1266	    iommu->ecap & ECAP_NWFS ? "nwfs " : "",	/* no write flag */
1267	    iommu->ecap & ECAP_EAFS ? "eafs " : "");	/* extended accessed flag */
1268
1269	mtx_init(&iommu->reg_lock, IPL_HIGH);
1270
1271	/* Clear Interrupt Masking */
1272	iommu_write_4(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
1273
1274	iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
1275	    acpidmar_intr, iommu, "dmarintr");
1276
1277	/* Enable interrupts */
1278	sts = iommu_read_4(iommu, DMAR_FECTL_REG);
1279	iommu_write_4(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
1280
1281	/* Allocate root pointer */
1282	iommu->root = iommu_alloc_page(iommu, &paddr);
1283	DPRINTF(0, "Allocated root pointer: pa:%.16llx va:%p\n",
1284	    (uint64_t)paddr, iommu->root);
1285	iommu->rtaddr = paddr;
1286	iommu_flush_write_buffer(iommu);
1287	iommu_set_rtaddr(iommu, paddr);
1288
1289#if 0
1290	if (iommu->ecap & ECAP_QI) {
1291		/* Queued Invalidation support */
1292		iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
1293		iommu_write_8(iommu, DMAR_IQT_REG, 0);
1294		iommu_write_8(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
1295	}
1296	if (iommu->ecap & ECAP_IR) {
1297		/* Interrupt remapping support */
1298		iommu_write_8(iommu, DMAR_IRTA_REG, 0);
1299	}
1300#endif
1301
1302	/* Calculate guest address width and supported guest widths */
1303	gaw = -1;
1304	iommu->mgaw = cap_mgaw(iommu->cap);
1305	DPRINTF(0, "gaw: %d { ", iommu->mgaw);
1306	for (i = 0; i < 5; i++) {
1307		if (cap_sagaw(iommu->cap) & (1L << i)) {
1308			gaw = VTD_LEVELTOAW(i);
1309			DPRINTF(0, "%d ", gaw);
1310			iommu->agaw = gaw;
1311		}
1312	}
1313	DPRINTF(0, "}\n");
1314
1315	/* Cache current status register bits */
1316	sts = iommu_read_4(iommu, DMAR_GSTS_REG);
1317	if (sts & GSTS_TES)
1318		iommu->gcmd |= GCMD_TE;
1319	if (sts & GSTS_QIES)
1320		iommu->gcmd |= GCMD_QIE;
1321	if (sts & GSTS_IRES)
1322		iommu->gcmd |= GCMD_IRE;
1323	DPRINTF(0, "gcmd: %x preset\n", iommu->gcmd);
1324	acpidmar_intr(iommu);
1325	return (0);
1326}
1327
1328/* Read/Write IOMMU register */
1329uint32_t
1330iommu_read_4(struct iommu_softc *iommu, int reg)
1331{
1332	uint32_t	v;
1333
1334	v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
1335	return (v);
1336}
1337
1338
1339void
1340iommu_write_4(struct iommu_softc *iommu, int reg, uint32_t v)
1341{
1342	bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
1343}
1344
1345uint64_t
1346iommu_read_8(struct iommu_softc *iommu, int reg)
1347{
1348	uint64_t	v;
1349
1350	v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
1351	return (v);
1352}
1353
1354void
1355iommu_write_8(struct iommu_softc *iommu, int reg, uint64_t v)
1356{
1357	bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
1358}
1359
1360/* Check if a device is within a device scope */
1361int
1362acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
1363    int sid)
1364{
1365	struct dmar_devlist	*ds;
1366	int			sub, sec, i;
1367	int			bus, dev, fun, sbus;
1368	pcireg_t		reg;
1369	pcitag_t		tag;
1370
1371	sbus = sid_bus(sid);
1372	TAILQ_FOREACH(ds, devlist, link) {
1373		bus = ds->bus;
1374		dev = ds->dp[0].device;
1375		fun = ds->dp[0].function;
1376		/* Walk PCI bridges in path */
1377		for (i = 1; i < ds->ndp; i++) {
1378			tag = pci_make_tag(pc, bus, dev, fun);
1379			reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
1380			bus = PPB_BUSINFO_SECONDARY(reg);
1381			dev = ds->dp[i].device;
1382			fun = ds->dp[i].function;
1383		}
1384
1385		/* Check for device exact match */
1386		if (sid == mksid(bus, dev, fun)) {
1387			return DMAR_ENDPOINT;
1388		}
1389
1390		/* Check for device subtree match */
1391		if (ds->type == DMAR_BRIDGE) {
1392			tag = pci_make_tag(pc, bus, dev, fun);
1393			reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
1394			sec = PPB_BUSINFO_SECONDARY(reg);
1395			sub = PPB_BUSINFO_SUBORDINATE(reg);
1396			if (sec <= sbus && sbus <= sub) {
1397				return DMAR_BRIDGE;
1398			}
1399		}
1400	}
1401
1402	return (0);
1403}
1404
1405struct domain *
1406domain_create(struct iommu_softc *iommu, int did)
1407{
1408	struct domain	*dom;
1409	int gaw;
1410
1411	DPRINTF(0, "iommu%d: create domain: %.4x\n", iommu->id, did);
1412	dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
1413	dom->did = did;
1414	dom->iommu = iommu;
1415	dom->pte = iommu_alloc_page(iommu, &dom->ptep);
1416	TAILQ_INIT(&dom->devices);
1417
1418	/* Setup DMA */
1419	dom->dmat._cookie = dom;
1420	dom->dmat._dmamap_create = dmar_dmamap_create;		/* nop */
1421	dom->dmat._dmamap_destroy = dmar_dmamap_destroy;	/* nop */
1422	dom->dmat._dmamap_load = dmar_dmamap_load;		/* lm */
1423	dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf;	/* lm */
1424	dom->dmat._dmamap_load_uio = dmar_dmamap_load_uio;	/* lm */
1425	dom->dmat._dmamap_load_raw = dmar_dmamap_load_raw;	/* lm */
1426	dom->dmat._dmamap_unload = dmar_dmamap_unload;		/* um */
1427	dom->dmat._dmamap_sync = dmar_dmamap_sync;		/* lm */
1428	dom->dmat._dmamem_alloc = dmar_dmamem_alloc;		/* nop */
1429	dom->dmat._dmamem_free = dmar_dmamem_free;		/* nop */
1430	dom->dmat._dmamem_map = dmar_dmamem_map;		/* nop */
1431	dom->dmat._dmamem_unmap = dmar_dmamem_unmap;		/* nop */
1432	dom->dmat._dmamem_mmap = dmar_dmamem_mmap;
1433
1434	snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
1435	    iommu->id, dom->did);
1436
1437	/* Setup IOMMU address map */
1438	gaw = min(iommu->agaw, iommu->mgaw);
1439	dom->iovamap = extent_create(dom->exname, 0, (1LL << gaw)-1,
1440	    M_DEVBUF, NULL, 0, EX_WAITOK | EX_NOCOALESCE);
1441
1442	/* Reserve the first 16M */
1443	extent_alloc_region(dom->iovamap, 0, 16*1024*1024, EX_WAITOK);
1444
1445	/* Zero out MSI Interrupt region */
1446	extent_alloc_region(dom->iovamap, MSI_BASE_ADDRESS, MSI_BASE_SIZE,
1447	    EX_WAITOK);
1448	mtx_init(&dom->exlck, IPL_HIGH);
1449
1450	TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
1451
1452	return dom;
1453}
1454
1455void
1456domain_add_device(struct domain *dom, int sid)
1457{
1458	struct domain_dev *ddev;
1459
1460	DPRINTF(0, "add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
1461	ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
1462	ddev->sid = sid;
1463	TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
1464
1465	/* Should set context entry here?? */
1466}
1467
1468void
1469domain_remove_device(struct domain *dom, int sid)
1470{
1471	struct domain_dev *ddev, *tmp;
1472
1473	TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
1474		if (ddev->sid == sid) {
1475			TAILQ_REMOVE(&dom->devices, ddev, link);
1476			free(ddev, sizeof(*ddev), M_DEVBUF);
1477		}
1478	}
1479}
1480
1481/* Lookup domain by segment & source id (bus.device.function) */
1482struct domain *
1483domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
1484{
1485	struct iommu_softc	*iommu;
1486	struct domain_dev	*ddev;
1487	struct domain		*dom;
1488	int			rc;
1489
1490	if (sc == NULL) {
1491		return NULL;
1492	}
1493
1494	/* Lookup IOMMU for this device */
1495	TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
1496		if (iommu->segment != segment)
1497			continue;
1498		/* Check for devscope match or catchall iommu */
1499		rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
1500		if (rc != 0 || iommu->flags) {
1501			break;
1502		}
1503	}
1504	if (!iommu) {
1505		printf("%s: no iommu found\n", dmar_bdf(sid));
1506		return NULL;
1507	}
1508
1509	/* Search domain devices */
1510	TAILQ_FOREACH(dom, &iommu->domains, link) {
1511		TAILQ_FOREACH(ddev, &dom->devices, link) {
1512			/* XXX: match all functions? */
1513			if (ddev->sid == sid) {
1514				return dom;
1515			}
1516		}
1517	}
1518	if (iommu->ndoms <= 2) {
1519		/* Running out of domains.. create catchall domain */
1520		if (!iommu->unity) {
1521			iommu->unity = domain_create(iommu, 1);
1522		}
1523		dom = iommu->unity;
1524	} else {
1525		dom = domain_create(iommu, --iommu->ndoms);
1526	}
1527	if (!dom) {
1528		printf("no domain here\n");
1529		return NULL;
1530	}
1531
1532	/* Add device to domain */
1533	domain_add_device(dom, sid);
1534
1535	return dom;
1536}
1537
1538/* Map Guest Pages into IOMMU */
1539void
1540_iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
1541{
1542	bus_size_t i;
1543	paddr_t hpa;
1544
1545	if (dom == NULL) {
1546		return;
1547	}
1548	DPRINTF(1, "Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
1549	for (i = 0; i < len; i += PAGE_SIZE) {
1550		hpa = 0;
1551		pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
1552		domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
1553		gpa += PAGE_SIZE;
1554		va  += PAGE_SIZE;
1555	}
1556}
1557
1558/* Find IOMMU for a given PCI device */
1559void
1560*_iommu_domain(int segment, int bus, int dev, int func, int *id)
1561{
1562	struct domain *dom;
1563
1564	dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
1565	if (dom) {
1566		*id = dom->did;
1567	}
1568	return dom;
1569}
1570
1571void
1572domain_map_device(struct domain *dom, int sid);
1573
1574void
1575domain_map_device(struct domain *dom, int sid)
1576{
1577	struct iommu_softc	*iommu;
1578	struct context_entry	*ctx;
1579	paddr_t			paddr;
1580	int			bus, devfn;
1581	int			tt, lvl;
1582
1583	iommu = dom->iommu;
1584
1585	bus = sid_bus(sid);
1586	devfn = sid_devfn(sid);
1587	/* AMD attach device */
1588	if (iommu->dte) {
1589		struct ivhd_dte *dte = &iommu->dte[sid];
1590		if (!dte->dw0) {
1591			/* Setup Device Table Entry: bus.devfn */
1592			DPRINTF(1, "@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
1593			dte_set_host_page_table_root_ptr(dte, dom->ptep);
1594			dte_set_domain(dte, dom->did);
1595			dte_set_mode(dte, 3);  /* Set 3 level PTE */
1596			dte_set_tv(dte);
1597			dte_set_valid(dte);
1598			ivhd_flush_devtab(iommu, dom->did);
1599#ifdef IOMMU_DEBUG
1600			//ivhd_showreg(iommu);
1601			ivhd_showdte(iommu);
1602#endif
1603		}
1604		return;
1605	}
1606
1607	/* Create Bus mapping */
1608	if (!root_entry_is_valid(&iommu->root[bus])) {
1609		iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
1610		iommu->root[bus].lo = paddr | ROOT_P;
1611		iommu_flush_cache(iommu, &iommu->root[bus],
1612		    sizeof(struct root_entry));
1613		DPRINTF(0, "iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
1614		    iommu->id, bus, (uint64_t)paddr,
1615		    iommu->ctx[bus]);
1616	}
1617
1618	/* Create DevFn mapping */
1619	ctx = iommu->ctx[bus] + devfn;
1620	if (!context_entry_is_valid(ctx)) {
1621		tt = CTX_T_MULTI;
1622		lvl = VTD_AWTOLEVEL(iommu->agaw);
1623
1624		/* Initialize context */
1625		context_set_slpte(ctx, dom->ptep);
1626		context_set_translation_type(ctx, tt);
1627		context_set_domain_id(ctx, dom->did);
1628		context_set_address_width(ctx, lvl);
1629		context_set_present(ctx);
1630
1631		/* Flush it */
1632		iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
1633		if ((iommu->cap & CAP_CM) || acpidmar_force_cm) {
1634			iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
1635			iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
1636		} else {
1637			iommu_flush_write_buffer(iommu);
1638		}
1639		DPRINTF(0, "iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
1640		    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
1641		    dom->did, tt);
1642	}
1643}
1644
1645struct domain *
1646acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
1647{
1648	static struct domain	*dom;
1649
1650	dom = domain_lookup(sc, segment, sid);
1651	if (!dom) {
1652		printf("no domain: %s\n", dmar_bdf(sid));
1653		return NULL;
1654	}
1655
1656	if (mapctx) {
1657		domain_map_device(dom, sid);
1658	}
1659
1660	return dom;
1661}
1662
1663void
1664acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
1665{
1666	int		bus, dev, fun, sid;
1667	struct domain	*dom;
1668	pcireg_t	reg;
1669
1670	if (!acpidmar_sc) {
1671		/* No DMAR, ignore */
1672		return;
1673	}
1674
1675	/* Add device to our list if valid */
1676	pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
1677	sid = mksid(bus, dev, fun);
1678	if (sid_flag[sid] & SID_INVALID)
1679		return;
1680
1681	reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
1682
1683	/* Add device to domain */
1684	dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
1685	if (dom == NULL)
1686		return;
1687
1688	if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
1689	    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
1690		dom->flag = DOM_NOMAP;
1691	}
1692	if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
1693	    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
1694		/* For ISA Bridges, map 0-16Mb as 1:1 */
1695		printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
1696		    pa->pa_domain, bus, dev, fun);
1697		domain_map_pthru(dom, 0x00, 16*1024*1024);
1698	}
1699
1700	/* Change DMA tag */
1701	pa->pa_dmat = &dom->dmat;
1702}
1703
1704/* Create list of device scope entries from ACPI table */
1705void
1706acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
1707    struct devlist_head *devlist)
1708{
1709	struct acpidmar_devscope	*ds;
1710	struct dmar_devlist		*d;
1711	int				dplen, i;
1712
1713	TAILQ_INIT(devlist);
1714	while (off < de->length) {
1715		ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
1716		off += ds->length;
1717
1718		/* We only care about bridges and endpoints */
1719		if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
1720			continue;
1721
1722		dplen = ds->length - sizeof(*ds);
1723		d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
1724		d->bus  = ds->bus;
1725		d->type = ds->type;
1726		d->ndp  = dplen / 2;
1727		d->dp   = (void *)&d[1];
1728		memcpy(d->dp, &ds[1], dplen);
1729		TAILQ_INSERT_TAIL(devlist, d, link);
1730
1731		DPRINTF(1, "  %8s  %.4x:%.2x.%.2x.%x {",
1732		    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
1733		    segment, ds->bus,
1734		    d->dp[0].device,
1735		    d->dp[0].function);
1736
1737		for (i = 1; i < d->ndp; i++) {
1738			DPRINTF(1, " %2x.%x ",
1739			    d->dp[i].device,
1740			    d->dp[i].function);
1741		}
1742		DPRINTF(1, "}\n");
1743	}
1744}
1745
1746/* DMA Remapping Hardware Unit */
1747void
1748acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
1749{
1750	struct iommu_softc	*iommu;
1751
1752	printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
1753	    de->drhd.segment,
1754	    de->drhd.address,
1755	    de->drhd.flags);
1756	iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
1757	acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
1758	    &iommu->devices);
1759	iommu_init(sc, iommu, &de->drhd);
1760
1761	if (de->drhd.flags) {
1762		/* Catchall IOMMU goes at end of list */
1763		TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
1764	} else {
1765		TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
1766	}
1767}
1768
1769/* Reserved Memory Region Reporting */
1770void
1771acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
1772{
1773	struct rmrr_softc	*rmrr;
1774	bios_memmap_t		*im, *jm;
1775	uint64_t		start, end;
1776
1777	printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
1778	    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
1779	if (de->rmrr.limit <= de->rmrr.base) {
1780		printf("  buggy BIOS\n");
1781		return;
1782	}
1783
1784	rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
1785	rmrr->start = trunc_page(de->rmrr.base);
1786	rmrr->end = round_page(de->rmrr.limit);
1787	rmrr->segment = de->rmrr.segment;
1788	acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
1789	    &rmrr->devices);
1790
1791	for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
1792		if (im->type != BIOS_MAP_RES)
1793			continue;
1794		/* Search for adjacent reserved regions */
1795		start = im->addr;
1796		end   = im->addr+im->size;
1797		for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
1798		    jm++) {
1799			end = jm->addr+jm->size;
1800		}
1801		printf("e820: %.16llx - %.16llx\n", start, end);
1802		if (start <= rmrr->start && rmrr->end <= end) {
1803			/* Bah.. some buggy BIOS stomp outside RMRR */
1804			printf("  ** inside E820 Reserved %.16llx %.16llx\n",
1805			    start, end);
1806			rmrr->start = trunc_page(start);
1807			rmrr->end   = round_page(end);
1808			break;
1809		}
1810	}
1811	TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
1812}
1813
1814/* Root Port ATS Reporting */
1815void
1816acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
1817{
1818	struct atsr_softc *atsr;
1819
1820	printf("ATSR: segment:%.4x flags:%x\n",
1821	    de->atsr.segment,
1822	    de->atsr.flags);
1823
1824	atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
1825	atsr->flags = de->atsr.flags;
1826	atsr->segment = de->atsr.segment;
1827	acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
1828	    &atsr->devices);
1829
1830	TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
1831}
1832
1833void
1834acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
1835{
1836	struct rmrr_softc	*rmrr;
1837	struct iommu_softc	*iommu;
1838	struct domain		*dom;
1839	struct dmar_devlist	*dl;
1840	union acpidmar_entry	*de;
1841	int			off, sid, rc;
1842
1843	domain_map_page = domain_map_page_intel;
1844	printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
1845	    dmar->haw+1,
1846	    !!(dmar->flags & 0x1),
1847	    !!(dmar->flags & 0x2));
1848	sc->sc_haw = dmar->haw+1;
1849	sc->sc_flags = dmar->flags;
1850
1851	TAILQ_INIT(&sc->sc_drhds);
1852	TAILQ_INIT(&sc->sc_rmrrs);
1853	TAILQ_INIT(&sc->sc_atsrs);
1854
1855	off = sizeof(*dmar);
1856	while (off < dmar->hdr.length) {
1857		de = (union acpidmar_entry *)((unsigned char *)dmar + off);
1858		switch (de->type) {
1859		case DMAR_DRHD:
1860			acpidmar_drhd(sc, de);
1861			break;
1862		case DMAR_RMRR:
1863			acpidmar_rmrr(sc, de);
1864			break;
1865		case DMAR_ATSR:
1866			acpidmar_atsr(sc, de);
1867			break;
1868		default:
1869			printf("DMAR: unknown %x\n", de->type);
1870			break;
1871		}
1872		off += de->length;
1873	}
1874
1875	/* Pre-create domains for iommu devices */
1876	TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
1877		TAILQ_FOREACH(dl, &iommu->devices, link) {
1878			sid = mksid(dl->bus, dl->dp[0].device,
1879			    dl->dp[0].function);
1880			dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
1881			if (dom != NULL) {
1882				printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
1883				    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
1884				    iommu->id, dom->did);
1885			}
1886		}
1887	}
1888	/* Map passthrough pages for RMRR */
1889	TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
1890		TAILQ_FOREACH(dl, &rmrr->devices, link) {
1891			sid = mksid(dl->bus, dl->dp[0].device,
1892			    dl->dp[0].function);
1893			dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
1894			if (dom != NULL) {
1895				printf("%s map ident: %.16llx %.16llx\n",
1896				    dom_bdf(dom), rmrr->start, rmrr->end);
1897				domain_map_pthru(dom, rmrr->start, rmrr->end);
1898				rc = extent_alloc_region(dom->iovamap,
1899				    rmrr->start, rmrr->end,
1900				    EX_WAITOK | EX_CONFLICTOK);
1901			}
1902		}
1903	}
1904}
1905
1906
1907/*=====================================================
1908 * AMD Vi
1909 *=====================================================*/
1910void	acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
1911int	ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
1912		struct acpi_ivhd *);
1913int	_ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
1914void	ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
1915int	ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
1916int	ivhd_invalidate_domain(struct iommu_softc *, int);
1917void	ivhd_intr_map(struct iommu_softc *, int);
1918void	ivhd_checkerr(struct iommu_softc *iommu);
1919int	acpiivhd_intr(void *);
1920
1921int
1922acpiivhd_intr(void *ctx)
1923{
1924	struct iommu_softc *iommu = ctx;
1925
1926	if (!iommu->dte)
1927		return (0);
1928	ivhd_poll_events(iommu);
1929	return (1);
1930}
1931
1932/* Setup interrupt for AMD */
1933void
1934ivhd_intr_map(struct iommu_softc *iommu, int devid) {
1935	pci_intr_handle_t ih;
1936
1937	if (iommu->intr)
1938		return;
1939	ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
1940	ih.line = APIC_INT_VIA_MSG;
1941	ih.pin = 0;
1942	iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
1943				acpiivhd_intr, iommu, "amd_iommu");
1944	printf("amd iommu intr: %p\n", iommu->intr);
1945}
1946
1947void
1948_dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
1949{
1950	char *pfx[] = { "    ", "   ", "  ", " ", "" };
1951	uint64_t i, sh;
1952	struct pte_entry *npte;
1953
1954	for (i = 0; i < 512; i++) {
1955		sh = (i << (((lvl-1) * 9) + 12));
1956		if (pte[i].val & PTE_P) {
1957			if (lvl > 1) {
1958				npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
1959				printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
1960				    pte[i].val, (pte[i].val >> 9) & 7);
1961				_dumppte(npte, lvl-1, va | sh);
1962			} else {
1963				printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
1964				    pte[i].val, va | sh);
1965			}
1966		}
1967	}
1968}
1969
1970void
1971ivhd_showpage(struct iommu_softc *iommu, int sid, paddr_t paddr)
1972{
1973	struct domain *dom;
1974	static int show = 0;
1975
1976	if (show > 10)
1977		return;
1978	show++;
1979	dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
1980	if (!dom)
1981		return;
1982	printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1983	    iommu->dte[sid].dw0,
1984	    iommu->dte[sid].dw1,
1985	    iommu->dte[sid].dw2,
1986	    iommu->dte[sid].dw3,
1987	    iommu->dte[sid].dw4,
1988	    iommu->dte[sid].dw5,
1989	    iommu->dte[sid].dw6,
1990	    iommu->dte[sid].dw7);
1991	_dumppte(dom->pte, 3, 0);
1992}
1993
1994/* Display AMD IOMMU Error */
1995void
1996ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
1997{
1998	int type, sid, did, flag;
1999	uint64_t address;
2000
2001	/* Get Device, Domain, Address and Type of event */
2002	sid  = __EXTRACT(evt->dw0, EVT_SID);
2003	type = __EXTRACT(evt->dw1, EVT_TYPE);
2004	did  = __EXTRACT(evt->dw1, EVT_DID);
2005	flag = __EXTRACT(evt->dw1, EVT_FLAG);
2006	address = _get64(&evt->dw2);
2007
2008	printf("=== IOMMU Error[%.4x]: ", head);
2009	switch (type) {
2010	case ILLEGAL_DEV_TABLE_ENTRY:
2011		printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
2012		    dmar_bdf(sid), address,
2013		    evt->dw1 & EVT_TR ? "translation" : "transaction",
2014		    evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
2015		    evt->dw1 & EVT_RW ? "write" : "read",
2016		    evt->dw1 & EVT_I  ? "interrupt" : "memory");
2017		ivhd_showdte(iommu);
2018		break;
2019	case IO_PAGE_FAULT:
2020		printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
2021		    dmar_bdf(sid), did, address,
2022		    evt->dw1 & EVT_TR ? "translation" : "transaction",
2023		    evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
2024		    evt->dw1 & EVT_PE ? "no perm" : "perm",
2025		    evt->dw1 & EVT_RW ? "write" : "read",
2026		    evt->dw1 & EVT_PR ? "present" : "not present",
2027		    evt->dw1 & EVT_I  ? "interrupt" : "memory");
2028		ivhd_showdte(iommu);
2029		ivhd_showpage(iommu, sid, address);
2030		break;
2031	case DEV_TAB_HARDWARE_ERROR:
2032		printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
2033		    dmar_bdf(sid), address,
2034		    evt->dw1 & EVT_TR ? "translation" : "transaction",
2035		    evt->dw1 & EVT_RW ? "write" : "read",
2036		    evt->dw1 & EVT_I  ? "interrupt" : "memory");
2037		ivhd_showdte(iommu);
2038		break;
2039	case PAGE_TAB_HARDWARE_ERROR:
2040		printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
2041		    dmar_bdf(sid), address,
2042		    evt->dw1 & EVT_TR ? "translation" : "transaction",
2043		    evt->dw1 & EVT_RW ? "write" : "read",
2044		    evt->dw1 & EVT_I  ? "interrupt" : "memory");
2045		ivhd_showdte(iommu);
2046		break;
2047	case ILLEGAL_COMMAND_ERROR:
2048		printf("illegal command addr=0x%.16llx\n", address);
2049		ivhd_showcmd(iommu);
2050		break;
2051	case COMMAND_HARDWARE_ERROR:
2052		printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
2053		    address, flag);
2054		ivhd_showcmd(iommu);
2055		break;
2056	case IOTLB_INV_TIMEOUT:
2057		printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
2058		    dmar_bdf(sid), address);
2059		break;
2060	case INVALID_DEVICE_REQUEST:
2061		printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
2062		    dmar_bdf(sid), address, flag);
2063		break;
2064	default:
2065		printf("unknown type=0x%.2x\n", type);
2066		break;
2067	}
2068	/* Clear old event */
2069	evt->dw0 = 0;
2070	evt->dw1 = 0;
2071	evt->dw2 = 0;
2072	evt->dw3 = 0;
2073}
2074
2075/* AMD: Process IOMMU error from hardware */
2076int
2077ivhd_poll_events(struct iommu_softc *iommu)
2078{
2079	uint32_t head, tail;
2080	int sz;
2081
2082	sz = sizeof(struct ivhd_event);
2083	head = iommu_read_4(iommu, EVT_HEAD_REG);
2084	tail = iommu_read_4(iommu, EVT_TAIL_REG);
2085	if (head == tail) {
2086		/* No pending events */
2087		return (0);
2088	}
2089	while (head != tail) {
2090		ivhd_show_event(iommu, iommu->evt_tbl + head, head);
2091		head = (head + sz) % EVT_TBL_SIZE;
2092	}
2093	iommu_write_4(iommu, EVT_HEAD_REG, head);
2094	return (0);
2095}
2096
2097/* AMD: Issue command to IOMMU queue */
2098int
2099_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
2100{
2101	u_long rf;
2102	uint32_t head, tail, next;
2103	int sz;
2104
2105	head = iommu_read_4(iommu, CMD_HEAD_REG);
2106	sz = sizeof(*cmd);
2107	rf = intr_disable();
2108	tail = iommu_read_4(iommu, CMD_TAIL_REG);
2109	next = (tail + sz) % CMD_TBL_SIZE;
2110	if (next == head) {
2111		printf("FULL\n");
2112		/* Queue is full */
2113		intr_restore(rf);
2114		return -EBUSY;
2115	}
2116	memcpy(iommu->cmd_tbl + tail, cmd, sz);
2117	iommu_write_4(iommu, CMD_TAIL_REG, next);
2118	intr_restore(rf);
2119	return (tail / sz);
2120}
2121
2122#define IVHD_MAXDELAY 8
2123
2124int
2125ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
2126{
2127	struct ivhd_command wq = { 0 };
2128	volatile uint64_t wv __aligned(16) = 0LL;
2129	paddr_t paddr;
2130	int rc, i;
2131
2132	rc = _ivhd_issue_command(iommu, cmd);
2133	if (rc >= 0 && wait) {
2134		/* Wait for previous commands to complete.
2135		 * Store address of completion variable to command */
2136		pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
2137		wq.dw0 = (paddr & ~0xF) | 0x1;
2138		wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
2139		wq.dw2 = 0xDEADBEEF;
2140		wq.dw3 = 0xFEEDC0DE;
2141
2142		rc = _ivhd_issue_command(iommu, &wq);
2143		/* wv will change to value in dw2/dw3 when command is complete */
2144		for (i = 0; i < IVHD_MAXDELAY && !wv; i++) {
2145			DELAY(10 << i);
2146		}
2147		if (i == IVHD_MAXDELAY) {
2148			printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
2149			    cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
2150		}
2151	}
2152	return rc;
2153
2154}
2155
2156/* AMD: Flush changes to Device Table Entry for a specific domain */
2157int
2158ivhd_flush_devtab(struct iommu_softc *iommu, int did)
2159{
2160	struct ivhd_command cmd = {
2161	    .dw0 = did,
2162	    .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT
2163	};
2164
2165	return ivhd_issue_command(iommu, &cmd, 1);
2166}
2167
2168/* AMD: Invalidate all IOMMU device and page tables */
2169int
2170ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
2171{
2172	struct ivhd_command cmd = {
2173	    .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT
2174	};
2175
2176	return ivhd_issue_command(iommu, &cmd, 0);
2177}
2178
2179/* AMD: Invalidate interrupt remapping */
2180int
2181ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
2182{
2183	struct ivhd_command cmd = {
2184	    .dw0 = did,
2185	    .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT
2186	};
2187
2188	return ivhd_issue_command(iommu, &cmd, 0);
2189}
2190
2191/* AMD: Invalidate all page tables in a domain */
2192int
2193ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
2194{
2195	struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
2196
2197	cmd.dw2 = 0xFFFFF000 | 0x3;
2198	cmd.dw3 = 0x7FFFFFFF;
2199	return ivhd_issue_command(iommu, &cmd, 1);
2200}
2201
2202/* AMD: Display Registers */
2203void
2204ivhd_showreg(struct iommu_softc *iommu)
2205{
2206	printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
2207	    iommu_read_8(iommu, DEV_TAB_BASE_REG),
2208	    iommu_read_8(iommu, CMD_BASE_REG),
2209	    iommu_read_8(iommu, EVT_BASE_REG),
2210	    iommu_read_8(iommu, IOMMUCTL_REG),
2211	    iommu_read_8(iommu, IOMMUSTS_REG));
2212	printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
2213	    iommu_read_8(iommu, CMD_HEAD_REG),
2214	    iommu_read_8(iommu, CMD_TAIL_REG),
2215	    iommu_read_8(iommu, EVT_HEAD_REG),
2216	    iommu_read_8(iommu, EVT_TAIL_REG));
2217}
2218
2219/* AMD: Generate Errors to test event handler */
2220void
2221ivhd_checkerr(struct iommu_softc *iommu)
2222{
2223	struct ivhd_command cmd = { -1, -1, -1, -1 };
2224
2225	/* Generate ILLEGAL DEV TAB entry? */
2226	iommu->dte[0x2303].dw0 = -1;		/* invalid */
2227	iommu->dte[0x2303].dw2 = 0x1234;	/* domain */
2228	iommu->dte[0x2303].dw7 = -1;		/* reserved */
2229	ivhd_flush_devtab(iommu, 0x1234);
2230	ivhd_poll_events(iommu);
2231
2232	/* Generate ILLEGAL_COMMAND_ERROR : ok */
2233	ivhd_issue_command(iommu, &cmd, 0);
2234	ivhd_poll_events(iommu);
2235
2236	/* Generate page hardware error */
2237}
2238
2239/* AMD: Show Device Table Entry */
2240void
2241ivhd_showdte(struct iommu_softc *iommu)
2242{
2243	int i;
2244
2245	for (i = 0; i < 65536; i++) {
2246		if (iommu->dte[i].dw0) {
2247			printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
2248			    i >> 8, (i >> 3) & 0x1F, i & 0x7,
2249			    iommu->dte[i].dw0, iommu->dte[i].dw1,
2250			    iommu->dte[i].dw2, iommu->dte[i].dw3,
2251			    iommu->dte[i].dw4, iommu->dte[i].dw5,
2252			    iommu->dte[i].dw6, iommu->dte[i].dw7);
2253		}
2254	}
2255}
2256
2257/* AMD: Show command entries */
2258void
2259ivhd_showcmd(struct iommu_softc *iommu)
2260{
2261	struct ivhd_command *ihd;
2262	paddr_t phd;
2263	int i;
2264
2265	ihd = iommu->cmd_tbl;
2266	phd = iommu_read_8(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
2267	for (i = 0; i < 4096 / 128; i++) {
2268		printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
2269		    (uint64_t)phd + i * sizeof(*ihd),
2270		    ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
2271	}
2272}
2273
2274#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
2275
2276/* AMD: Initialize IOMMU */
2277int
2278ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
2279	struct acpi_ivhd *ivhd)
2280{
2281	static int niommu;
2282	paddr_t paddr;
2283	uint64_t ov;
2284
2285	if (sc == NULL || iommu == NULL || ivhd == NULL) {
2286		printf("Bad pointer to iommu_init!\n");
2287		return -1;
2288	}
2289	if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
2290		printf("Bus Space Map fails\n");
2291		return -1;
2292	}
2293	TAILQ_INIT(&iommu->domains);
2294	TAILQ_INIT(&iommu->devices);
2295
2296	/* Setup address width and number of domains */
2297	iommu->id = ++niommu;
2298	iommu->iot = sc->sc_memt;
2299	iommu->mgaw = 48;
2300	iommu->agaw = 48;
2301	iommu->flags = 1;
2302	iommu->segment = 0;
2303	iommu->ndoms = 256;
2304
2305	printf(": AMD iommu%d at 0x%.8llx\n", iommu->id, ivhd->address);
2306
2307	iommu->ecap = iommu_read_8(iommu, EXTFEAT_REG);
2308	DPRINTF(0,"iommu%d: ecap:%.16llx ", iommu->id, iommu->ecap);
2309	DPRINTF(0,"%s%s%s%s%s%s%s%s\n",
2310	    iommu->ecap & EFR_PREFSUP ? "pref " : "",
2311	    iommu->ecap & EFR_PPRSUP  ? "ppr " : "",
2312	    iommu->ecap & EFR_NXSUP   ? "nx " : "",
2313	    iommu->ecap & EFR_GTSUP   ? "gt " : "",
2314	    iommu->ecap & EFR_IASUP   ? "ia " : "",
2315	    iommu->ecap & EFR_GASUP   ? "ga " : "",
2316	    iommu->ecap & EFR_HESUP   ? "he " : "",
2317	    iommu->ecap & EFR_PCSUP   ? "pc " : "");
2318	DPRINTF(0,"hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
2319	    _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
2320	    _c(EFR_SMIFRC), _c(EFR_GAMSUP));
2321
2322	/* Turn off iommu */
2323	ov = iommu_read_8(iommu, IOMMUCTL_REG);
2324	iommu_write_8(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
2325		CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
2326
2327	/* Enable intr, mark IOMMU device as invalid for remap */
2328	sid_flag[ivhd->devid] |= SID_INVALID;
2329	ivhd_intr_map(iommu, ivhd->devid);
2330
2331	/* Setup command buffer with 4k buffer (128 entries) */
2332	iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
2333	iommu_write_8(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
2334	iommu_write_4(iommu, CMD_HEAD_REG, 0x00);
2335	iommu_write_4(iommu, CMD_TAIL_REG, 0x00);
2336	iommu->cmd_tblp = paddr;
2337
2338	/* Setup event log with 4k buffer (128 entries) */
2339	iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
2340	iommu_write_8(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
2341	iommu_write_4(iommu, EVT_HEAD_REG, 0x00);
2342	iommu_write_4(iommu, EVT_TAIL_REG, 0x00);
2343	iommu->evt_tblp = paddr;
2344
2345	/* Setup device table
2346	 * 1 entry per source ID (bus:device:function - 64k entries)
2347	 */
2348	iommu->dte = sc->sc_hwdte;
2349	iommu_write_8(iommu, DEV_TAB_BASE_REG, (sc->sc_hwdtep & DEV_TAB_MASK) | DEV_TAB_LEN);
2350
2351	/* Enable IOMMU */
2352	ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN);
2353	if (ivhd->flags & IVHD_COHERENT)
2354		ov |= CTL_COHERENT;
2355	if (ivhd->flags & IVHD_HTTUNEN)
2356		ov |= CTL_HTTUNEN;
2357	if (ivhd->flags & IVHD_RESPASSPW)
2358		ov |= CTL_RESPASSPW;
2359	if (ivhd->flags & IVHD_PASSPW)
2360		ov |= CTL_PASSPW;
2361	if (ivhd->flags & IVHD_ISOC)
2362		ov |= CTL_ISOC;
2363	ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
2364	ov |=  (CTL_INVTIMEOUT_10MS << CTL_INVTIMEOUT_SHIFT);
2365	iommu_write_8(iommu, IOMMUCTL_REG, ov);
2366
2367	ivhd_invalidate_iommu_all(iommu);
2368
2369	TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
2370	return 0;
2371}
2372
2373void
2374acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
2375{
2376	struct iommu_softc *iommu;
2377	struct acpi_ivhd_ext *ext;
2378	union acpi_ivhd_entry *ie;
2379	int start, off, dte, all_dte = 0;
2380
2381	if (ivhd->type == IVRS_IVHD_EXT) {
2382		ext = (struct acpi_ivhd_ext *)ivhd;
2383		DPRINTF(0,"ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
2384		    ext->type, ext->flags, ext->length,
2385		    ext->segment, dmar_bdf(ext->devid), ext->cap,
2386		    ext->address, ext->info,
2387		    ext->attrib, ext->efr);
2388		if (ext->flags & IVHD_PPRSUP)
2389			DPRINTF(0," PPRSup");
2390		if (ext->flags & IVHD_PREFSUP)
2391			DPRINTF(0," PreFSup");
2392		if (ext->flags & IVHD_COHERENT)
2393			DPRINTF(0," Coherent");
2394		if (ext->flags & IVHD_IOTLB)
2395			DPRINTF(0," Iotlb");
2396		if (ext->flags & IVHD_ISOC)
2397			DPRINTF(0," ISoc");
2398		if (ext->flags & IVHD_RESPASSPW)
2399			DPRINTF(0," ResPassPW");
2400		if (ext->flags & IVHD_PASSPW)
2401			DPRINTF(0," PassPW");
2402		if (ext->flags & IVHD_HTTUNEN)
2403			DPRINTF(0, " HtTunEn");
2404		if (ext->flags)
2405			DPRINTF(0,"\n");
2406		off = sizeof(*ext);
2407		iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
2408		ivhd_iommu_init(sc, iommu, ivhd);
2409	} else {
2410		DPRINTF(0,"ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
2411		    ivhd->type, ivhd->flags, ivhd->length,
2412		    ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
2413		    ivhd->address, ivhd->info,
2414		    ivhd->feature);
2415		if (ivhd->flags & IVHD_PPRSUP)
2416			DPRINTF(0," PPRSup");
2417		if (ivhd->flags & IVHD_PREFSUP)
2418			DPRINTF(0," PreFSup");
2419		if (ivhd->flags & IVHD_COHERENT)
2420			DPRINTF(0," Coherent");
2421		if (ivhd->flags & IVHD_IOTLB)
2422			DPRINTF(0," Iotlb");
2423		if (ivhd->flags & IVHD_ISOC)
2424			DPRINTF(0," ISoc");
2425		if (ivhd->flags & IVHD_RESPASSPW)
2426			DPRINTF(0," ResPassPW");
2427		if (ivhd->flags & IVHD_PASSPW)
2428			DPRINTF(0," PassPW");
2429		if (ivhd->flags & IVHD_HTTUNEN)
2430			DPRINTF(0, " HtTunEn");
2431		if (ivhd->flags)
2432			DPRINTF(0,"\n");
2433		off = sizeof(*ivhd);
2434	}
2435	while (off < ivhd->length) {
2436		ie = (void *)ivhd + off;
2437		switch (ie->type) {
2438		case IVHD_ALL:
2439			all_dte = ie->all.data;
2440			DPRINTF(0," ALL %.4x\n", dte);
2441			off += sizeof(ie->all);
2442			break;
2443		case IVHD_SEL:
2444			dte = ie->sel.data;
2445			DPRINTF(0," SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
2446			off += sizeof(ie->sel);
2447			break;
2448		case IVHD_SOR:
2449			dte = ie->sor.data;
2450			start = ie->sor.devid;
2451			DPRINTF(0," SOR: %s %.4x\n", dmar_bdf(start), dte);
2452			off += sizeof(ie->sor);
2453			break;
2454		case IVHD_EOR:
2455			DPRINTF(0," EOR: %s\n", dmar_bdf(ie->eor.devid));
2456			off += sizeof(ie->eor);
2457			break;
2458		case IVHD_ALIAS_SEL:
2459			dte = ie->alias.data;
2460			DPRINTF(0," ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
2461			DPRINTF(0," %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
2462			off += sizeof(ie->alias);
2463			break;
2464		case IVHD_ALIAS_SOR:
2465			dte = ie->alias.data;
2466			DPRINTF(0," ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
2467			DPRINTF(0," src=%s\n", dmar_bdf(ie->alias.srcid));
2468			off += sizeof(ie->alias);
2469			break;
2470		case IVHD_EXT_SEL:
2471			dte = ie->ext.data;
2472			DPRINTF(0," EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
2473			    dte, ie->ext.extdata);
2474			off += sizeof(ie->ext);
2475			break;
2476		case IVHD_EXT_SOR:
2477			dte = ie->ext.data;
2478			DPRINTF(0," EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
2479			    dte, ie->ext.extdata);
2480			off += sizeof(ie->ext);
2481			break;
2482		case IVHD_SPECIAL:
2483			DPRINTF(0," SPECIAL\n");
2484			off += sizeof(ie->special);
2485			break;
2486		default:
2487			DPRINTF(0," 2:unknown %x\n", ie->type);
2488			off = ivhd->length;
2489			break;
2490		}
2491	}
2492}
2493
2494void
2495acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
2496{
2497	union acpi_ivrs_entry *ie;
2498	int off;
2499
2500	if (!sc->sc_hwdte) {
2501		sc->sc_hwdte = iommu_alloc_hwdte(sc, HWDTE_SIZE, &sc->sc_hwdtep);
2502		if (sc->sc_hwdte == NULL)
2503			panic("Can't allocate HWDTE!");
2504	}
2505
2506	domain_map_page = domain_map_page_amd;
2507	DPRINTF(0,"IVRS Version: %d\n", ivrs->hdr.revision);
2508	DPRINTF(0," VA Size: %d\n",
2509	    (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
2510	DPRINTF(0," PA Size: %d\n",
2511	    (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
2512
2513	TAILQ_INIT(&sc->sc_drhds);
2514	TAILQ_INIT(&sc->sc_rmrrs);
2515	TAILQ_INIT(&sc->sc_atsrs);
2516
2517	DPRINTF(0,"======== IVRS\n");
2518	off = sizeof(*ivrs);
2519	while (off < ivrs->hdr.length) {
2520		ie = (void *)ivrs + off;
2521		switch (ie->type) {
2522		case IVRS_IVHD:
2523		case IVRS_IVHD_EXT:
2524			acpiivrs_ivhd(sc, &ie->ivhd);
2525			break;
2526		case IVRS_IVMD_ALL:
2527		case IVRS_IVMD_SPECIFIED:
2528		case IVRS_IVMD_RANGE:
2529			DPRINTF(0,"ivmd\n");
2530			break;
2531		default:
2532			DPRINTF(0,"1:unknown: %x\n", ie->type);
2533			break;
2534		}
2535		off += ie->length;
2536	}
2537	DPRINTF(0,"======== End IVRS\n");
2538}
2539
2540static int
2541acpiivhd_activate(struct iommu_softc *iommu, int act)
2542{
2543	switch (act) {
2544	case DVACT_SUSPEND:
2545		iommu->flags |= IOMMU_FLAGS_SUSPEND;
2546		break;
2547	case DVACT_RESUME:
2548		iommu->flags &= ~IOMMU_FLAGS_SUSPEND;
2549		break;
2550	}
2551	return (0);
2552}
2553
2554int
2555acpidmar_activate(struct device *self, int act)
2556{
2557	struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
2558	struct iommu_softc *iommu;
2559
2560	printf("called acpidmar_activate %d %p\n", act, sc);
2561
2562	if (sc == NULL) {
2563		return (0);
2564	}
2565
2566	switch (act) {
2567	case DVACT_RESUME:
2568		TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
2569			printf("iommu%d resume\n", iommu->id);
2570			if (iommu->dte) {
2571				acpiivhd_activate(iommu, act);
2572				continue;
2573			}
2574			iommu_flush_write_buffer(iommu);
2575			iommu_set_rtaddr(iommu, iommu->rtaddr);
2576			iommu_write_4(iommu, DMAR_FEDATA_REG, iommu->fedata);
2577			iommu_write_4(iommu, DMAR_FEADDR_REG, iommu->feaddr);
2578			iommu_write_4(iommu, DMAR_FEUADDR_REG,
2579			    iommu->feaddr >> 32);
2580			if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
2581			    IOMMU_FLAGS_SUSPEND) {
2582				printf("enable wakeup translation\n");
2583				iommu_enable_translation(iommu, 1);
2584			}
2585			iommu_showcfg(iommu, -1);
2586		}
2587		break;
2588	case DVACT_SUSPEND:
2589		TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
2590			printf("iommu%d suspend\n", iommu->id);
2591			if (iommu->flags & IOMMU_FLAGS_BAD)
2592				continue;
2593			if (iommu->dte) {
2594				acpiivhd_activate(iommu, act);
2595				continue;
2596			}
2597			iommu->flags |= IOMMU_FLAGS_SUSPEND;
2598			iommu_enable_translation(iommu, 0);
2599			iommu_showcfg(iommu, -1);
2600		}
2601		break;
2602	}
2603	return (0);
2604}
2605
2606int
2607acpidmar_match(struct device *parent, void *match, void *aux)
2608{
2609	struct acpi_attach_args		*aaa = aux;
2610	struct acpi_table_header	*hdr;
2611
2612	/* If we do not have a table, it is not us */
2613	if (aaa->aaa_table == NULL)
2614		return (0);
2615
2616	/* If it is an DMAR table, we can attach */
2617	hdr = (struct acpi_table_header *)aaa->aaa_table;
2618	if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
2619		return (1);
2620	if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
2621		return (1);
2622
2623	return (0);
2624}
2625
2626void
2627acpidmar_attach(struct device *parent, struct device *self, void *aux)
2628{
2629	struct acpidmar_softc *sc = (void *)self;
2630	struct acpi_attach_args	*aaa = aux;
2631	struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
2632	struct acpi_ivrs *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
2633	struct acpi_table_header *hdr;
2634
2635	hdr = (struct acpi_table_header *)aaa->aaa_table;
2636	sc->sc_memt = aaa->aaa_memt;
2637	sc->sc_dmat = aaa->aaa_dmat;
2638	if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
2639		acpidmar_sc = sc;
2640		acpidmar_init(sc, dmar);
2641	}
2642	if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
2643		acpidmar_sc = sc;
2644		acpiivrs_init(sc, ivrs);
2645	}
2646}
2647
2648/* Interrupt shiz */
2649void acpidmar_msi_hwmask(struct pic *, int);
2650void acpidmar_msi_hwunmask(struct pic *, int);
2651void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
2652void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
2653
2654void
2655acpidmar_msi_hwmask(struct pic *pic, int pin)
2656{
2657	struct iommu_pic	*ip = (void *)pic;
2658	struct iommu_softc	*iommu = ip->iommu;
2659
2660	printf("msi_hwmask\n");
2661
2662	mtx_enter(&iommu->reg_lock);
2663
2664	iommu_write_4(iommu, DMAR_FECTL_REG, FECTL_IM);
2665	iommu_read_4(iommu, DMAR_FECTL_REG);
2666
2667	mtx_leave(&iommu->reg_lock);
2668}
2669
2670void
2671acpidmar_msi_hwunmask(struct pic *pic, int pin)
2672{
2673	struct iommu_pic	*ip = (void *)pic;
2674	struct iommu_softc	*iommu = ip->iommu;
2675
2676	printf("msi_hwunmask\n");
2677
2678	mtx_enter(&iommu->reg_lock);
2679
2680	iommu_write_4(iommu, DMAR_FECTL_REG, 0);
2681	iommu_read_4(iommu, DMAR_FECTL_REG);
2682
2683	mtx_leave(&iommu->reg_lock);
2684}
2685
2686void
2687acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
2688    int type)
2689{
2690	struct iommu_pic	*ip = (void *)pic;
2691	struct iommu_softc	*iommu = ip->iommu;
2692
2693	mtx_enter(&iommu->reg_lock);
2694
2695	iommu->fedata = vec;
2696	iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
2697	iommu_write_4(iommu, DMAR_FEDATA_REG, vec);
2698	iommu_write_4(iommu, DMAR_FEADDR_REG, iommu->feaddr);
2699	iommu_write_4(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
2700
2701	mtx_leave(&iommu->reg_lock);
2702}
2703
2704void
2705acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
2706    int type)
2707{
2708	printf("msi_delroute\n");
2709}
2710
2711void *
2712acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
2713    void *arg, const char *what)
2714{
2715	struct iommu_softc	*iommu = ctx;
2716	struct pic		*pic;
2717
2718	pic = &iommu->pic.pic;
2719	iommu->pic.iommu = iommu;
2720
2721	strlcpy(pic->pic_dev.dv_xname, "dmarpic",
2722		sizeof(pic->pic_dev.dv_xname));
2723	pic->pic_type = PIC_MSI;
2724	pic->pic_hwmask = acpidmar_msi_hwmask;
2725	pic->pic_hwunmask = acpidmar_msi_hwunmask;
2726	pic->pic_addroute = acpidmar_msi_addroute;
2727	pic->pic_delroute = acpidmar_msi_delroute;
2728	pic->pic_edge_stubs = ioapic_edge_stubs;
2729#ifdef MULTIPROCESSOR
2730	mtx_init(&pic->pic_mutex, level);
2731#endif
2732
2733	return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
2734}
2735
2736/* Intel: Handle DMAR Interrupt */
2737int
2738acpidmar_intr(void *ctx)
2739{
2740	struct iommu_softc		*iommu = ctx;
2741	struct fault_entry		fe;
2742	static struct fault_entry	ofe;
2743	int				fro, nfr, fri, i;
2744	uint32_t			sts;
2745
2746	/*splassert(IPL_HIGH);*/
2747
2748	if (!(iommu->gcmd & GCMD_TE)) {
2749		return (1);
2750	}
2751	mtx_enter(&iommu->reg_lock);
2752	sts = iommu_read_4(iommu, DMAR_FECTL_REG);
2753	sts = iommu_read_4(iommu, DMAR_FSTS_REG);
2754
2755	if (!(sts & FSTS_PPF)) {
2756		mtx_leave(&iommu->reg_lock);
2757		return (1);
2758	}
2759
2760	nfr = cap_nfr(iommu->cap);
2761	fro = cap_fro(iommu->cap);
2762	fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
2763	for (i = 0; i < nfr; i++) {
2764		fe.hi = iommu_read_8(iommu, fro + (fri*16) + 8);
2765		if (!(fe.hi & FRCD_HI_F))
2766			break;
2767
2768		fe.lo = iommu_read_8(iommu, fro + (fri*16));
2769		if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
2770			iommu_showfault(iommu, fri, &fe);
2771			ofe.hi = fe.hi;
2772			ofe.lo = fe.lo;
2773		}
2774		fri = (fri + 1) % nfr;
2775	}
2776
2777	iommu_write_4(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
2778
2779	mtx_leave(&iommu->reg_lock);
2780
2781	return (1);
2782}
2783
2784const char *vtd_faults[] = {
2785	"Software",
2786	"Root Entry Not Present",	/* ok (rtaddr + 4096) */
2787	"Context Entry Not Present",	/* ok (no CTX_P) */
2788	"Context Entry Invalid",	/* ok (tt = 3) */
2789	"Address Beyond MGAW",
2790	"Write",			/* ok */
2791	"Read",				/* ok */
2792	"Paging Entry Invalid",		/* ok */
2793	"Root Table Invalid",
2794	"Context Table Invalid",
2795	"Root Entry Reserved",		/* ok (root.lo |= 0x4) */
2796	"Context Entry Reserved",
2797	"Paging Entry Reserved",
2798	"Context Entry TT",
2799	"Reserved",
2800};
2801
2802void iommu_showpte(uint64_t, int, uint64_t);
2803
2804/* Intel: Show IOMMU page table entry */
2805void
2806iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
2807{
2808	uint64_t nb, pb, i;
2809	struct pte_entry *pte;
2810
2811	pte = (void *)PMAP_DIRECT_MAP(ptep);
2812	for (i = 0; i < 512; i++) {
2813		if (!(pte[i].val & PTE_P))
2814			continue;
2815		nb = base + (i << lvl);
2816		pb = pte[i].val & ~VTD_PAGE_MASK;
2817		if(lvl == VTD_LEVEL0) {
2818			printf("   %3llx %.16llx = %.16llx %c%c %s\n",
2819			    i, nb, pb,
2820			    pte[i].val == PTE_R ? 'r' : ' ',
2821			    pte[i].val & PTE_W ? 'w' : ' ',
2822			    (nb == pb) ? " ident" : "");
2823			if (nb == pb)
2824				return;
2825		} else {
2826			iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
2827		}
2828	}
2829}
2830
2831/* Intel: Show IOMMU configuration */
2832void
2833iommu_showcfg(struct iommu_softc *iommu, int sid)
2834{
2835	int i, j, sts, cmd;
2836	struct context_entry *ctx;
2837	pcitag_t tag;
2838	pcireg_t clc;
2839
2840	cmd = iommu_read_4(iommu, DMAR_GCMD_REG);
2841	sts = iommu_read_4(iommu, DMAR_GSTS_REG);
2842	printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
2843	    iommu->id, iommu->flags, iommu_read_8(iommu, DMAR_RTADDR_REG),
2844	    sts & GSTS_TES ? "enabled" : "disabled",
2845	    sts & GSTS_QIES ? "qi" : "ccmd",
2846	    sts & GSTS_IRES ? "ir" : "",
2847	    cmd, sts);
2848	for (i = 0; i < 256; i++) {
2849		if (!root_entry_is_valid(&iommu->root[i])) {
2850			continue;
2851		}
2852		for (j = 0; j < 256; j++) {
2853			ctx = iommu->ctx[i] + j;
2854			if (!context_entry_is_valid(ctx)) {
2855				continue;
2856			}
2857			tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
2858			clc = pci_conf_read(NULL, tag, 0x08) >> 8;
2859			printf("  %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
2860			    i, (j >> 3), j & 7,
2861			    context_address_width(ctx),
2862			    context_domain_id(ctx),
2863			    context_translation_type(ctx),
2864			    context_pte(ctx),
2865			    context_user(ctx),
2866			    clc);
2867#if 0
2868			/* dump pagetables */
2869			iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
2870			    VTD_STRIDE_SIZE, 0);
2871#endif
2872		}
2873	}
2874}
2875
2876/* Intel: Show IOMMU fault */
2877void
2878iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
2879{
2880	int bus, dev, fun, type, fr, df;
2881	bios_memmap_t	*im;
2882	const char *mapped;
2883
2884	if (!(fe->hi & FRCD_HI_F))
2885		return;
2886	type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
2887	fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
2888	bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
2889	dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
2890	fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
2891	df  = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
2892	iommu_showcfg(iommu, mksid(bus,dev,fun));
2893	if (!iommu->ctx[bus]) {
2894		/* Bus is not initialized */
2895		mapped = "nobus";
2896	} else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
2897		/* DevFn not initialized */
2898		mapped = "nodevfn";
2899	} else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
2900		/* no bus_space_map */
2901		mapped = "nomap";
2902	} else {
2903		/* bus_space_map */
2904		mapped = "mapped";
2905	}
2906	printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
2907	    fri, bus, dev, fun,
2908	    type == 'r' ? "read" : "write",
2909	    fe->lo,
2910	    fr, fr <= 13 ? vtd_faults[fr] : "unknown",
2911	    iommu->id,
2912	    mapped);
2913	for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
2914		if ((im->type == BIOS_MAP_RES) &&
2915		    (im->addr <= fe->lo) &&
2916		    (fe->lo <= im->addr+im->size)) {
2917			printf("mem in e820.reserved\n");
2918		}
2919	}
2920#ifdef DDB
2921	if (acpidmar_ddb)
2922		db_enter();
2923#endif
2924}
2925
2926