1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
5 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
6 * Copyright (c) 2000, BSDi
7 * Copyright (c) 2004, Scott Long <scottl@freebsd.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice unmodified, this list of conditions, and the following
15 *    disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/lock.h>
36#include <sys/kernel.h>
37#include <sys/mutex.h>
38#include <sys/malloc.h>
39#include <sys/queue.h>
40#include <sys/sysctl.h>
41#include <dev/pci/pcivar.h>
42#include <dev/pci/pcireg.h>
43#include <machine/pci_cfgreg.h>
44#include <machine/pc/bios.h>
45
46#include <vm/vm.h>
47#include <vm/vm_param.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_extern.h>
50#include <vm/pmap.h>
51
52#define PRVERB(a) do {							\
53	if (bootverbose)						\
54		printf a ;						\
55} while(0)
56
57struct pcie_mcfg_region {
58	uint64_t base;
59	uint16_t domain;
60	uint8_t minbus;
61	uint8_t maxbus;
62};
63
64#define PCIE_CACHE 8
65struct pcie_cfg_elem {
66	TAILQ_ENTRY(pcie_cfg_elem)	elem;
67	vm_offset_t	vapage;
68	vm_paddr_t	papage;
69};
70
71SYSCTL_DECL(_hw_pci);
72
73static struct pcie_mcfg_region *mcfg_regions;
74static int mcfg_numregions;
75static TAILQ_HEAD(pcie_cfg_list, pcie_cfg_elem) pcie_list[MAXCPU];
76static int pcie_cache_initted;
77static uint32_t pcie_badslots;
78int cfgmech;
79static int devmax;
80static struct mtx pcicfg_mtx;
81
82static int mcfg_enable = 1;
83SYSCTL_INT(_hw_pci, OID_AUTO, mcfg, CTLFLAG_RDTUN, &mcfg_enable, 0,
84    "Enable support for PCI-e memory mapped config access");
85
86static uint32_t	pci_docfgregread(int domain, int bus, int slot, int func,
87		    int reg, int bytes);
88static struct pcie_mcfg_region *pcie_lookup_region(int domain, int bus);
89static int	pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
90static void	pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
91static int	pcireg_cfgopen(void);
92static int	pciereg_cfgread(struct pcie_mcfg_region *region, int bus,
93		    unsigned slot, unsigned func, unsigned reg, unsigned bytes);
94static void	pciereg_cfgwrite(struct pcie_mcfg_region *region, int bus,
95		    unsigned slot, unsigned func, unsigned reg, int data,
96		    unsigned bytes);
97
98/*
99 * Some BIOS writers seem to want to ignore the spec and put
100 * 0 in the intline rather than 255 to indicate none.  Some use
101 * numbers in the range 128-254 to indicate something strange and
102 * apparently undocumented anywhere.  Assume these are completely bogus
103 * and map them to 255, which means "none".
104 */
105static __inline int
106pci_i386_map_intline(int line)
107{
108	if (line == 0 || line >= 128)
109		return (PCI_INVALID_IRQ);
110	return (line);
111}
112
113static u_int16_t
114pcibios_get_version(void)
115{
116	struct bios_regs args;
117
118	if (PCIbios.ventry == 0) {
119		PRVERB(("pcibios: No call entry point\n"));
120		return (0);
121	}
122	args.eax = PCIBIOS_BIOS_PRESENT;
123	if (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))) {
124		PRVERB(("pcibios: BIOS_PRESENT call failed\n"));
125		return (0);
126	}
127	if (args.edx != 0x20494350) {
128		PRVERB(("pcibios: BIOS_PRESENT didn't return 'PCI ' in edx\n"));
129		return (0);
130	}
131	return (args.ebx & 0xffff);
132}
133
134/*
135 * Initialise access to PCI configuration space
136 */
137int
138pci_cfgregopen(void)
139{
140	uint16_t v;
141	static int opened = 0;
142
143	if (opened)
144		return (1);
145
146	if (cfgmech == CFGMECH_NONE && pcireg_cfgopen() == 0)
147		return (0);
148
149	v = pcibios_get_version();
150	if (v > 0)
151		PRVERB(("pcibios: BIOS version %x.%02x\n", (v & 0xff00) >> 8,
152		    v & 0xff));
153	mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN);
154	opened = 1;
155
156	/* $PIR requires PCI BIOS 2.10 or greater. */
157	if (v >= 0x0210)
158		pci_pir_open();
159
160	return (1);
161}
162
163static struct pcie_mcfg_region *
164pcie_lookup_region(int domain, int bus)
165{
166	for (int i = 0; i < mcfg_numregions; i++)
167		if (mcfg_regions[i].domain == domain &&
168		    bus >= mcfg_regions[i].minbus &&
169		    bus <= mcfg_regions[i].maxbus)
170			return (&mcfg_regions[i]);
171	return (NULL);
172}
173
174static uint32_t
175pci_docfgregread(int domain, int bus, int slot, int func, int reg, int bytes)
176{
177	if (domain == 0 && bus == 0 && (1 << slot & pcie_badslots) != 0)
178		return (pcireg_cfgread(bus, slot, func, reg, bytes));
179
180	if (cfgmech == CFGMECH_PCIE) {
181		struct pcie_mcfg_region *region;
182
183		region = pcie_lookup_region(domain, bus);
184		if (region != NULL)
185			return (pciereg_cfgread(region, bus, slot, func, reg,
186			    bytes));
187	}
188
189	if (domain == 0)
190		return (pcireg_cfgread(bus, slot, func, reg, bytes));
191	else
192		return (-1);
193}
194
195/*
196 * Read configuration space register
197 */
198u_int32_t
199pci_cfgregread(int domain, int bus, int slot, int func, int reg, int bytes)
200{
201	uint32_t line;
202
203	/*
204	 * Some BIOS writers seem to want to ignore the spec and put
205	 * 0 in the intline rather than 255 to indicate none.  The rest of
206	 * the code uses 255 as an invalid IRQ.
207	 */
208	if (reg == PCIR_INTLINE && bytes == 1) {
209		line = pci_docfgregread(domain, bus, slot, func, PCIR_INTLINE,
210		    1);
211		return (pci_i386_map_intline(line));
212	}
213	return (pci_docfgregread(domain, bus, slot, func, reg, bytes));
214}
215
216/*
217 * Write configuration space register
218 */
219void
220pci_cfgregwrite(int domain, int bus, int slot, int func, int reg, uint32_t data,
221    int bytes)
222{
223	if (domain == 0 && bus == 0 && (1 << slot & pcie_badslots) != 0) {
224		pcireg_cfgwrite(bus, slot, func, reg, data, bytes);
225		return;
226	}
227
228	if (cfgmech == CFGMECH_PCIE) {
229		struct pcie_mcfg_region *region;
230
231		region = pcie_lookup_region(domain, bus);
232		if (region != NULL) {
233			pciereg_cfgwrite(region, bus, slot, func, reg, data,
234			    bytes);
235			return;
236		}
237	}
238
239	if (domain == 0)
240		pcireg_cfgwrite(bus, slot, func, reg, data, bytes);
241}
242
243/*
244 * Configuration space access using direct register operations
245 */
246
247/* enable configuration space accesses and return data port address */
248static int
249pci_cfgenable(unsigned bus, unsigned slot, unsigned func, int reg, int bytes)
250{
251	int dataport = 0;
252
253	if (bus <= PCI_BUSMAX
254	    && slot < devmax
255	    && func <= PCI_FUNCMAX
256	    && (unsigned)reg <= PCI_REGMAX
257	    && bytes != 3
258	    && (unsigned)bytes <= 4
259	    && (reg & (bytes - 1)) == 0) {
260		switch (cfgmech) {
261		case CFGMECH_PCIE:
262		case CFGMECH_1:
263			outl(CONF1_ADDR_PORT, (1U << 31)
264			    | (bus << 16) | (slot << 11)
265			    | (func << 8) | (reg & ~0x03));
266			dataport = CONF1_DATA_PORT + (reg & 0x03);
267			break;
268		case CFGMECH_2:
269			outb(CONF2_ENABLE_PORT, 0xf0 | (func << 1));
270			outb(CONF2_FORWARD_PORT, bus);
271			dataport = 0xc000 | (slot << 8) | reg;
272			break;
273		}
274	}
275	return (dataport);
276}
277
278/* disable configuration space accesses */
279static void
280pci_cfgdisable(void)
281{
282	switch (cfgmech) {
283	case CFGMECH_PCIE:
284	case CFGMECH_1:
285		/*
286		 * Do nothing for the config mechanism 1 case.
287		 * Writing a 0 to the address port can apparently
288		 * confuse some bridges and cause spurious
289		 * access failures.
290		 */
291		break;
292	case CFGMECH_2:
293		outb(CONF2_ENABLE_PORT, 0);
294		break;
295	}
296}
297
298static int
299pcireg_cfgread(int bus, int slot, int func, int reg, int bytes)
300{
301	int data = -1;
302	int port;
303
304	mtx_lock_spin(&pcicfg_mtx);
305	port = pci_cfgenable(bus, slot, func, reg, bytes);
306	if (port != 0) {
307		switch (bytes) {
308		case 1:
309			data = inb(port);
310			break;
311		case 2:
312			data = inw(port);
313			break;
314		case 4:
315			data = inl(port);
316			break;
317		}
318		pci_cfgdisable();
319	}
320	mtx_unlock_spin(&pcicfg_mtx);
321	return (data);
322}
323
324static void
325pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
326{
327	int port;
328
329	mtx_lock_spin(&pcicfg_mtx);
330	port = pci_cfgenable(bus, slot, func, reg, bytes);
331	if (port != 0) {
332		switch (bytes) {
333		case 1:
334			outb(port, data);
335			break;
336		case 2:
337			outw(port, data);
338			break;
339		case 4:
340			outl(port, data);
341			break;
342		}
343		pci_cfgdisable();
344	}
345	mtx_unlock_spin(&pcicfg_mtx);
346}
347
348/* check whether the configuration mechanism has been correctly identified */
349static int
350pci_cfgcheck(int maxdev)
351{
352	uint32_t id, class;
353	uint8_t header;
354	uint8_t device;
355	int port;
356
357	if (bootverbose)
358		printf("pci_cfgcheck:\tdevice ");
359
360	for (device = 0; device < maxdev; device++) {
361		if (bootverbose)
362			printf("%d ", device);
363
364		port = pci_cfgenable(0, device, 0, 0, 4);
365		id = inl(port);
366		if (id == 0 || id == 0xffffffff)
367			continue;
368
369		port = pci_cfgenable(0, device, 0, 8, 4);
370		class = inl(port) >> 8;
371		if (bootverbose)
372			printf("[class=%06x] ", class);
373		if (class == 0 || (class & 0xf870ff) != 0)
374			continue;
375
376		port = pci_cfgenable(0, device, 0, 14, 1);
377		header = inb(port);
378		if (bootverbose)
379			printf("[hdr=%02x] ", header);
380		if ((header & 0x7e) != 0)
381			continue;
382
383		if (bootverbose)
384			printf("is there (id=%08x)\n", id);
385
386		pci_cfgdisable();
387		return (1);
388	}
389	if (bootverbose)
390		printf("-- nothing found\n");
391
392	pci_cfgdisable();
393	return (0);
394}
395
396static int
397pcireg_cfgopen(void)
398{
399	uint32_t mode1res, oldval1;
400	uint8_t mode2res, oldval2;
401
402	/* Check for type #1 first. */
403	oldval1 = inl(CONF1_ADDR_PORT);
404
405	if (bootverbose) {
406		printf("pci_open(1):\tmode 1 addr port (0x0cf8) is 0x%08x\n",
407		    oldval1);
408	}
409
410	cfgmech = CFGMECH_1;
411	devmax = 32;
412
413	outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK);
414	DELAY(1);
415	mode1res = inl(CONF1_ADDR_PORT);
416	outl(CONF1_ADDR_PORT, oldval1);
417
418	if (bootverbose)
419		printf("pci_open(1a):\tmode1res=0x%08x (0x%08lx)\n",  mode1res,
420		    CONF1_ENABLE_CHK);
421
422	if (mode1res) {
423		if (pci_cfgcheck(32))
424			return (cfgmech);
425	}
426
427	outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK1);
428	mode1res = inl(CONF1_ADDR_PORT);
429	outl(CONF1_ADDR_PORT, oldval1);
430
431	if (bootverbose)
432		printf("pci_open(1b):\tmode1res=0x%08x (0x%08lx)\n",  mode1res,
433		    CONF1_ENABLE_CHK1);
434
435	if ((mode1res & CONF1_ENABLE_MSK1) == CONF1_ENABLE_RES1) {
436		if (pci_cfgcheck(32))
437			return (cfgmech);
438	}
439
440	/* Type #1 didn't work, so try type #2. */
441	oldval2 = inb(CONF2_ENABLE_PORT);
442
443	if (bootverbose) {
444		printf("pci_open(2):\tmode 2 enable port (0x0cf8) is 0x%02x\n",
445		    oldval2);
446	}
447
448	if ((oldval2 & 0xf0) == 0) {
449		cfgmech = CFGMECH_2;
450		devmax = 16;
451
452		outb(CONF2_ENABLE_PORT, CONF2_ENABLE_CHK);
453		mode2res = inb(CONF2_ENABLE_PORT);
454		outb(CONF2_ENABLE_PORT, oldval2);
455
456		if (bootverbose)
457			printf("pci_open(2a):\tmode2res=0x%02x (0x%02x)\n",
458			    mode2res, CONF2_ENABLE_CHK);
459
460		if (mode2res == CONF2_ENABLE_RES) {
461			if (bootverbose)
462				printf("pci_open(2a):\tnow trying mechanism 2\n");
463
464			if (pci_cfgcheck(16))
465				return (cfgmech);
466		}
467	}
468
469	/* Nothing worked, so punt. */
470	cfgmech = CFGMECH_NONE;
471	devmax = 0;
472	return (cfgmech);
473}
474
475static bool
476pcie_init_cache(void)
477{
478	struct pcie_cfg_list *pcielist;
479	struct pcie_cfg_elem *pcie_array, *elem;
480#ifdef SMP
481	struct pcpu *pc;
482#endif
483	vm_offset_t va;
484	int i;
485
486#ifdef SMP
487	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
488#endif
489	{
490		pcie_array = malloc(sizeof(struct pcie_cfg_elem) * PCIE_CACHE,
491		    M_DEVBUF, M_NOWAIT);
492		if (pcie_array == NULL)
493			return (false);
494
495		va = kva_alloc(PCIE_CACHE * PAGE_SIZE);
496		if (va == 0) {
497			free(pcie_array, M_DEVBUF);
498			return (false);
499		}
500
501#ifdef SMP
502		pcielist = &pcie_list[pc->pc_cpuid];
503#else
504		pcielist = &pcie_list[0];
505#endif
506		TAILQ_INIT(pcielist);
507		for (i = 0; i < PCIE_CACHE; i++) {
508			elem = &pcie_array[i];
509			elem->vapage = va + (i * PAGE_SIZE);
510			elem->papage = 0;
511			TAILQ_INSERT_HEAD(pcielist, elem, elem);
512		}
513	}
514	return (true);
515}
516
517static void
518pcie_init_badslots(struct pcie_mcfg_region *region)
519{
520	uint32_t val1, val2;
521	int slot;
522
523	/*
524	 * On some AMD systems, some of the devices on bus 0 are
525	 * inaccessible using memory-mapped PCI config access.  Walk
526	 * bus 0 looking for such devices.  For these devices, we will
527	 * fall back to using type 1 config access instead.
528	 */
529	if (pci_cfgregopen() != 0) {
530		for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
531			val1 = pcireg_cfgread(0, slot, 0, 0, 4);
532			if (val1 == 0xffffffff)
533				continue;
534
535			val2 = pciereg_cfgread(region, 0, slot, 0, 0, 4);
536			if (val2 != val1)
537				pcie_badslots |= (1 << slot);
538		}
539	}
540}
541
542int
543pcie_cfgregopen(uint64_t base, uint16_t domain, uint8_t minbus, uint8_t maxbus)
544{
545	struct pcie_mcfg_region *region;
546
547	if (!mcfg_enable)
548		return (0);
549
550	if (!pae_mode && base >= 0x100000000) {
551		if (bootverbose)
552			printf(
553	    "PCI: MCFG domain %u bus %u-%u base 0x%jx too high\n",
554			domain, minbus, maxbus, (uintmax_t)base);
555		return (0);
556	}
557
558	if (bootverbose)
559		printf("PCI: MCFG domain %u bus %u-%u base @ 0x%jx\n",
560		    domain, minbus, maxbus, (uintmax_t)base);
561
562	if (pcie_cache_initted == 0) {
563		if (!pcie_init_cache())
564			pcie_cache_initted = -1;
565		else
566			pcie_cache_initted = 1;
567	}
568
569	if (pcie_cache_initted == -1)
570		return (0);
571
572	/* Resize the array. */
573	mcfg_regions = realloc(mcfg_regions,
574	    sizeof(*mcfg_regions) * (mcfg_numregions + 1), M_DEVBUF, M_WAITOK);
575
576	region = &mcfg_regions[mcfg_numregions];
577	region->base = base + (minbus << 20);
578	region->domain = domain;
579	region->minbus = minbus;
580	region->maxbus = maxbus;
581	mcfg_numregions++;
582
583	cfgmech = CFGMECH_PCIE;
584	devmax = 32;
585
586	if (domain == 0 && minbus == 0)
587		pcie_init_badslots(region);
588
589	return (1);
590}
591
592#define PCIE_PADDR(base, reg, bus, slot, func)	\
593	((base)				+	\
594	((((bus) & 0xff) << 20)		|	\
595	(((slot) & 0x1f) << 15)		|	\
596	(((func) & 0x7) << 12)		|	\
597	((reg) & 0xfff)))
598
599static __inline vm_offset_t
600pciereg_findaddr(struct pcie_mcfg_region *region, int bus, unsigned slot,
601    unsigned func, unsigned reg)
602{
603	struct pcie_cfg_list *pcielist;
604	struct pcie_cfg_elem *elem;
605	vm_paddr_t pa, papage;
606
607	MPASS(bus >= region->minbus && bus <= region->maxbus);
608
609	pa = PCIE_PADDR(region->base, reg, bus - region->minbus, slot, func);
610	papage = pa & ~PAGE_MASK;
611
612	/*
613	 * Find an element in the cache that matches the physical page desired,
614	 * or create a new mapping from the least recently used element.
615	 * A very simple LRU algorithm is used here, does it need to be more
616	 * efficient?
617	 */
618	pcielist = &pcie_list[PCPU_GET(cpuid)];
619	TAILQ_FOREACH(elem, pcielist, elem) {
620		if (elem->papage == papage)
621			break;
622	}
623
624	if (elem == NULL) {
625		elem = TAILQ_LAST(pcielist, pcie_cfg_list);
626		if (elem->papage != 0) {
627			pmap_kremove(elem->vapage);
628			invlpg(elem->vapage);
629		}
630		pmap_kenter(elem->vapage, papage);
631		elem->papage = papage;
632	}
633
634	if (elem != TAILQ_FIRST(pcielist)) {
635		TAILQ_REMOVE(pcielist, elem, elem);
636		TAILQ_INSERT_HEAD(pcielist, elem, elem);
637	}
638	return (elem->vapage | (pa & PAGE_MASK));
639}
640
641/*
642 * AMD BIOS And Kernel Developer's Guides for CPU families starting with 10h
643 * have a requirement that all accesses to the memory mapped PCI configuration
644 * space are done using AX class of registers.
645 * Since other vendors do not currently have any contradicting requirements
646 * the AMD access pattern is applied universally.
647 */
648
649static int
650pciereg_cfgread(struct pcie_mcfg_region *region, int bus, unsigned slot,
651    unsigned func, unsigned reg, unsigned bytes)
652{
653	vm_offset_t va;
654	int data = -1;
655
656	if (slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
657		return (-1);
658
659	critical_enter();
660	va = pciereg_findaddr(region, bus, slot, func, reg);
661
662	switch (bytes) {
663	case 4:
664		__asm("movl %1, %0" : "=a" (data)
665		    : "m" (*(volatile uint32_t *)va));
666		break;
667	case 2:
668		__asm("movzwl %1, %0" : "=a" (data)
669		    : "m" (*(volatile uint16_t *)va));
670		break;
671	case 1:
672		__asm("movzbl %1, %0" : "=a" (data)
673		    : "m" (*(volatile uint8_t *)va));
674		break;
675	}
676
677	critical_exit();
678	return (data);
679}
680
681static void
682pciereg_cfgwrite(struct pcie_mcfg_region *region, int bus, unsigned slot,
683    unsigned func, unsigned reg, int data, unsigned bytes)
684{
685	vm_offset_t va;
686
687	if (slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
688		return;
689
690	critical_enter();
691	va = pciereg_findaddr(region, bus, slot, func, reg);
692
693	switch (bytes) {
694	case 4:
695		__asm("movl %1, %0" : "=m" (*(volatile uint32_t *)va)
696		    : "a" (data));
697		break;
698	case 2:
699		__asm("movw %1, %0" : "=m" (*(volatile uint16_t *)va)
700		    : "a" ((uint16_t)data));
701		break;
702	case 1:
703		__asm("movb %1, %0" : "=m" (*(volatile uint8_t *)va)
704		    : "a" ((uint8_t)data));
705		break;
706	}
707
708	critical_exit();
709}
710