pci.c revision 252166
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 252166 2013-06-24 18:30:44Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74#define	PCI_DMA_BOUNDARY	0x100000000
75#endif
76
77#define	PCIR_IS_BIOS(cfg, reg)						\
78	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80
81static pci_addr_t	pci_mapbase(uint64_t mapreg);
82static const char	*pci_maptype(uint64_t mapreg);
83static int		pci_mapsize(uint64_t testval);
84static int		pci_maprange(uint64_t mapreg);
85static pci_addr_t	pci_rombase(uint64_t mapreg);
86static int		pci_romsize(uint64_t testval);
87static void		pci_fixancient(pcicfgregs *cfg);
88static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89
90static int		pci_porten(device_t dev);
91static int		pci_memen(device_t dev);
92static void		pci_assign_interrupt(device_t bus, device_t dev,
93			    int force_route);
94static int		pci_add_map(device_t bus, device_t dev, int reg,
95			    struct resource_list *rl, int force, int prefetch);
96static int		pci_probe(device_t dev);
97static int		pci_attach(device_t dev);
98static void		pci_load_vendor_data(void);
99static int		pci_describe_parse_line(char **ptr, int *vendor,
100			    int *device, char **desc);
101static char		*pci_describe_device(device_t dev);
102static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103static int		pci_modevent(module_t mod, int what, void *arg);
104static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105			    pcicfgregs *cfg);
106static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t *data);
109#if 0
110static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111			    int reg, uint32_t data);
112#endif
113static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114static void		pci_disable_msi(device_t dev);
115static void		pci_enable_msi(device_t dev, uint64_t address,
116			    uint16_t data);
117static void		pci_enable_msix(device_t dev, u_int index,
118			    uint64_t address, uint32_t data);
119static void		pci_mask_msix(device_t dev, u_int index);
120static void		pci_unmask_msix(device_t dev, u_int index);
121static int		pci_msi_blacklisted(void);
122static void		pci_resume_msi(device_t dev);
123static void		pci_resume_msix(device_t dev);
124static int		pci_remap_intr_method(device_t bus, device_t dev,
125			    u_int irq);
126
127static device_method_t pci_methods[] = {
128	/* Device interface */
129	DEVMETHOD(device_probe,		pci_probe),
130	DEVMETHOD(device_attach,	pci_attach),
131	DEVMETHOD(device_detach,	bus_generic_detach),
132	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133	DEVMETHOD(device_suspend,	pci_suspend),
134	DEVMETHOD(device_resume,	pci_resume),
135
136	/* Bus interface */
137	DEVMETHOD(bus_print_child,	pci_print_child),
138	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141	DEVMETHOD(bus_driver_added,	pci_driver_added),
142	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144
145	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158
159	/* PCI interface */
160	DEVMETHOD(pci_read_config,	pci_read_config_method),
161	DEVMETHOD(pci_write_config,	pci_write_config_method),
162	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180
181	DEVMETHOD_END
182};
183
184DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185
186static devclass_t pci_devclass;
187DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188MODULE_VERSION(pci, 1);
189
190static char	*pci_vendordata;
191static size_t	pci_vendordata_size;
192
193struct pci_quirk {
194	uint32_t devid;	/* Vendor/device of the card */
195	int	type;
196#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200	int	arg1;
201	int	arg2;
202};
203
204static const struct pci_quirk pci_quirks[] = {
205	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
206	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208	/* As does the Serverworks OSB4 (the SMBus mapping register) */
209	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210
211	/*
212	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
213	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214	 */
215	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	/*
219	 * MSI doesn't work on earlier Intel chipsets including
220	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221	 */
222	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229
230	/*
231	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232	 * bridge.
233	 */
234	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235
236	/*
237	 * MSI-X allocation doesn't work properly for devices passed through
238	 * by VMware up to at least ESXi 5.1.
239	 */
240	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCI/PCI-X */
241	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCIe */
242
243	/*
244	 * Some virtualization environments emulate an older chipset
245	 * but support MSI just fine.  QEMU uses the Intel 82440.
246	 */
247	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
248
249	/*
250	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
251	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
252	 * It prevents us from attaching hpet(4) when the bit is unset.
253	 * Note this quirk only affects SB600 revision A13 and earlier.
254	 * For SB600 A21 and later, firmware must set the bit to hide it.
255	 * For SB700 and later, it is unused and hardcoded to zero.
256	 */
257	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
258
259	{ 0 }
260};
261
262/* map register information */
263#define	PCI_MAPMEM	0x01	/* memory map */
264#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
265#define	PCI_MAPPORT	0x04	/* port map */
266
267struct devlist pci_devq;
268uint32_t pci_generation;
269uint32_t pci_numdevs = 0;
270static int pcie_chipset, pcix_chipset;
271
272/* sysctl vars */
273SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
274
275static int pci_enable_io_modes = 1;
276TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
277SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
278    &pci_enable_io_modes, 1,
279    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
280enable these bits correctly.  We'd like to do this all the time, but there\n\
281are some peripherals that this causes problems with.");
282
283static int pci_do_realloc_bars = 0;
284TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
285SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
286    &pci_do_realloc_bars, 0,
287    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
288
289static int pci_do_power_nodriver = 0;
290TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
291SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
292    &pci_do_power_nodriver, 0,
293  "Place a function into D3 state when no driver attaches to it.  0 means\n\
294disable.  1 means conservatively place devices into D3 state.  2 means\n\
295agressively place devices into D3 state.  3 means put absolutely everything\n\
296in D3 state.");
297
298int pci_do_power_resume = 1;
299TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
300SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
301    &pci_do_power_resume, 1,
302  "Transition from D3 -> D0 on resume.");
303
304int pci_do_power_suspend = 1;
305TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
306SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
307    &pci_do_power_suspend, 1,
308  "Transition from D0 -> D3 on suspend.");
309
310static int pci_do_msi = 1;
311TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
312SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
313    "Enable support for MSI interrupts");
314
315static int pci_do_msix = 1;
316TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
317SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
318    "Enable support for MSI-X interrupts");
319
320static int pci_honor_msi_blacklist = 1;
321TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
322SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
323    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
324
325#if defined(__i386__) || defined(__amd64__)
326static int pci_usb_takeover = 1;
327#else
328static int pci_usb_takeover = 0;
329#endif
330TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
331SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
332    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
333Disable this if you depend on BIOS emulation of USB devices, that is\n\
334you use USB devices (like keyboard or mouse) but do not load USB drivers");
335
336/* Find a device_t by bus/slot/function in domain 0 */
337
338device_t
339pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
340{
341
342	return (pci_find_dbsf(0, bus, slot, func));
343}
344
345/* Find a device_t by domain/bus/slot/function */
346
347device_t
348pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
349{
350	struct pci_devinfo *dinfo;
351
352	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
353		if ((dinfo->cfg.domain == domain) &&
354		    (dinfo->cfg.bus == bus) &&
355		    (dinfo->cfg.slot == slot) &&
356		    (dinfo->cfg.func == func)) {
357			return (dinfo->cfg.dev);
358		}
359	}
360
361	return (NULL);
362}
363
364/* Find a device_t by vendor/device ID */
365
366device_t
367pci_find_device(uint16_t vendor, uint16_t device)
368{
369	struct pci_devinfo *dinfo;
370
371	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
372		if ((dinfo->cfg.vendor == vendor) &&
373		    (dinfo->cfg.device == device)) {
374			return (dinfo->cfg.dev);
375		}
376	}
377
378	return (NULL);
379}
380
381device_t
382pci_find_class(uint8_t class, uint8_t subclass)
383{
384	struct pci_devinfo *dinfo;
385
386	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
387		if (dinfo->cfg.baseclass == class &&
388		    dinfo->cfg.subclass == subclass) {
389			return (dinfo->cfg.dev);
390		}
391	}
392
393	return (NULL);
394}
395
396static int
397pci_printf(pcicfgregs *cfg, const char *fmt, ...)
398{
399	va_list ap;
400	int retval;
401
402	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
403	    cfg->func);
404	va_start(ap, fmt);
405	retval += vprintf(fmt, ap);
406	va_end(ap);
407	return (retval);
408}
409
410/* return base address of memory or port map */
411
412static pci_addr_t
413pci_mapbase(uint64_t mapreg)
414{
415
416	if (PCI_BAR_MEM(mapreg))
417		return (mapreg & PCIM_BAR_MEM_BASE);
418	else
419		return (mapreg & PCIM_BAR_IO_BASE);
420}
421
422/* return map type of memory or port map */
423
424static const char *
425pci_maptype(uint64_t mapreg)
426{
427
428	if (PCI_BAR_IO(mapreg))
429		return ("I/O Port");
430	if (mapreg & PCIM_BAR_MEM_PREFETCH)
431		return ("Prefetchable Memory");
432	return ("Memory");
433}
434
435/* return log2 of map size decoded for memory or port map */
436
437static int
438pci_mapsize(uint64_t testval)
439{
440	int ln2size;
441
442	testval = pci_mapbase(testval);
443	ln2size = 0;
444	if (testval != 0) {
445		while ((testval & 1) == 0)
446		{
447			ln2size++;
448			testval >>= 1;
449		}
450	}
451	return (ln2size);
452}
453
454/* return base address of device ROM */
455
456static pci_addr_t
457pci_rombase(uint64_t mapreg)
458{
459
460	return (mapreg & PCIM_BIOS_ADDR_MASK);
461}
462
463/* return log2 of map size decided for device ROM */
464
465static int
466pci_romsize(uint64_t testval)
467{
468	int ln2size;
469
470	testval = pci_rombase(testval);
471	ln2size = 0;
472	if (testval != 0) {
473		while ((testval & 1) == 0)
474		{
475			ln2size++;
476			testval >>= 1;
477		}
478	}
479	return (ln2size);
480}
481
482/* return log2 of address range supported by map register */
483
484static int
485pci_maprange(uint64_t mapreg)
486{
487	int ln2range = 0;
488
489	if (PCI_BAR_IO(mapreg))
490		ln2range = 32;
491	else
492		switch (mapreg & PCIM_BAR_MEM_TYPE) {
493		case PCIM_BAR_MEM_32:
494			ln2range = 32;
495			break;
496		case PCIM_BAR_MEM_1MB:
497			ln2range = 20;
498			break;
499		case PCIM_BAR_MEM_64:
500			ln2range = 64;
501			break;
502		}
503	return (ln2range);
504}
505
506/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
507
508static void
509pci_fixancient(pcicfgregs *cfg)
510{
511	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
512		return;
513
514	/* PCI to PCI bridges use header type 1 */
515	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
516		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
517}
518
519/* extract header type specific config data */
520
521static void
522pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
523{
524#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
525	switch (cfg->hdrtype & PCIM_HDRTYPE) {
526	case PCIM_HDRTYPE_NORMAL:
527		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
528		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
529		cfg->nummaps	    = PCI_MAXMAPS_0;
530		break;
531	case PCIM_HDRTYPE_BRIDGE:
532		cfg->nummaps	    = PCI_MAXMAPS_1;
533		break;
534	case PCIM_HDRTYPE_CARDBUS:
535		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
536		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
537		cfg->nummaps	    = PCI_MAXMAPS_2;
538		break;
539	}
540#undef REG
541}
542
543/* read configuration header into pcicfgregs structure */
544struct pci_devinfo *
545pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
546{
547#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
548	pcicfgregs *cfg = NULL;
549	struct pci_devinfo *devlist_entry;
550	struct devlist *devlist_head;
551
552	devlist_head = &pci_devq;
553
554	devlist_entry = NULL;
555
556	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
557		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
558		if (devlist_entry == NULL)
559			return (NULL);
560
561		cfg = &devlist_entry->cfg;
562
563		cfg->domain		= d;
564		cfg->bus		= b;
565		cfg->slot		= s;
566		cfg->func		= f;
567		cfg->vendor		= REG(PCIR_VENDOR, 2);
568		cfg->device		= REG(PCIR_DEVICE, 2);
569		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
570		cfg->statreg		= REG(PCIR_STATUS, 2);
571		cfg->baseclass		= REG(PCIR_CLASS, 1);
572		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
573		cfg->progif		= REG(PCIR_PROGIF, 1);
574		cfg->revid		= REG(PCIR_REVID, 1);
575		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
576		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
577		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
578		cfg->intpin		= REG(PCIR_INTPIN, 1);
579		cfg->intline		= REG(PCIR_INTLINE, 1);
580
581		cfg->mingnt		= REG(PCIR_MINGNT, 1);
582		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
583
584		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
585		cfg->hdrtype		&= ~PCIM_MFDEV;
586		STAILQ_INIT(&cfg->maps);
587
588		pci_fixancient(cfg);
589		pci_hdrtypedata(pcib, b, s, f, cfg);
590
591		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
592			pci_read_cap(pcib, cfg);
593
594		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
595
596		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
597		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
598		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
599		devlist_entry->conf.pc_sel.pc_func = cfg->func;
600		devlist_entry->conf.pc_hdr = cfg->hdrtype;
601
602		devlist_entry->conf.pc_subvendor = cfg->subvendor;
603		devlist_entry->conf.pc_subdevice = cfg->subdevice;
604		devlist_entry->conf.pc_vendor = cfg->vendor;
605		devlist_entry->conf.pc_device = cfg->device;
606
607		devlist_entry->conf.pc_class = cfg->baseclass;
608		devlist_entry->conf.pc_subclass = cfg->subclass;
609		devlist_entry->conf.pc_progif = cfg->progif;
610		devlist_entry->conf.pc_revid = cfg->revid;
611
612		pci_numdevs++;
613		pci_generation++;
614	}
615	return (devlist_entry);
616#undef REG
617}
618
619static void
620pci_read_cap(device_t pcib, pcicfgregs *cfg)
621{
622#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
623#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
624#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
625	uint64_t addr;
626#endif
627	uint32_t val;
628	int	ptr, nextptr, ptrptr;
629
630	switch (cfg->hdrtype & PCIM_HDRTYPE) {
631	case PCIM_HDRTYPE_NORMAL:
632	case PCIM_HDRTYPE_BRIDGE:
633		ptrptr = PCIR_CAP_PTR;
634		break;
635	case PCIM_HDRTYPE_CARDBUS:
636		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
637		break;
638	default:
639		return;		/* no extended capabilities support */
640	}
641	nextptr = REG(ptrptr, 1);	/* sanity check? */
642
643	/*
644	 * Read capability entries.
645	 */
646	while (nextptr != 0) {
647		/* Sanity check */
648		if (nextptr > 255) {
649			printf("illegal PCI extended capability offset %d\n",
650			    nextptr);
651			return;
652		}
653		/* Find the next entry */
654		ptr = nextptr;
655		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
656
657		/* Process this entry */
658		switch (REG(ptr + PCICAP_ID, 1)) {
659		case PCIY_PMG:		/* PCI power management */
660			if (cfg->pp.pp_cap == 0) {
661				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
662				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
663				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
664				if ((nextptr - ptr) > PCIR_POWER_DATA)
665					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
666			}
667			break;
668		case PCIY_HT:		/* HyperTransport */
669			/* Determine HT-specific capability type. */
670			val = REG(ptr + PCIR_HT_COMMAND, 2);
671
672			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
673				cfg->ht.ht_slave = ptr;
674
675#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
676			switch (val & PCIM_HTCMD_CAP_MASK) {
677			case PCIM_HTCAP_MSI_MAPPING:
678				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
679					/* Sanity check the mapping window. */
680					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
681					    4);
682					addr <<= 32;
683					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
684					    4);
685					if (addr != MSI_INTEL_ADDR_BASE)
686						device_printf(pcib,
687	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
688						    cfg->domain, cfg->bus,
689						    cfg->slot, cfg->func,
690						    (long long)addr);
691				} else
692					addr = MSI_INTEL_ADDR_BASE;
693
694				cfg->ht.ht_msimap = ptr;
695				cfg->ht.ht_msictrl = val;
696				cfg->ht.ht_msiaddr = addr;
697				break;
698			}
699#endif
700			break;
701		case PCIY_MSI:		/* PCI MSI */
702			cfg->msi.msi_location = ptr;
703			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
704			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
705						     PCIM_MSICTRL_MMC_MASK)>>1);
706			break;
707		case PCIY_MSIX:		/* PCI MSI-X */
708			cfg->msix.msix_location = ptr;
709			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
710			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
711			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
712			val = REG(ptr + PCIR_MSIX_TABLE, 4);
713			cfg->msix.msix_table_bar = PCIR_BAR(val &
714			    PCIM_MSIX_BIR_MASK);
715			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
716			val = REG(ptr + PCIR_MSIX_PBA, 4);
717			cfg->msix.msix_pba_bar = PCIR_BAR(val &
718			    PCIM_MSIX_BIR_MASK);
719			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
720			break;
721		case PCIY_VPD:		/* PCI Vital Product Data */
722			cfg->vpd.vpd_reg = ptr;
723			break;
724		case PCIY_SUBVENDOR:
725			/* Should always be true. */
726			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
727			    PCIM_HDRTYPE_BRIDGE) {
728				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
729				cfg->subvendor = val & 0xffff;
730				cfg->subdevice = val >> 16;
731			}
732			break;
733		case PCIY_PCIX:		/* PCI-X */
734			/*
735			 * Assume we have a PCI-X chipset if we have
736			 * at least one PCI-PCI bridge with a PCI-X
737			 * capability.  Note that some systems with
738			 * PCI-express or HT chipsets might match on
739			 * this check as well.
740			 */
741			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
742			    PCIM_HDRTYPE_BRIDGE)
743				pcix_chipset = 1;
744			cfg->pcix.pcix_location = ptr;
745			break;
746		case PCIY_EXPRESS:	/* PCI-express */
747			/*
748			 * Assume we have a PCI-express chipset if we have
749			 * at least one PCI-express device.
750			 */
751			pcie_chipset = 1;
752			cfg->pcie.pcie_location = ptr;
753			val = REG(ptr + PCIER_FLAGS, 2);
754			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
755			break;
756		default:
757			break;
758		}
759	}
760
761#if defined(__powerpc__)
762	/*
763	 * Enable the MSI mapping window for all HyperTransport
764	 * slaves.  PCI-PCI bridges have their windows enabled via
765	 * PCIB_MAP_MSI().
766	 */
767	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
768	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
769		device_printf(pcib,
770	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
771		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
772		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
773		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
774		     2);
775	}
776#endif
777/* REG and WREG use carry through to next functions */
778}
779
780/*
781 * PCI Vital Product Data
782 */
783
784#define	PCI_VPD_TIMEOUT		1000000
785
786static int
787pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
788{
789	int count = PCI_VPD_TIMEOUT;
790
791	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
792
793	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
794
795	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
796		if (--count < 0)
797			return (ENXIO);
798		DELAY(1);	/* limit looping */
799	}
800	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
801
802	return (0);
803}
804
805#if 0
806static int
807pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
808{
809	int count = PCI_VPD_TIMEOUT;
810
811	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
812
813	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
814	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
815	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
816		if (--count < 0)
817			return (ENXIO);
818		DELAY(1);	/* limit looping */
819	}
820
821	return (0);
822}
823#endif
824
825#undef PCI_VPD_TIMEOUT
826
827struct vpd_readstate {
828	device_t	pcib;
829	pcicfgregs	*cfg;
830	uint32_t	val;
831	int		bytesinval;
832	int		off;
833	uint8_t		cksum;
834};
835
836static int
837vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
838{
839	uint32_t reg;
840	uint8_t byte;
841
842	if (vrs->bytesinval == 0) {
843		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
844			return (ENXIO);
845		vrs->val = le32toh(reg);
846		vrs->off += 4;
847		byte = vrs->val & 0xff;
848		vrs->bytesinval = 3;
849	} else {
850		vrs->val = vrs->val >> 8;
851		byte = vrs->val & 0xff;
852		vrs->bytesinval--;
853	}
854
855	vrs->cksum += byte;
856	*data = byte;
857	return (0);
858}
859
860static void
861pci_read_vpd(device_t pcib, pcicfgregs *cfg)
862{
863	struct vpd_readstate vrs;
864	int state;
865	int name;
866	int remain;
867	int i;
868	int alloc, off;		/* alloc/off for RO/W arrays */
869	int cksumvalid;
870	int dflen;
871	uint8_t byte;
872	uint8_t byte2;
873
874	/* init vpd reader */
875	vrs.bytesinval = 0;
876	vrs.off = 0;
877	vrs.pcib = pcib;
878	vrs.cfg = cfg;
879	vrs.cksum = 0;
880
881	state = 0;
882	name = remain = i = 0;	/* shut up stupid gcc */
883	alloc = off = 0;	/* shut up stupid gcc */
884	dflen = 0;		/* shut up stupid gcc */
885	cksumvalid = -1;
886	while (state >= 0) {
887		if (vpd_nextbyte(&vrs, &byte)) {
888			state = -2;
889			break;
890		}
891#if 0
892		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
893		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
894		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
895#endif
896		switch (state) {
897		case 0:		/* item name */
898			if (byte & 0x80) {
899				if (vpd_nextbyte(&vrs, &byte2)) {
900					state = -2;
901					break;
902				}
903				remain = byte2;
904				if (vpd_nextbyte(&vrs, &byte2)) {
905					state = -2;
906					break;
907				}
908				remain |= byte2 << 8;
909				if (remain > (0x7f*4 - vrs.off)) {
910					state = -1;
911					pci_printf(cfg,
912					    "invalid VPD data, remain %#x\n",
913					    remain);
914				}
915				name = byte & 0x7f;
916			} else {
917				remain = byte & 0x7;
918				name = (byte >> 3) & 0xf;
919			}
920			switch (name) {
921			case 0x2:	/* String */
922				cfg->vpd.vpd_ident = malloc(remain + 1,
923				    M_DEVBUF, M_WAITOK);
924				i = 0;
925				state = 1;
926				break;
927			case 0xf:	/* End */
928				state = -1;
929				break;
930			case 0x10:	/* VPD-R */
931				alloc = 8;
932				off = 0;
933				cfg->vpd.vpd_ros = malloc(alloc *
934				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
935				    M_WAITOK | M_ZERO);
936				state = 2;
937				break;
938			case 0x11:	/* VPD-W */
939				alloc = 8;
940				off = 0;
941				cfg->vpd.vpd_w = malloc(alloc *
942				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
943				    M_WAITOK | M_ZERO);
944				state = 5;
945				break;
946			default:	/* Invalid data, abort */
947				state = -1;
948				break;
949			}
950			break;
951
952		case 1:	/* Identifier String */
953			cfg->vpd.vpd_ident[i++] = byte;
954			remain--;
955			if (remain == 0)  {
956				cfg->vpd.vpd_ident[i] = '\0';
957				state = 0;
958			}
959			break;
960
961		case 2:	/* VPD-R Keyword Header */
962			if (off == alloc) {
963				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
964				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
965				    M_DEVBUF, M_WAITOK | M_ZERO);
966			}
967			cfg->vpd.vpd_ros[off].keyword[0] = byte;
968			if (vpd_nextbyte(&vrs, &byte2)) {
969				state = -2;
970				break;
971			}
972			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
973			if (vpd_nextbyte(&vrs, &byte2)) {
974				state = -2;
975				break;
976			}
977			dflen = byte2;
978			if (dflen == 0 &&
979			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
980			    2) == 0) {
981				/*
982				 * if this happens, we can't trust the rest
983				 * of the VPD.
984				 */
985				pci_printf(cfg, "bad keyword length: %d\n",
986				    dflen);
987				cksumvalid = 0;
988				state = -1;
989				break;
990			} else if (dflen == 0) {
991				cfg->vpd.vpd_ros[off].value = malloc(1 *
992				    sizeof(*cfg->vpd.vpd_ros[off].value),
993				    M_DEVBUF, M_WAITOK);
994				cfg->vpd.vpd_ros[off].value[0] = '\x00';
995			} else
996				cfg->vpd.vpd_ros[off].value = malloc(
997				    (dflen + 1) *
998				    sizeof(*cfg->vpd.vpd_ros[off].value),
999				    M_DEVBUF, M_WAITOK);
1000			remain -= 3;
1001			i = 0;
1002			/* keep in sync w/ state 3's transistions */
1003			if (dflen == 0 && remain == 0)
1004				state = 0;
1005			else if (dflen == 0)
1006				state = 2;
1007			else
1008				state = 3;
1009			break;
1010
1011		case 3:	/* VPD-R Keyword Value */
1012			cfg->vpd.vpd_ros[off].value[i++] = byte;
1013			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1014			    "RV", 2) == 0 && cksumvalid == -1) {
1015				if (vrs.cksum == 0)
1016					cksumvalid = 1;
1017				else {
1018					if (bootverbose)
1019						pci_printf(cfg,
1020					    "bad VPD cksum, remain %hhu\n",
1021						    vrs.cksum);
1022					cksumvalid = 0;
1023					state = -1;
1024					break;
1025				}
1026			}
1027			dflen--;
1028			remain--;
1029			/* keep in sync w/ state 2's transistions */
1030			if (dflen == 0)
1031				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1032			if (dflen == 0 && remain == 0) {
1033				cfg->vpd.vpd_rocnt = off;
1034				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1035				    off * sizeof(*cfg->vpd.vpd_ros),
1036				    M_DEVBUF, M_WAITOK | M_ZERO);
1037				state = 0;
1038			} else if (dflen == 0)
1039				state = 2;
1040			break;
1041
1042		case 4:
1043			remain--;
1044			if (remain == 0)
1045				state = 0;
1046			break;
1047
1048		case 5:	/* VPD-W Keyword Header */
1049			if (off == alloc) {
1050				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1051				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1052				    M_DEVBUF, M_WAITOK | M_ZERO);
1053			}
1054			cfg->vpd.vpd_w[off].keyword[0] = byte;
1055			if (vpd_nextbyte(&vrs, &byte2)) {
1056				state = -2;
1057				break;
1058			}
1059			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1060			if (vpd_nextbyte(&vrs, &byte2)) {
1061				state = -2;
1062				break;
1063			}
1064			cfg->vpd.vpd_w[off].len = dflen = byte2;
1065			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1066			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1067			    sizeof(*cfg->vpd.vpd_w[off].value),
1068			    M_DEVBUF, M_WAITOK);
1069			remain -= 3;
1070			i = 0;
1071			/* keep in sync w/ state 6's transistions */
1072			if (dflen == 0 && remain == 0)
1073				state = 0;
1074			else if (dflen == 0)
1075				state = 5;
1076			else
1077				state = 6;
1078			break;
1079
1080		case 6:	/* VPD-W Keyword Value */
1081			cfg->vpd.vpd_w[off].value[i++] = byte;
1082			dflen--;
1083			remain--;
1084			/* keep in sync w/ state 5's transistions */
1085			if (dflen == 0)
1086				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1087			if (dflen == 0 && remain == 0) {
1088				cfg->vpd.vpd_wcnt = off;
1089				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1090				    off * sizeof(*cfg->vpd.vpd_w),
1091				    M_DEVBUF, M_WAITOK | M_ZERO);
1092				state = 0;
1093			} else if (dflen == 0)
1094				state = 5;
1095			break;
1096
1097		default:
1098			pci_printf(cfg, "invalid state: %d\n", state);
1099			state = -1;
1100			break;
1101		}
1102	}
1103
1104	if (cksumvalid == 0 || state < -1) {
1105		/* read-only data bad, clean up */
1106		if (cfg->vpd.vpd_ros != NULL) {
1107			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1108				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1109			free(cfg->vpd.vpd_ros, M_DEVBUF);
1110			cfg->vpd.vpd_ros = NULL;
1111		}
1112	}
1113	if (state < -1) {
1114		/* I/O error, clean up */
1115		pci_printf(cfg, "failed to read VPD data.\n");
1116		if (cfg->vpd.vpd_ident != NULL) {
1117			free(cfg->vpd.vpd_ident, M_DEVBUF);
1118			cfg->vpd.vpd_ident = NULL;
1119		}
1120		if (cfg->vpd.vpd_w != NULL) {
1121			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1122				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1123			free(cfg->vpd.vpd_w, M_DEVBUF);
1124			cfg->vpd.vpd_w = NULL;
1125		}
1126	}
1127	cfg->vpd.vpd_cached = 1;
1128#undef REG
1129#undef WREG
1130}
1131
1132int
1133pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1134{
1135	struct pci_devinfo *dinfo = device_get_ivars(child);
1136	pcicfgregs *cfg = &dinfo->cfg;
1137
1138	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1139		pci_read_vpd(device_get_parent(dev), cfg);
1140
1141	*identptr = cfg->vpd.vpd_ident;
1142
1143	if (*identptr == NULL)
1144		return (ENXIO);
1145
1146	return (0);
1147}
1148
1149int
1150pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1151	const char **vptr)
1152{
1153	struct pci_devinfo *dinfo = device_get_ivars(child);
1154	pcicfgregs *cfg = &dinfo->cfg;
1155	int i;
1156
1157	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1158		pci_read_vpd(device_get_parent(dev), cfg);
1159
1160	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1161		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1162		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1163			*vptr = cfg->vpd.vpd_ros[i].value;
1164			return (0);
1165		}
1166
1167	*vptr = NULL;
1168	return (ENXIO);
1169}
1170
1171/*
1172 * Find the requested HyperTransport capability and return the offset
1173 * in configuration space via the pointer provided.  The function
1174 * returns 0 on success and an error code otherwise.
1175 */
1176int
1177pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1178{
1179	int ptr, error;
1180	uint16_t val;
1181
1182	error = pci_find_cap(child, PCIY_HT, &ptr);
1183	if (error)
1184		return (error);
1185
1186	/*
1187	 * Traverse the capabilities list checking each HT capability
1188	 * to see if it matches the requested HT capability.
1189	 */
1190	while (ptr != 0) {
1191		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1192		if (capability == PCIM_HTCAP_SLAVE ||
1193		    capability == PCIM_HTCAP_HOST)
1194			val &= 0xe000;
1195		else
1196			val &= PCIM_HTCMD_CAP_MASK;
1197		if (val == capability) {
1198			if (capreg != NULL)
1199				*capreg = ptr;
1200			return (0);
1201		}
1202
1203		/* Skip to the next HT capability. */
1204		while (ptr != 0) {
1205			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1206			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1207			    PCIY_HT)
1208				break;
1209		}
1210	}
1211	return (ENOENT);
1212}
1213
1214/*
1215 * Find the requested capability and return the offset in
1216 * configuration space via the pointer provided.  The function returns
1217 * 0 on success and an error code otherwise.
1218 */
1219int
1220pci_find_cap_method(device_t dev, device_t child, int capability,
1221    int *capreg)
1222{
1223	struct pci_devinfo *dinfo = device_get_ivars(child);
1224	pcicfgregs *cfg = &dinfo->cfg;
1225	u_int32_t status;
1226	u_int8_t ptr;
1227
1228	/*
1229	 * Check the CAP_LIST bit of the PCI status register first.
1230	 */
1231	status = pci_read_config(child, PCIR_STATUS, 2);
1232	if (!(status & PCIM_STATUS_CAPPRESENT))
1233		return (ENXIO);
1234
1235	/*
1236	 * Determine the start pointer of the capabilities list.
1237	 */
1238	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1239	case PCIM_HDRTYPE_NORMAL:
1240	case PCIM_HDRTYPE_BRIDGE:
1241		ptr = PCIR_CAP_PTR;
1242		break;
1243	case PCIM_HDRTYPE_CARDBUS:
1244		ptr = PCIR_CAP_PTR_2;
1245		break;
1246	default:
1247		/* XXX: panic? */
1248		return (ENXIO);		/* no extended capabilities support */
1249	}
1250	ptr = pci_read_config(child, ptr, 1);
1251
1252	/*
1253	 * Traverse the capabilities list.
1254	 */
1255	while (ptr != 0) {
1256		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1257			if (capreg != NULL)
1258				*capreg = ptr;
1259			return (0);
1260		}
1261		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1262	}
1263
1264	return (ENOENT);
1265}
1266
1267/*
1268 * Find the requested extended capability and return the offset in
1269 * configuration space via the pointer provided.  The function returns
1270 * 0 on success and an error code otherwise.
1271 */
1272int
1273pci_find_extcap_method(device_t dev, device_t child, int capability,
1274    int *capreg)
1275{
1276	struct pci_devinfo *dinfo = device_get_ivars(child);
1277	pcicfgregs *cfg = &dinfo->cfg;
1278	uint32_t ecap;
1279	uint16_t ptr;
1280
1281	/* Only supported for PCI-express devices. */
1282	if (cfg->pcie.pcie_location == 0)
1283		return (ENXIO);
1284
1285	ptr = PCIR_EXTCAP;
1286	ecap = pci_read_config(child, ptr, 4);
1287	if (ecap == 0xffffffff || ecap == 0)
1288		return (ENOENT);
1289	for (;;) {
1290		if (PCI_EXTCAP_ID(ecap) == capability) {
1291			if (capreg != NULL)
1292				*capreg = ptr;
1293			return (0);
1294		}
1295		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1296		if (ptr == 0)
1297			break;
1298		ecap = pci_read_config(child, ptr, 4);
1299	}
1300
1301	return (ENOENT);
1302}
1303
1304/*
1305 * Support for MSI-X message interrupts.
1306 */
1307void
1308pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1309{
1310	struct pci_devinfo *dinfo = device_get_ivars(dev);
1311	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1312	uint32_t offset;
1313
1314	KASSERT(msix->msix_table_len > index, ("bogus index"));
1315	offset = msix->msix_table_offset + index * 16;
1316	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1317	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1318	bus_write_4(msix->msix_table_res, offset + 8, data);
1319
1320	/* Enable MSI -> HT mapping. */
1321	pci_ht_map_msi(dev, address);
1322}
1323
1324void
1325pci_mask_msix(device_t dev, u_int index)
1326{
1327	struct pci_devinfo *dinfo = device_get_ivars(dev);
1328	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1329	uint32_t offset, val;
1330
1331	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1332	offset = msix->msix_table_offset + index * 16 + 12;
1333	val = bus_read_4(msix->msix_table_res, offset);
1334	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1335		val |= PCIM_MSIX_VCTRL_MASK;
1336		bus_write_4(msix->msix_table_res, offset, val);
1337	}
1338}
1339
1340void
1341pci_unmask_msix(device_t dev, u_int index)
1342{
1343	struct pci_devinfo *dinfo = device_get_ivars(dev);
1344	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1345	uint32_t offset, val;
1346
1347	KASSERT(msix->msix_table_len > index, ("bogus index"));
1348	offset = msix->msix_table_offset + index * 16 + 12;
1349	val = bus_read_4(msix->msix_table_res, offset);
1350	if (val & PCIM_MSIX_VCTRL_MASK) {
1351		val &= ~PCIM_MSIX_VCTRL_MASK;
1352		bus_write_4(msix->msix_table_res, offset, val);
1353	}
1354}
1355
1356int
1357pci_pending_msix(device_t dev, u_int index)
1358{
1359	struct pci_devinfo *dinfo = device_get_ivars(dev);
1360	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1361	uint32_t offset, bit;
1362
1363	KASSERT(msix->msix_table_len > index, ("bogus index"));
1364	offset = msix->msix_pba_offset + (index / 32) * 4;
1365	bit = 1 << index % 32;
1366	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1367}
1368
1369/*
1370 * Restore MSI-X registers and table during resume.  If MSI-X is
1371 * enabled then walk the virtual table to restore the actual MSI-X
1372 * table.
1373 */
1374static void
1375pci_resume_msix(device_t dev)
1376{
1377	struct pci_devinfo *dinfo = device_get_ivars(dev);
1378	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1379	struct msix_table_entry *mte;
1380	struct msix_vector *mv;
1381	int i;
1382
1383	if (msix->msix_alloc > 0) {
1384		/* First, mask all vectors. */
1385		for (i = 0; i < msix->msix_msgnum; i++)
1386			pci_mask_msix(dev, i);
1387
1388		/* Second, program any messages with at least one handler. */
1389		for (i = 0; i < msix->msix_table_len; i++) {
1390			mte = &msix->msix_table[i];
1391			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1392				continue;
1393			mv = &msix->msix_vectors[mte->mte_vector - 1];
1394			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1395			pci_unmask_msix(dev, i);
1396		}
1397	}
1398	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1399	    msix->msix_ctrl, 2);
1400}
1401
1402/*
1403 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1404 * returned in *count.  After this function returns, each message will be
1405 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1406 */
1407int
1408pci_alloc_msix_method(device_t dev, device_t child, int *count)
1409{
1410	struct pci_devinfo *dinfo = device_get_ivars(child);
1411	pcicfgregs *cfg = &dinfo->cfg;
1412	struct resource_list_entry *rle;
1413	int actual, error, i, irq, max;
1414
1415	/* Don't let count == 0 get us into trouble. */
1416	if (*count == 0)
1417		return (EINVAL);
1418
1419	/* If rid 0 is allocated, then fail. */
1420	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1421	if (rle != NULL && rle->res != NULL)
1422		return (ENXIO);
1423
1424	/* Already have allocated messages? */
1425	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1426		return (ENXIO);
1427
1428	/* If MSI is blacklisted for this system, fail. */
1429	if (pci_msi_blacklisted())
1430		return (ENXIO);
1431
1432	/* MSI-X capability present? */
1433	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1434		return (ENODEV);
1435
1436	/* Make sure the appropriate BARs are mapped. */
1437	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1438	    cfg->msix.msix_table_bar);
1439	if (rle == NULL || rle->res == NULL ||
1440	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1441		return (ENXIO);
1442	cfg->msix.msix_table_res = rle->res;
1443	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1444		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1445		    cfg->msix.msix_pba_bar);
1446		if (rle == NULL || rle->res == NULL ||
1447		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1448			return (ENXIO);
1449	}
1450	cfg->msix.msix_pba_res = rle->res;
1451
1452	if (bootverbose)
1453		device_printf(child,
1454		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1455		    *count, cfg->msix.msix_msgnum);
1456	max = min(*count, cfg->msix.msix_msgnum);
1457	for (i = 0; i < max; i++) {
1458		/* Allocate a message. */
1459		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1460		if (error) {
1461			if (i == 0)
1462				return (error);
1463			break;
1464		}
1465		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1466		    irq, 1);
1467	}
1468	actual = i;
1469
1470	if (bootverbose) {
1471		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1472		if (actual == 1)
1473			device_printf(child, "using IRQ %lu for MSI-X\n",
1474			    rle->start);
1475		else {
1476			int run;
1477
1478			/*
1479			 * Be fancy and try to print contiguous runs of
1480			 * IRQ values as ranges.  'irq' is the previous IRQ.
1481			 * 'run' is true if we are in a range.
1482			 */
1483			device_printf(child, "using IRQs %lu", rle->start);
1484			irq = rle->start;
1485			run = 0;
1486			for (i = 1; i < actual; i++) {
1487				rle = resource_list_find(&dinfo->resources,
1488				    SYS_RES_IRQ, i + 1);
1489
1490				/* Still in a run? */
1491				if (rle->start == irq + 1) {
1492					run = 1;
1493					irq++;
1494					continue;
1495				}
1496
1497				/* Finish previous range. */
1498				if (run) {
1499					printf("-%d", irq);
1500					run = 0;
1501				}
1502
1503				/* Start new range. */
1504				printf(",%lu", rle->start);
1505				irq = rle->start;
1506			}
1507
1508			/* Unfinished range? */
1509			if (run)
1510				printf("-%d", irq);
1511			printf(" for MSI-X\n");
1512		}
1513	}
1514
1515	/* Mask all vectors. */
1516	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1517		pci_mask_msix(child, i);
1518
1519	/* Allocate and initialize vector data and virtual table. */
1520	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1521	    M_DEVBUF, M_WAITOK | M_ZERO);
1522	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1523	    M_DEVBUF, M_WAITOK | M_ZERO);
1524	for (i = 0; i < actual; i++) {
1525		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1527		cfg->msix.msix_table[i].mte_vector = i + 1;
1528	}
1529
1530	/* Update control register to enable MSI-X. */
1531	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1532	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1533	    cfg->msix.msix_ctrl, 2);
1534
1535	/* Update counts of alloc'd messages. */
1536	cfg->msix.msix_alloc = actual;
1537	cfg->msix.msix_table_len = actual;
1538	*count = actual;
1539	return (0);
1540}
1541
1542/*
1543 * By default, pci_alloc_msix() will assign the allocated IRQ
1544 * resources consecutively to the first N messages in the MSI-X table.
1545 * However, device drivers may want to use different layouts if they
1546 * either receive fewer messages than they asked for, or they wish to
1547 * populate the MSI-X table sparsely.  This method allows the driver
1548 * to specify what layout it wants.  It must be called after a
1549 * successful pci_alloc_msix() but before any of the associated
1550 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1551 *
1552 * The 'vectors' array contains 'count' message vectors.  The array
1553 * maps directly to the MSI-X table in that index 0 in the array
1554 * specifies the vector for the first message in the MSI-X table, etc.
1555 * The vector value in each array index can either be 0 to indicate
1556 * that no vector should be assigned to a message slot, or it can be a
1557 * number from 1 to N (where N is the count returned from a
1558 * succcessful call to pci_alloc_msix()) to indicate which message
1559 * vector (IRQ) to be used for the corresponding message.
1560 *
1561 * On successful return, each message with a non-zero vector will have
1562 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1563 * 1.  Additionally, if any of the IRQs allocated via the previous
1564 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1565 * will be freed back to the system automatically.
1566 *
1567 * For example, suppose a driver has a MSI-X table with 6 messages and
1568 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1569 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1570 * C.  After the call to pci_alloc_msix(), the device will be setup to
1571 * have an MSI-X table of ABC--- (where - means no vector assigned).
1572 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1573 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1574 * be freed back to the system.  This device will also have valid
1575 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1576 *
1577 * In any case, the SYS_RES_IRQ rid X will always map to the message
1578 * at MSI-X table index X - 1 and will only be valid if a vector is
1579 * assigned to that table entry.
1580 */
1581int
1582pci_remap_msix_method(device_t dev, device_t child, int count,
1583    const u_int *vectors)
1584{
1585	struct pci_devinfo *dinfo = device_get_ivars(child);
1586	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1587	struct resource_list_entry *rle;
1588	int i, irq, j, *used;
1589
1590	/*
1591	 * Have to have at least one message in the table but the
1592	 * table can't be bigger than the actual MSI-X table in the
1593	 * device.
1594	 */
1595	if (count == 0 || count > msix->msix_msgnum)
1596		return (EINVAL);
1597
1598	/* Sanity check the vectors. */
1599	for (i = 0; i < count; i++)
1600		if (vectors[i] > msix->msix_alloc)
1601			return (EINVAL);
1602
1603	/*
1604	 * Make sure there aren't any holes in the vectors to be used.
1605	 * It's a big pain to support it, and it doesn't really make
1606	 * sense anyway.  Also, at least one vector must be used.
1607	 */
1608	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1609	    M_ZERO);
1610	for (i = 0; i < count; i++)
1611		if (vectors[i] != 0)
1612			used[vectors[i] - 1] = 1;
1613	for (i = 0; i < msix->msix_alloc - 1; i++)
1614		if (used[i] == 0 && used[i + 1] == 1) {
1615			free(used, M_DEVBUF);
1616			return (EINVAL);
1617		}
1618	if (used[0] != 1) {
1619		free(used, M_DEVBUF);
1620		return (EINVAL);
1621	}
1622
1623	/* Make sure none of the resources are allocated. */
1624	for (i = 0; i < msix->msix_table_len; i++) {
1625		if (msix->msix_table[i].mte_vector == 0)
1626			continue;
1627		if (msix->msix_table[i].mte_handlers > 0)
1628			return (EBUSY);
1629		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1630		KASSERT(rle != NULL, ("missing resource"));
1631		if (rle->res != NULL)
1632			return (EBUSY);
1633	}
1634
1635	/* Free the existing resource list entries. */
1636	for (i = 0; i < msix->msix_table_len; i++) {
1637		if (msix->msix_table[i].mte_vector == 0)
1638			continue;
1639		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1640	}
1641
1642	/*
1643	 * Build the new virtual table keeping track of which vectors are
1644	 * used.
1645	 */
1646	free(msix->msix_table, M_DEVBUF);
1647	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1648	    M_DEVBUF, M_WAITOK | M_ZERO);
1649	for (i = 0; i < count; i++)
1650		msix->msix_table[i].mte_vector = vectors[i];
1651	msix->msix_table_len = count;
1652
1653	/* Free any unused IRQs and resize the vectors array if necessary. */
1654	j = msix->msix_alloc - 1;
1655	if (used[j] == 0) {
1656		struct msix_vector *vec;
1657
1658		while (used[j] == 0) {
1659			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1660			    msix->msix_vectors[j].mv_irq);
1661			j--;
1662		}
1663		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1664		    M_WAITOK);
1665		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1666		    (j + 1));
1667		free(msix->msix_vectors, M_DEVBUF);
1668		msix->msix_vectors = vec;
1669		msix->msix_alloc = j + 1;
1670	}
1671	free(used, M_DEVBUF);
1672
1673	/* Map the IRQs onto the rids. */
1674	for (i = 0; i < count; i++) {
1675		if (vectors[i] == 0)
1676			continue;
1677		irq = msix->msix_vectors[vectors[i]].mv_irq;
1678		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1679		    irq, 1);
1680	}
1681
1682	if (bootverbose) {
1683		device_printf(child, "Remapped MSI-X IRQs as: ");
1684		for (i = 0; i < count; i++) {
1685			if (i != 0)
1686				printf(", ");
1687			if (vectors[i] == 0)
1688				printf("---");
1689			else
1690				printf("%d",
1691				    msix->msix_vectors[vectors[i]].mv_irq);
1692		}
1693		printf("\n");
1694	}
1695
1696	return (0);
1697}
1698
1699static int
1700pci_release_msix(device_t dev, device_t child)
1701{
1702	struct pci_devinfo *dinfo = device_get_ivars(child);
1703	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1704	struct resource_list_entry *rle;
1705	int i;
1706
1707	/* Do we have any messages to release? */
1708	if (msix->msix_alloc == 0)
1709		return (ENODEV);
1710
1711	/* Make sure none of the resources are allocated. */
1712	for (i = 0; i < msix->msix_table_len; i++) {
1713		if (msix->msix_table[i].mte_vector == 0)
1714			continue;
1715		if (msix->msix_table[i].mte_handlers > 0)
1716			return (EBUSY);
1717		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1718		KASSERT(rle != NULL, ("missing resource"));
1719		if (rle->res != NULL)
1720			return (EBUSY);
1721	}
1722
1723	/* Update control register to disable MSI-X. */
1724	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1725	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1726	    msix->msix_ctrl, 2);
1727
1728	/* Free the resource list entries. */
1729	for (i = 0; i < msix->msix_table_len; i++) {
1730		if (msix->msix_table[i].mte_vector == 0)
1731			continue;
1732		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1733	}
1734	free(msix->msix_table, M_DEVBUF);
1735	msix->msix_table_len = 0;
1736
1737	/* Release the IRQs. */
1738	for (i = 0; i < msix->msix_alloc; i++)
1739		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1740		    msix->msix_vectors[i].mv_irq);
1741	free(msix->msix_vectors, M_DEVBUF);
1742	msix->msix_alloc = 0;
1743	return (0);
1744}
1745
1746/*
1747 * Return the max supported MSI-X messages this device supports.
1748 * Basically, assuming the MD code can alloc messages, this function
1749 * should return the maximum value that pci_alloc_msix() can return.
1750 * Thus, it is subject to the tunables, etc.
1751 */
1752int
1753pci_msix_count_method(device_t dev, device_t child)
1754{
1755	struct pci_devinfo *dinfo = device_get_ivars(child);
1756	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1757
1758	if (pci_do_msix && msix->msix_location != 0)
1759		return (msix->msix_msgnum);
1760	return (0);
1761}
1762
1763/*
1764 * HyperTransport MSI mapping control
1765 */
1766void
1767pci_ht_map_msi(device_t dev, uint64_t addr)
1768{
1769	struct pci_devinfo *dinfo = device_get_ivars(dev);
1770	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1771
1772	if (!ht->ht_msimap)
1773		return;
1774
1775	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1776	    ht->ht_msiaddr >> 20 == addr >> 20) {
1777		/* Enable MSI -> HT mapping. */
1778		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1779		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1780		    ht->ht_msictrl, 2);
1781	}
1782
1783	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1784		/* Disable MSI -> HT mapping. */
1785		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1786		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1787		    ht->ht_msictrl, 2);
1788	}
1789}
1790
1791int
1792pci_get_max_read_req(device_t dev)
1793{
1794	struct pci_devinfo *dinfo = device_get_ivars(dev);
1795	int cap;
1796	uint16_t val;
1797
1798	cap = dinfo->cfg.pcie.pcie_location;
1799	if (cap == 0)
1800		return (0);
1801	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1802	val &= PCIEM_CTL_MAX_READ_REQUEST;
1803	val >>= 12;
1804	return (1 << (val + 7));
1805}
1806
1807int
1808pci_set_max_read_req(device_t dev, int size)
1809{
1810	struct pci_devinfo *dinfo = device_get_ivars(dev);
1811	int cap;
1812	uint16_t val;
1813
1814	cap = dinfo->cfg.pcie.pcie_location;
1815	if (cap == 0)
1816		return (0);
1817	if (size < 128)
1818		size = 128;
1819	if (size > 4096)
1820		size = 4096;
1821	size = (1 << (fls(size) - 1));
1822	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1823	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1824	val |= (fls(size) - 8) << 12;
1825	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1826	return (size);
1827}
1828
1829/*
1830 * Support for MSI message signalled interrupts.
1831 */
1832void
1833pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1834{
1835	struct pci_devinfo *dinfo = device_get_ivars(dev);
1836	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1837
1838	/* Write data and address values. */
1839	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1840	    address & 0xffffffff, 4);
1841	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1842		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1843		    address >> 32, 4);
1844		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1845		    data, 2);
1846	} else
1847		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1848		    2);
1849
1850	/* Enable MSI in the control register. */
1851	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1852	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1853	    2);
1854
1855	/* Enable MSI -> HT mapping. */
1856	pci_ht_map_msi(dev, address);
1857}
1858
1859void
1860pci_disable_msi(device_t dev)
1861{
1862	struct pci_devinfo *dinfo = device_get_ivars(dev);
1863	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1864
1865	/* Disable MSI -> HT mapping. */
1866	pci_ht_map_msi(dev, 0);
1867
1868	/* Disable MSI in the control register. */
1869	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1870	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1871	    2);
1872}
1873
1874/*
1875 * Restore MSI registers during resume.  If MSI is enabled then
1876 * restore the data and address registers in addition to the control
1877 * register.
1878 */
1879static void
1880pci_resume_msi(device_t dev)
1881{
1882	struct pci_devinfo *dinfo = device_get_ivars(dev);
1883	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1884	uint64_t address;
1885	uint16_t data;
1886
1887	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1888		address = msi->msi_addr;
1889		data = msi->msi_data;
1890		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1891		    address & 0xffffffff, 4);
1892		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1893			pci_write_config(dev, msi->msi_location +
1894			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1895			pci_write_config(dev, msi->msi_location +
1896			    PCIR_MSI_DATA_64BIT, data, 2);
1897		} else
1898			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1899			    data, 2);
1900	}
1901	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1902	    2);
1903}
1904
1905static int
1906pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1907{
1908	struct pci_devinfo *dinfo = device_get_ivars(dev);
1909	pcicfgregs *cfg = &dinfo->cfg;
1910	struct resource_list_entry *rle;
1911	struct msix_table_entry *mte;
1912	struct msix_vector *mv;
1913	uint64_t addr;
1914	uint32_t data;
1915	int error, i, j;
1916
1917	/*
1918	 * Handle MSI first.  We try to find this IRQ among our list
1919	 * of MSI IRQs.  If we find it, we request updated address and
1920	 * data registers and apply the results.
1921	 */
1922	if (cfg->msi.msi_alloc > 0) {
1923
1924		/* If we don't have any active handlers, nothing to do. */
1925		if (cfg->msi.msi_handlers == 0)
1926			return (0);
1927		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1928			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1929			    i + 1);
1930			if (rle->start == irq) {
1931				error = PCIB_MAP_MSI(device_get_parent(bus),
1932				    dev, irq, &addr, &data);
1933				if (error)
1934					return (error);
1935				pci_disable_msi(dev);
1936				dinfo->cfg.msi.msi_addr = addr;
1937				dinfo->cfg.msi.msi_data = data;
1938				pci_enable_msi(dev, addr, data);
1939				return (0);
1940			}
1941		}
1942		return (ENOENT);
1943	}
1944
1945	/*
1946	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1947	 * we request the updated mapping info.  If that works, we go
1948	 * through all the slots that use this IRQ and update them.
1949	 */
1950	if (cfg->msix.msix_alloc > 0) {
1951		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1952			mv = &cfg->msix.msix_vectors[i];
1953			if (mv->mv_irq == irq) {
1954				error = PCIB_MAP_MSI(device_get_parent(bus),
1955				    dev, irq, &addr, &data);
1956				if (error)
1957					return (error);
1958				mv->mv_address = addr;
1959				mv->mv_data = data;
1960				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1961					mte = &cfg->msix.msix_table[j];
1962					if (mte->mte_vector != i + 1)
1963						continue;
1964					if (mte->mte_handlers == 0)
1965						continue;
1966					pci_mask_msix(dev, j);
1967					pci_enable_msix(dev, j, addr, data);
1968					pci_unmask_msix(dev, j);
1969				}
1970			}
1971		}
1972		return (ENOENT);
1973	}
1974
1975	return (ENOENT);
1976}
1977
1978/*
1979 * Returns true if the specified device is blacklisted because MSI
1980 * doesn't work.
1981 */
1982int
1983pci_msi_device_blacklisted(device_t dev)
1984{
1985	const struct pci_quirk *q;
1986
1987	if (!pci_honor_msi_blacklist)
1988		return (0);
1989
1990	for (q = &pci_quirks[0]; q->devid; q++) {
1991		if (q->devid == pci_get_devid(dev) &&
1992		    q->type == PCI_QUIRK_DISABLE_MSI)
1993			return (1);
1994	}
1995	return (0);
1996}
1997
1998/*
1999 * Returns true if a specified chipset supports MSI when it is
2000 * emulated hardware in a virtual machine.
2001 */
2002static int
2003pci_msi_vm_chipset(device_t dev)
2004{
2005	const struct pci_quirk *q;
2006
2007	for (q = &pci_quirks[0]; q->devid; q++) {
2008		if (q->devid == pci_get_devid(dev) &&
2009		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2010			return (1);
2011	}
2012	return (0);
2013}
2014
2015/*
2016 * Determine if MSI is blacklisted globally on this sytem.  Currently,
2017 * we just check for blacklisted chipsets as represented by the
2018 * host-PCI bridge at device 0:0:0.  In the future, it may become
2019 * necessary to check other system attributes, such as the kenv values
2020 * that give the motherboard manufacturer and model number.
2021 */
2022static int
2023pci_msi_blacklisted(void)
2024{
2025	device_t dev;
2026
2027	if (!pci_honor_msi_blacklist)
2028		return (0);
2029
2030	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2031	if (!(pcie_chipset || pcix_chipset)) {
2032		if (vm_guest != VM_GUEST_NO) {
2033			dev = pci_find_bsf(0, 0, 0);
2034			if (dev != NULL)
2035				return (pci_msi_vm_chipset(dev) == 0);
2036		}
2037		return (1);
2038	}
2039
2040	dev = pci_find_bsf(0, 0, 0);
2041	if (dev != NULL)
2042		return (pci_msi_device_blacklisted(dev));
2043	return (0);
2044}
2045
2046/*
2047 * Attempt to allocate *count MSI messages.  The actual number allocated is
2048 * returned in *count.  After this function returns, each message will be
2049 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2050 */
2051int
2052pci_alloc_msi_method(device_t dev, device_t child, int *count)
2053{
2054	struct pci_devinfo *dinfo = device_get_ivars(child);
2055	pcicfgregs *cfg = &dinfo->cfg;
2056	struct resource_list_entry *rle;
2057	int actual, error, i, irqs[32];
2058	uint16_t ctrl;
2059
2060	/* Don't let count == 0 get us into trouble. */
2061	if (*count == 0)
2062		return (EINVAL);
2063
2064	/* If rid 0 is allocated, then fail. */
2065	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2066	if (rle != NULL && rle->res != NULL)
2067		return (ENXIO);
2068
2069	/* Already have allocated messages? */
2070	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2071		return (ENXIO);
2072
2073	/* If MSI is blacklisted for this system, fail. */
2074	if (pci_msi_blacklisted())
2075		return (ENXIO);
2076
2077	/* MSI capability present? */
2078	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2079		return (ENODEV);
2080
2081	if (bootverbose)
2082		device_printf(child,
2083		    "attempting to allocate %d MSI vectors (%d supported)\n",
2084		    *count, cfg->msi.msi_msgnum);
2085
2086	/* Don't ask for more than the device supports. */
2087	actual = min(*count, cfg->msi.msi_msgnum);
2088
2089	/* Don't ask for more than 32 messages. */
2090	actual = min(actual, 32);
2091
2092	/* MSI requires power of 2 number of messages. */
2093	if (!powerof2(actual))
2094		return (EINVAL);
2095
2096	for (;;) {
2097		/* Try to allocate N messages. */
2098		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2099		    actual, irqs);
2100		if (error == 0)
2101			break;
2102		if (actual == 1)
2103			return (error);
2104
2105		/* Try N / 2. */
2106		actual >>= 1;
2107	}
2108
2109	/*
2110	 * We now have N actual messages mapped onto SYS_RES_IRQ
2111	 * resources in the irqs[] array, so add new resources
2112	 * starting at rid 1.
2113	 */
2114	for (i = 0; i < actual; i++)
2115		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2116		    irqs[i], irqs[i], 1);
2117
2118	if (bootverbose) {
2119		if (actual == 1)
2120			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2121		else {
2122			int run;
2123
2124			/*
2125			 * Be fancy and try to print contiguous runs
2126			 * of IRQ values as ranges.  'run' is true if
2127			 * we are in a range.
2128			 */
2129			device_printf(child, "using IRQs %d", irqs[0]);
2130			run = 0;
2131			for (i = 1; i < actual; i++) {
2132
2133				/* Still in a run? */
2134				if (irqs[i] == irqs[i - 1] + 1) {
2135					run = 1;
2136					continue;
2137				}
2138
2139				/* Finish previous range. */
2140				if (run) {
2141					printf("-%d", irqs[i - 1]);
2142					run = 0;
2143				}
2144
2145				/* Start new range. */
2146				printf(",%d", irqs[i]);
2147			}
2148
2149			/* Unfinished range? */
2150			if (run)
2151				printf("-%d", irqs[actual - 1]);
2152			printf(" for MSI\n");
2153		}
2154	}
2155
2156	/* Update control register with actual count. */
2157	ctrl = cfg->msi.msi_ctrl;
2158	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2159	ctrl |= (ffs(actual) - 1) << 4;
2160	cfg->msi.msi_ctrl = ctrl;
2161	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2162
2163	/* Update counts of alloc'd messages. */
2164	cfg->msi.msi_alloc = actual;
2165	cfg->msi.msi_handlers = 0;
2166	*count = actual;
2167	return (0);
2168}
2169
2170/* Release the MSI messages associated with this device. */
2171int
2172pci_release_msi_method(device_t dev, device_t child)
2173{
2174	struct pci_devinfo *dinfo = device_get_ivars(child);
2175	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2176	struct resource_list_entry *rle;
2177	int error, i, irqs[32];
2178
2179	/* Try MSI-X first. */
2180	error = pci_release_msix(dev, child);
2181	if (error != ENODEV)
2182		return (error);
2183
2184	/* Do we have any messages to release? */
2185	if (msi->msi_alloc == 0)
2186		return (ENODEV);
2187	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2188
2189	/* Make sure none of the resources are allocated. */
2190	if (msi->msi_handlers > 0)
2191		return (EBUSY);
2192	for (i = 0; i < msi->msi_alloc; i++) {
2193		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2194		KASSERT(rle != NULL, ("missing MSI resource"));
2195		if (rle->res != NULL)
2196			return (EBUSY);
2197		irqs[i] = rle->start;
2198	}
2199
2200	/* Update control register with 0 count. */
2201	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2202	    ("%s: MSI still enabled", __func__));
2203	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2204	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2205	    msi->msi_ctrl, 2);
2206
2207	/* Release the messages. */
2208	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2209	for (i = 0; i < msi->msi_alloc; i++)
2210		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2211
2212	/* Update alloc count. */
2213	msi->msi_alloc = 0;
2214	msi->msi_addr = 0;
2215	msi->msi_data = 0;
2216	return (0);
2217}
2218
2219/*
2220 * Return the max supported MSI messages this device supports.
2221 * Basically, assuming the MD code can alloc messages, this function
2222 * should return the maximum value that pci_alloc_msi() can return.
2223 * Thus, it is subject to the tunables, etc.
2224 */
2225int
2226pci_msi_count_method(device_t dev, device_t child)
2227{
2228	struct pci_devinfo *dinfo = device_get_ivars(child);
2229	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2230
2231	if (pci_do_msi && msi->msi_location != 0)
2232		return (msi->msi_msgnum);
2233	return (0);
2234}
2235
2236/* free pcicfgregs structure and all depending data structures */
2237
2238int
2239pci_freecfg(struct pci_devinfo *dinfo)
2240{
2241	struct devlist *devlist_head;
2242	struct pci_map *pm, *next;
2243	int i;
2244
2245	devlist_head = &pci_devq;
2246
2247	if (dinfo->cfg.vpd.vpd_reg) {
2248		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2249		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2250			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2251		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2252		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2253			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2254		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2255	}
2256	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2257		free(pm, M_DEVBUF);
2258	}
2259	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2260	free(dinfo, M_DEVBUF);
2261
2262	/* increment the generation count */
2263	pci_generation++;
2264
2265	/* we're losing one device */
2266	pci_numdevs--;
2267	return (0);
2268}
2269
2270/*
2271 * PCI power manangement
2272 */
2273int
2274pci_set_powerstate_method(device_t dev, device_t child, int state)
2275{
2276	struct pci_devinfo *dinfo = device_get_ivars(child);
2277	pcicfgregs *cfg = &dinfo->cfg;
2278	uint16_t status;
2279	int result, oldstate, highest, delay;
2280
2281	if (cfg->pp.pp_cap == 0)
2282		return (EOPNOTSUPP);
2283
2284	/*
2285	 * Optimize a no state change request away.  While it would be OK to
2286	 * write to the hardware in theory, some devices have shown odd
2287	 * behavior when going from D3 -> D3.
2288	 */
2289	oldstate = pci_get_powerstate(child);
2290	if (oldstate == state)
2291		return (0);
2292
2293	/*
2294	 * The PCI power management specification states that after a state
2295	 * transition between PCI power states, system software must
2296	 * guarantee a minimal delay before the function accesses the device.
2297	 * Compute the worst case delay that we need to guarantee before we
2298	 * access the device.  Many devices will be responsive much more
2299	 * quickly than this delay, but there are some that don't respond
2300	 * instantly to state changes.  Transitions to/from D3 state require
2301	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2302	 * is done below with DELAY rather than a sleeper function because
2303	 * this function can be called from contexts where we cannot sleep.
2304	 */
2305	highest = (oldstate > state) ? oldstate : state;
2306	if (highest == PCI_POWERSTATE_D3)
2307	    delay = 10000;
2308	else if (highest == PCI_POWERSTATE_D2)
2309	    delay = 200;
2310	else
2311	    delay = 0;
2312	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2313	    & ~PCIM_PSTAT_DMASK;
2314	result = 0;
2315	switch (state) {
2316	case PCI_POWERSTATE_D0:
2317		status |= PCIM_PSTAT_D0;
2318		break;
2319	case PCI_POWERSTATE_D1:
2320		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2321			return (EOPNOTSUPP);
2322		status |= PCIM_PSTAT_D1;
2323		break;
2324	case PCI_POWERSTATE_D2:
2325		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2326			return (EOPNOTSUPP);
2327		status |= PCIM_PSTAT_D2;
2328		break;
2329	case PCI_POWERSTATE_D3:
2330		status |= PCIM_PSTAT_D3;
2331		break;
2332	default:
2333		return (EINVAL);
2334	}
2335
2336	if (bootverbose)
2337		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2338		    state);
2339
2340	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2341	if (delay)
2342		DELAY(delay);
2343	return (0);
2344}
2345
2346int
2347pci_get_powerstate_method(device_t dev, device_t child)
2348{
2349	struct pci_devinfo *dinfo = device_get_ivars(child);
2350	pcicfgregs *cfg = &dinfo->cfg;
2351	uint16_t status;
2352	int result;
2353
2354	if (cfg->pp.pp_cap != 0) {
2355		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2356		switch (status & PCIM_PSTAT_DMASK) {
2357		case PCIM_PSTAT_D0:
2358			result = PCI_POWERSTATE_D0;
2359			break;
2360		case PCIM_PSTAT_D1:
2361			result = PCI_POWERSTATE_D1;
2362			break;
2363		case PCIM_PSTAT_D2:
2364			result = PCI_POWERSTATE_D2;
2365			break;
2366		case PCIM_PSTAT_D3:
2367			result = PCI_POWERSTATE_D3;
2368			break;
2369		default:
2370			result = PCI_POWERSTATE_UNKNOWN;
2371			break;
2372		}
2373	} else {
2374		/* No support, device is always at D0 */
2375		result = PCI_POWERSTATE_D0;
2376	}
2377	return (result);
2378}
2379
2380/*
2381 * Some convenience functions for PCI device drivers.
2382 */
2383
2384static __inline void
2385pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2386{
2387	uint16_t	command;
2388
2389	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2390	command |= bit;
2391	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2392}
2393
2394static __inline void
2395pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2396{
2397	uint16_t	command;
2398
2399	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2400	command &= ~bit;
2401	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2402}
2403
2404int
2405pci_enable_busmaster_method(device_t dev, device_t child)
2406{
2407	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2408	return (0);
2409}
2410
2411int
2412pci_disable_busmaster_method(device_t dev, device_t child)
2413{
2414	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2415	return (0);
2416}
2417
2418int
2419pci_enable_io_method(device_t dev, device_t child, int space)
2420{
2421	uint16_t bit;
2422
2423	switch(space) {
2424	case SYS_RES_IOPORT:
2425		bit = PCIM_CMD_PORTEN;
2426		break;
2427	case SYS_RES_MEMORY:
2428		bit = PCIM_CMD_MEMEN;
2429		break;
2430	default:
2431		return (EINVAL);
2432	}
2433	pci_set_command_bit(dev, child, bit);
2434	return (0);
2435}
2436
2437int
2438pci_disable_io_method(device_t dev, device_t child, int space)
2439{
2440	uint16_t bit;
2441
2442	switch(space) {
2443	case SYS_RES_IOPORT:
2444		bit = PCIM_CMD_PORTEN;
2445		break;
2446	case SYS_RES_MEMORY:
2447		bit = PCIM_CMD_MEMEN;
2448		break;
2449	default:
2450		return (EINVAL);
2451	}
2452	pci_clear_command_bit(dev, child, bit);
2453	return (0);
2454}
2455
2456/*
2457 * New style pci driver.  Parent device is either a pci-host-bridge or a
2458 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2459 */
2460
2461void
2462pci_print_verbose(struct pci_devinfo *dinfo)
2463{
2464
2465	if (bootverbose) {
2466		pcicfgregs *cfg = &dinfo->cfg;
2467
2468		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2469		    cfg->vendor, cfg->device, cfg->revid);
2470		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2471		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2472		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2473		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2474		    cfg->mfdev);
2475		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2476		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2477		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2478		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2479		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2480		if (cfg->intpin > 0)
2481			printf("\tintpin=%c, irq=%d\n",
2482			    cfg->intpin +'a' -1, cfg->intline);
2483		if (cfg->pp.pp_cap) {
2484			uint16_t status;
2485
2486			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2487			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2488			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2489			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2490			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2491			    status & PCIM_PSTAT_DMASK);
2492		}
2493		if (cfg->msi.msi_location) {
2494			int ctrl;
2495
2496			ctrl = cfg->msi.msi_ctrl;
2497			printf("\tMSI supports %d message%s%s%s\n",
2498			    cfg->msi.msi_msgnum,
2499			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2500			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2501			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2502		}
2503		if (cfg->msix.msix_location) {
2504			printf("\tMSI-X supports %d message%s ",
2505			    cfg->msix.msix_msgnum,
2506			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2507			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2508				printf("in map 0x%x\n",
2509				    cfg->msix.msix_table_bar);
2510			else
2511				printf("in maps 0x%x and 0x%x\n",
2512				    cfg->msix.msix_table_bar,
2513				    cfg->msix.msix_pba_bar);
2514		}
2515	}
2516}
2517
2518static int
2519pci_porten(device_t dev)
2520{
2521	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2522}
2523
2524static int
2525pci_memen(device_t dev)
2526{
2527	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2528}
2529
2530static void
2531pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2532{
2533	struct pci_devinfo *dinfo;
2534	pci_addr_t map, testval;
2535	int ln2range;
2536	uint16_t cmd;
2537
2538	/*
2539	 * The device ROM BAR is special.  It is always a 32-bit
2540	 * memory BAR.  Bit 0 is special and should not be set when
2541	 * sizing the BAR.
2542	 */
2543	dinfo = device_get_ivars(dev);
2544	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2545		map = pci_read_config(dev, reg, 4);
2546		pci_write_config(dev, reg, 0xfffffffe, 4);
2547		testval = pci_read_config(dev, reg, 4);
2548		pci_write_config(dev, reg, map, 4);
2549		*mapp = map;
2550		*testvalp = testval;
2551		return;
2552	}
2553
2554	map = pci_read_config(dev, reg, 4);
2555	ln2range = pci_maprange(map);
2556	if (ln2range == 64)
2557		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2558
2559	/*
2560	 * Disable decoding via the command register before
2561	 * determining the BAR's length since we will be placing it in
2562	 * a weird state.
2563	 */
2564	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2565	pci_write_config(dev, PCIR_COMMAND,
2566	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2567
2568	/*
2569	 * Determine the BAR's length by writing all 1's.  The bottom
2570	 * log_2(size) bits of the BAR will stick as 0 when we read
2571	 * the value back.
2572	 */
2573	pci_write_config(dev, reg, 0xffffffff, 4);
2574	testval = pci_read_config(dev, reg, 4);
2575	if (ln2range == 64) {
2576		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2577		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2578	}
2579
2580	/*
2581	 * Restore the original value of the BAR.  We may have reprogrammed
2582	 * the BAR of the low-level console device and when booting verbose,
2583	 * we need the console device addressable.
2584	 */
2585	pci_write_config(dev, reg, map, 4);
2586	if (ln2range == 64)
2587		pci_write_config(dev, reg + 4, map >> 32, 4);
2588	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2589
2590	*mapp = map;
2591	*testvalp = testval;
2592}
2593
2594static void
2595pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2596{
2597	struct pci_devinfo *dinfo;
2598	int ln2range;
2599
2600	/* The device ROM BAR is always a 32-bit memory BAR. */
2601	dinfo = device_get_ivars(dev);
2602	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2603		ln2range = 32;
2604	else
2605		ln2range = pci_maprange(pm->pm_value);
2606	pci_write_config(dev, pm->pm_reg, base, 4);
2607	if (ln2range == 64)
2608		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2609	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2610	if (ln2range == 64)
2611		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2612		    pm->pm_reg + 4, 4) << 32;
2613}
2614
2615struct pci_map *
2616pci_find_bar(device_t dev, int reg)
2617{
2618	struct pci_devinfo *dinfo;
2619	struct pci_map *pm;
2620
2621	dinfo = device_get_ivars(dev);
2622	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2623		if (pm->pm_reg == reg)
2624			return (pm);
2625	}
2626	return (NULL);
2627}
2628
2629int
2630pci_bar_enabled(device_t dev, struct pci_map *pm)
2631{
2632	struct pci_devinfo *dinfo;
2633	uint16_t cmd;
2634
2635	dinfo = device_get_ivars(dev);
2636	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2637	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2638		return (0);
2639	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2640	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2641		return ((cmd & PCIM_CMD_MEMEN) != 0);
2642	else
2643		return ((cmd & PCIM_CMD_PORTEN) != 0);
2644}
2645
2646static struct pci_map *
2647pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2648{
2649	struct pci_devinfo *dinfo;
2650	struct pci_map *pm, *prev;
2651
2652	dinfo = device_get_ivars(dev);
2653	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2654	pm->pm_reg = reg;
2655	pm->pm_value = value;
2656	pm->pm_size = size;
2657	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2658		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2659		    reg));
2660		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2661		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2662			break;
2663	}
2664	if (prev != NULL)
2665		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2666	else
2667		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2668	return (pm);
2669}
2670
2671static void
2672pci_restore_bars(device_t dev)
2673{
2674	struct pci_devinfo *dinfo;
2675	struct pci_map *pm;
2676	int ln2range;
2677
2678	dinfo = device_get_ivars(dev);
2679	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2680		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2681			ln2range = 32;
2682		else
2683			ln2range = pci_maprange(pm->pm_value);
2684		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2685		if (ln2range == 64)
2686			pci_write_config(dev, pm->pm_reg + 4,
2687			    pm->pm_value >> 32, 4);
2688	}
2689}
2690
2691/*
2692 * Add a resource based on a pci map register. Return 1 if the map
2693 * register is a 32bit map register or 2 if it is a 64bit register.
2694 */
2695static int
2696pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2697    int force, int prefetch)
2698{
2699	struct pci_map *pm;
2700	pci_addr_t base, map, testval;
2701	pci_addr_t start, end, count;
2702	int barlen, basezero, maprange, mapsize, type;
2703	uint16_t cmd;
2704	struct resource *res;
2705
2706	/*
2707	 * The BAR may already exist if the device is a CardBus card
2708	 * whose CIS is stored in this BAR.
2709	 */
2710	pm = pci_find_bar(dev, reg);
2711	if (pm != NULL) {
2712		maprange = pci_maprange(pm->pm_value);
2713		barlen = maprange == 64 ? 2 : 1;
2714		return (barlen);
2715	}
2716
2717	pci_read_bar(dev, reg, &map, &testval);
2718	if (PCI_BAR_MEM(map)) {
2719		type = SYS_RES_MEMORY;
2720		if (map & PCIM_BAR_MEM_PREFETCH)
2721			prefetch = 1;
2722	} else
2723		type = SYS_RES_IOPORT;
2724	mapsize = pci_mapsize(testval);
2725	base = pci_mapbase(map);
2726#ifdef __PCI_BAR_ZERO_VALID
2727	basezero = 0;
2728#else
2729	basezero = base == 0;
2730#endif
2731	maprange = pci_maprange(map);
2732	barlen = maprange == 64 ? 2 : 1;
2733
2734	/*
2735	 * For I/O registers, if bottom bit is set, and the next bit up
2736	 * isn't clear, we know we have a BAR that doesn't conform to the
2737	 * spec, so ignore it.  Also, sanity check the size of the data
2738	 * areas to the type of memory involved.  Memory must be at least
2739	 * 16 bytes in size, while I/O ranges must be at least 4.
2740	 */
2741	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2742		return (barlen);
2743	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2744	    (type == SYS_RES_IOPORT && mapsize < 2))
2745		return (barlen);
2746
2747	/* Save a record of this BAR. */
2748	pm = pci_add_bar(dev, reg, map, mapsize);
2749	if (bootverbose) {
2750		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2751		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2752		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2753			printf(", port disabled\n");
2754		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2755			printf(", memory disabled\n");
2756		else
2757			printf(", enabled\n");
2758	}
2759
2760	/*
2761	 * If base is 0, then we have problems if this architecture does
2762	 * not allow that.  It is best to ignore such entries for the
2763	 * moment.  These will be allocated later if the driver specifically
2764	 * requests them.  However, some removable busses look better when
2765	 * all resources are allocated, so allow '0' to be overriden.
2766	 *
2767	 * Similarly treat maps whose values is the same as the test value
2768	 * read back.  These maps have had all f's written to them by the
2769	 * BIOS in an attempt to disable the resources.
2770	 */
2771	if (!force && (basezero || map == testval))
2772		return (barlen);
2773	if ((u_long)base != base) {
2774		device_printf(bus,
2775		    "pci%d:%d:%d:%d bar %#x too many address bits",
2776		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2777		    pci_get_function(dev), reg);
2778		return (barlen);
2779	}
2780
2781	/*
2782	 * This code theoretically does the right thing, but has
2783	 * undesirable side effects in some cases where peripherals
2784	 * respond oddly to having these bits enabled.  Let the user
2785	 * be able to turn them off (since pci_enable_io_modes is 1 by
2786	 * default).
2787	 */
2788	if (pci_enable_io_modes) {
2789		/* Turn on resources that have been left off by a lazy BIOS */
2790		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2791			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2792			cmd |= PCIM_CMD_PORTEN;
2793			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2794		}
2795		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2796			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2797			cmd |= PCIM_CMD_MEMEN;
2798			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2799		}
2800	} else {
2801		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2802			return (barlen);
2803		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2804			return (barlen);
2805	}
2806
2807	count = (pci_addr_t)1 << mapsize;
2808	if (basezero || base == pci_mapbase(testval)) {
2809		start = 0;	/* Let the parent decide. */
2810		end = ~0ul;
2811	} else {
2812		start = base;
2813		end = base + count - 1;
2814	}
2815	resource_list_add(rl, type, reg, start, end, count);
2816
2817	/*
2818	 * Try to allocate the resource for this BAR from our parent
2819	 * so that this resource range is already reserved.  The
2820	 * driver for this device will later inherit this resource in
2821	 * pci_alloc_resource().
2822	 */
2823	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2824	    prefetch ? RF_PREFETCHABLE : 0);
2825	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2826		/*
2827		 * If the allocation fails, try to allocate a resource for
2828		 * this BAR using any available range.  The firmware felt
2829		 * it was important enough to assign a resource, so don't
2830		 * disable decoding if we can help it.
2831		 */
2832		resource_list_delete(rl, type, reg);
2833		resource_list_add(rl, type, reg, 0, ~0ul, count);
2834		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2835		    count, prefetch ? RF_PREFETCHABLE : 0);
2836	}
2837	if (res == NULL) {
2838		/*
2839		 * If the allocation fails, delete the resource list entry
2840		 * and disable decoding for this device.
2841		 *
2842		 * If the driver requests this resource in the future,
2843		 * pci_reserve_map() will try to allocate a fresh
2844		 * resource range.
2845		 */
2846		resource_list_delete(rl, type, reg);
2847		pci_disable_io(dev, type);
2848		if (bootverbose)
2849			device_printf(bus,
2850			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2851			    pci_get_domain(dev), pci_get_bus(dev),
2852			    pci_get_slot(dev), pci_get_function(dev), reg);
2853	} else {
2854		start = rman_get_start(res);
2855		pci_write_bar(dev, pm, start);
2856	}
2857	return (barlen);
2858}
2859
2860/*
2861 * For ATA devices we need to decide early what addressing mode to use.
2862 * Legacy demands that the primary and secondary ATA ports sits on the
2863 * same addresses that old ISA hardware did. This dictates that we use
2864 * those addresses and ignore the BAR's if we cannot set PCI native
2865 * addressing mode.
2866 */
2867static void
2868pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2869    uint32_t prefetchmask)
2870{
2871	struct resource *r;
2872	int rid, type, progif;
2873#if 0
2874	/* if this device supports PCI native addressing use it */
2875	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2876	if ((progif & 0x8a) == 0x8a) {
2877		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2878		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2879			printf("Trying ATA native PCI addressing mode\n");
2880			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2881		}
2882	}
2883#endif
2884	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2885	type = SYS_RES_IOPORT;
2886	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2887		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2888		    prefetchmask & (1 << 0));
2889		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2890		    prefetchmask & (1 << 1));
2891	} else {
2892		rid = PCIR_BAR(0);
2893		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2894		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2895		    0x1f7, 8, 0);
2896		rid = PCIR_BAR(1);
2897		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2898		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2899		    0x3f6, 1, 0);
2900	}
2901	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2902		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2903		    prefetchmask & (1 << 2));
2904		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2905		    prefetchmask & (1 << 3));
2906	} else {
2907		rid = PCIR_BAR(2);
2908		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2909		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2910		    0x177, 8, 0);
2911		rid = PCIR_BAR(3);
2912		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2913		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2914		    0x376, 1, 0);
2915	}
2916	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2917	    prefetchmask & (1 << 4));
2918	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2919	    prefetchmask & (1 << 5));
2920}
2921
2922static void
2923pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2924{
2925	struct pci_devinfo *dinfo = device_get_ivars(dev);
2926	pcicfgregs *cfg = &dinfo->cfg;
2927	char tunable_name[64];
2928	int irq;
2929
2930	/* Has to have an intpin to have an interrupt. */
2931	if (cfg->intpin == 0)
2932		return;
2933
2934	/* Let the user override the IRQ with a tunable. */
2935	irq = PCI_INVALID_IRQ;
2936	snprintf(tunable_name, sizeof(tunable_name),
2937	    "hw.pci%d.%d.%d.INT%c.irq",
2938	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2939	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2940		irq = PCI_INVALID_IRQ;
2941
2942	/*
2943	 * If we didn't get an IRQ via the tunable, then we either use the
2944	 * IRQ value in the intline register or we ask the bus to route an
2945	 * interrupt for us.  If force_route is true, then we only use the
2946	 * value in the intline register if the bus was unable to assign an
2947	 * IRQ.
2948	 */
2949	if (!PCI_INTERRUPT_VALID(irq)) {
2950		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2951			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2952		if (!PCI_INTERRUPT_VALID(irq))
2953			irq = cfg->intline;
2954	}
2955
2956	/* If after all that we don't have an IRQ, just bail. */
2957	if (!PCI_INTERRUPT_VALID(irq))
2958		return;
2959
2960	/* Update the config register if it changed. */
2961	if (irq != cfg->intline) {
2962		cfg->intline = irq;
2963		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2964	}
2965
2966	/* Add this IRQ as rid 0 interrupt resource. */
2967	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2968}
2969
2970/* Perform early OHCI takeover from SMM. */
2971static void
2972ohci_early_takeover(device_t self)
2973{
2974	struct resource *res;
2975	uint32_t ctl;
2976	int rid;
2977	int i;
2978
2979	rid = PCIR_BAR(0);
2980	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2981	if (res == NULL)
2982		return;
2983
2984	ctl = bus_read_4(res, OHCI_CONTROL);
2985	if (ctl & OHCI_IR) {
2986		if (bootverbose)
2987			printf("ohci early: "
2988			    "SMM active, request owner change\n");
2989		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2990		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2991			DELAY(1000);
2992			ctl = bus_read_4(res, OHCI_CONTROL);
2993		}
2994		if (ctl & OHCI_IR) {
2995			if (bootverbose)
2996				printf("ohci early: "
2997				    "SMM does not respond, resetting\n");
2998			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2999		}
3000		/* Disable interrupts */
3001		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3002	}
3003
3004	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3005}
3006
3007/* Perform early UHCI takeover from SMM. */
3008static void
3009uhci_early_takeover(device_t self)
3010{
3011	struct resource *res;
3012	int rid;
3013
3014	/*
3015	 * Set the PIRQD enable bit and switch off all the others. We don't
3016	 * want legacy support to interfere with us XXX Does this also mean
3017	 * that the BIOS won't touch the keyboard anymore if it is connected
3018	 * to the ports of the root hub?
3019	 */
3020	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3021
3022	/* Disable interrupts */
3023	rid = PCI_UHCI_BASE_REG;
3024	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3025	if (res != NULL) {
3026		bus_write_2(res, UHCI_INTR, 0);
3027		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3028	}
3029}
3030
3031/* Perform early EHCI takeover from SMM. */
3032static void
3033ehci_early_takeover(device_t self)
3034{
3035	struct resource *res;
3036	uint32_t cparams;
3037	uint32_t eec;
3038	uint8_t eecp;
3039	uint8_t bios_sem;
3040	uint8_t offs;
3041	int rid;
3042	int i;
3043
3044	rid = PCIR_BAR(0);
3045	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3046	if (res == NULL)
3047		return;
3048
3049	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3050
3051	/* Synchronise with the BIOS if it owns the controller. */
3052	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3053	    eecp = EHCI_EECP_NEXT(eec)) {
3054		eec = pci_read_config(self, eecp, 4);
3055		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3056			continue;
3057		}
3058		bios_sem = pci_read_config(self, eecp +
3059		    EHCI_LEGSUP_BIOS_SEM, 1);
3060		if (bios_sem == 0) {
3061			continue;
3062		}
3063		if (bootverbose)
3064			printf("ehci early: "
3065			    "SMM active, request owner change\n");
3066
3067		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3068
3069		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3070			DELAY(1000);
3071			bios_sem = pci_read_config(self, eecp +
3072			    EHCI_LEGSUP_BIOS_SEM, 1);
3073		}
3074
3075		if (bios_sem != 0) {
3076			if (bootverbose)
3077				printf("ehci early: "
3078				    "SMM does not respond\n");
3079		}
3080		/* Disable interrupts */
3081		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3082		bus_write_4(res, offs + EHCI_USBINTR, 0);
3083	}
3084	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3085}
3086
3087/* Perform early XHCI takeover from SMM. */
3088static void
3089xhci_early_takeover(device_t self)
3090{
3091	struct resource *res;
3092	uint32_t cparams;
3093	uint32_t eec;
3094	uint8_t eecp;
3095	uint8_t bios_sem;
3096	uint8_t offs;
3097	int rid;
3098	int i;
3099
3100	rid = PCIR_BAR(0);
3101	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3102	if (res == NULL)
3103		return;
3104
3105	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3106
3107	eec = -1;
3108
3109	/* Synchronise with the BIOS if it owns the controller. */
3110	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3111	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3112		eec = bus_read_4(res, eecp);
3113
3114		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3115			continue;
3116
3117		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3118		if (bios_sem == 0)
3119			continue;
3120
3121		if (bootverbose)
3122			printf("xhci early: "
3123			    "SMM active, request owner change\n");
3124
3125		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3126
3127		/* wait a maximum of 5 second */
3128
3129		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3130			DELAY(1000);
3131			bios_sem = bus_read_1(res, eecp +
3132			    XHCI_XECP_BIOS_SEM);
3133		}
3134
3135		if (bios_sem != 0) {
3136			if (bootverbose)
3137				printf("xhci early: "
3138				    "SMM does not respond\n");
3139		}
3140
3141		/* Disable interrupts */
3142		offs = bus_read_1(res, XHCI_CAPLENGTH);
3143		bus_write_4(res, offs + XHCI_USBCMD, 0);
3144		bus_read_4(res, offs + XHCI_USBSTS);
3145	}
3146	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3147}
3148
3149void
3150pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3151{
3152	struct pci_devinfo *dinfo;
3153	pcicfgregs *cfg;
3154	struct resource_list *rl;
3155	const struct pci_quirk *q;
3156	uint32_t devid;
3157	int i;
3158
3159	dinfo = device_get_ivars(dev);
3160	cfg = &dinfo->cfg;
3161	rl = &dinfo->resources;
3162	devid = (cfg->device << 16) | cfg->vendor;
3163
3164	/* ATA devices needs special map treatment */
3165	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3166	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3167	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3168	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3169	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3170		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3171	else
3172		for (i = 0; i < cfg->nummaps;) {
3173			/*
3174			 * Skip quirked resources.
3175			 */
3176			for (q = &pci_quirks[0]; q->devid != 0; q++)
3177				if (q->devid == devid &&
3178				    q->type == PCI_QUIRK_UNMAP_REG &&
3179				    q->arg1 == PCIR_BAR(i))
3180					break;
3181			if (q->devid != 0) {
3182				i++;
3183				continue;
3184			}
3185			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3186			    prefetchmask & (1 << i));
3187		}
3188
3189	/*
3190	 * Add additional, quirked resources.
3191	 */
3192	for (q = &pci_quirks[0]; q->devid != 0; q++)
3193		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3194			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3195
3196	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3197#ifdef __PCI_REROUTE_INTERRUPT
3198		/*
3199		 * Try to re-route interrupts. Sometimes the BIOS or
3200		 * firmware may leave bogus values in these registers.
3201		 * If the re-route fails, then just stick with what we
3202		 * have.
3203		 */
3204		pci_assign_interrupt(bus, dev, 1);
3205#else
3206		pci_assign_interrupt(bus, dev, 0);
3207#endif
3208	}
3209
3210	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3211	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3212		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3213			xhci_early_takeover(dev);
3214		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3215			ehci_early_takeover(dev);
3216		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3217			ohci_early_takeover(dev);
3218		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3219			uhci_early_takeover(dev);
3220	}
3221}
3222
3223void
3224pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3225{
3226#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3227	device_t pcib = device_get_parent(dev);
3228	struct pci_devinfo *dinfo;
3229	int maxslots;
3230	int s, f, pcifunchigh;
3231	uint8_t hdrtype;
3232
3233	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3234	    ("dinfo_size too small"));
3235	maxslots = PCIB_MAXSLOTS(pcib);
3236	for (s = 0; s <= maxslots; s++) {
3237		pcifunchigh = 0;
3238		f = 0;
3239		DELAY(1);
3240		hdrtype = REG(PCIR_HDRTYPE, 1);
3241		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3242			continue;
3243		if (hdrtype & PCIM_MFDEV)
3244			pcifunchigh = PCI_FUNCMAX;
3245		for (f = 0; f <= pcifunchigh; f++) {
3246			dinfo = pci_read_device(pcib, domain, busno, s, f,
3247			    dinfo_size);
3248			if (dinfo != NULL) {
3249				pci_add_child(dev, dinfo);
3250			}
3251		}
3252	}
3253#undef REG
3254}
3255
3256void
3257pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3258{
3259	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3260	device_set_ivars(dinfo->cfg.dev, dinfo);
3261	resource_list_init(&dinfo->resources);
3262	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3263	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3264	pci_print_verbose(dinfo);
3265	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3266}
3267
3268static int
3269pci_probe(device_t dev)
3270{
3271
3272	device_set_desc(dev, "PCI bus");
3273
3274	/* Allow other subclasses to override this driver. */
3275	return (BUS_PROBE_GENERIC);
3276}
3277
3278int
3279pci_attach_common(device_t dev)
3280{
3281	struct pci_softc *sc;
3282	int busno, domain;
3283#ifdef PCI_DMA_BOUNDARY
3284	int error, tag_valid;
3285#endif
3286
3287	sc = device_get_softc(dev);
3288	domain = pcib_get_domain(dev);
3289	busno = pcib_get_bus(dev);
3290	if (bootverbose)
3291		device_printf(dev, "domain=%d, physical bus=%d\n",
3292		    domain, busno);
3293#ifdef PCI_DMA_BOUNDARY
3294	tag_valid = 0;
3295	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3296	    devclass_find("pci")) {
3297		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3298		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3299		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3300		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3301		if (error)
3302			device_printf(dev, "Failed to create DMA tag: %d\n",
3303			    error);
3304		else
3305			tag_valid = 1;
3306	}
3307	if (!tag_valid)
3308#endif
3309		sc->sc_dma_tag = bus_get_dma_tag(dev);
3310	return (0);
3311}
3312
3313static int
3314pci_attach(device_t dev)
3315{
3316	int busno, domain, error;
3317
3318	error = pci_attach_common(dev);
3319	if (error)
3320		return (error);
3321
3322	/*
3323	 * Since there can be multiple independantly numbered PCI
3324	 * busses on systems with multiple PCI domains, we can't use
3325	 * the unit number to decide which bus we are probing. We ask
3326	 * the parent pcib what our domain and bus numbers are.
3327	 */
3328	domain = pcib_get_domain(dev);
3329	busno = pcib_get_bus(dev);
3330	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3331	return (bus_generic_attach(dev));
3332}
3333
3334static void
3335pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3336    int state)
3337{
3338	device_t child, pcib;
3339	struct pci_devinfo *dinfo;
3340	int dstate, i;
3341
3342	/*
3343	 * Set the device to the given state.  If the firmware suggests
3344	 * a different power state, use it instead.  If power management
3345	 * is not present, the firmware is responsible for managing
3346	 * device power.  Skip children who aren't attached since they
3347	 * are handled separately.
3348	 */
3349	pcib = device_get_parent(dev);
3350	for (i = 0; i < numdevs; i++) {
3351		child = devlist[i];
3352		dinfo = device_get_ivars(child);
3353		dstate = state;
3354		if (device_is_attached(child) &&
3355		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3356			pci_set_powerstate(child, dstate);
3357	}
3358}
3359
3360int
3361pci_suspend(device_t dev)
3362{
3363	device_t child, *devlist;
3364	struct pci_devinfo *dinfo;
3365	int error, i, numdevs;
3366
3367	/*
3368	 * Save the PCI configuration space for each child and set the
3369	 * device in the appropriate power state for this sleep state.
3370	 */
3371	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3372		return (error);
3373	for (i = 0; i < numdevs; i++) {
3374		child = devlist[i];
3375		dinfo = device_get_ivars(child);
3376		pci_cfg_save(child, dinfo, 0);
3377	}
3378
3379	/* Suspend devices before potentially powering them down. */
3380	error = bus_generic_suspend(dev);
3381	if (error) {
3382		free(devlist, M_TEMP);
3383		return (error);
3384	}
3385	if (pci_do_power_suspend)
3386		pci_set_power_children(dev, devlist, numdevs,
3387		    PCI_POWERSTATE_D3);
3388	free(devlist, M_TEMP);
3389	return (0);
3390}
3391
3392int
3393pci_resume(device_t dev)
3394{
3395	device_t child, *devlist;
3396	struct pci_devinfo *dinfo;
3397	int error, i, numdevs;
3398
3399	/*
3400	 * Set each child to D0 and restore its PCI configuration space.
3401	 */
3402	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3403		return (error);
3404	if (pci_do_power_resume)
3405		pci_set_power_children(dev, devlist, numdevs,
3406		    PCI_POWERSTATE_D0);
3407
3408	/* Now the device is powered up, restore its config space. */
3409	for (i = 0; i < numdevs; i++) {
3410		child = devlist[i];
3411		dinfo = device_get_ivars(child);
3412
3413		pci_cfg_restore(child, dinfo);
3414		if (!device_is_attached(child))
3415			pci_cfg_save(child, dinfo, 1);
3416	}
3417
3418	/*
3419	 * Resume critical devices first, then everything else later.
3420	 */
3421	for (i = 0; i < numdevs; i++) {
3422		child = devlist[i];
3423		switch (pci_get_class(child)) {
3424		case PCIC_DISPLAY:
3425		case PCIC_MEMORY:
3426		case PCIC_BRIDGE:
3427		case PCIC_BASEPERIPH:
3428			DEVICE_RESUME(child);
3429			break;
3430		}
3431	}
3432	for (i = 0; i < numdevs; i++) {
3433		child = devlist[i];
3434		switch (pci_get_class(child)) {
3435		case PCIC_DISPLAY:
3436		case PCIC_MEMORY:
3437		case PCIC_BRIDGE:
3438		case PCIC_BASEPERIPH:
3439			break;
3440		default:
3441			DEVICE_RESUME(child);
3442		}
3443	}
3444	free(devlist, M_TEMP);
3445	return (0);
3446}
3447
3448static void
3449pci_load_vendor_data(void)
3450{
3451	caddr_t data;
3452	void *ptr;
3453	size_t sz;
3454
3455	data = preload_search_by_type("pci_vendor_data");
3456	if (data != NULL) {
3457		ptr = preload_fetch_addr(data);
3458		sz = preload_fetch_size(data);
3459		if (ptr != NULL && sz != 0) {
3460			pci_vendordata = ptr;
3461			pci_vendordata_size = sz;
3462			/* terminate the database */
3463			pci_vendordata[pci_vendordata_size] = '\n';
3464		}
3465	}
3466}
3467
3468void
3469pci_driver_added(device_t dev, driver_t *driver)
3470{
3471	int numdevs;
3472	device_t *devlist;
3473	device_t child;
3474	struct pci_devinfo *dinfo;
3475	int i;
3476
3477	if (bootverbose)
3478		device_printf(dev, "driver added\n");
3479	DEVICE_IDENTIFY(driver, dev);
3480	if (device_get_children(dev, &devlist, &numdevs) != 0)
3481		return;
3482	for (i = 0; i < numdevs; i++) {
3483		child = devlist[i];
3484		if (device_get_state(child) != DS_NOTPRESENT)
3485			continue;
3486		dinfo = device_get_ivars(child);
3487		pci_print_verbose(dinfo);
3488		if (bootverbose)
3489			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3490		pci_cfg_restore(child, dinfo);
3491		if (device_probe_and_attach(child) != 0)
3492			pci_cfg_save(child, dinfo, 1);
3493	}
3494	free(devlist, M_TEMP);
3495}
3496
3497int
3498pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3499    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3500{
3501	struct pci_devinfo *dinfo;
3502	struct msix_table_entry *mte;
3503	struct msix_vector *mv;
3504	uint64_t addr;
3505	uint32_t data;
3506	void *cookie;
3507	int error, rid;
3508
3509	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3510	    arg, &cookie);
3511	if (error)
3512		return (error);
3513
3514	/* If this is not a direct child, just bail out. */
3515	if (device_get_parent(child) != dev) {
3516		*cookiep = cookie;
3517		return(0);
3518	}
3519
3520	rid = rman_get_rid(irq);
3521	if (rid == 0) {
3522		/* Make sure that INTx is enabled */
3523		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3524	} else {
3525		/*
3526		 * Check to see if the interrupt is MSI or MSI-X.
3527		 * Ask our parent to map the MSI and give
3528		 * us the address and data register values.
3529		 * If we fail for some reason, teardown the
3530		 * interrupt handler.
3531		 */
3532		dinfo = device_get_ivars(child);
3533		if (dinfo->cfg.msi.msi_alloc > 0) {
3534			if (dinfo->cfg.msi.msi_addr == 0) {
3535				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3536			    ("MSI has handlers, but vectors not mapped"));
3537				error = PCIB_MAP_MSI(device_get_parent(dev),
3538				    child, rman_get_start(irq), &addr, &data);
3539				if (error)
3540					goto bad;
3541				dinfo->cfg.msi.msi_addr = addr;
3542				dinfo->cfg.msi.msi_data = data;
3543			}
3544			if (dinfo->cfg.msi.msi_handlers == 0)
3545				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3546				    dinfo->cfg.msi.msi_data);
3547			dinfo->cfg.msi.msi_handlers++;
3548		} else {
3549			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3550			    ("No MSI or MSI-X interrupts allocated"));
3551			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3552			    ("MSI-X index too high"));
3553			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3554			KASSERT(mte->mte_vector != 0, ("no message vector"));
3555			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3556			KASSERT(mv->mv_irq == rman_get_start(irq),
3557			    ("IRQ mismatch"));
3558			if (mv->mv_address == 0) {
3559				KASSERT(mte->mte_handlers == 0,
3560		    ("MSI-X table entry has handlers, but vector not mapped"));
3561				error = PCIB_MAP_MSI(device_get_parent(dev),
3562				    child, rman_get_start(irq), &addr, &data);
3563				if (error)
3564					goto bad;
3565				mv->mv_address = addr;
3566				mv->mv_data = data;
3567			}
3568			if (mte->mte_handlers == 0) {
3569				pci_enable_msix(child, rid - 1, mv->mv_address,
3570				    mv->mv_data);
3571				pci_unmask_msix(child, rid - 1);
3572			}
3573			mte->mte_handlers++;
3574		}
3575
3576		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3577		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3578	bad:
3579		if (error) {
3580			(void)bus_generic_teardown_intr(dev, child, irq,
3581			    cookie);
3582			return (error);
3583		}
3584	}
3585	*cookiep = cookie;
3586	return (0);
3587}
3588
3589int
3590pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3591    void *cookie)
3592{
3593	struct msix_table_entry *mte;
3594	struct resource_list_entry *rle;
3595	struct pci_devinfo *dinfo;
3596	int error, rid;
3597
3598	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3599		return (EINVAL);
3600
3601	/* If this isn't a direct child, just bail out */
3602	if (device_get_parent(child) != dev)
3603		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3604
3605	rid = rman_get_rid(irq);
3606	if (rid == 0) {
3607		/* Mask INTx */
3608		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3609	} else {
3610		/*
3611		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3612		 * decrement the appropriate handlers count and mask the
3613		 * MSI-X message, or disable MSI messages if the count
3614		 * drops to 0.
3615		 */
3616		dinfo = device_get_ivars(child);
3617		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3618		if (rle->res != irq)
3619			return (EINVAL);
3620		if (dinfo->cfg.msi.msi_alloc > 0) {
3621			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3622			    ("MSI-X index too high"));
3623			if (dinfo->cfg.msi.msi_handlers == 0)
3624				return (EINVAL);
3625			dinfo->cfg.msi.msi_handlers--;
3626			if (dinfo->cfg.msi.msi_handlers == 0)
3627				pci_disable_msi(child);
3628		} else {
3629			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3630			    ("No MSI or MSI-X interrupts allocated"));
3631			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3632			    ("MSI-X index too high"));
3633			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3634			if (mte->mte_handlers == 0)
3635				return (EINVAL);
3636			mte->mte_handlers--;
3637			if (mte->mte_handlers == 0)
3638				pci_mask_msix(child, rid - 1);
3639		}
3640	}
3641	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3642	if (rid > 0)
3643		KASSERT(error == 0,
3644		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3645	return (error);
3646}
3647
3648int
3649pci_print_child(device_t dev, device_t child)
3650{
3651	struct pci_devinfo *dinfo;
3652	struct resource_list *rl;
3653	int retval = 0;
3654
3655	dinfo = device_get_ivars(child);
3656	rl = &dinfo->resources;
3657
3658	retval += bus_print_child_header(dev, child);
3659
3660	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3661	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3662	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3663	if (device_get_flags(dev))
3664		retval += printf(" flags %#x", device_get_flags(dev));
3665
3666	retval += printf(" at device %d.%d", pci_get_slot(child),
3667	    pci_get_function(child));
3668
3669	retval += bus_print_child_footer(dev, child);
3670
3671	return (retval);
3672}
3673
3674static const struct
3675{
3676	int		class;
3677	int		subclass;
3678	const char	*desc;
3679} pci_nomatch_tab[] = {
3680	{PCIC_OLD,		-1,			"old"},
3681	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3682	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3683	{PCIC_STORAGE,		-1,			"mass storage"},
3684	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3685	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3686	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3687	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3688	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3689	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3690	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3691	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3692	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3693	{PCIC_NETWORK,		-1,			"network"},
3694	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3695	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3696	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3697	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3698	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3699	{PCIC_DISPLAY,		-1,			"display"},
3700	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3701	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3702	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3703	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3704	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3705	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3706	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3707	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3708	{PCIC_MEMORY,		-1,			"memory"},
3709	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3710	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3711	{PCIC_BRIDGE,		-1,			"bridge"},
3712	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3713	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3714	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3715	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3716	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3717	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3718	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3719	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3720	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3721	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3722	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3723	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3724	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3725	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3726	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3727	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3728	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3729	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3730	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3731	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3732	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3733	{PCIC_INPUTDEV,		-1,			"input device"},
3734	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3735	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3736	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3737	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3738	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3739	{PCIC_DOCKING,		-1,			"docking station"},
3740	{PCIC_PROCESSOR,	-1,			"processor"},
3741	{PCIC_SERIALBUS,	-1,			"serial bus"},
3742	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3743	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3744	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3745	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3746	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3747	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3748	{PCIC_WIRELESS,		-1,			"wireless controller"},
3749	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3750	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3751	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3752	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3753	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3754	{PCIC_SATCOM,		-1,			"satellite communication"},
3755	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3756	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3757	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3758	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3759	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3760	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3761	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3762	{PCIC_DASP,		-1,			"dasp"},
3763	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3764	{0, 0,		NULL}
3765};
3766
3767void
3768pci_probe_nomatch(device_t dev, device_t child)
3769{
3770	int i;
3771	const char *cp, *scp;
3772	char *device;
3773
3774	/*
3775	 * Look for a listing for this device in a loaded device database.
3776	 */
3777	if ((device = pci_describe_device(child)) != NULL) {
3778		device_printf(dev, "<%s>", device);
3779		free(device, M_DEVBUF);
3780	} else {
3781		/*
3782		 * Scan the class/subclass descriptions for a general
3783		 * description.
3784		 */
3785		cp = "unknown";
3786		scp = NULL;
3787		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3788			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3789				if (pci_nomatch_tab[i].subclass == -1) {
3790					cp = pci_nomatch_tab[i].desc;
3791				} else if (pci_nomatch_tab[i].subclass ==
3792				    pci_get_subclass(child)) {
3793					scp = pci_nomatch_tab[i].desc;
3794				}
3795			}
3796		}
3797		device_printf(dev, "<%s%s%s>",
3798		    cp ? cp : "",
3799		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3800		    scp ? scp : "");
3801	}
3802	printf(" at device %d.%d (no driver attached)\n",
3803	    pci_get_slot(child), pci_get_function(child));
3804	pci_cfg_save(child, device_get_ivars(child), 1);
3805}
3806
3807/*
3808 * Parse the PCI device database, if loaded, and return a pointer to a
3809 * description of the device.
3810 *
3811 * The database is flat text formatted as follows:
3812 *
3813 * Any line not in a valid format is ignored.
3814 * Lines are terminated with newline '\n' characters.
3815 *
3816 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3817 * the vendor name.
3818 *
3819 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3820 * - devices cannot be listed without a corresponding VENDOR line.
3821 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3822 * another TAB, then the device name.
3823 */
3824
3825/*
3826 * Assuming (ptr) points to the beginning of a line in the database,
3827 * return the vendor or device and description of the next entry.
3828 * The value of (vendor) or (device) inappropriate for the entry type
3829 * is set to -1.  Returns nonzero at the end of the database.
3830 *
3831 * Note that this is slightly unrobust in the face of corrupt data;
3832 * we attempt to safeguard against this by spamming the end of the
3833 * database with a newline when we initialise.
3834 */
3835static int
3836pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3837{
3838	char	*cp = *ptr;
3839	int	left;
3840
3841	*device = -1;
3842	*vendor = -1;
3843	**desc = '\0';
3844	for (;;) {
3845		left = pci_vendordata_size - (cp - pci_vendordata);
3846		if (left <= 0) {
3847			*ptr = cp;
3848			return(1);
3849		}
3850
3851		/* vendor entry? */
3852		if (*cp != '\t' &&
3853		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3854			break;
3855		/* device entry? */
3856		if (*cp == '\t' &&
3857		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3858			break;
3859
3860		/* skip to next line */
3861		while (*cp != '\n' && left > 0) {
3862			cp++;
3863			left--;
3864		}
3865		if (*cp == '\n') {
3866			cp++;
3867			left--;
3868		}
3869	}
3870	/* skip to next line */
3871	while (*cp != '\n' && left > 0) {
3872		cp++;
3873		left--;
3874	}
3875	if (*cp == '\n' && left > 0)
3876		cp++;
3877	*ptr = cp;
3878	return(0);
3879}
3880
3881static char *
3882pci_describe_device(device_t dev)
3883{
3884	int	vendor, device;
3885	char	*desc, *vp, *dp, *line;
3886
3887	desc = vp = dp = NULL;
3888
3889	/*
3890	 * If we have no vendor data, we can't do anything.
3891	 */
3892	if (pci_vendordata == NULL)
3893		goto out;
3894
3895	/*
3896	 * Scan the vendor data looking for this device
3897	 */
3898	line = pci_vendordata;
3899	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3900		goto out;
3901	for (;;) {
3902		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3903			goto out;
3904		if (vendor == pci_get_vendor(dev))
3905			break;
3906	}
3907	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3908		goto out;
3909	for (;;) {
3910		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3911			*dp = 0;
3912			break;
3913		}
3914		if (vendor != -1) {
3915			*dp = 0;
3916			break;
3917		}
3918		if (device == pci_get_device(dev))
3919			break;
3920	}
3921	if (dp[0] == '\0')
3922		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3923	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3924	    NULL)
3925		sprintf(desc, "%s, %s", vp, dp);
3926out:
3927	if (vp != NULL)
3928		free(vp, M_DEVBUF);
3929	if (dp != NULL)
3930		free(dp, M_DEVBUF);
3931	return(desc);
3932}
3933
3934int
3935pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3936{
3937	struct pci_devinfo *dinfo;
3938	pcicfgregs *cfg;
3939
3940	dinfo = device_get_ivars(child);
3941	cfg = &dinfo->cfg;
3942
3943	switch (which) {
3944	case PCI_IVAR_ETHADDR:
3945		/*
3946		 * The generic accessor doesn't deal with failure, so
3947		 * we set the return value, then return an error.
3948		 */
3949		*((uint8_t **) result) = NULL;
3950		return (EINVAL);
3951	case PCI_IVAR_SUBVENDOR:
3952		*result = cfg->subvendor;
3953		break;
3954	case PCI_IVAR_SUBDEVICE:
3955		*result = cfg->subdevice;
3956		break;
3957	case PCI_IVAR_VENDOR:
3958		*result = cfg->vendor;
3959		break;
3960	case PCI_IVAR_DEVICE:
3961		*result = cfg->device;
3962		break;
3963	case PCI_IVAR_DEVID:
3964		*result = (cfg->device << 16) | cfg->vendor;
3965		break;
3966	case PCI_IVAR_CLASS:
3967		*result = cfg->baseclass;
3968		break;
3969	case PCI_IVAR_SUBCLASS:
3970		*result = cfg->subclass;
3971		break;
3972	case PCI_IVAR_PROGIF:
3973		*result = cfg->progif;
3974		break;
3975	case PCI_IVAR_REVID:
3976		*result = cfg->revid;
3977		break;
3978	case PCI_IVAR_INTPIN:
3979		*result = cfg->intpin;
3980		break;
3981	case PCI_IVAR_IRQ:
3982		*result = cfg->intline;
3983		break;
3984	case PCI_IVAR_DOMAIN:
3985		*result = cfg->domain;
3986		break;
3987	case PCI_IVAR_BUS:
3988		*result = cfg->bus;
3989		break;
3990	case PCI_IVAR_SLOT:
3991		*result = cfg->slot;
3992		break;
3993	case PCI_IVAR_FUNCTION:
3994		*result = cfg->func;
3995		break;
3996	case PCI_IVAR_CMDREG:
3997		*result = cfg->cmdreg;
3998		break;
3999	case PCI_IVAR_CACHELNSZ:
4000		*result = cfg->cachelnsz;
4001		break;
4002	case PCI_IVAR_MINGNT:
4003		*result = cfg->mingnt;
4004		break;
4005	case PCI_IVAR_MAXLAT:
4006		*result = cfg->maxlat;
4007		break;
4008	case PCI_IVAR_LATTIMER:
4009		*result = cfg->lattimer;
4010		break;
4011	default:
4012		return (ENOENT);
4013	}
4014	return (0);
4015}
4016
4017int
4018pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4019{
4020	struct pci_devinfo *dinfo;
4021
4022	dinfo = device_get_ivars(child);
4023
4024	switch (which) {
4025	case PCI_IVAR_INTPIN:
4026		dinfo->cfg.intpin = value;
4027		return (0);
4028	case PCI_IVAR_ETHADDR:
4029	case PCI_IVAR_SUBVENDOR:
4030	case PCI_IVAR_SUBDEVICE:
4031	case PCI_IVAR_VENDOR:
4032	case PCI_IVAR_DEVICE:
4033	case PCI_IVAR_DEVID:
4034	case PCI_IVAR_CLASS:
4035	case PCI_IVAR_SUBCLASS:
4036	case PCI_IVAR_PROGIF:
4037	case PCI_IVAR_REVID:
4038	case PCI_IVAR_IRQ:
4039	case PCI_IVAR_DOMAIN:
4040	case PCI_IVAR_BUS:
4041	case PCI_IVAR_SLOT:
4042	case PCI_IVAR_FUNCTION:
4043		return (EINVAL);	/* disallow for now */
4044
4045	default:
4046		return (ENOENT);
4047	}
4048}
4049
4050#include "opt_ddb.h"
4051#ifdef DDB
4052#include <ddb/ddb.h>
4053#include <sys/cons.h>
4054
4055/*
4056 * List resources based on pci map registers, used for within ddb
4057 */
4058
4059DB_SHOW_COMMAND(pciregs, db_pci_dump)
4060{
4061	struct pci_devinfo *dinfo;
4062	struct devlist *devlist_head;
4063	struct pci_conf *p;
4064	const char *name;
4065	int i, error, none_count;
4066
4067	none_count = 0;
4068	/* get the head of the device queue */
4069	devlist_head = &pci_devq;
4070
4071	/*
4072	 * Go through the list of devices and print out devices
4073	 */
4074	for (error = 0, i = 0,
4075	     dinfo = STAILQ_FIRST(devlist_head);
4076	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4077	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4078
4079		/* Populate pd_name and pd_unit */
4080		name = NULL;
4081		if (dinfo->cfg.dev)
4082			name = device_get_name(dinfo->cfg.dev);
4083
4084		p = &dinfo->conf;
4085		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4086			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4087			(name && *name) ? name : "none",
4088			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4089			none_count++,
4090			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4091			p->pc_sel.pc_func, (p->pc_class << 16) |
4092			(p->pc_subclass << 8) | p->pc_progif,
4093			(p->pc_subdevice << 16) | p->pc_subvendor,
4094			(p->pc_device << 16) | p->pc_vendor,
4095			p->pc_revid, p->pc_hdr);
4096	}
4097}
4098#endif /* DDB */
4099
4100static struct resource *
4101pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4102    u_long start, u_long end, u_long count, u_int flags)
4103{
4104	struct pci_devinfo *dinfo = device_get_ivars(child);
4105	struct resource_list *rl = &dinfo->resources;
4106	struct resource_list_entry *rle;
4107	struct resource *res;
4108	struct pci_map *pm;
4109	pci_addr_t map, testval;
4110	int mapsize;
4111
4112	res = NULL;
4113	pm = pci_find_bar(child, *rid);
4114	if (pm != NULL) {
4115		/* This is a BAR that we failed to allocate earlier. */
4116		mapsize = pm->pm_size;
4117		map = pm->pm_value;
4118	} else {
4119		/*
4120		 * Weed out the bogons, and figure out how large the
4121		 * BAR/map is.  BARs that read back 0 here are bogus
4122		 * and unimplemented.  Note: atapci in legacy mode are
4123		 * special and handled elsewhere in the code.  If you
4124		 * have a atapci device in legacy mode and it fails
4125		 * here, that other code is broken.
4126		 */
4127		pci_read_bar(child, *rid, &map, &testval);
4128
4129		/*
4130		 * Determine the size of the BAR and ignore BARs with a size
4131		 * of 0.  Device ROM BARs use a different mask value.
4132		 */
4133		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4134			mapsize = pci_romsize(testval);
4135		else
4136			mapsize = pci_mapsize(testval);
4137		if (mapsize == 0)
4138			goto out;
4139		pm = pci_add_bar(child, *rid, map, mapsize);
4140	}
4141
4142	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4143		if (type != SYS_RES_MEMORY) {
4144			if (bootverbose)
4145				device_printf(dev,
4146				    "child %s requested type %d for rid %#x,"
4147				    " but the BAR says it is an memio\n",
4148				    device_get_nameunit(child), type, *rid);
4149			goto out;
4150		}
4151	} else {
4152		if (type != SYS_RES_IOPORT) {
4153			if (bootverbose)
4154				device_printf(dev,
4155				    "child %s requested type %d for rid %#x,"
4156				    " but the BAR says it is an ioport\n",
4157				    device_get_nameunit(child), type, *rid);
4158			goto out;
4159		}
4160	}
4161
4162	/*
4163	 * For real BARs, we need to override the size that
4164	 * the driver requests, because that's what the BAR
4165	 * actually uses and we would otherwise have a
4166	 * situation where we might allocate the excess to
4167	 * another driver, which won't work.
4168	 */
4169	count = (pci_addr_t)1 << mapsize;
4170	if (RF_ALIGNMENT(flags) < mapsize)
4171		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4172	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4173		flags |= RF_PREFETCHABLE;
4174
4175	/*
4176	 * Allocate enough resource, and then write back the
4177	 * appropriate BAR for that resource.
4178	 */
4179	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4180	    start, end, count, flags & ~RF_ACTIVE);
4181	if (res == NULL) {
4182		device_printf(child,
4183		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4184		    count, *rid, type, start, end);
4185		goto out;
4186	}
4187	resource_list_add(rl, type, *rid, start, end, count);
4188	rle = resource_list_find(rl, type, *rid);
4189	if (rle == NULL)
4190		panic("pci_reserve_map: unexpectedly can't find resource.");
4191	rle->res = res;
4192	rle->start = rman_get_start(res);
4193	rle->end = rman_get_end(res);
4194	rle->count = count;
4195	rle->flags = RLE_RESERVED;
4196	if (bootverbose)
4197		device_printf(child,
4198		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4199		    count, *rid, type, rman_get_start(res));
4200	map = rman_get_start(res);
4201	pci_write_bar(child, pm, map);
4202out:
4203	return (res);
4204}
4205
4206struct resource *
4207pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4208		   u_long start, u_long end, u_long count, u_int flags)
4209{
4210	struct pci_devinfo *dinfo = device_get_ivars(child);
4211	struct resource_list *rl = &dinfo->resources;
4212	struct resource_list_entry *rle;
4213	struct resource *res;
4214	pcicfgregs *cfg = &dinfo->cfg;
4215
4216	if (device_get_parent(child) != dev)
4217		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4218		    type, rid, start, end, count, flags));
4219
4220	/*
4221	 * Perform lazy resource allocation
4222	 */
4223	switch (type) {
4224	case SYS_RES_IRQ:
4225		/*
4226		 * Can't alloc legacy interrupt once MSI messages have
4227		 * been allocated.
4228		 */
4229		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4230		    cfg->msix.msix_alloc > 0))
4231			return (NULL);
4232
4233		/*
4234		 * If the child device doesn't have an interrupt
4235		 * routed and is deserving of an interrupt, try to
4236		 * assign it one.
4237		 */
4238		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4239		    (cfg->intpin != 0))
4240			pci_assign_interrupt(dev, child, 0);
4241		break;
4242	case SYS_RES_IOPORT:
4243	case SYS_RES_MEMORY:
4244#ifdef NEW_PCIB
4245		/*
4246		 * PCI-PCI bridge I/O window resources are not BARs.
4247		 * For those allocations just pass the request up the
4248		 * tree.
4249		 */
4250		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4251			switch (*rid) {
4252			case PCIR_IOBASEL_1:
4253			case PCIR_MEMBASE_1:
4254			case PCIR_PMBASEL_1:
4255				/*
4256				 * XXX: Should we bother creating a resource
4257				 * list entry?
4258				 */
4259				return (bus_generic_alloc_resource(dev, child,
4260				    type, rid, start, end, count, flags));
4261			}
4262		}
4263#endif
4264		/* Reserve resources for this BAR if needed. */
4265		rle = resource_list_find(rl, type, *rid);
4266		if (rle == NULL) {
4267			res = pci_reserve_map(dev, child, type, rid, start, end,
4268			    count, flags);
4269			if (res == NULL)
4270				return (NULL);
4271		}
4272	}
4273	return (resource_list_alloc(rl, dev, child, type, rid,
4274	    start, end, count, flags));
4275}
4276
4277int
4278pci_activate_resource(device_t dev, device_t child, int type, int rid,
4279    struct resource *r)
4280{
4281	struct pci_devinfo *dinfo;
4282	int error;
4283
4284	error = bus_generic_activate_resource(dev, child, type, rid, r);
4285	if (error)
4286		return (error);
4287
4288	/* Enable decoding in the command register when activating BARs. */
4289	if (device_get_parent(child) == dev) {
4290		/* Device ROMs need their decoding explicitly enabled. */
4291		dinfo = device_get_ivars(child);
4292		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4293			pci_write_bar(child, pci_find_bar(child, rid),
4294			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4295		switch (type) {
4296		case SYS_RES_IOPORT:
4297		case SYS_RES_MEMORY:
4298			error = PCI_ENABLE_IO(dev, child, type);
4299			break;
4300		}
4301	}
4302	return (error);
4303}
4304
4305int
4306pci_deactivate_resource(device_t dev, device_t child, int type,
4307    int rid, struct resource *r)
4308{
4309	struct pci_devinfo *dinfo;
4310	int error;
4311
4312	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4313	if (error)
4314		return (error);
4315
4316	/* Disable decoding for device ROMs. */
4317	if (device_get_parent(child) == dev) {
4318		dinfo = device_get_ivars(child);
4319		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4320			pci_write_bar(child, pci_find_bar(child, rid),
4321			    rman_get_start(r));
4322	}
4323	return (0);
4324}
4325
4326void
4327pci_delete_child(device_t dev, device_t child)
4328{
4329	struct resource_list_entry *rle;
4330	struct resource_list *rl;
4331	struct pci_devinfo *dinfo;
4332
4333	dinfo = device_get_ivars(child);
4334	rl = &dinfo->resources;
4335
4336	if (device_is_attached(child))
4337		device_detach(child);
4338
4339	/* Turn off access to resources we're about to free */
4340	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4341	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4342
4343	/* Free all allocated resources */
4344	STAILQ_FOREACH(rle, rl, link) {
4345		if (rle->res) {
4346			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4347			    resource_list_busy(rl, rle->type, rle->rid)) {
4348				pci_printf(&dinfo->cfg,
4349				    "Resource still owned, oops. "
4350				    "(type=%d, rid=%d, addr=%lx)\n",
4351				    rle->type, rle->rid,
4352				    rman_get_start(rle->res));
4353				bus_release_resource(child, rle->type, rle->rid,
4354				    rle->res);
4355			}
4356			resource_list_unreserve(rl, dev, child, rle->type,
4357			    rle->rid);
4358		}
4359	}
4360	resource_list_free(rl);
4361
4362	device_delete_child(dev, child);
4363	pci_freecfg(dinfo);
4364}
4365
4366void
4367pci_delete_resource(device_t dev, device_t child, int type, int rid)
4368{
4369	struct pci_devinfo *dinfo;
4370	struct resource_list *rl;
4371	struct resource_list_entry *rle;
4372
4373	if (device_get_parent(child) != dev)
4374		return;
4375
4376	dinfo = device_get_ivars(child);
4377	rl = &dinfo->resources;
4378	rle = resource_list_find(rl, type, rid);
4379	if (rle == NULL)
4380		return;
4381
4382	if (rle->res) {
4383		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4384		    resource_list_busy(rl, type, rid)) {
4385			device_printf(dev, "delete_resource: "
4386			    "Resource still owned by child, oops. "
4387			    "(type=%d, rid=%d, addr=%lx)\n",
4388			    type, rid, rman_get_start(rle->res));
4389			return;
4390		}
4391		resource_list_unreserve(rl, dev, child, type, rid);
4392	}
4393	resource_list_delete(rl, type, rid);
4394}
4395
4396struct resource_list *
4397pci_get_resource_list (device_t dev, device_t child)
4398{
4399	struct pci_devinfo *dinfo = device_get_ivars(child);
4400
4401	return (&dinfo->resources);
4402}
4403
4404bus_dma_tag_t
4405pci_get_dma_tag(device_t bus, device_t dev)
4406{
4407	struct pci_softc *sc = device_get_softc(bus);
4408
4409	return (sc->sc_dma_tag);
4410}
4411
4412uint32_t
4413pci_read_config_method(device_t dev, device_t child, int reg, int width)
4414{
4415	struct pci_devinfo *dinfo = device_get_ivars(child);
4416	pcicfgregs *cfg = &dinfo->cfg;
4417
4418	return (PCIB_READ_CONFIG(device_get_parent(dev),
4419	    cfg->bus, cfg->slot, cfg->func, reg, width));
4420}
4421
4422void
4423pci_write_config_method(device_t dev, device_t child, int reg,
4424    uint32_t val, int width)
4425{
4426	struct pci_devinfo *dinfo = device_get_ivars(child);
4427	pcicfgregs *cfg = &dinfo->cfg;
4428
4429	PCIB_WRITE_CONFIG(device_get_parent(dev),
4430	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4431}
4432
4433int
4434pci_child_location_str_method(device_t dev, device_t child, char *buf,
4435    size_t buflen)
4436{
4437
4438	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4439	    pci_get_function(child));
4440	return (0);
4441}
4442
4443int
4444pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4445    size_t buflen)
4446{
4447	struct pci_devinfo *dinfo;
4448	pcicfgregs *cfg;
4449
4450	dinfo = device_get_ivars(child);
4451	cfg = &dinfo->cfg;
4452	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4453	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4454	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4455	    cfg->progif);
4456	return (0);
4457}
4458
4459int
4460pci_assign_interrupt_method(device_t dev, device_t child)
4461{
4462	struct pci_devinfo *dinfo = device_get_ivars(child);
4463	pcicfgregs *cfg = &dinfo->cfg;
4464
4465	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4466	    cfg->intpin));
4467}
4468
4469static int
4470pci_modevent(module_t mod, int what, void *arg)
4471{
4472	static struct cdev *pci_cdev;
4473
4474	switch (what) {
4475	case MOD_LOAD:
4476		STAILQ_INIT(&pci_devq);
4477		pci_generation = 0;
4478		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4479		    "pci");
4480		pci_load_vendor_data();
4481		break;
4482
4483	case MOD_UNLOAD:
4484		destroy_dev(pci_cdev);
4485		break;
4486	}
4487
4488	return (0);
4489}
4490
4491static void
4492pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4493{
4494#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4495	struct pcicfg_pcie *cfg;
4496	int version, pos;
4497
4498	cfg = &dinfo->cfg.pcie;
4499	pos = cfg->pcie_location;
4500
4501	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4502
4503	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4504
4505	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4506	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4507	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4508		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4509
4510	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4511	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4512	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4513		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4514
4515	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4516	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4517		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4518
4519	if (version > 1) {
4520		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4521		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4522		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4523	}
4524#undef WREG
4525}
4526
4527static void
4528pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4529{
4530	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4531	    dinfo->cfg.pcix.pcix_command,  2);
4532}
4533
4534void
4535pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4536{
4537
4538	/*
4539	 * Only do header type 0 devices.  Type 1 devices are bridges,
4540	 * which we know need special treatment.  Type 2 devices are
4541	 * cardbus bridges which also require special treatment.
4542	 * Other types are unknown, and we err on the side of safety
4543	 * by ignoring them.
4544	 */
4545	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4546		return;
4547
4548	/*
4549	 * Restore the device to full power mode.  We must do this
4550	 * before we restore the registers because moving from D3 to
4551	 * D0 will cause the chip's BARs and some other registers to
4552	 * be reset to some unknown power on reset values.  Cut down
4553	 * the noise on boot by doing nothing if we are already in
4554	 * state D0.
4555	 */
4556	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4557		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4558	pci_restore_bars(dev);
4559	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4560	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4561	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4562	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4563	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4564	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4565	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4566	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4567	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4568
4569	/*
4570	 * Restore extended capabilities for PCI-Express and PCI-X
4571	 */
4572	if (dinfo->cfg.pcie.pcie_location != 0)
4573		pci_cfg_restore_pcie(dev, dinfo);
4574	if (dinfo->cfg.pcix.pcix_location != 0)
4575		pci_cfg_restore_pcix(dev, dinfo);
4576
4577	/* Restore MSI and MSI-X configurations if they are present. */
4578	if (dinfo->cfg.msi.msi_location != 0)
4579		pci_resume_msi(dev);
4580	if (dinfo->cfg.msix.msix_location != 0)
4581		pci_resume_msix(dev);
4582}
4583
4584static void
4585pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4586{
4587#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4588	struct pcicfg_pcie *cfg;
4589	int version, pos;
4590
4591	cfg = &dinfo->cfg.pcie;
4592	pos = cfg->pcie_location;
4593
4594	cfg->pcie_flags = RREG(PCIER_FLAGS);
4595
4596	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4597
4598	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4599
4600	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4601	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4602	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4603		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4604
4605	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4606	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4607	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4608		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4609
4610	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4611	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4612		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4613
4614	if (version > 1) {
4615		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4616		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4617		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4618	}
4619#undef RREG
4620}
4621
4622static void
4623pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4624{
4625	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4626	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4627}
4628
4629void
4630pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4631{
4632	uint32_t cls;
4633	int ps;
4634
4635	/*
4636	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4637	 * we know need special treatment.  Type 2 devices are cardbus bridges
4638	 * which also require special treatment.  Other types are unknown, and
4639	 * we err on the side of safety by ignoring them.  Powering down
4640	 * bridges should not be undertaken lightly.
4641	 */
4642	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4643		return;
4644
4645	/*
4646	 * Some drivers apparently write to these registers w/o updating our
4647	 * cached copy.  No harm happens if we update the copy, so do so here
4648	 * so we can restore them.  The COMMAND register is modified by the
4649	 * bus w/o updating the cache.  This should represent the normally
4650	 * writable portion of the 'defined' part of type 0 headers.  In
4651	 * theory we also need to save/restore the PCI capability structures
4652	 * we know about, but apart from power we don't know any that are
4653	 * writable.
4654	 */
4655	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4656	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4657	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4658	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4659	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4660	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4661	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4662	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4663	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4664	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4665	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4666	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4667	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4668	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4669	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4670
4671	if (dinfo->cfg.pcie.pcie_location != 0)
4672		pci_cfg_save_pcie(dev, dinfo);
4673
4674	if (dinfo->cfg.pcix.pcix_location != 0)
4675		pci_cfg_save_pcix(dev, dinfo);
4676
4677	/*
4678	 * don't set the state for display devices, base peripherals and
4679	 * memory devices since bad things happen when they are powered down.
4680	 * We should (a) have drivers that can easily detach and (b) use
4681	 * generic drivers for these devices so that some device actually
4682	 * attaches.  We need to make sure that when we implement (a) we don't
4683	 * power the device down on a reattach.
4684	 */
4685	cls = pci_get_class(dev);
4686	if (!setstate)
4687		return;
4688	switch (pci_do_power_nodriver)
4689	{
4690		case 0:		/* NO powerdown at all */
4691			return;
4692		case 1:		/* Conservative about what to power down */
4693			if (cls == PCIC_STORAGE)
4694				return;
4695			/*FALLTHROUGH*/
4696		case 2:		/* Agressive about what to power down */
4697			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4698			    cls == PCIC_BASEPERIPH)
4699				return;
4700			/*FALLTHROUGH*/
4701		case 3:		/* Power down everything */
4702			break;
4703	}
4704	/*
4705	 * PCI spec says we can only go into D3 state from D0 state.
4706	 * Transition from D[12] into D0 before going to D3 state.
4707	 */
4708	ps = pci_get_powerstate(dev);
4709	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4710		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4711	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4712		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4713}
4714
4715/* Wrapper APIs suitable for device driver use. */
4716void
4717pci_save_state(device_t dev)
4718{
4719	struct pci_devinfo *dinfo;
4720
4721	dinfo = device_get_ivars(dev);
4722	pci_cfg_save(dev, dinfo, 0);
4723}
4724
4725void
4726pci_restore_state(device_t dev)
4727{
4728	struct pci_devinfo *dinfo;
4729
4730	dinfo = device_get_ivars(dev);
4731	pci_cfg_restore(dev, dinfo);
4732}
4733