1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: releng/10.3/sys/dev/pci/pci.c 295131 2016-02-01 23:07:31Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/limits.h>
39#include <sys/linker.h>
40#include <sys/fcntl.h>
41#include <sys/conf.h>
42#include <sys/kernel.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/endian.h>
46
47#include <vm/vm.h>
48#include <vm/pmap.h>
49#include <vm/vm_extern.h>
50
51#include <sys/bus.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#include <machine/resource.h>
55#include <machine/stdarg.h>
56
57#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58#include <machine/intr_machdep.h>
59#endif
60
61#include <sys/pciio.h>
62#include <dev/pci/pcireg.h>
63#include <dev/pci/pcivar.h>
64#include <dev/pci/pci_private.h>
65
66#include <dev/usb/controller/xhcireg.h>
67#include <dev/usb/controller/ehcireg.h>
68#include <dev/usb/controller/ohcireg.h>
69#include <dev/usb/controller/uhcireg.h>
70
71#include "pcib_if.h"
72#include "pci_if.h"
73
74#define	PCIR_IS_BIOS(cfg, reg)						\
75	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77
78static int		pci_has_quirk(uint32_t devid, int quirk);
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static pci_addr_t	pci_rombase(uint64_t mapreg);
84static int		pci_romsize(uint64_t testval);
85static void		pci_fixancient(pcicfgregs *cfg);
86static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87
88static int		pci_porten(device_t dev);
89static int		pci_memen(device_t dev);
90static void		pci_assign_interrupt(device_t bus, device_t dev,
91			    int force_route);
92static int		pci_add_map(device_t bus, device_t dev, int reg,
93			    struct resource_list *rl, int force, int prefetch);
94static int		pci_probe(device_t dev);
95static int		pci_attach(device_t dev);
96#ifdef PCI_RES_BUS
97static int		pci_detach(device_t dev);
98#endif
99static void		pci_load_vendor_data(void);
100static int		pci_describe_parse_line(char **ptr, int *vendor,
101			    int *device, char **desc);
102static char		*pci_describe_device(device_t dev);
103static int		pci_modevent(module_t mod, int what, void *arg);
104static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105			    pcicfgregs *cfg);
106static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t *data);
109#if 0
110static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111			    int reg, uint32_t data);
112#endif
113static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static int		pci_msix_blacklisted(void);
118static void		pci_resume_msi(device_t dev);
119static void		pci_resume_msix(device_t dev);
120static int		pci_remap_intr_method(device_t bus, device_t dev,
121			    u_int irq);
122
123static uint16_t		pci_get_rid_method(device_t dev, device_t child);
124
125static device_method_t pci_methods[] = {
126	/* Device interface */
127	DEVMETHOD(device_probe,		pci_probe),
128	DEVMETHOD(device_attach,	pci_attach),
129#ifdef PCI_RES_BUS
130	DEVMETHOD(device_detach,	pci_detach),
131#else
132	DEVMETHOD(device_detach,	bus_generic_detach),
133#endif
134	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
135	DEVMETHOD(device_suspend,	pci_suspend),
136	DEVMETHOD(device_resume,	pci_resume),
137
138	/* Bus interface */
139	DEVMETHOD(bus_print_child,	pci_print_child),
140	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
141	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
142	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
143	DEVMETHOD(bus_driver_added,	pci_driver_added),
144	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
145	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
146
147	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
148	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
149	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
150	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
151	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
152	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
153	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
154	DEVMETHOD(bus_release_resource,	pci_release_resource),
155	DEVMETHOD(bus_activate_resource, pci_activate_resource),
156	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
157	DEVMETHOD(bus_child_detached,	pci_child_detached),
158	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
159	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
160	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
161
162	/* PCI interface */
163	DEVMETHOD(pci_read_config,	pci_read_config_method),
164	DEVMETHOD(pci_write_config,	pci_write_config_method),
165	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
175	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
176	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
177	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
179	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
180	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
181	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
182	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
183	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
184	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
185	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
186	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
187	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
188	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
189	DEVMETHOD(pci_child_added,	pci_child_added_method),
190
191	DEVMETHOD_END
192};
193
194DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
195
196static devclass_t pci_devclass;
197DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
198MODULE_VERSION(pci, 1);
199
200static char	*pci_vendordata;
201static size_t	pci_vendordata_size;
202
203struct pci_quirk {
204	uint32_t devid;	/* Vendor/device of the card */
205	int	type;
206#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
207#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
208#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
209#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
210#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
211#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
212	int	arg1;
213	int	arg2;
214};
215
216static const struct pci_quirk pci_quirks[] = {
217	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
218	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
219	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220	/* As does the Serverworks OSB4 (the SMBus mapping register) */
221	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222
223	/*
224	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
225	 * or the CMIC-SL (AKA ServerWorks GC_LE).
226	 */
227	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229
230	/*
231	 * MSI doesn't work on earlier Intel chipsets including
232	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
233	 */
234	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241
242	/*
243	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
244	 * bridge.
245	 */
246	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247
248	/*
249	 * MSI-X allocation doesn't work properly for devices passed through
250	 * by VMware up to at least ESXi 5.1.
251	 */
252	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
253	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
254
255	/*
256	 * Some virtualization environments emulate an older chipset
257	 * but support MSI just fine.  QEMU uses the Intel 82440.
258	 */
259	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
260
261	/*
262	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
263	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
264	 * It prevents us from attaching hpet(4) when the bit is unset.
265	 * Note this quirk only affects SB600 revision A13 and earlier.
266	 * For SB600 A21 and later, firmware must set the bit to hide it.
267	 * For SB700 and later, it is unused and hardcoded to zero.
268	 */
269	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
270
271	/*
272	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
273	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
274	 * command register is set.
275	 */
276	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
277	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
278	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
279
280	/*
281	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
282	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
283	 */
284	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
285	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
286	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
287	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
288	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
289	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
290
291	{ 0 }
292};
293
294/* map register information */
295#define	PCI_MAPMEM	0x01	/* memory map */
296#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
297#define	PCI_MAPPORT	0x04	/* port map */
298
299struct devlist pci_devq;
300uint32_t pci_generation;
301uint32_t pci_numdevs = 0;
302static int pcie_chipset, pcix_chipset;
303
304/* sysctl vars */
305SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
306
307static int pci_enable_io_modes = 1;
308TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
309SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
310    &pci_enable_io_modes, 1,
311    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
312enable these bits correctly.  We'd like to do this all the time, but there\n\
313are some peripherals that this causes problems with.");
314
315static int pci_do_realloc_bars = 0;
316TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
317SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
318    &pci_do_realloc_bars, 0,
319    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
320
321static int pci_do_power_nodriver = 0;
322TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
323SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
324    &pci_do_power_nodriver, 0,
325  "Place a function into D3 state when no driver attaches to it.  0 means\n\
326disable.  1 means conservatively place devices into D3 state.  2 means\n\
327agressively place devices into D3 state.  3 means put absolutely everything\n\
328in D3 state.");
329
330int pci_do_power_resume = 1;
331TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
332SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
333    &pci_do_power_resume, 1,
334  "Transition from D3 -> D0 on resume.");
335
336int pci_do_power_suspend = 1;
337TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
338SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
339    &pci_do_power_suspend, 1,
340  "Transition from D0 -> D3 on suspend.");
341
342static int pci_do_msi = 1;
343TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
344SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
345    "Enable support for MSI interrupts");
346
347static int pci_do_msix = 1;
348TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
349SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
350    "Enable support for MSI-X interrupts");
351
352static int pci_honor_msi_blacklist = 1;
353TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
354SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
355    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
356
357#if defined(__i386__) || defined(__amd64__)
358static int pci_usb_takeover = 1;
359#else
360static int pci_usb_takeover = 0;
361#endif
362TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
363SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
364    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
365Disable this if you depend on BIOS emulation of USB devices, that is\n\
366you use USB devices (like keyboard or mouse) but do not load USB drivers");
367
368static int pci_clear_bars;
369TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
370SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
371    "Ignore firmware-assigned resources for BARs.");
372
373#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
374static int pci_clear_buses;
375TUNABLE_INT("hw.pci.clear_buses", &pci_clear_buses);
376SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
377    "Ignore firmware-assigned bus numbers.");
378#endif
379
380static int pci_enable_ari = 1;
381TUNABLE_INT("hw.pci.enable_ari", &pci_enable_ari);
382SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
383    0, "Enable support for PCIe Alternative RID Interpretation");
384
385static int
386pci_has_quirk(uint32_t devid, int quirk)
387{
388	const struct pci_quirk *q;
389
390	for (q = &pci_quirks[0]; q->devid; q++) {
391		if (q->devid == devid && q->type == quirk)
392			return (1);
393	}
394	return (0);
395}
396
397/* Find a device_t by bus/slot/function in domain 0 */
398
399device_t
400pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
401{
402
403	return (pci_find_dbsf(0, bus, slot, func));
404}
405
406/* Find a device_t by domain/bus/slot/function */
407
408device_t
409pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
410{
411	struct pci_devinfo *dinfo;
412
413	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
414		if ((dinfo->cfg.domain == domain) &&
415		    (dinfo->cfg.bus == bus) &&
416		    (dinfo->cfg.slot == slot) &&
417		    (dinfo->cfg.func == func)) {
418			return (dinfo->cfg.dev);
419		}
420	}
421
422	return (NULL);
423}
424
425/* Find a device_t by vendor/device ID */
426
427device_t
428pci_find_device(uint16_t vendor, uint16_t device)
429{
430	struct pci_devinfo *dinfo;
431
432	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
433		if ((dinfo->cfg.vendor == vendor) &&
434		    (dinfo->cfg.device == device)) {
435			return (dinfo->cfg.dev);
436		}
437	}
438
439	return (NULL);
440}
441
442device_t
443pci_find_class(uint8_t class, uint8_t subclass)
444{
445	struct pci_devinfo *dinfo;
446
447	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
448		if (dinfo->cfg.baseclass == class &&
449		    dinfo->cfg.subclass == subclass) {
450			return (dinfo->cfg.dev);
451		}
452	}
453
454	return (NULL);
455}
456
457static int
458pci_printf(pcicfgregs *cfg, const char *fmt, ...)
459{
460	va_list ap;
461	int retval;
462
463	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
464	    cfg->func);
465	va_start(ap, fmt);
466	retval += vprintf(fmt, ap);
467	va_end(ap);
468	return (retval);
469}
470
471/* return base address of memory or port map */
472
473static pci_addr_t
474pci_mapbase(uint64_t mapreg)
475{
476
477	if (PCI_BAR_MEM(mapreg))
478		return (mapreg & PCIM_BAR_MEM_BASE);
479	else
480		return (mapreg & PCIM_BAR_IO_BASE);
481}
482
483/* return map type of memory or port map */
484
485static const char *
486pci_maptype(uint64_t mapreg)
487{
488
489	if (PCI_BAR_IO(mapreg))
490		return ("I/O Port");
491	if (mapreg & PCIM_BAR_MEM_PREFETCH)
492		return ("Prefetchable Memory");
493	return ("Memory");
494}
495
496/* return log2 of map size decoded for memory or port map */
497
498static int
499pci_mapsize(uint64_t testval)
500{
501	int ln2size;
502
503	testval = pci_mapbase(testval);
504	ln2size = 0;
505	if (testval != 0) {
506		while ((testval & 1) == 0)
507		{
508			ln2size++;
509			testval >>= 1;
510		}
511	}
512	return (ln2size);
513}
514
515/* return base address of device ROM */
516
517static pci_addr_t
518pci_rombase(uint64_t mapreg)
519{
520
521	return (mapreg & PCIM_BIOS_ADDR_MASK);
522}
523
524/* return log2 of map size decided for device ROM */
525
526static int
527pci_romsize(uint64_t testval)
528{
529	int ln2size;
530
531	testval = pci_rombase(testval);
532	ln2size = 0;
533	if (testval != 0) {
534		while ((testval & 1) == 0)
535		{
536			ln2size++;
537			testval >>= 1;
538		}
539	}
540	return (ln2size);
541}
542
543/* return log2 of address range supported by map register */
544
545static int
546pci_maprange(uint64_t mapreg)
547{
548	int ln2range = 0;
549
550	if (PCI_BAR_IO(mapreg))
551		ln2range = 32;
552	else
553		switch (mapreg & PCIM_BAR_MEM_TYPE) {
554		case PCIM_BAR_MEM_32:
555			ln2range = 32;
556			break;
557		case PCIM_BAR_MEM_1MB:
558			ln2range = 20;
559			break;
560		case PCIM_BAR_MEM_64:
561			ln2range = 64;
562			break;
563		}
564	return (ln2range);
565}
566
567/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
568
569static void
570pci_fixancient(pcicfgregs *cfg)
571{
572	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
573		return;
574
575	/* PCI to PCI bridges use header type 1 */
576	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
577		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
578}
579
580/* extract header type specific config data */
581
582static void
583pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
584{
585#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
586	switch (cfg->hdrtype & PCIM_HDRTYPE) {
587	case PCIM_HDRTYPE_NORMAL:
588		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
589		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
590		cfg->mingnt         = REG(PCIR_MINGNT, 1);
591		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
592		cfg->nummaps	    = PCI_MAXMAPS_0;
593		break;
594	case PCIM_HDRTYPE_BRIDGE:
595		cfg->nummaps	    = PCI_MAXMAPS_1;
596		break;
597	case PCIM_HDRTYPE_CARDBUS:
598		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
599		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
600		cfg->nummaps	    = PCI_MAXMAPS_2;
601		break;
602	}
603#undef REG
604}
605
606/* read configuration header into pcicfgregs structure */
607struct pci_devinfo *
608pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
609{
610#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
611	pcicfgregs *cfg = NULL;
612	struct pci_devinfo *devlist_entry;
613	struct devlist *devlist_head;
614
615	devlist_head = &pci_devq;
616
617	devlist_entry = NULL;
618
619	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
620		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
621
622		cfg = &devlist_entry->cfg;
623
624		cfg->domain		= d;
625		cfg->bus		= b;
626		cfg->slot		= s;
627		cfg->func		= f;
628		cfg->vendor		= REG(PCIR_VENDOR, 2);
629		cfg->device		= REG(PCIR_DEVICE, 2);
630		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
631		cfg->statreg		= REG(PCIR_STATUS, 2);
632		cfg->baseclass		= REG(PCIR_CLASS, 1);
633		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
634		cfg->progif		= REG(PCIR_PROGIF, 1);
635		cfg->revid		= REG(PCIR_REVID, 1);
636		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
637		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
638		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
639		cfg->intpin		= REG(PCIR_INTPIN, 1);
640		cfg->intline		= REG(PCIR_INTLINE, 1);
641
642		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
643		cfg->hdrtype		&= ~PCIM_MFDEV;
644		STAILQ_INIT(&cfg->maps);
645
646		pci_fixancient(cfg);
647		pci_hdrtypedata(pcib, b, s, f, cfg);
648
649		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
650			pci_read_cap(pcib, cfg);
651
652		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
653
654		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
655		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
656		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
657		devlist_entry->conf.pc_sel.pc_func = cfg->func;
658		devlist_entry->conf.pc_hdr = cfg->hdrtype;
659
660		devlist_entry->conf.pc_subvendor = cfg->subvendor;
661		devlist_entry->conf.pc_subdevice = cfg->subdevice;
662		devlist_entry->conf.pc_vendor = cfg->vendor;
663		devlist_entry->conf.pc_device = cfg->device;
664
665		devlist_entry->conf.pc_class = cfg->baseclass;
666		devlist_entry->conf.pc_subclass = cfg->subclass;
667		devlist_entry->conf.pc_progif = cfg->progif;
668		devlist_entry->conf.pc_revid = cfg->revid;
669
670		pci_numdevs++;
671		pci_generation++;
672	}
673	return (devlist_entry);
674#undef REG
675}
676
677static void
678pci_read_cap(device_t pcib, pcicfgregs *cfg)
679{
680#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
681#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
682#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
683	uint64_t addr;
684#endif
685	uint32_t val;
686	int	ptr, nextptr, ptrptr;
687
688	switch (cfg->hdrtype & PCIM_HDRTYPE) {
689	case PCIM_HDRTYPE_NORMAL:
690	case PCIM_HDRTYPE_BRIDGE:
691		ptrptr = PCIR_CAP_PTR;
692		break;
693	case PCIM_HDRTYPE_CARDBUS:
694		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
695		break;
696	default:
697		return;		/* no extended capabilities support */
698	}
699	nextptr = REG(ptrptr, 1);	/* sanity check? */
700
701	/*
702	 * Read capability entries.
703	 */
704	while (nextptr != 0) {
705		/* Sanity check */
706		if (nextptr > 255) {
707			printf("illegal PCI extended capability offset %d\n",
708			    nextptr);
709			return;
710		}
711		/* Find the next entry */
712		ptr = nextptr;
713		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
714
715		/* Process this entry */
716		switch (REG(ptr + PCICAP_ID, 1)) {
717		case PCIY_PMG:		/* PCI power management */
718			if (cfg->pp.pp_cap == 0) {
719				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
720				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
721				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
722				if ((nextptr - ptr) > PCIR_POWER_DATA)
723					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
724			}
725			break;
726		case PCIY_HT:		/* HyperTransport */
727			/* Determine HT-specific capability type. */
728			val = REG(ptr + PCIR_HT_COMMAND, 2);
729
730			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
731				cfg->ht.ht_slave = ptr;
732
733#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
734			switch (val & PCIM_HTCMD_CAP_MASK) {
735			case PCIM_HTCAP_MSI_MAPPING:
736				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
737					/* Sanity check the mapping window. */
738					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
739					    4);
740					addr <<= 32;
741					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
742					    4);
743					if (addr != MSI_INTEL_ADDR_BASE)
744						device_printf(pcib,
745	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
746						    cfg->domain, cfg->bus,
747						    cfg->slot, cfg->func,
748						    (long long)addr);
749				} else
750					addr = MSI_INTEL_ADDR_BASE;
751
752				cfg->ht.ht_msimap = ptr;
753				cfg->ht.ht_msictrl = val;
754				cfg->ht.ht_msiaddr = addr;
755				break;
756			}
757#endif
758			break;
759		case PCIY_MSI:		/* PCI MSI */
760			cfg->msi.msi_location = ptr;
761			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
762			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
763						     PCIM_MSICTRL_MMC_MASK)>>1);
764			break;
765		case PCIY_MSIX:		/* PCI MSI-X */
766			cfg->msix.msix_location = ptr;
767			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
768			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
769			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
770			val = REG(ptr + PCIR_MSIX_TABLE, 4);
771			cfg->msix.msix_table_bar = PCIR_BAR(val &
772			    PCIM_MSIX_BIR_MASK);
773			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
774			val = REG(ptr + PCIR_MSIX_PBA, 4);
775			cfg->msix.msix_pba_bar = PCIR_BAR(val &
776			    PCIM_MSIX_BIR_MASK);
777			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
778			break;
779		case PCIY_VPD:		/* PCI Vital Product Data */
780			cfg->vpd.vpd_reg = ptr;
781			break;
782		case PCIY_SUBVENDOR:
783			/* Should always be true. */
784			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
785			    PCIM_HDRTYPE_BRIDGE) {
786				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
787				cfg->subvendor = val & 0xffff;
788				cfg->subdevice = val >> 16;
789			}
790			break;
791		case PCIY_PCIX:		/* PCI-X */
792			/*
793			 * Assume we have a PCI-X chipset if we have
794			 * at least one PCI-PCI bridge with a PCI-X
795			 * capability.  Note that some systems with
796			 * PCI-express or HT chipsets might match on
797			 * this check as well.
798			 */
799			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
800			    PCIM_HDRTYPE_BRIDGE)
801				pcix_chipset = 1;
802			cfg->pcix.pcix_location = ptr;
803			break;
804		case PCIY_EXPRESS:	/* PCI-express */
805			/*
806			 * Assume we have a PCI-express chipset if we have
807			 * at least one PCI-express device.
808			 */
809			pcie_chipset = 1;
810			cfg->pcie.pcie_location = ptr;
811			val = REG(ptr + PCIER_FLAGS, 2);
812			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
813			break;
814		default:
815			break;
816		}
817	}
818
819#if defined(__powerpc__)
820	/*
821	 * Enable the MSI mapping window for all HyperTransport
822	 * slaves.  PCI-PCI bridges have their windows enabled via
823	 * PCIB_MAP_MSI().
824	 */
825	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
826	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
827		device_printf(pcib,
828	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
829		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
830		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
831		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
832		     2);
833	}
834#endif
835/* REG and WREG use carry through to next functions */
836}
837
838/*
839 * PCI Vital Product Data
840 */
841
842#define	PCI_VPD_TIMEOUT		1000000
843
844static int
845pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
846{
847	int count = PCI_VPD_TIMEOUT;
848
849	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
850
851	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
852
853	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
854		if (--count < 0)
855			return (ENXIO);
856		DELAY(1);	/* limit looping */
857	}
858	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
859
860	return (0);
861}
862
863#if 0
864static int
865pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
866{
867	int count = PCI_VPD_TIMEOUT;
868
869	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
870
871	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
872	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
873	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
874		if (--count < 0)
875			return (ENXIO);
876		DELAY(1);	/* limit looping */
877	}
878
879	return (0);
880}
881#endif
882
883#undef PCI_VPD_TIMEOUT
884
885struct vpd_readstate {
886	device_t	pcib;
887	pcicfgregs	*cfg;
888	uint32_t	val;
889	int		bytesinval;
890	int		off;
891	uint8_t		cksum;
892};
893
894static int
895vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
896{
897	uint32_t reg;
898	uint8_t byte;
899
900	if (vrs->bytesinval == 0) {
901		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
902			return (ENXIO);
903		vrs->val = le32toh(reg);
904		vrs->off += 4;
905		byte = vrs->val & 0xff;
906		vrs->bytesinval = 3;
907	} else {
908		vrs->val = vrs->val >> 8;
909		byte = vrs->val & 0xff;
910		vrs->bytesinval--;
911	}
912
913	vrs->cksum += byte;
914	*data = byte;
915	return (0);
916}
917
918static void
919pci_read_vpd(device_t pcib, pcicfgregs *cfg)
920{
921	struct vpd_readstate vrs;
922	int state;
923	int name;
924	int remain;
925	int i;
926	int alloc, off;		/* alloc/off for RO/W arrays */
927	int cksumvalid;
928	int dflen;
929	uint8_t byte;
930	uint8_t byte2;
931
932	/* init vpd reader */
933	vrs.bytesinval = 0;
934	vrs.off = 0;
935	vrs.pcib = pcib;
936	vrs.cfg = cfg;
937	vrs.cksum = 0;
938
939	state = 0;
940	name = remain = i = 0;	/* shut up stupid gcc */
941	alloc = off = 0;	/* shut up stupid gcc */
942	dflen = 0;		/* shut up stupid gcc */
943	cksumvalid = -1;
944	while (state >= 0) {
945		if (vpd_nextbyte(&vrs, &byte)) {
946			state = -2;
947			break;
948		}
949#if 0
950		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
951		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
952		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
953#endif
954		switch (state) {
955		case 0:		/* item name */
956			if (byte & 0x80) {
957				if (vpd_nextbyte(&vrs, &byte2)) {
958					state = -2;
959					break;
960				}
961				remain = byte2;
962				if (vpd_nextbyte(&vrs, &byte2)) {
963					state = -2;
964					break;
965				}
966				remain |= byte2 << 8;
967				if (remain > (0x7f*4 - vrs.off)) {
968					state = -1;
969					pci_printf(cfg,
970					    "invalid VPD data, remain %#x\n",
971					    remain);
972				}
973				name = byte & 0x7f;
974			} else {
975				remain = byte & 0x7;
976				name = (byte >> 3) & 0xf;
977			}
978			switch (name) {
979			case 0x2:	/* String */
980				cfg->vpd.vpd_ident = malloc(remain + 1,
981				    M_DEVBUF, M_WAITOK);
982				i = 0;
983				state = 1;
984				break;
985			case 0xf:	/* End */
986				state = -1;
987				break;
988			case 0x10:	/* VPD-R */
989				alloc = 8;
990				off = 0;
991				cfg->vpd.vpd_ros = malloc(alloc *
992				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
993				    M_WAITOK | M_ZERO);
994				state = 2;
995				break;
996			case 0x11:	/* VPD-W */
997				alloc = 8;
998				off = 0;
999				cfg->vpd.vpd_w = malloc(alloc *
1000				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1001				    M_WAITOK | M_ZERO);
1002				state = 5;
1003				break;
1004			default:	/* Invalid data, abort */
1005				state = -1;
1006				break;
1007			}
1008			break;
1009
1010		case 1:	/* Identifier String */
1011			cfg->vpd.vpd_ident[i++] = byte;
1012			remain--;
1013			if (remain == 0)  {
1014				cfg->vpd.vpd_ident[i] = '\0';
1015				state = 0;
1016			}
1017			break;
1018
1019		case 2:	/* VPD-R Keyword Header */
1020			if (off == alloc) {
1021				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1022				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1023				    M_DEVBUF, M_WAITOK | M_ZERO);
1024			}
1025			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1026			if (vpd_nextbyte(&vrs, &byte2)) {
1027				state = -2;
1028				break;
1029			}
1030			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1031			if (vpd_nextbyte(&vrs, &byte2)) {
1032				state = -2;
1033				break;
1034			}
1035			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1036			if (dflen == 0 &&
1037			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1038			    2) == 0) {
1039				/*
1040				 * if this happens, we can't trust the rest
1041				 * of the VPD.
1042				 */
1043				pci_printf(cfg, "bad keyword length: %d\n",
1044				    dflen);
1045				cksumvalid = 0;
1046				state = -1;
1047				break;
1048			} else if (dflen == 0) {
1049				cfg->vpd.vpd_ros[off].value = malloc(1 *
1050				    sizeof(*cfg->vpd.vpd_ros[off].value),
1051				    M_DEVBUF, M_WAITOK);
1052				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1053			} else
1054				cfg->vpd.vpd_ros[off].value = malloc(
1055				    (dflen + 1) *
1056				    sizeof(*cfg->vpd.vpd_ros[off].value),
1057				    M_DEVBUF, M_WAITOK);
1058			remain -= 3;
1059			i = 0;
1060			/* keep in sync w/ state 3's transistions */
1061			if (dflen == 0 && remain == 0)
1062				state = 0;
1063			else if (dflen == 0)
1064				state = 2;
1065			else
1066				state = 3;
1067			break;
1068
1069		case 3:	/* VPD-R Keyword Value */
1070			cfg->vpd.vpd_ros[off].value[i++] = byte;
1071			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1072			    "RV", 2) == 0 && cksumvalid == -1) {
1073				if (vrs.cksum == 0)
1074					cksumvalid = 1;
1075				else {
1076					if (bootverbose)
1077						pci_printf(cfg,
1078					    "bad VPD cksum, remain %hhu\n",
1079						    vrs.cksum);
1080					cksumvalid = 0;
1081					state = -1;
1082					break;
1083				}
1084			}
1085			dflen--;
1086			remain--;
1087			/* keep in sync w/ state 2's transistions */
1088			if (dflen == 0)
1089				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1090			if (dflen == 0 && remain == 0) {
1091				cfg->vpd.vpd_rocnt = off;
1092				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1093				    off * sizeof(*cfg->vpd.vpd_ros),
1094				    M_DEVBUF, M_WAITOK | M_ZERO);
1095				state = 0;
1096			} else if (dflen == 0)
1097				state = 2;
1098			break;
1099
1100		case 4:
1101			remain--;
1102			if (remain == 0)
1103				state = 0;
1104			break;
1105
1106		case 5:	/* VPD-W Keyword Header */
1107			if (off == alloc) {
1108				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1109				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1110				    M_DEVBUF, M_WAITOK | M_ZERO);
1111			}
1112			cfg->vpd.vpd_w[off].keyword[0] = byte;
1113			if (vpd_nextbyte(&vrs, &byte2)) {
1114				state = -2;
1115				break;
1116			}
1117			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1118			if (vpd_nextbyte(&vrs, &byte2)) {
1119				state = -2;
1120				break;
1121			}
1122			cfg->vpd.vpd_w[off].len = dflen = byte2;
1123			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1124			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1125			    sizeof(*cfg->vpd.vpd_w[off].value),
1126			    M_DEVBUF, M_WAITOK);
1127			remain -= 3;
1128			i = 0;
1129			/* keep in sync w/ state 6's transistions */
1130			if (dflen == 0 && remain == 0)
1131				state = 0;
1132			else if (dflen == 0)
1133				state = 5;
1134			else
1135				state = 6;
1136			break;
1137
1138		case 6:	/* VPD-W Keyword Value */
1139			cfg->vpd.vpd_w[off].value[i++] = byte;
1140			dflen--;
1141			remain--;
1142			/* keep in sync w/ state 5's transistions */
1143			if (dflen == 0)
1144				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1145			if (dflen == 0 && remain == 0) {
1146				cfg->vpd.vpd_wcnt = off;
1147				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1148				    off * sizeof(*cfg->vpd.vpd_w),
1149				    M_DEVBUF, M_WAITOK | M_ZERO);
1150				state = 0;
1151			} else if (dflen == 0)
1152				state = 5;
1153			break;
1154
1155		default:
1156			pci_printf(cfg, "invalid state: %d\n", state);
1157			state = -1;
1158			break;
1159		}
1160	}
1161
1162	if (cksumvalid == 0 || state < -1) {
1163		/* read-only data bad, clean up */
1164		if (cfg->vpd.vpd_ros != NULL) {
1165			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1166				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1167			free(cfg->vpd.vpd_ros, M_DEVBUF);
1168			cfg->vpd.vpd_ros = NULL;
1169		}
1170	}
1171	if (state < -1) {
1172		/* I/O error, clean up */
1173		pci_printf(cfg, "failed to read VPD data.\n");
1174		if (cfg->vpd.vpd_ident != NULL) {
1175			free(cfg->vpd.vpd_ident, M_DEVBUF);
1176			cfg->vpd.vpd_ident = NULL;
1177		}
1178		if (cfg->vpd.vpd_w != NULL) {
1179			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1180				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1181			free(cfg->vpd.vpd_w, M_DEVBUF);
1182			cfg->vpd.vpd_w = NULL;
1183		}
1184	}
1185	cfg->vpd.vpd_cached = 1;
1186#undef REG
1187#undef WREG
1188}
1189
1190int
1191pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1192{
1193	struct pci_devinfo *dinfo = device_get_ivars(child);
1194	pcicfgregs *cfg = &dinfo->cfg;
1195
1196	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1197		pci_read_vpd(device_get_parent(dev), cfg);
1198
1199	*identptr = cfg->vpd.vpd_ident;
1200
1201	if (*identptr == NULL)
1202		return (ENXIO);
1203
1204	return (0);
1205}
1206
1207int
1208pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1209	const char **vptr)
1210{
1211	struct pci_devinfo *dinfo = device_get_ivars(child);
1212	pcicfgregs *cfg = &dinfo->cfg;
1213	int i;
1214
1215	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1216		pci_read_vpd(device_get_parent(dev), cfg);
1217
1218	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1219		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1220		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1221			*vptr = cfg->vpd.vpd_ros[i].value;
1222			return (0);
1223		}
1224
1225	*vptr = NULL;
1226	return (ENXIO);
1227}
1228
1229struct pcicfg_vpd *
1230pci_fetch_vpd_list(device_t dev)
1231{
1232	struct pci_devinfo *dinfo = device_get_ivars(dev);
1233	pcicfgregs *cfg = &dinfo->cfg;
1234
1235	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1236		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1237	return (&cfg->vpd);
1238}
1239
1240/*
1241 * Find the requested HyperTransport capability and return the offset
1242 * in configuration space via the pointer provided.  The function
1243 * returns 0 on success and an error code otherwise.
1244 */
1245int
1246pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1247{
1248	int ptr, error;
1249	uint16_t val;
1250
1251	error = pci_find_cap(child, PCIY_HT, &ptr);
1252	if (error)
1253		return (error);
1254
1255	/*
1256	 * Traverse the capabilities list checking each HT capability
1257	 * to see if it matches the requested HT capability.
1258	 */
1259	while (ptr != 0) {
1260		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1261		if (capability == PCIM_HTCAP_SLAVE ||
1262		    capability == PCIM_HTCAP_HOST)
1263			val &= 0xe000;
1264		else
1265			val &= PCIM_HTCMD_CAP_MASK;
1266		if (val == capability) {
1267			if (capreg != NULL)
1268				*capreg = ptr;
1269			return (0);
1270		}
1271
1272		/* Skip to the next HT capability. */
1273		while (ptr != 0) {
1274			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1275			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1276			    PCIY_HT)
1277				break;
1278		}
1279	}
1280	return (ENOENT);
1281}
1282
1283/*
1284 * Find the requested capability and return the offset in
1285 * configuration space via the pointer provided.  The function returns
1286 * 0 on success and an error code otherwise.
1287 */
1288int
1289pci_find_cap_method(device_t dev, device_t child, int capability,
1290    int *capreg)
1291{
1292	struct pci_devinfo *dinfo = device_get_ivars(child);
1293	pcicfgregs *cfg = &dinfo->cfg;
1294	u_int32_t status;
1295	u_int8_t ptr;
1296
1297	/*
1298	 * Check the CAP_LIST bit of the PCI status register first.
1299	 */
1300	status = pci_read_config(child, PCIR_STATUS, 2);
1301	if (!(status & PCIM_STATUS_CAPPRESENT))
1302		return (ENXIO);
1303
1304	/*
1305	 * Determine the start pointer of the capabilities list.
1306	 */
1307	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1308	case PCIM_HDRTYPE_NORMAL:
1309	case PCIM_HDRTYPE_BRIDGE:
1310		ptr = PCIR_CAP_PTR;
1311		break;
1312	case PCIM_HDRTYPE_CARDBUS:
1313		ptr = PCIR_CAP_PTR_2;
1314		break;
1315	default:
1316		/* XXX: panic? */
1317		return (ENXIO);		/* no extended capabilities support */
1318	}
1319	ptr = pci_read_config(child, ptr, 1);
1320
1321	/*
1322	 * Traverse the capabilities list.
1323	 */
1324	while (ptr != 0) {
1325		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1326			if (capreg != NULL)
1327				*capreg = ptr;
1328			return (0);
1329		}
1330		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1331	}
1332
1333	return (ENOENT);
1334}
1335
1336/*
1337 * Find the requested extended capability and return the offset in
1338 * configuration space via the pointer provided.  The function returns
1339 * 0 on success and an error code otherwise.
1340 */
1341int
1342pci_find_extcap_method(device_t dev, device_t child, int capability,
1343    int *capreg)
1344{
1345	struct pci_devinfo *dinfo = device_get_ivars(child);
1346	pcicfgregs *cfg = &dinfo->cfg;
1347	uint32_t ecap;
1348	uint16_t ptr;
1349
1350	/* Only supported for PCI-express devices. */
1351	if (cfg->pcie.pcie_location == 0)
1352		return (ENXIO);
1353
1354	ptr = PCIR_EXTCAP;
1355	ecap = pci_read_config(child, ptr, 4);
1356	if (ecap == 0xffffffff || ecap == 0)
1357		return (ENOENT);
1358	for (;;) {
1359		if (PCI_EXTCAP_ID(ecap) == capability) {
1360			if (capreg != NULL)
1361				*capreg = ptr;
1362			return (0);
1363		}
1364		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1365		if (ptr == 0)
1366			break;
1367		ecap = pci_read_config(child, ptr, 4);
1368	}
1369
1370	return (ENOENT);
1371}
1372
1373/*
1374 * Support for MSI-X message interrupts.
1375 */
1376void
1377pci_enable_msix_method(device_t dev, device_t child, u_int index,
1378    uint64_t address, uint32_t data)
1379{
1380	struct pci_devinfo *dinfo = device_get_ivars(child);
1381	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382	uint32_t offset;
1383
1384	KASSERT(msix->msix_table_len > index, ("bogus index"));
1385	offset = msix->msix_table_offset + index * 16;
1386	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1387	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1388	bus_write_4(msix->msix_table_res, offset + 8, data);
1389
1390	/* Enable MSI -> HT mapping. */
1391	pci_ht_map_msi(child, address);
1392}
1393
1394void
1395pci_mask_msix(device_t dev, u_int index)
1396{
1397	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399	uint32_t offset, val;
1400
1401	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1402	offset = msix->msix_table_offset + index * 16 + 12;
1403	val = bus_read_4(msix->msix_table_res, offset);
1404	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1405		val |= PCIM_MSIX_VCTRL_MASK;
1406		bus_write_4(msix->msix_table_res, offset, val);
1407	}
1408}
1409
1410void
1411pci_unmask_msix(device_t dev, u_int index)
1412{
1413	struct pci_devinfo *dinfo = device_get_ivars(dev);
1414	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1415	uint32_t offset, val;
1416
1417	KASSERT(msix->msix_table_len > index, ("bogus index"));
1418	offset = msix->msix_table_offset + index * 16 + 12;
1419	val = bus_read_4(msix->msix_table_res, offset);
1420	if (val & PCIM_MSIX_VCTRL_MASK) {
1421		val &= ~PCIM_MSIX_VCTRL_MASK;
1422		bus_write_4(msix->msix_table_res, offset, val);
1423	}
1424}
1425
1426int
1427pci_pending_msix(device_t dev, u_int index)
1428{
1429	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431	uint32_t offset, bit;
1432
1433	KASSERT(msix->msix_table_len > index, ("bogus index"));
1434	offset = msix->msix_pba_offset + (index / 32) * 4;
1435	bit = 1 << index % 32;
1436	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1437}
1438
1439/*
1440 * Restore MSI-X registers and table during resume.  If MSI-X is
1441 * enabled then walk the virtual table to restore the actual MSI-X
1442 * table.
1443 */
1444static void
1445pci_resume_msix(device_t dev)
1446{
1447	struct pci_devinfo *dinfo = device_get_ivars(dev);
1448	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1449	struct msix_table_entry *mte;
1450	struct msix_vector *mv;
1451	int i;
1452
1453	if (msix->msix_alloc > 0) {
1454		/* First, mask all vectors. */
1455		for (i = 0; i < msix->msix_msgnum; i++)
1456			pci_mask_msix(dev, i);
1457
1458		/* Second, program any messages with at least one handler. */
1459		for (i = 0; i < msix->msix_table_len; i++) {
1460			mte = &msix->msix_table[i];
1461			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1462				continue;
1463			mv = &msix->msix_vectors[mte->mte_vector - 1];
1464			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1465			pci_unmask_msix(dev, i);
1466		}
1467	}
1468	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1469	    msix->msix_ctrl, 2);
1470}
1471
1472/*
1473 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1474 * returned in *count.  After this function returns, each message will be
1475 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1476 */
1477int
1478pci_alloc_msix_method(device_t dev, device_t child, int *count)
1479{
1480	struct pci_devinfo *dinfo = device_get_ivars(child);
1481	pcicfgregs *cfg = &dinfo->cfg;
1482	struct resource_list_entry *rle;
1483	int actual, error, i, irq, max;
1484
1485	/* Don't let count == 0 get us into trouble. */
1486	if (*count == 0)
1487		return (EINVAL);
1488
1489	/* If rid 0 is allocated, then fail. */
1490	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1491	if (rle != NULL && rle->res != NULL)
1492		return (ENXIO);
1493
1494	/* Already have allocated messages? */
1495	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1496		return (ENXIO);
1497
1498	/* If MSI-X is blacklisted for this system, fail. */
1499	if (pci_msix_blacklisted())
1500		return (ENXIO);
1501
1502	/* MSI-X capability present? */
1503	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1504		return (ENODEV);
1505
1506	/* Make sure the appropriate BARs are mapped. */
1507	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1508	    cfg->msix.msix_table_bar);
1509	if (rle == NULL || rle->res == NULL ||
1510	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1511		return (ENXIO);
1512	cfg->msix.msix_table_res = rle->res;
1513	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1514		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1515		    cfg->msix.msix_pba_bar);
1516		if (rle == NULL || rle->res == NULL ||
1517		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1518			return (ENXIO);
1519	}
1520	cfg->msix.msix_pba_res = rle->res;
1521
1522	if (bootverbose)
1523		device_printf(child,
1524		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1525		    *count, cfg->msix.msix_msgnum);
1526	max = min(*count, cfg->msix.msix_msgnum);
1527	for (i = 0; i < max; i++) {
1528		/* Allocate a message. */
1529		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1530		if (error) {
1531			if (i == 0)
1532				return (error);
1533			break;
1534		}
1535		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1536		    irq, 1);
1537	}
1538	actual = i;
1539
1540	if (bootverbose) {
1541		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1542		if (actual == 1)
1543			device_printf(child, "using IRQ %lu for MSI-X\n",
1544			    rle->start);
1545		else {
1546			int run;
1547
1548			/*
1549			 * Be fancy and try to print contiguous runs of
1550			 * IRQ values as ranges.  'irq' is the previous IRQ.
1551			 * 'run' is true if we are in a range.
1552			 */
1553			device_printf(child, "using IRQs %lu", rle->start);
1554			irq = rle->start;
1555			run = 0;
1556			for (i = 1; i < actual; i++) {
1557				rle = resource_list_find(&dinfo->resources,
1558				    SYS_RES_IRQ, i + 1);
1559
1560				/* Still in a run? */
1561				if (rle->start == irq + 1) {
1562					run = 1;
1563					irq++;
1564					continue;
1565				}
1566
1567				/* Finish previous range. */
1568				if (run) {
1569					printf("-%d", irq);
1570					run = 0;
1571				}
1572
1573				/* Start new range. */
1574				printf(",%lu", rle->start);
1575				irq = rle->start;
1576			}
1577
1578			/* Unfinished range? */
1579			if (run)
1580				printf("-%d", irq);
1581			printf(" for MSI-X\n");
1582		}
1583	}
1584
1585	/* Mask all vectors. */
1586	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1587		pci_mask_msix(child, i);
1588
1589	/* Allocate and initialize vector data and virtual table. */
1590	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1591	    M_DEVBUF, M_WAITOK | M_ZERO);
1592	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1593	    M_DEVBUF, M_WAITOK | M_ZERO);
1594	for (i = 0; i < actual; i++) {
1595		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1597		cfg->msix.msix_table[i].mte_vector = i + 1;
1598	}
1599
1600	/* Update control register to enable MSI-X. */
1601	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1602	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1603	    cfg->msix.msix_ctrl, 2);
1604
1605	/* Update counts of alloc'd messages. */
1606	cfg->msix.msix_alloc = actual;
1607	cfg->msix.msix_table_len = actual;
1608	*count = actual;
1609	return (0);
1610}
1611
1612/*
1613 * By default, pci_alloc_msix() will assign the allocated IRQ
1614 * resources consecutively to the first N messages in the MSI-X table.
1615 * However, device drivers may want to use different layouts if they
1616 * either receive fewer messages than they asked for, or they wish to
1617 * populate the MSI-X table sparsely.  This method allows the driver
1618 * to specify what layout it wants.  It must be called after a
1619 * successful pci_alloc_msix() but before any of the associated
1620 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1621 *
1622 * The 'vectors' array contains 'count' message vectors.  The array
1623 * maps directly to the MSI-X table in that index 0 in the array
1624 * specifies the vector for the first message in the MSI-X table, etc.
1625 * The vector value in each array index can either be 0 to indicate
1626 * that no vector should be assigned to a message slot, or it can be a
1627 * number from 1 to N (where N is the count returned from a
1628 * succcessful call to pci_alloc_msix()) to indicate which message
1629 * vector (IRQ) to be used for the corresponding message.
1630 *
1631 * On successful return, each message with a non-zero vector will have
1632 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1633 * 1.  Additionally, if any of the IRQs allocated via the previous
1634 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1635 * will be freed back to the system automatically.
1636 *
1637 * For example, suppose a driver has a MSI-X table with 6 messages and
1638 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1639 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1640 * C.  After the call to pci_alloc_msix(), the device will be setup to
1641 * have an MSI-X table of ABC--- (where - means no vector assigned).
1642 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1643 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1644 * be freed back to the system.  This device will also have valid
1645 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1646 *
1647 * In any case, the SYS_RES_IRQ rid X will always map to the message
1648 * at MSI-X table index X - 1 and will only be valid if a vector is
1649 * assigned to that table entry.
1650 */
1651int
1652pci_remap_msix_method(device_t dev, device_t child, int count,
1653    const u_int *vectors)
1654{
1655	struct pci_devinfo *dinfo = device_get_ivars(child);
1656	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1657	struct resource_list_entry *rle;
1658	int i, irq, j, *used;
1659
1660	/*
1661	 * Have to have at least one message in the table but the
1662	 * table can't be bigger than the actual MSI-X table in the
1663	 * device.
1664	 */
1665	if (count == 0 || count > msix->msix_msgnum)
1666		return (EINVAL);
1667
1668	/* Sanity check the vectors. */
1669	for (i = 0; i < count; i++)
1670		if (vectors[i] > msix->msix_alloc)
1671			return (EINVAL);
1672
1673	/*
1674	 * Make sure there aren't any holes in the vectors to be used.
1675	 * It's a big pain to support it, and it doesn't really make
1676	 * sense anyway.  Also, at least one vector must be used.
1677	 */
1678	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1679	    M_ZERO);
1680	for (i = 0; i < count; i++)
1681		if (vectors[i] != 0)
1682			used[vectors[i] - 1] = 1;
1683	for (i = 0; i < msix->msix_alloc - 1; i++)
1684		if (used[i] == 0 && used[i + 1] == 1) {
1685			free(used, M_DEVBUF);
1686			return (EINVAL);
1687		}
1688	if (used[0] != 1) {
1689		free(used, M_DEVBUF);
1690		return (EINVAL);
1691	}
1692
1693	/* Make sure none of the resources are allocated. */
1694	for (i = 0; i < msix->msix_table_len; i++) {
1695		if (msix->msix_table[i].mte_vector == 0)
1696			continue;
1697		if (msix->msix_table[i].mte_handlers > 0)
1698			return (EBUSY);
1699		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1700		KASSERT(rle != NULL, ("missing resource"));
1701		if (rle->res != NULL)
1702			return (EBUSY);
1703	}
1704
1705	/* Free the existing resource list entries. */
1706	for (i = 0; i < msix->msix_table_len; i++) {
1707		if (msix->msix_table[i].mte_vector == 0)
1708			continue;
1709		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1710	}
1711
1712	/*
1713	 * Build the new virtual table keeping track of which vectors are
1714	 * used.
1715	 */
1716	free(msix->msix_table, M_DEVBUF);
1717	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1718	    M_DEVBUF, M_WAITOK | M_ZERO);
1719	for (i = 0; i < count; i++)
1720		msix->msix_table[i].mte_vector = vectors[i];
1721	msix->msix_table_len = count;
1722
1723	/* Free any unused IRQs and resize the vectors array if necessary. */
1724	j = msix->msix_alloc - 1;
1725	if (used[j] == 0) {
1726		struct msix_vector *vec;
1727
1728		while (used[j] == 0) {
1729			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1730			    msix->msix_vectors[j].mv_irq);
1731			j--;
1732		}
1733		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1734		    M_WAITOK);
1735		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1736		    (j + 1));
1737		free(msix->msix_vectors, M_DEVBUF);
1738		msix->msix_vectors = vec;
1739		msix->msix_alloc = j + 1;
1740	}
1741	free(used, M_DEVBUF);
1742
1743	/* Map the IRQs onto the rids. */
1744	for (i = 0; i < count; i++) {
1745		if (vectors[i] == 0)
1746			continue;
1747		irq = msix->msix_vectors[vectors[i]].mv_irq;
1748		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1749		    irq, 1);
1750	}
1751
1752	if (bootverbose) {
1753		device_printf(child, "Remapped MSI-X IRQs as: ");
1754		for (i = 0; i < count; i++) {
1755			if (i != 0)
1756				printf(", ");
1757			if (vectors[i] == 0)
1758				printf("---");
1759			else
1760				printf("%d",
1761				    msix->msix_vectors[vectors[i]].mv_irq);
1762		}
1763		printf("\n");
1764	}
1765
1766	return (0);
1767}
1768
1769static int
1770pci_release_msix(device_t dev, device_t child)
1771{
1772	struct pci_devinfo *dinfo = device_get_ivars(child);
1773	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1774	struct resource_list_entry *rle;
1775	int i;
1776
1777	/* Do we have any messages to release? */
1778	if (msix->msix_alloc == 0)
1779		return (ENODEV);
1780
1781	/* Make sure none of the resources are allocated. */
1782	for (i = 0; i < msix->msix_table_len; i++) {
1783		if (msix->msix_table[i].mte_vector == 0)
1784			continue;
1785		if (msix->msix_table[i].mte_handlers > 0)
1786			return (EBUSY);
1787		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1788		KASSERT(rle != NULL, ("missing resource"));
1789		if (rle->res != NULL)
1790			return (EBUSY);
1791	}
1792
1793	/* Update control register to disable MSI-X. */
1794	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1795	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1796	    msix->msix_ctrl, 2);
1797
1798	/* Free the resource list entries. */
1799	for (i = 0; i < msix->msix_table_len; i++) {
1800		if (msix->msix_table[i].mte_vector == 0)
1801			continue;
1802		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1803	}
1804	free(msix->msix_table, M_DEVBUF);
1805	msix->msix_table_len = 0;
1806
1807	/* Release the IRQs. */
1808	for (i = 0; i < msix->msix_alloc; i++)
1809		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1810		    msix->msix_vectors[i].mv_irq);
1811	free(msix->msix_vectors, M_DEVBUF);
1812	msix->msix_alloc = 0;
1813	return (0);
1814}
1815
1816/*
1817 * Return the max supported MSI-X messages this device supports.
1818 * Basically, assuming the MD code can alloc messages, this function
1819 * should return the maximum value that pci_alloc_msix() can return.
1820 * Thus, it is subject to the tunables, etc.
1821 */
1822int
1823pci_msix_count_method(device_t dev, device_t child)
1824{
1825	struct pci_devinfo *dinfo = device_get_ivars(child);
1826	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1827
1828	if (pci_do_msix && msix->msix_location != 0)
1829		return (msix->msix_msgnum);
1830	return (0);
1831}
1832
1833int
1834pci_msix_pba_bar_method(device_t dev, device_t child)
1835{
1836	struct pci_devinfo *dinfo = device_get_ivars(child);
1837	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1838
1839	if (pci_do_msix && msix->msix_location != 0)
1840		return (msix->msix_pba_bar);
1841	return (-1);
1842}
1843
1844int
1845pci_msix_table_bar_method(device_t dev, device_t child)
1846{
1847	struct pci_devinfo *dinfo = device_get_ivars(child);
1848	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1849
1850	if (pci_do_msix && msix->msix_location != 0)
1851		return (msix->msix_table_bar);
1852	return (-1);
1853}
1854
1855/*
1856 * HyperTransport MSI mapping control
1857 */
1858void
1859pci_ht_map_msi(device_t dev, uint64_t addr)
1860{
1861	struct pci_devinfo *dinfo = device_get_ivars(dev);
1862	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1863
1864	if (!ht->ht_msimap)
1865		return;
1866
1867	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1868	    ht->ht_msiaddr >> 20 == addr >> 20) {
1869		/* Enable MSI -> HT mapping. */
1870		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1871		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1872		    ht->ht_msictrl, 2);
1873	}
1874
1875	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1876		/* Disable MSI -> HT mapping. */
1877		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1878		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1879		    ht->ht_msictrl, 2);
1880	}
1881}
1882
1883int
1884pci_get_max_read_req(device_t dev)
1885{
1886	struct pci_devinfo *dinfo = device_get_ivars(dev);
1887	int cap;
1888	uint16_t val;
1889
1890	cap = dinfo->cfg.pcie.pcie_location;
1891	if (cap == 0)
1892		return (0);
1893	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1894	val &= PCIEM_CTL_MAX_READ_REQUEST;
1895	val >>= 12;
1896	return (1 << (val + 7));
1897}
1898
1899int
1900pci_set_max_read_req(device_t dev, int size)
1901{
1902	struct pci_devinfo *dinfo = device_get_ivars(dev);
1903	int cap;
1904	uint16_t val;
1905
1906	cap = dinfo->cfg.pcie.pcie_location;
1907	if (cap == 0)
1908		return (0);
1909	if (size < 128)
1910		size = 128;
1911	if (size > 4096)
1912		size = 4096;
1913	size = (1 << (fls(size) - 1));
1914	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1915	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1916	val |= (fls(size) - 8) << 12;
1917	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1918	return (size);
1919}
1920
1921uint32_t
1922pcie_read_config(device_t dev, int reg, int width)
1923{
1924	struct pci_devinfo *dinfo = device_get_ivars(dev);
1925	int cap;
1926
1927	cap = dinfo->cfg.pcie.pcie_location;
1928	if (cap == 0) {
1929		if (width == 2)
1930			return (0xffff);
1931		return (0xffffffff);
1932	}
1933
1934	return (pci_read_config(dev, cap + reg, width));
1935}
1936
1937void
1938pcie_write_config(device_t dev, int reg, uint32_t value, int width)
1939{
1940	struct pci_devinfo *dinfo = device_get_ivars(dev);
1941	int cap;
1942
1943	cap = dinfo->cfg.pcie.pcie_location;
1944	if (cap == 0)
1945		return;
1946	pci_write_config(dev, cap + reg, value, width);
1947}
1948
1949/*
1950 * Adjusts a PCI-e capability register by clearing the bits in mask
1951 * and setting the bits in (value & mask).  Bits not set in mask are
1952 * not adjusted.
1953 *
1954 * Returns the old value on success or all ones on failure.
1955 */
1956uint32_t
1957pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
1958    int width)
1959{
1960	struct pci_devinfo *dinfo = device_get_ivars(dev);
1961	uint32_t old, new;
1962	int cap;
1963
1964	cap = dinfo->cfg.pcie.pcie_location;
1965	if (cap == 0) {
1966		if (width == 2)
1967			return (0xffff);
1968		return (0xffffffff);
1969	}
1970
1971	old = pci_read_config(dev, cap + reg, width);
1972	new = old & ~mask;
1973	new |= (value & mask);
1974	pci_write_config(dev, cap + reg, new, width);
1975	return (old);
1976}
1977
1978/*
1979 * Support for MSI message signalled interrupts.
1980 */
1981void
1982pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1983    uint16_t data)
1984{
1985	struct pci_devinfo *dinfo = device_get_ivars(child);
1986	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1987
1988	/* Write data and address values. */
1989	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
1990	    address & 0xffffffff, 4);
1991	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1992		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1993		    address >> 32, 4);
1994		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
1995		    data, 2);
1996	} else
1997		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
1998		    2);
1999
2000	/* Enable MSI in the control register. */
2001	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2002	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2003	    msi->msi_ctrl, 2);
2004
2005	/* Enable MSI -> HT mapping. */
2006	pci_ht_map_msi(child, address);
2007}
2008
2009void
2010pci_disable_msi_method(device_t dev, device_t child)
2011{
2012	struct pci_devinfo *dinfo = device_get_ivars(child);
2013	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2014
2015	/* Disable MSI -> HT mapping. */
2016	pci_ht_map_msi(child, 0);
2017
2018	/* Disable MSI in the control register. */
2019	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2020	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2021	    msi->msi_ctrl, 2);
2022}
2023
2024/*
2025 * Restore MSI registers during resume.  If MSI is enabled then
2026 * restore the data and address registers in addition to the control
2027 * register.
2028 */
2029static void
2030pci_resume_msi(device_t dev)
2031{
2032	struct pci_devinfo *dinfo = device_get_ivars(dev);
2033	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2034	uint64_t address;
2035	uint16_t data;
2036
2037	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2038		address = msi->msi_addr;
2039		data = msi->msi_data;
2040		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2041		    address & 0xffffffff, 4);
2042		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2043			pci_write_config(dev, msi->msi_location +
2044			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2045			pci_write_config(dev, msi->msi_location +
2046			    PCIR_MSI_DATA_64BIT, data, 2);
2047		} else
2048			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2049			    data, 2);
2050	}
2051	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2052	    2);
2053}
2054
2055static int
2056pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2057{
2058	struct pci_devinfo *dinfo = device_get_ivars(dev);
2059	pcicfgregs *cfg = &dinfo->cfg;
2060	struct resource_list_entry *rle;
2061	struct msix_table_entry *mte;
2062	struct msix_vector *mv;
2063	uint64_t addr;
2064	uint32_t data;
2065	int error, i, j;
2066
2067	/*
2068	 * Handle MSI first.  We try to find this IRQ among our list
2069	 * of MSI IRQs.  If we find it, we request updated address and
2070	 * data registers and apply the results.
2071	 */
2072	if (cfg->msi.msi_alloc > 0) {
2073
2074		/* If we don't have any active handlers, nothing to do. */
2075		if (cfg->msi.msi_handlers == 0)
2076			return (0);
2077		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2078			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2079			    i + 1);
2080			if (rle->start == irq) {
2081				error = PCIB_MAP_MSI(device_get_parent(bus),
2082				    dev, irq, &addr, &data);
2083				if (error)
2084					return (error);
2085				pci_disable_msi(dev);
2086				dinfo->cfg.msi.msi_addr = addr;
2087				dinfo->cfg.msi.msi_data = data;
2088				pci_enable_msi(dev, addr, data);
2089				return (0);
2090			}
2091		}
2092		return (ENOENT);
2093	}
2094
2095	/*
2096	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2097	 * we request the updated mapping info.  If that works, we go
2098	 * through all the slots that use this IRQ and update them.
2099	 */
2100	if (cfg->msix.msix_alloc > 0) {
2101		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2102			mv = &cfg->msix.msix_vectors[i];
2103			if (mv->mv_irq == irq) {
2104				error = PCIB_MAP_MSI(device_get_parent(bus),
2105				    dev, irq, &addr, &data);
2106				if (error)
2107					return (error);
2108				mv->mv_address = addr;
2109				mv->mv_data = data;
2110				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2111					mte = &cfg->msix.msix_table[j];
2112					if (mte->mte_vector != i + 1)
2113						continue;
2114					if (mte->mte_handlers == 0)
2115						continue;
2116					pci_mask_msix(dev, j);
2117					pci_enable_msix(dev, j, addr, data);
2118					pci_unmask_msix(dev, j);
2119				}
2120			}
2121		}
2122		return (ENOENT);
2123	}
2124
2125	return (ENOENT);
2126}
2127
2128/*
2129 * Returns true if the specified device is blacklisted because MSI
2130 * doesn't work.
2131 */
2132int
2133pci_msi_device_blacklisted(device_t dev)
2134{
2135
2136	if (!pci_honor_msi_blacklist)
2137		return (0);
2138
2139	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2140}
2141
2142/*
2143 * Determine if MSI is blacklisted globally on this system.  Currently,
2144 * we just check for blacklisted chipsets as represented by the
2145 * host-PCI bridge at device 0:0:0.  In the future, it may become
2146 * necessary to check other system attributes, such as the kenv values
2147 * that give the motherboard manufacturer and model number.
2148 */
2149static int
2150pci_msi_blacklisted(void)
2151{
2152	device_t dev;
2153
2154	if (!pci_honor_msi_blacklist)
2155		return (0);
2156
2157	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2158	if (!(pcie_chipset || pcix_chipset)) {
2159		if (vm_guest != VM_GUEST_NO) {
2160			/*
2161			 * Whitelist older chipsets in virtual
2162			 * machines known to support MSI.
2163			 */
2164			dev = pci_find_bsf(0, 0, 0);
2165			if (dev != NULL)
2166				return (!pci_has_quirk(pci_get_devid(dev),
2167					PCI_QUIRK_ENABLE_MSI_VM));
2168		}
2169		return (1);
2170	}
2171
2172	dev = pci_find_bsf(0, 0, 0);
2173	if (dev != NULL)
2174		return (pci_msi_device_blacklisted(dev));
2175	return (0);
2176}
2177
2178/*
2179 * Returns true if the specified device is blacklisted because MSI-X
2180 * doesn't work.  Note that this assumes that if MSI doesn't work,
2181 * MSI-X doesn't either.
2182 */
2183int
2184pci_msix_device_blacklisted(device_t dev)
2185{
2186
2187	if (!pci_honor_msi_blacklist)
2188		return (0);
2189
2190	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2191		return (1);
2192
2193	return (pci_msi_device_blacklisted(dev));
2194}
2195
2196/*
2197 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2198 * is blacklisted, assume that MSI-X is as well.  Check for additional
2199 * chipsets where MSI works but MSI-X does not.
2200 */
2201static int
2202pci_msix_blacklisted(void)
2203{
2204	device_t dev;
2205
2206	if (!pci_honor_msi_blacklist)
2207		return (0);
2208
2209	dev = pci_find_bsf(0, 0, 0);
2210	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2211	    PCI_QUIRK_DISABLE_MSIX))
2212		return (1);
2213
2214	return (pci_msi_blacklisted());
2215}
2216
2217/*
2218 * Attempt to allocate *count MSI messages.  The actual number allocated is
2219 * returned in *count.  After this function returns, each message will be
2220 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2221 */
2222int
2223pci_alloc_msi_method(device_t dev, device_t child, int *count)
2224{
2225	struct pci_devinfo *dinfo = device_get_ivars(child);
2226	pcicfgregs *cfg = &dinfo->cfg;
2227	struct resource_list_entry *rle;
2228	int actual, error, i, irqs[32];
2229	uint16_t ctrl;
2230
2231	/* Don't let count == 0 get us into trouble. */
2232	if (*count == 0)
2233		return (EINVAL);
2234
2235	/* If rid 0 is allocated, then fail. */
2236	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2237	if (rle != NULL && rle->res != NULL)
2238		return (ENXIO);
2239
2240	/* Already have allocated messages? */
2241	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2242		return (ENXIO);
2243
2244	/* If MSI is blacklisted for this system, fail. */
2245	if (pci_msi_blacklisted())
2246		return (ENXIO);
2247
2248	/* MSI capability present? */
2249	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2250		return (ENODEV);
2251
2252	if (bootverbose)
2253		device_printf(child,
2254		    "attempting to allocate %d MSI vectors (%d supported)\n",
2255		    *count, cfg->msi.msi_msgnum);
2256
2257	/* Don't ask for more than the device supports. */
2258	actual = min(*count, cfg->msi.msi_msgnum);
2259
2260	/* Don't ask for more than 32 messages. */
2261	actual = min(actual, 32);
2262
2263	/* MSI requires power of 2 number of messages. */
2264	if (!powerof2(actual))
2265		return (EINVAL);
2266
2267	for (;;) {
2268		/* Try to allocate N messages. */
2269		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2270		    actual, irqs);
2271		if (error == 0)
2272			break;
2273		if (actual == 1)
2274			return (error);
2275
2276		/* Try N / 2. */
2277		actual >>= 1;
2278	}
2279
2280	/*
2281	 * We now have N actual messages mapped onto SYS_RES_IRQ
2282	 * resources in the irqs[] array, so add new resources
2283	 * starting at rid 1.
2284	 */
2285	for (i = 0; i < actual; i++)
2286		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2287		    irqs[i], irqs[i], 1);
2288
2289	if (bootverbose) {
2290		if (actual == 1)
2291			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2292		else {
2293			int run;
2294
2295			/*
2296			 * Be fancy and try to print contiguous runs
2297			 * of IRQ values as ranges.  'run' is true if
2298			 * we are in a range.
2299			 */
2300			device_printf(child, "using IRQs %d", irqs[0]);
2301			run = 0;
2302			for (i = 1; i < actual; i++) {
2303
2304				/* Still in a run? */
2305				if (irqs[i] == irqs[i - 1] + 1) {
2306					run = 1;
2307					continue;
2308				}
2309
2310				/* Finish previous range. */
2311				if (run) {
2312					printf("-%d", irqs[i - 1]);
2313					run = 0;
2314				}
2315
2316				/* Start new range. */
2317				printf(",%d", irqs[i]);
2318			}
2319
2320			/* Unfinished range? */
2321			if (run)
2322				printf("-%d", irqs[actual - 1]);
2323			printf(" for MSI\n");
2324		}
2325	}
2326
2327	/* Update control register with actual count. */
2328	ctrl = cfg->msi.msi_ctrl;
2329	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2330	ctrl |= (ffs(actual) - 1) << 4;
2331	cfg->msi.msi_ctrl = ctrl;
2332	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2333
2334	/* Update counts of alloc'd messages. */
2335	cfg->msi.msi_alloc = actual;
2336	cfg->msi.msi_handlers = 0;
2337	*count = actual;
2338	return (0);
2339}
2340
2341/* Release the MSI messages associated with this device. */
2342int
2343pci_release_msi_method(device_t dev, device_t child)
2344{
2345	struct pci_devinfo *dinfo = device_get_ivars(child);
2346	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2347	struct resource_list_entry *rle;
2348	int error, i, irqs[32];
2349
2350	/* Try MSI-X first. */
2351	error = pci_release_msix(dev, child);
2352	if (error != ENODEV)
2353		return (error);
2354
2355	/* Do we have any messages to release? */
2356	if (msi->msi_alloc == 0)
2357		return (ENODEV);
2358	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2359
2360	/* Make sure none of the resources are allocated. */
2361	if (msi->msi_handlers > 0)
2362		return (EBUSY);
2363	for (i = 0; i < msi->msi_alloc; i++) {
2364		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2365		KASSERT(rle != NULL, ("missing MSI resource"));
2366		if (rle->res != NULL)
2367			return (EBUSY);
2368		irqs[i] = rle->start;
2369	}
2370
2371	/* Update control register with 0 count. */
2372	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2373	    ("%s: MSI still enabled", __func__));
2374	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2375	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2376	    msi->msi_ctrl, 2);
2377
2378	/* Release the messages. */
2379	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2380	for (i = 0; i < msi->msi_alloc; i++)
2381		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2382
2383	/* Update alloc count. */
2384	msi->msi_alloc = 0;
2385	msi->msi_addr = 0;
2386	msi->msi_data = 0;
2387	return (0);
2388}
2389
2390/*
2391 * Return the max supported MSI messages this device supports.
2392 * Basically, assuming the MD code can alloc messages, this function
2393 * should return the maximum value that pci_alloc_msi() can return.
2394 * Thus, it is subject to the tunables, etc.
2395 */
2396int
2397pci_msi_count_method(device_t dev, device_t child)
2398{
2399	struct pci_devinfo *dinfo = device_get_ivars(child);
2400	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2401
2402	if (pci_do_msi && msi->msi_location != 0)
2403		return (msi->msi_msgnum);
2404	return (0);
2405}
2406
2407/* free pcicfgregs structure and all depending data structures */
2408
2409int
2410pci_freecfg(struct pci_devinfo *dinfo)
2411{
2412	struct devlist *devlist_head;
2413	struct pci_map *pm, *next;
2414	int i;
2415
2416	devlist_head = &pci_devq;
2417
2418	if (dinfo->cfg.vpd.vpd_reg) {
2419		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2420		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2421			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2422		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2423		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2424			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2425		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2426	}
2427	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2428		free(pm, M_DEVBUF);
2429	}
2430	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2431	free(dinfo, M_DEVBUF);
2432
2433	/* increment the generation count */
2434	pci_generation++;
2435
2436	/* we're losing one device */
2437	pci_numdevs--;
2438	return (0);
2439}
2440
2441/*
2442 * PCI power manangement
2443 */
2444int
2445pci_set_powerstate_method(device_t dev, device_t child, int state)
2446{
2447	struct pci_devinfo *dinfo = device_get_ivars(child);
2448	pcicfgregs *cfg = &dinfo->cfg;
2449	uint16_t status;
2450	int oldstate, highest, delay;
2451
2452	if (cfg->pp.pp_cap == 0)
2453		return (EOPNOTSUPP);
2454
2455	/*
2456	 * Optimize a no state change request away.  While it would be OK to
2457	 * write to the hardware in theory, some devices have shown odd
2458	 * behavior when going from D3 -> D3.
2459	 */
2460	oldstate = pci_get_powerstate(child);
2461	if (oldstate == state)
2462		return (0);
2463
2464	/*
2465	 * The PCI power management specification states that after a state
2466	 * transition between PCI power states, system software must
2467	 * guarantee a minimal delay before the function accesses the device.
2468	 * Compute the worst case delay that we need to guarantee before we
2469	 * access the device.  Many devices will be responsive much more
2470	 * quickly than this delay, but there are some that don't respond
2471	 * instantly to state changes.  Transitions to/from D3 state require
2472	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2473	 * is done below with DELAY rather than a sleeper function because
2474	 * this function can be called from contexts where we cannot sleep.
2475	 */
2476	highest = (oldstate > state) ? oldstate : state;
2477	if (highest == PCI_POWERSTATE_D3)
2478	    delay = 10000;
2479	else if (highest == PCI_POWERSTATE_D2)
2480	    delay = 200;
2481	else
2482	    delay = 0;
2483	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2484	    & ~PCIM_PSTAT_DMASK;
2485	switch (state) {
2486	case PCI_POWERSTATE_D0:
2487		status |= PCIM_PSTAT_D0;
2488		break;
2489	case PCI_POWERSTATE_D1:
2490		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2491			return (EOPNOTSUPP);
2492		status |= PCIM_PSTAT_D1;
2493		break;
2494	case PCI_POWERSTATE_D2:
2495		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2496			return (EOPNOTSUPP);
2497		status |= PCIM_PSTAT_D2;
2498		break;
2499	case PCI_POWERSTATE_D3:
2500		status |= PCIM_PSTAT_D3;
2501		break;
2502	default:
2503		return (EINVAL);
2504	}
2505
2506	if (bootverbose)
2507		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2508		    state);
2509
2510	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2511	if (delay)
2512		DELAY(delay);
2513	return (0);
2514}
2515
2516int
2517pci_get_powerstate_method(device_t dev, device_t child)
2518{
2519	struct pci_devinfo *dinfo = device_get_ivars(child);
2520	pcicfgregs *cfg = &dinfo->cfg;
2521	uint16_t status;
2522	int result;
2523
2524	if (cfg->pp.pp_cap != 0) {
2525		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2526		switch (status & PCIM_PSTAT_DMASK) {
2527		case PCIM_PSTAT_D0:
2528			result = PCI_POWERSTATE_D0;
2529			break;
2530		case PCIM_PSTAT_D1:
2531			result = PCI_POWERSTATE_D1;
2532			break;
2533		case PCIM_PSTAT_D2:
2534			result = PCI_POWERSTATE_D2;
2535			break;
2536		case PCIM_PSTAT_D3:
2537			result = PCI_POWERSTATE_D3;
2538			break;
2539		default:
2540			result = PCI_POWERSTATE_UNKNOWN;
2541			break;
2542		}
2543	} else {
2544		/* No support, device is always at D0 */
2545		result = PCI_POWERSTATE_D0;
2546	}
2547	return (result);
2548}
2549
2550/*
2551 * Some convenience functions for PCI device drivers.
2552 */
2553
2554static __inline void
2555pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2556{
2557	uint16_t	command;
2558
2559	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2560	command |= bit;
2561	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2562}
2563
2564static __inline void
2565pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2566{
2567	uint16_t	command;
2568
2569	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2570	command &= ~bit;
2571	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2572}
2573
2574int
2575pci_enable_busmaster_method(device_t dev, device_t child)
2576{
2577	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2578	return (0);
2579}
2580
2581int
2582pci_disable_busmaster_method(device_t dev, device_t child)
2583{
2584	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2585	return (0);
2586}
2587
2588int
2589pci_enable_io_method(device_t dev, device_t child, int space)
2590{
2591	uint16_t bit;
2592
2593	switch(space) {
2594	case SYS_RES_IOPORT:
2595		bit = PCIM_CMD_PORTEN;
2596		break;
2597	case SYS_RES_MEMORY:
2598		bit = PCIM_CMD_MEMEN;
2599		break;
2600	default:
2601		return (EINVAL);
2602	}
2603	pci_set_command_bit(dev, child, bit);
2604	return (0);
2605}
2606
2607int
2608pci_disable_io_method(device_t dev, device_t child, int space)
2609{
2610	uint16_t bit;
2611
2612	switch(space) {
2613	case SYS_RES_IOPORT:
2614		bit = PCIM_CMD_PORTEN;
2615		break;
2616	case SYS_RES_MEMORY:
2617		bit = PCIM_CMD_MEMEN;
2618		break;
2619	default:
2620		return (EINVAL);
2621	}
2622	pci_clear_command_bit(dev, child, bit);
2623	return (0);
2624}
2625
2626/*
2627 * New style pci driver.  Parent device is either a pci-host-bridge or a
2628 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2629 */
2630
2631void
2632pci_print_verbose(struct pci_devinfo *dinfo)
2633{
2634
2635	if (bootverbose) {
2636		pcicfgregs *cfg = &dinfo->cfg;
2637
2638		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2639		    cfg->vendor, cfg->device, cfg->revid);
2640		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2641		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2642		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2643		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2644		    cfg->mfdev);
2645		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2646		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2647		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2648		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2649		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2650		if (cfg->intpin > 0)
2651			printf("\tintpin=%c, irq=%d\n",
2652			    cfg->intpin +'a' -1, cfg->intline);
2653		if (cfg->pp.pp_cap) {
2654			uint16_t status;
2655
2656			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2657			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2658			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2659			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2660			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2661			    status & PCIM_PSTAT_DMASK);
2662		}
2663		if (cfg->msi.msi_location) {
2664			int ctrl;
2665
2666			ctrl = cfg->msi.msi_ctrl;
2667			printf("\tMSI supports %d message%s%s%s\n",
2668			    cfg->msi.msi_msgnum,
2669			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2670			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2671			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2672		}
2673		if (cfg->msix.msix_location) {
2674			printf("\tMSI-X supports %d message%s ",
2675			    cfg->msix.msix_msgnum,
2676			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2677			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2678				printf("in map 0x%x\n",
2679				    cfg->msix.msix_table_bar);
2680			else
2681				printf("in maps 0x%x and 0x%x\n",
2682				    cfg->msix.msix_table_bar,
2683				    cfg->msix.msix_pba_bar);
2684		}
2685	}
2686}
2687
2688static int
2689pci_porten(device_t dev)
2690{
2691	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2692}
2693
2694static int
2695pci_memen(device_t dev)
2696{
2697	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2698}
2699
2700static void
2701pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2702{
2703	struct pci_devinfo *dinfo;
2704	pci_addr_t map, testval;
2705	int ln2range;
2706	uint16_t cmd;
2707
2708	/*
2709	 * The device ROM BAR is special.  It is always a 32-bit
2710	 * memory BAR.  Bit 0 is special and should not be set when
2711	 * sizing the BAR.
2712	 */
2713	dinfo = device_get_ivars(dev);
2714	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2715		map = pci_read_config(dev, reg, 4);
2716		pci_write_config(dev, reg, 0xfffffffe, 4);
2717		testval = pci_read_config(dev, reg, 4);
2718		pci_write_config(dev, reg, map, 4);
2719		*mapp = map;
2720		*testvalp = testval;
2721		return;
2722	}
2723
2724	map = pci_read_config(dev, reg, 4);
2725	ln2range = pci_maprange(map);
2726	if (ln2range == 64)
2727		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2728
2729	/*
2730	 * Disable decoding via the command register before
2731	 * determining the BAR's length since we will be placing it in
2732	 * a weird state.
2733	 */
2734	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2735	pci_write_config(dev, PCIR_COMMAND,
2736	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2737
2738	/*
2739	 * Determine the BAR's length by writing all 1's.  The bottom
2740	 * log_2(size) bits of the BAR will stick as 0 when we read
2741	 * the value back.
2742	 */
2743	pci_write_config(dev, reg, 0xffffffff, 4);
2744	testval = pci_read_config(dev, reg, 4);
2745	if (ln2range == 64) {
2746		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2747		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2748	}
2749
2750	/*
2751	 * Restore the original value of the BAR.  We may have reprogrammed
2752	 * the BAR of the low-level console device and when booting verbose,
2753	 * we need the console device addressable.
2754	 */
2755	pci_write_config(dev, reg, map, 4);
2756	if (ln2range == 64)
2757		pci_write_config(dev, reg + 4, map >> 32, 4);
2758	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2759
2760	*mapp = map;
2761	*testvalp = testval;
2762}
2763
2764static void
2765pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2766{
2767	struct pci_devinfo *dinfo;
2768	int ln2range;
2769
2770	/* The device ROM BAR is always a 32-bit memory BAR. */
2771	dinfo = device_get_ivars(dev);
2772	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2773		ln2range = 32;
2774	else
2775		ln2range = pci_maprange(pm->pm_value);
2776	pci_write_config(dev, pm->pm_reg, base, 4);
2777	if (ln2range == 64)
2778		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2779	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2780	if (ln2range == 64)
2781		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2782		    pm->pm_reg + 4, 4) << 32;
2783}
2784
2785struct pci_map *
2786pci_find_bar(device_t dev, int reg)
2787{
2788	struct pci_devinfo *dinfo;
2789	struct pci_map *pm;
2790
2791	dinfo = device_get_ivars(dev);
2792	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2793		if (pm->pm_reg == reg)
2794			return (pm);
2795	}
2796	return (NULL);
2797}
2798
2799int
2800pci_bar_enabled(device_t dev, struct pci_map *pm)
2801{
2802	struct pci_devinfo *dinfo;
2803	uint16_t cmd;
2804
2805	dinfo = device_get_ivars(dev);
2806	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2807	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2808		return (0);
2809	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2810	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2811		return ((cmd & PCIM_CMD_MEMEN) != 0);
2812	else
2813		return ((cmd & PCIM_CMD_PORTEN) != 0);
2814}
2815
2816static struct pci_map *
2817pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2818{
2819	struct pci_devinfo *dinfo;
2820	struct pci_map *pm, *prev;
2821
2822	dinfo = device_get_ivars(dev);
2823	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2824	pm->pm_reg = reg;
2825	pm->pm_value = value;
2826	pm->pm_size = size;
2827	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2828		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2829		    reg));
2830		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2831		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2832			break;
2833	}
2834	if (prev != NULL)
2835		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2836	else
2837		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2838	return (pm);
2839}
2840
2841static void
2842pci_restore_bars(device_t dev)
2843{
2844	struct pci_devinfo *dinfo;
2845	struct pci_map *pm;
2846	int ln2range;
2847
2848	dinfo = device_get_ivars(dev);
2849	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2850		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2851			ln2range = 32;
2852		else
2853			ln2range = pci_maprange(pm->pm_value);
2854		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2855		if (ln2range == 64)
2856			pci_write_config(dev, pm->pm_reg + 4,
2857			    pm->pm_value >> 32, 4);
2858	}
2859}
2860
2861/*
2862 * Add a resource based on a pci map register. Return 1 if the map
2863 * register is a 32bit map register or 2 if it is a 64bit register.
2864 */
2865static int
2866pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2867    int force, int prefetch)
2868{
2869	struct pci_map *pm;
2870	pci_addr_t base, map, testval;
2871	pci_addr_t start, end, count;
2872	int barlen, basezero, flags, maprange, mapsize, type;
2873	uint16_t cmd;
2874	struct resource *res;
2875
2876	/*
2877	 * The BAR may already exist if the device is a CardBus card
2878	 * whose CIS is stored in this BAR.
2879	 */
2880	pm = pci_find_bar(dev, reg);
2881	if (pm != NULL) {
2882		maprange = pci_maprange(pm->pm_value);
2883		barlen = maprange == 64 ? 2 : 1;
2884		return (barlen);
2885	}
2886
2887	pci_read_bar(dev, reg, &map, &testval);
2888	if (PCI_BAR_MEM(map)) {
2889		type = SYS_RES_MEMORY;
2890		if (map & PCIM_BAR_MEM_PREFETCH)
2891			prefetch = 1;
2892	} else
2893		type = SYS_RES_IOPORT;
2894	mapsize = pci_mapsize(testval);
2895	base = pci_mapbase(map);
2896#ifdef __PCI_BAR_ZERO_VALID
2897	basezero = 0;
2898#else
2899	basezero = base == 0;
2900#endif
2901	maprange = pci_maprange(map);
2902	barlen = maprange == 64 ? 2 : 1;
2903
2904	/*
2905	 * For I/O registers, if bottom bit is set, and the next bit up
2906	 * isn't clear, we know we have a BAR that doesn't conform to the
2907	 * spec, so ignore it.  Also, sanity check the size of the data
2908	 * areas to the type of memory involved.  Memory must be at least
2909	 * 16 bytes in size, while I/O ranges must be at least 4.
2910	 */
2911	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2912		return (barlen);
2913	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2914	    (type == SYS_RES_IOPORT && mapsize < 2))
2915		return (barlen);
2916
2917	/* Save a record of this BAR. */
2918	pm = pci_add_bar(dev, reg, map, mapsize);
2919	if (bootverbose) {
2920		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2921		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2922		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2923			printf(", port disabled\n");
2924		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2925			printf(", memory disabled\n");
2926		else
2927			printf(", enabled\n");
2928	}
2929
2930	/*
2931	 * If base is 0, then we have problems if this architecture does
2932	 * not allow that.  It is best to ignore such entries for the
2933	 * moment.  These will be allocated later if the driver specifically
2934	 * requests them.  However, some removable busses look better when
2935	 * all resources are allocated, so allow '0' to be overriden.
2936	 *
2937	 * Similarly treat maps whose values is the same as the test value
2938	 * read back.  These maps have had all f's written to them by the
2939	 * BIOS in an attempt to disable the resources.
2940	 */
2941	if (!force && (basezero || map == testval))
2942		return (barlen);
2943	if ((u_long)base != base) {
2944		device_printf(bus,
2945		    "pci%d:%d:%d:%d bar %#x too many address bits",
2946		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2947		    pci_get_function(dev), reg);
2948		return (barlen);
2949	}
2950
2951	/*
2952	 * This code theoretically does the right thing, but has
2953	 * undesirable side effects in some cases where peripherals
2954	 * respond oddly to having these bits enabled.  Let the user
2955	 * be able to turn them off (since pci_enable_io_modes is 1 by
2956	 * default).
2957	 */
2958	if (pci_enable_io_modes) {
2959		/* Turn on resources that have been left off by a lazy BIOS */
2960		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2961			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2962			cmd |= PCIM_CMD_PORTEN;
2963			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2964		}
2965		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2966			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2967			cmd |= PCIM_CMD_MEMEN;
2968			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2969		}
2970	} else {
2971		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2972			return (barlen);
2973		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2974			return (barlen);
2975	}
2976
2977	count = (pci_addr_t)1 << mapsize;
2978	flags = RF_ALIGNMENT_LOG2(mapsize);
2979	if (prefetch)
2980		flags |= RF_PREFETCHABLE;
2981	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2982		start = 0;	/* Let the parent decide. */
2983		end = ~0ul;
2984	} else {
2985		start = base;
2986		end = base + count - 1;
2987	}
2988	resource_list_add(rl, type, reg, start, end, count);
2989
2990	/*
2991	 * Try to allocate the resource for this BAR from our parent
2992	 * so that this resource range is already reserved.  The
2993	 * driver for this device will later inherit this resource in
2994	 * pci_alloc_resource().
2995	 */
2996	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2997	    flags);
2998	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2999		/*
3000		 * If the allocation fails, try to allocate a resource for
3001		 * this BAR using any available range.  The firmware felt
3002		 * it was important enough to assign a resource, so don't
3003		 * disable decoding if we can help it.
3004		 */
3005		resource_list_delete(rl, type, reg);
3006		resource_list_add(rl, type, reg, 0, ~0ul, count);
3007		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
3008		    count, flags);
3009	}
3010	if (res == NULL) {
3011		/*
3012		 * If the allocation fails, delete the resource list entry
3013		 * and disable decoding for this device.
3014		 *
3015		 * If the driver requests this resource in the future,
3016		 * pci_reserve_map() will try to allocate a fresh
3017		 * resource range.
3018		 */
3019		resource_list_delete(rl, type, reg);
3020		pci_disable_io(dev, type);
3021		if (bootverbose)
3022			device_printf(bus,
3023			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3024			    pci_get_domain(dev), pci_get_bus(dev),
3025			    pci_get_slot(dev), pci_get_function(dev), reg);
3026	} else {
3027		start = rman_get_start(res);
3028		pci_write_bar(dev, pm, start);
3029	}
3030	return (barlen);
3031}
3032
3033/*
3034 * For ATA devices we need to decide early what addressing mode to use.
3035 * Legacy demands that the primary and secondary ATA ports sits on the
3036 * same addresses that old ISA hardware did. This dictates that we use
3037 * those addresses and ignore the BAR's if we cannot set PCI native
3038 * addressing mode.
3039 */
3040static void
3041pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3042    uint32_t prefetchmask)
3043{
3044	int rid, type, progif;
3045#if 0
3046	/* if this device supports PCI native addressing use it */
3047	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3048	if ((progif & 0x8a) == 0x8a) {
3049		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3050		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3051			printf("Trying ATA native PCI addressing mode\n");
3052			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3053		}
3054	}
3055#endif
3056	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3057	type = SYS_RES_IOPORT;
3058	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3059		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3060		    prefetchmask & (1 << 0));
3061		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3062		    prefetchmask & (1 << 1));
3063	} else {
3064		rid = PCIR_BAR(0);
3065		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3066		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3067		    0x1f7, 8, 0);
3068		rid = PCIR_BAR(1);
3069		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3070		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3071		    0x3f6, 1, 0);
3072	}
3073	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3074		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3075		    prefetchmask & (1 << 2));
3076		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3077		    prefetchmask & (1 << 3));
3078	} else {
3079		rid = PCIR_BAR(2);
3080		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3081		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3082		    0x177, 8, 0);
3083		rid = PCIR_BAR(3);
3084		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3085		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3086		    0x376, 1, 0);
3087	}
3088	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3089	    prefetchmask & (1 << 4));
3090	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3091	    prefetchmask & (1 << 5));
3092}
3093
3094static void
3095pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3096{
3097	struct pci_devinfo *dinfo = device_get_ivars(dev);
3098	pcicfgregs *cfg = &dinfo->cfg;
3099	char tunable_name[64];
3100	int irq;
3101
3102	/* Has to have an intpin to have an interrupt. */
3103	if (cfg->intpin == 0)
3104		return;
3105
3106	/* Let the user override the IRQ with a tunable. */
3107	irq = PCI_INVALID_IRQ;
3108	snprintf(tunable_name, sizeof(tunable_name),
3109	    "hw.pci%d.%d.%d.INT%c.irq",
3110	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3111	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3112		irq = PCI_INVALID_IRQ;
3113
3114	/*
3115	 * If we didn't get an IRQ via the tunable, then we either use the
3116	 * IRQ value in the intline register or we ask the bus to route an
3117	 * interrupt for us.  If force_route is true, then we only use the
3118	 * value in the intline register if the bus was unable to assign an
3119	 * IRQ.
3120	 */
3121	if (!PCI_INTERRUPT_VALID(irq)) {
3122		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3123			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3124		if (!PCI_INTERRUPT_VALID(irq))
3125			irq = cfg->intline;
3126	}
3127
3128	/* If after all that we don't have an IRQ, just bail. */
3129	if (!PCI_INTERRUPT_VALID(irq))
3130		return;
3131
3132	/* Update the config register if it changed. */
3133	if (irq != cfg->intline) {
3134		cfg->intline = irq;
3135		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3136	}
3137
3138	/* Add this IRQ as rid 0 interrupt resource. */
3139	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3140}
3141
3142/* Perform early OHCI takeover from SMM. */
3143static void
3144ohci_early_takeover(device_t self)
3145{
3146	struct resource *res;
3147	uint32_t ctl;
3148	int rid;
3149	int i;
3150
3151	rid = PCIR_BAR(0);
3152	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3153	if (res == NULL)
3154		return;
3155
3156	ctl = bus_read_4(res, OHCI_CONTROL);
3157	if (ctl & OHCI_IR) {
3158		if (bootverbose)
3159			printf("ohci early: "
3160			    "SMM active, request owner change\n");
3161		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3162		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3163			DELAY(1000);
3164			ctl = bus_read_4(res, OHCI_CONTROL);
3165		}
3166		if (ctl & OHCI_IR) {
3167			if (bootverbose)
3168				printf("ohci early: "
3169				    "SMM does not respond, resetting\n");
3170			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3171		}
3172		/* Disable interrupts */
3173		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3174	}
3175
3176	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3177}
3178
3179/* Perform early UHCI takeover from SMM. */
3180static void
3181uhci_early_takeover(device_t self)
3182{
3183	struct resource *res;
3184	int rid;
3185
3186	/*
3187	 * Set the PIRQD enable bit and switch off all the others. We don't
3188	 * want legacy support to interfere with us XXX Does this also mean
3189	 * that the BIOS won't touch the keyboard anymore if it is connected
3190	 * to the ports of the root hub?
3191	 */
3192	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3193
3194	/* Disable interrupts */
3195	rid = PCI_UHCI_BASE_REG;
3196	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3197	if (res != NULL) {
3198		bus_write_2(res, UHCI_INTR, 0);
3199		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3200	}
3201}
3202
3203/* Perform early EHCI takeover from SMM. */
3204static void
3205ehci_early_takeover(device_t self)
3206{
3207	struct resource *res;
3208	uint32_t cparams;
3209	uint32_t eec;
3210	uint8_t eecp;
3211	uint8_t bios_sem;
3212	uint8_t offs;
3213	int rid;
3214	int i;
3215
3216	rid = PCIR_BAR(0);
3217	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3218	if (res == NULL)
3219		return;
3220
3221	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3222
3223	/* Synchronise with the BIOS if it owns the controller. */
3224	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3225	    eecp = EHCI_EECP_NEXT(eec)) {
3226		eec = pci_read_config(self, eecp, 4);
3227		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3228			continue;
3229		}
3230		bios_sem = pci_read_config(self, eecp +
3231		    EHCI_LEGSUP_BIOS_SEM, 1);
3232		if (bios_sem == 0) {
3233			continue;
3234		}
3235		if (bootverbose)
3236			printf("ehci early: "
3237			    "SMM active, request owner change\n");
3238
3239		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3240
3241		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3242			DELAY(1000);
3243			bios_sem = pci_read_config(self, eecp +
3244			    EHCI_LEGSUP_BIOS_SEM, 1);
3245		}
3246
3247		if (bios_sem != 0) {
3248			if (bootverbose)
3249				printf("ehci early: "
3250				    "SMM does not respond\n");
3251		}
3252		/* Disable interrupts */
3253		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3254		bus_write_4(res, offs + EHCI_USBINTR, 0);
3255	}
3256	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3257}
3258
3259/* Perform early XHCI takeover from SMM. */
3260static void
3261xhci_early_takeover(device_t self)
3262{
3263	struct resource *res;
3264	uint32_t cparams;
3265	uint32_t eec;
3266	uint8_t eecp;
3267	uint8_t bios_sem;
3268	uint8_t offs;
3269	int rid;
3270	int i;
3271
3272	rid = PCIR_BAR(0);
3273	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3274	if (res == NULL)
3275		return;
3276
3277	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3278
3279	eec = -1;
3280
3281	/* Synchronise with the BIOS if it owns the controller. */
3282	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3283	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3284		eec = bus_read_4(res, eecp);
3285
3286		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3287			continue;
3288
3289		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3290		if (bios_sem == 0)
3291			continue;
3292
3293		if (bootverbose)
3294			printf("xhci early: "
3295			    "SMM active, request owner change\n");
3296
3297		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3298
3299		/* wait a maximum of 5 second */
3300
3301		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3302			DELAY(1000);
3303			bios_sem = bus_read_1(res, eecp +
3304			    XHCI_XECP_BIOS_SEM);
3305		}
3306
3307		if (bios_sem != 0) {
3308			if (bootverbose)
3309				printf("xhci early: "
3310				    "SMM does not respond\n");
3311		}
3312
3313		/* Disable interrupts */
3314		offs = bus_read_1(res, XHCI_CAPLENGTH);
3315		bus_write_4(res, offs + XHCI_USBCMD, 0);
3316		bus_read_4(res, offs + XHCI_USBSTS);
3317	}
3318	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3319}
3320
3321#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3322static void
3323pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3324    struct resource_list *rl)
3325{
3326	struct resource *res;
3327	char *cp;
3328	u_long start, end, count;
3329	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3330
3331	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3332	case PCIM_HDRTYPE_BRIDGE:
3333		sec_reg = PCIR_SECBUS_1;
3334		sub_reg = PCIR_SUBBUS_1;
3335		break;
3336	case PCIM_HDRTYPE_CARDBUS:
3337		sec_reg = PCIR_SECBUS_2;
3338		sub_reg = PCIR_SUBBUS_2;
3339		break;
3340	default:
3341		return;
3342	}
3343
3344	/*
3345	 * If the existing bus range is valid, attempt to reserve it
3346	 * from our parent.  If this fails for any reason, clear the
3347	 * secbus and subbus registers.
3348	 *
3349	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3350	 * This would at least preserve the existing sec_bus if it is
3351	 * valid.
3352	 */
3353	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3354	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3355
3356	/* Quirk handling. */
3357	switch (pci_get_devid(dev)) {
3358	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3359		sup_bus = pci_read_config(dev, 0x41, 1);
3360		if (sup_bus != 0xff) {
3361			sec_bus = sup_bus + 1;
3362			sub_bus = sup_bus + 1;
3363			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3364			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3365		}
3366		break;
3367
3368	case 0x00dd10de:
3369		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3370		if ((cp = getenv("smbios.planar.maker")) == NULL)
3371			break;
3372		if (strncmp(cp, "Compal", 6) != 0) {
3373			freeenv(cp);
3374			break;
3375		}
3376		freeenv(cp);
3377		if ((cp = getenv("smbios.planar.product")) == NULL)
3378			break;
3379		if (strncmp(cp, "08A0", 4) != 0) {
3380			freeenv(cp);
3381			break;
3382		}
3383		freeenv(cp);
3384		if (sub_bus < 0xa) {
3385			sub_bus = 0xa;
3386			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3387		}
3388		break;
3389	}
3390
3391	if (bootverbose)
3392		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3393	if (sec_bus > 0 && sub_bus >= sec_bus) {
3394		start = sec_bus;
3395		end = sub_bus;
3396		count = end - start + 1;
3397
3398		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3399
3400		/*
3401		 * If requested, clear secondary bus registers in
3402		 * bridge devices to force a complete renumbering
3403		 * rather than reserving the existing range.  However,
3404		 * preserve the existing size.
3405		 */
3406		if (pci_clear_buses)
3407			goto clear;
3408
3409		rid = 0;
3410		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3411		    start, end, count, 0);
3412		if (res != NULL)
3413			return;
3414
3415		if (bootverbose)
3416			device_printf(bus,
3417			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3418			    pci_get_domain(dev), pci_get_bus(dev),
3419			    pci_get_slot(dev), pci_get_function(dev));
3420	}
3421
3422clear:
3423	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3424	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3425}
3426
3427static struct resource *
3428pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3429    u_long end, u_long count, u_int flags)
3430{
3431	struct pci_devinfo *dinfo;
3432	pcicfgregs *cfg;
3433	struct resource_list *rl;
3434	struct resource *res;
3435	int sec_reg, sub_reg;
3436
3437	dinfo = device_get_ivars(child);
3438	cfg = &dinfo->cfg;
3439	rl = &dinfo->resources;
3440	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3441	case PCIM_HDRTYPE_BRIDGE:
3442		sec_reg = PCIR_SECBUS_1;
3443		sub_reg = PCIR_SUBBUS_1;
3444		break;
3445	case PCIM_HDRTYPE_CARDBUS:
3446		sec_reg = PCIR_SECBUS_2;
3447		sub_reg = PCIR_SUBBUS_2;
3448		break;
3449	default:
3450		return (NULL);
3451	}
3452
3453	if (*rid != 0)
3454		return (NULL);
3455
3456	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3457		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3458	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3459		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3460		    start, end, count, flags & ~RF_ACTIVE);
3461		if (res == NULL) {
3462			resource_list_delete(rl, PCI_RES_BUS, *rid);
3463			device_printf(child, "allocating %lu bus%s failed\n",
3464			    count, count == 1 ? "" : "es");
3465			return (NULL);
3466		}
3467		if (bootverbose)
3468			device_printf(child,
3469			    "Lazy allocation of %lu bus%s at %lu\n", count,
3470			    count == 1 ? "" : "es", rman_get_start(res));
3471		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3472		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3473	}
3474	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3475	    end, count, flags));
3476}
3477#endif
3478
3479void
3480pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3481{
3482	struct pci_devinfo *dinfo;
3483	pcicfgregs *cfg;
3484	struct resource_list *rl;
3485	const struct pci_quirk *q;
3486	uint32_t devid;
3487	int i;
3488
3489	dinfo = device_get_ivars(dev);
3490	cfg = &dinfo->cfg;
3491	rl = &dinfo->resources;
3492	devid = (cfg->device << 16) | cfg->vendor;
3493
3494	/* ATA devices needs special map treatment */
3495	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3496	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3497	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3498	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3499	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3500		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3501	else
3502		for (i = 0; i < cfg->nummaps;) {
3503			/*
3504			 * Skip quirked resources.
3505			 */
3506			for (q = &pci_quirks[0]; q->devid != 0; q++)
3507				if (q->devid == devid &&
3508				    q->type == PCI_QUIRK_UNMAP_REG &&
3509				    q->arg1 == PCIR_BAR(i))
3510					break;
3511			if (q->devid != 0) {
3512				i++;
3513				continue;
3514			}
3515			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3516			    prefetchmask & (1 << i));
3517		}
3518
3519	/*
3520	 * Add additional, quirked resources.
3521	 */
3522	for (q = &pci_quirks[0]; q->devid != 0; q++)
3523		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3524			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3525
3526	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3527#ifdef __PCI_REROUTE_INTERRUPT
3528		/*
3529		 * Try to re-route interrupts. Sometimes the BIOS or
3530		 * firmware may leave bogus values in these registers.
3531		 * If the re-route fails, then just stick with what we
3532		 * have.
3533		 */
3534		pci_assign_interrupt(bus, dev, 1);
3535#else
3536		pci_assign_interrupt(bus, dev, 0);
3537#endif
3538	}
3539
3540	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3541	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3542		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3543			xhci_early_takeover(dev);
3544		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3545			ehci_early_takeover(dev);
3546		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3547			ohci_early_takeover(dev);
3548		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3549			uhci_early_takeover(dev);
3550	}
3551
3552#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3553	/*
3554	 * Reserve resources for secondary bus ranges behind bridge
3555	 * devices.
3556	 */
3557	pci_reserve_secbus(bus, dev, cfg, rl);
3558#endif
3559}
3560
3561static struct pci_devinfo *
3562pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3563    int slot, int func, size_t dinfo_size)
3564{
3565	struct pci_devinfo *dinfo;
3566
3567	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3568	if (dinfo != NULL)
3569		pci_add_child(dev, dinfo);
3570
3571	return (dinfo);
3572}
3573
3574void
3575pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3576{
3577#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3578	device_t pcib = device_get_parent(dev);
3579	struct pci_devinfo *dinfo;
3580	int maxslots;
3581	int s, f, pcifunchigh;
3582	uint8_t hdrtype;
3583	int first_func;
3584
3585	/*
3586	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3587	 * enable ARI.  We must enable ARI before detecting the rest of the
3588	 * functions on this bus as ARI changes the set of slots and functions
3589	 * that are legal on this bus.
3590	 */
3591	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3592	    dinfo_size);
3593	if (dinfo != NULL && pci_enable_ari)
3594		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3595
3596	/*
3597	 * Start looking for new devices on slot 0 at function 1 because we
3598	 * just identified the device at slot 0, function 0.
3599	 */
3600	first_func = 1;
3601
3602	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3603	    ("dinfo_size too small"));
3604	maxslots = PCIB_MAXSLOTS(pcib);
3605	for (s = 0; s <= maxslots; s++, first_func = 0) {
3606		pcifunchigh = 0;
3607		f = 0;
3608		DELAY(1);
3609		hdrtype = REG(PCIR_HDRTYPE, 1);
3610		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3611			continue;
3612		if (hdrtype & PCIM_MFDEV)
3613			pcifunchigh = PCIB_MAXFUNCS(pcib);
3614		for (f = first_func; f <= pcifunchigh; f++)
3615			pci_identify_function(pcib, dev, domain, busno, s, f,
3616			    dinfo_size);
3617	}
3618#undef REG
3619}
3620
3621void
3622pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3623{
3624	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3625	device_set_ivars(dinfo->cfg.dev, dinfo);
3626	resource_list_init(&dinfo->resources);
3627	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3628	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3629	pci_print_verbose(dinfo);
3630	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3631	pci_child_added(dinfo->cfg.dev);
3632}
3633
3634void
3635pci_child_added_method(device_t dev, device_t child)
3636{
3637
3638}
3639
3640static int
3641pci_probe(device_t dev)
3642{
3643
3644	device_set_desc(dev, "PCI bus");
3645
3646	/* Allow other subclasses to override this driver. */
3647	return (BUS_PROBE_GENERIC);
3648}
3649
3650int
3651pci_attach_common(device_t dev)
3652{
3653	struct pci_softc *sc;
3654	int busno, domain;
3655#ifdef PCI_DMA_BOUNDARY
3656	int error, tag_valid;
3657#endif
3658#ifdef PCI_RES_BUS
3659	int rid;
3660#endif
3661
3662	sc = device_get_softc(dev);
3663	domain = pcib_get_domain(dev);
3664	busno = pcib_get_bus(dev);
3665#ifdef PCI_RES_BUS
3666	rid = 0;
3667	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3668	    1, 0);
3669	if (sc->sc_bus == NULL) {
3670		device_printf(dev, "failed to allocate bus number\n");
3671		return (ENXIO);
3672	}
3673#endif
3674	if (bootverbose)
3675		device_printf(dev, "domain=%d, physical bus=%d\n",
3676		    domain, busno);
3677#ifdef PCI_DMA_BOUNDARY
3678	tag_valid = 0;
3679	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3680	    devclass_find("pci")) {
3681		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3682		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3683		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3684		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3685		if (error)
3686			device_printf(dev, "Failed to create DMA tag: %d\n",
3687			    error);
3688		else
3689			tag_valid = 1;
3690	}
3691	if (!tag_valid)
3692#endif
3693		sc->sc_dma_tag = bus_get_dma_tag(dev);
3694	return (0);
3695}
3696
3697static int
3698pci_attach(device_t dev)
3699{
3700	int busno, domain, error;
3701
3702	error = pci_attach_common(dev);
3703	if (error)
3704		return (error);
3705
3706	/*
3707	 * Since there can be multiple independantly numbered PCI
3708	 * busses on systems with multiple PCI domains, we can't use
3709	 * the unit number to decide which bus we are probing. We ask
3710	 * the parent pcib what our domain and bus numbers are.
3711	 */
3712	domain = pcib_get_domain(dev);
3713	busno = pcib_get_bus(dev);
3714	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3715	return (bus_generic_attach(dev));
3716}
3717
3718#ifdef PCI_RES_BUS
3719static int
3720pci_detach(device_t dev)
3721{
3722	struct pci_softc *sc;
3723	int error;
3724
3725	error = bus_generic_detach(dev);
3726	if (error)
3727		return (error);
3728	sc = device_get_softc(dev);
3729	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3730}
3731#endif
3732
3733static void
3734pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3735    int state)
3736{
3737	device_t child, pcib;
3738	int dstate, i;
3739
3740	/*
3741	 * Set the device to the given state.  If the firmware suggests
3742	 * a different power state, use it instead.  If power management
3743	 * is not present, the firmware is responsible for managing
3744	 * device power.  Skip children who aren't attached since they
3745	 * are handled separately.
3746	 */
3747	pcib = device_get_parent(dev);
3748	for (i = 0; i < numdevs; i++) {
3749		child = devlist[i];
3750		dstate = state;
3751		if (device_is_attached(child) &&
3752		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3753			pci_set_powerstate(child, dstate);
3754	}
3755}
3756
3757int
3758pci_suspend(device_t dev)
3759{
3760	device_t child, *devlist;
3761	struct pci_devinfo *dinfo;
3762	int error, i, numdevs;
3763
3764	/*
3765	 * Save the PCI configuration space for each child and set the
3766	 * device in the appropriate power state for this sleep state.
3767	 */
3768	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3769		return (error);
3770	for (i = 0; i < numdevs; i++) {
3771		child = devlist[i];
3772		dinfo = device_get_ivars(child);
3773		pci_cfg_save(child, dinfo, 0);
3774	}
3775
3776	/* Suspend devices before potentially powering them down. */
3777	error = bus_generic_suspend(dev);
3778	if (error) {
3779		free(devlist, M_TEMP);
3780		return (error);
3781	}
3782	if (pci_do_power_suspend)
3783		pci_set_power_children(dev, devlist, numdevs,
3784		    PCI_POWERSTATE_D3);
3785	free(devlist, M_TEMP);
3786	return (0);
3787}
3788
3789int
3790pci_resume(device_t dev)
3791{
3792	device_t child, *devlist;
3793	struct pci_devinfo *dinfo;
3794	int error, i, numdevs;
3795
3796	/*
3797	 * Set each child to D0 and restore its PCI configuration space.
3798	 */
3799	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3800		return (error);
3801	if (pci_do_power_resume)
3802		pci_set_power_children(dev, devlist, numdevs,
3803		    PCI_POWERSTATE_D0);
3804
3805	/* Now the device is powered up, restore its config space. */
3806	for (i = 0; i < numdevs; i++) {
3807		child = devlist[i];
3808		dinfo = device_get_ivars(child);
3809
3810		pci_cfg_restore(child, dinfo);
3811		if (!device_is_attached(child))
3812			pci_cfg_save(child, dinfo, 1);
3813	}
3814
3815	/*
3816	 * Resume critical devices first, then everything else later.
3817	 */
3818	for (i = 0; i < numdevs; i++) {
3819		child = devlist[i];
3820		switch (pci_get_class(child)) {
3821		case PCIC_DISPLAY:
3822		case PCIC_MEMORY:
3823		case PCIC_BRIDGE:
3824		case PCIC_BASEPERIPH:
3825			DEVICE_RESUME(child);
3826			break;
3827		}
3828	}
3829	for (i = 0; i < numdevs; i++) {
3830		child = devlist[i];
3831		switch (pci_get_class(child)) {
3832		case PCIC_DISPLAY:
3833		case PCIC_MEMORY:
3834		case PCIC_BRIDGE:
3835		case PCIC_BASEPERIPH:
3836			break;
3837		default:
3838			DEVICE_RESUME(child);
3839		}
3840	}
3841	free(devlist, M_TEMP);
3842	return (0);
3843}
3844
3845static void
3846pci_load_vendor_data(void)
3847{
3848	caddr_t data;
3849	void *ptr;
3850	size_t sz;
3851
3852	data = preload_search_by_type("pci_vendor_data");
3853	if (data != NULL) {
3854		ptr = preload_fetch_addr(data);
3855		sz = preload_fetch_size(data);
3856		if (ptr != NULL && sz != 0) {
3857			pci_vendordata = ptr;
3858			pci_vendordata_size = sz;
3859			/* terminate the database */
3860			pci_vendordata[pci_vendordata_size] = '\n';
3861		}
3862	}
3863}
3864
3865void
3866pci_driver_added(device_t dev, driver_t *driver)
3867{
3868	int numdevs;
3869	device_t *devlist;
3870	device_t child;
3871	struct pci_devinfo *dinfo;
3872	int i;
3873
3874	if (bootverbose)
3875		device_printf(dev, "driver added\n");
3876	DEVICE_IDENTIFY(driver, dev);
3877	if (device_get_children(dev, &devlist, &numdevs) != 0)
3878		return;
3879	for (i = 0; i < numdevs; i++) {
3880		child = devlist[i];
3881		if (device_get_state(child) != DS_NOTPRESENT)
3882			continue;
3883		dinfo = device_get_ivars(child);
3884		pci_print_verbose(dinfo);
3885		if (bootverbose)
3886			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3887		pci_cfg_restore(child, dinfo);
3888		if (device_probe_and_attach(child) != 0)
3889			pci_child_detached(dev, child);
3890	}
3891	free(devlist, M_TEMP);
3892}
3893
3894int
3895pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3896    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3897{
3898	struct pci_devinfo *dinfo;
3899	struct msix_table_entry *mte;
3900	struct msix_vector *mv;
3901	uint64_t addr;
3902	uint32_t data;
3903	void *cookie;
3904	int error, rid;
3905
3906	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3907	    arg, &cookie);
3908	if (error)
3909		return (error);
3910
3911	/* If this is not a direct child, just bail out. */
3912	if (device_get_parent(child) != dev) {
3913		*cookiep = cookie;
3914		return(0);
3915	}
3916
3917	rid = rman_get_rid(irq);
3918	if (rid == 0) {
3919		/* Make sure that INTx is enabled */
3920		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3921	} else {
3922		/*
3923		 * Check to see if the interrupt is MSI or MSI-X.
3924		 * Ask our parent to map the MSI and give
3925		 * us the address and data register values.
3926		 * If we fail for some reason, teardown the
3927		 * interrupt handler.
3928		 */
3929		dinfo = device_get_ivars(child);
3930		if (dinfo->cfg.msi.msi_alloc > 0) {
3931			if (dinfo->cfg.msi.msi_addr == 0) {
3932				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3933			    ("MSI has handlers, but vectors not mapped"));
3934				error = PCIB_MAP_MSI(device_get_parent(dev),
3935				    child, rman_get_start(irq), &addr, &data);
3936				if (error)
3937					goto bad;
3938				dinfo->cfg.msi.msi_addr = addr;
3939				dinfo->cfg.msi.msi_data = data;
3940			}
3941			if (dinfo->cfg.msi.msi_handlers == 0)
3942				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3943				    dinfo->cfg.msi.msi_data);
3944			dinfo->cfg.msi.msi_handlers++;
3945		} else {
3946			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3947			    ("No MSI or MSI-X interrupts allocated"));
3948			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3949			    ("MSI-X index too high"));
3950			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3951			KASSERT(mte->mte_vector != 0, ("no message vector"));
3952			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3953			KASSERT(mv->mv_irq == rman_get_start(irq),
3954			    ("IRQ mismatch"));
3955			if (mv->mv_address == 0) {
3956				KASSERT(mte->mte_handlers == 0,
3957		    ("MSI-X table entry has handlers, but vector not mapped"));
3958				error = PCIB_MAP_MSI(device_get_parent(dev),
3959				    child, rman_get_start(irq), &addr, &data);
3960				if (error)
3961					goto bad;
3962				mv->mv_address = addr;
3963				mv->mv_data = data;
3964			}
3965			if (mte->mte_handlers == 0) {
3966				pci_enable_msix(child, rid - 1, mv->mv_address,
3967				    mv->mv_data);
3968				pci_unmask_msix(child, rid - 1);
3969			}
3970			mte->mte_handlers++;
3971		}
3972
3973		/*
3974		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3975		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3976		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3977		 */
3978		if (!pci_has_quirk(pci_get_devid(child),
3979		    PCI_QUIRK_MSI_INTX_BUG))
3980			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3981		else
3982			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3983	bad:
3984		if (error) {
3985			(void)bus_generic_teardown_intr(dev, child, irq,
3986			    cookie);
3987			return (error);
3988		}
3989	}
3990	*cookiep = cookie;
3991	return (0);
3992}
3993
3994int
3995pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3996    void *cookie)
3997{
3998	struct msix_table_entry *mte;
3999	struct resource_list_entry *rle;
4000	struct pci_devinfo *dinfo;
4001	int error, rid;
4002
4003	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4004		return (EINVAL);
4005
4006	/* If this isn't a direct child, just bail out */
4007	if (device_get_parent(child) != dev)
4008		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4009
4010	rid = rman_get_rid(irq);
4011	if (rid == 0) {
4012		/* Mask INTx */
4013		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4014	} else {
4015		/*
4016		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4017		 * decrement the appropriate handlers count and mask the
4018		 * MSI-X message, or disable MSI messages if the count
4019		 * drops to 0.
4020		 */
4021		dinfo = device_get_ivars(child);
4022		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4023		if (rle->res != irq)
4024			return (EINVAL);
4025		if (dinfo->cfg.msi.msi_alloc > 0) {
4026			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4027			    ("MSI-X index too high"));
4028			if (dinfo->cfg.msi.msi_handlers == 0)
4029				return (EINVAL);
4030			dinfo->cfg.msi.msi_handlers--;
4031			if (dinfo->cfg.msi.msi_handlers == 0)
4032				pci_disable_msi(child);
4033		} else {
4034			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4035			    ("No MSI or MSI-X interrupts allocated"));
4036			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4037			    ("MSI-X index too high"));
4038			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4039			if (mte->mte_handlers == 0)
4040				return (EINVAL);
4041			mte->mte_handlers--;
4042			if (mte->mte_handlers == 0)
4043				pci_mask_msix(child, rid - 1);
4044		}
4045	}
4046	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4047	if (rid > 0)
4048		KASSERT(error == 0,
4049		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4050	return (error);
4051}
4052
4053int
4054pci_print_child(device_t dev, device_t child)
4055{
4056	struct pci_devinfo *dinfo;
4057	struct resource_list *rl;
4058	int retval = 0;
4059
4060	dinfo = device_get_ivars(child);
4061	rl = &dinfo->resources;
4062
4063	retval += bus_print_child_header(dev, child);
4064
4065	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4066	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4067	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4068	if (device_get_flags(dev))
4069		retval += printf(" flags %#x", device_get_flags(dev));
4070
4071	retval += printf(" at device %d.%d", pci_get_slot(child),
4072	    pci_get_function(child));
4073
4074	retval += bus_print_child_domain(dev, child);
4075	retval += bus_print_child_footer(dev, child);
4076
4077	return (retval);
4078}
4079
4080static const struct
4081{
4082	int		class;
4083	int		subclass;
4084	int		report; /* 0 = bootverbose, 1 = always */
4085	const char	*desc;
4086} pci_nomatch_tab[] = {
4087	{PCIC_OLD,		-1,			1, "old"},
4088	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4089	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4090	{PCIC_STORAGE,		-1,			1, "mass storage"},
4091	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4092	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4093	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4094	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4095	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4096	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4097	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4098	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4099	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4100	{PCIC_NETWORK,		-1,			1, "network"},
4101	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4102	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4103	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4104	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4105	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4106	{PCIC_DISPLAY,		-1,			1, "display"},
4107	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4108	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4109	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4110	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4111	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4112	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4113	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4114	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4115	{PCIC_MEMORY,		-1,			1, "memory"},
4116	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4117	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4118	{PCIC_BRIDGE,		-1,			1, "bridge"},
4119	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4120	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4121	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4122	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4123	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4124	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4125	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4126	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4127	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4128	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4129	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4130	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4131	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4132	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4133	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4134	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4135	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4136	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4137	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4138	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4139	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4140	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4141	{PCIC_INPUTDEV,		-1,			1, "input device"},
4142	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4143	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4144	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4145	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4146	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4147	{PCIC_DOCKING,		-1,			1, "docking station"},
4148	{PCIC_PROCESSOR,	-1,			1, "processor"},
4149	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4150	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4151	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4152	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4153	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4154	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4155	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4156	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4157	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4158	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4159	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4160	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4161	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4162	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4163	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4164	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4165	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4166	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4167	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4168	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4169	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4170	{PCIC_DASP,		-1,			0, "dasp"},
4171	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4172	{0, 0, 0,		NULL}
4173};
4174
4175void
4176pci_probe_nomatch(device_t dev, device_t child)
4177{
4178	int i, report;
4179	const char *cp, *scp;
4180	char *device;
4181
4182	/*
4183	 * Look for a listing for this device in a loaded device database.
4184	 */
4185	report = 1;
4186	if ((device = pci_describe_device(child)) != NULL) {
4187		device_printf(dev, "<%s>", device);
4188		free(device, M_DEVBUF);
4189	} else {
4190		/*
4191		 * Scan the class/subclass descriptions for a general
4192		 * description.
4193		 */
4194		cp = "unknown";
4195		scp = NULL;
4196		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4197			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4198				if (pci_nomatch_tab[i].subclass == -1) {
4199					cp = pci_nomatch_tab[i].desc;
4200					report = pci_nomatch_tab[i].report;
4201				} else if (pci_nomatch_tab[i].subclass ==
4202				    pci_get_subclass(child)) {
4203					scp = pci_nomatch_tab[i].desc;
4204					report = pci_nomatch_tab[i].report;
4205				}
4206			}
4207		}
4208		if (report || bootverbose) {
4209			device_printf(dev, "<%s%s%s>",
4210			    cp ? cp : "",
4211			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4212			    scp ? scp : "");
4213		}
4214	}
4215	if (report || bootverbose) {
4216		printf(" at device %d.%d (no driver attached)\n",
4217		    pci_get_slot(child), pci_get_function(child));
4218	}
4219	pci_cfg_save(child, device_get_ivars(child), 1);
4220}
4221
4222void
4223pci_child_detached(device_t dev, device_t child)
4224{
4225	struct pci_devinfo *dinfo;
4226	struct resource_list *rl;
4227
4228	dinfo = device_get_ivars(child);
4229	rl = &dinfo->resources;
4230
4231	/*
4232	 * Have to deallocate IRQs before releasing any MSI messages and
4233	 * have to release MSI messages before deallocating any memory
4234	 * BARs.
4235	 */
4236	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4237		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4238	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4239		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4240		(void)pci_release_msi(child);
4241	}
4242	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4243		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4244	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4245		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4246#ifdef PCI_RES_BUS
4247	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4248		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4249#endif
4250
4251	pci_cfg_save(child, dinfo, 1);
4252}
4253
4254/*
4255 * Parse the PCI device database, if loaded, and return a pointer to a
4256 * description of the device.
4257 *
4258 * The database is flat text formatted as follows:
4259 *
4260 * Any line not in a valid format is ignored.
4261 * Lines are terminated with newline '\n' characters.
4262 *
4263 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4264 * the vendor name.
4265 *
4266 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4267 * - devices cannot be listed without a corresponding VENDOR line.
4268 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4269 * another TAB, then the device name.
4270 */
4271
4272/*
4273 * Assuming (ptr) points to the beginning of a line in the database,
4274 * return the vendor or device and description of the next entry.
4275 * The value of (vendor) or (device) inappropriate for the entry type
4276 * is set to -1.  Returns nonzero at the end of the database.
4277 *
4278 * Note that this is slightly unrobust in the face of corrupt data;
4279 * we attempt to safeguard against this by spamming the end of the
4280 * database with a newline when we initialise.
4281 */
4282static int
4283pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4284{
4285	char	*cp = *ptr;
4286	int	left;
4287
4288	*device = -1;
4289	*vendor = -1;
4290	**desc = '\0';
4291	for (;;) {
4292		left = pci_vendordata_size - (cp - pci_vendordata);
4293		if (left <= 0) {
4294			*ptr = cp;
4295			return(1);
4296		}
4297
4298		/* vendor entry? */
4299		if (*cp != '\t' &&
4300		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4301			break;
4302		/* device entry? */
4303		if (*cp == '\t' &&
4304		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4305			break;
4306
4307		/* skip to next line */
4308		while (*cp != '\n' && left > 0) {
4309			cp++;
4310			left--;
4311		}
4312		if (*cp == '\n') {
4313			cp++;
4314			left--;
4315		}
4316	}
4317	/* skip to next line */
4318	while (*cp != '\n' && left > 0) {
4319		cp++;
4320		left--;
4321	}
4322	if (*cp == '\n' && left > 0)
4323		cp++;
4324	*ptr = cp;
4325	return(0);
4326}
4327
4328static char *
4329pci_describe_device(device_t dev)
4330{
4331	int	vendor, device;
4332	char	*desc, *vp, *dp, *line;
4333
4334	desc = vp = dp = NULL;
4335
4336	/*
4337	 * If we have no vendor data, we can't do anything.
4338	 */
4339	if (pci_vendordata == NULL)
4340		goto out;
4341
4342	/*
4343	 * Scan the vendor data looking for this device
4344	 */
4345	line = pci_vendordata;
4346	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4347		goto out;
4348	for (;;) {
4349		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4350			goto out;
4351		if (vendor == pci_get_vendor(dev))
4352			break;
4353	}
4354	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4355		goto out;
4356	for (;;) {
4357		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4358			*dp = 0;
4359			break;
4360		}
4361		if (vendor != -1) {
4362			*dp = 0;
4363			break;
4364		}
4365		if (device == pci_get_device(dev))
4366			break;
4367	}
4368	if (dp[0] == '\0')
4369		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4370	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4371	    NULL)
4372		sprintf(desc, "%s, %s", vp, dp);
4373out:
4374	if (vp != NULL)
4375		free(vp, M_DEVBUF);
4376	if (dp != NULL)
4377		free(dp, M_DEVBUF);
4378	return(desc);
4379}
4380
4381int
4382pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4383{
4384	struct pci_devinfo *dinfo;
4385	pcicfgregs *cfg;
4386
4387	dinfo = device_get_ivars(child);
4388	cfg = &dinfo->cfg;
4389
4390	switch (which) {
4391	case PCI_IVAR_ETHADDR:
4392		/*
4393		 * The generic accessor doesn't deal with failure, so
4394		 * we set the return value, then return an error.
4395		 */
4396		*((uint8_t **) result) = NULL;
4397		return (EINVAL);
4398	case PCI_IVAR_SUBVENDOR:
4399		*result = cfg->subvendor;
4400		break;
4401	case PCI_IVAR_SUBDEVICE:
4402		*result = cfg->subdevice;
4403		break;
4404	case PCI_IVAR_VENDOR:
4405		*result = cfg->vendor;
4406		break;
4407	case PCI_IVAR_DEVICE:
4408		*result = cfg->device;
4409		break;
4410	case PCI_IVAR_DEVID:
4411		*result = (cfg->device << 16) | cfg->vendor;
4412		break;
4413	case PCI_IVAR_CLASS:
4414		*result = cfg->baseclass;
4415		break;
4416	case PCI_IVAR_SUBCLASS:
4417		*result = cfg->subclass;
4418		break;
4419	case PCI_IVAR_PROGIF:
4420		*result = cfg->progif;
4421		break;
4422	case PCI_IVAR_REVID:
4423		*result = cfg->revid;
4424		break;
4425	case PCI_IVAR_INTPIN:
4426		*result = cfg->intpin;
4427		break;
4428	case PCI_IVAR_IRQ:
4429		*result = cfg->intline;
4430		break;
4431	case PCI_IVAR_DOMAIN:
4432		*result = cfg->domain;
4433		break;
4434	case PCI_IVAR_BUS:
4435		*result = cfg->bus;
4436		break;
4437	case PCI_IVAR_SLOT:
4438		*result = cfg->slot;
4439		break;
4440	case PCI_IVAR_FUNCTION:
4441		*result = cfg->func;
4442		break;
4443	case PCI_IVAR_CMDREG:
4444		*result = cfg->cmdreg;
4445		break;
4446	case PCI_IVAR_CACHELNSZ:
4447		*result = cfg->cachelnsz;
4448		break;
4449	case PCI_IVAR_MINGNT:
4450		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4451			*result = -1;
4452			return (EINVAL);
4453		}
4454		*result = cfg->mingnt;
4455		break;
4456	case PCI_IVAR_MAXLAT:
4457		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4458			*result = -1;
4459			return (EINVAL);
4460		}
4461		*result = cfg->maxlat;
4462		break;
4463	case PCI_IVAR_LATTIMER:
4464		*result = cfg->lattimer;
4465		break;
4466	default:
4467		return (ENOENT);
4468	}
4469	return (0);
4470}
4471
4472int
4473pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4474{
4475	struct pci_devinfo *dinfo;
4476
4477	dinfo = device_get_ivars(child);
4478
4479	switch (which) {
4480	case PCI_IVAR_INTPIN:
4481		dinfo->cfg.intpin = value;
4482		return (0);
4483	case PCI_IVAR_ETHADDR:
4484	case PCI_IVAR_SUBVENDOR:
4485	case PCI_IVAR_SUBDEVICE:
4486	case PCI_IVAR_VENDOR:
4487	case PCI_IVAR_DEVICE:
4488	case PCI_IVAR_DEVID:
4489	case PCI_IVAR_CLASS:
4490	case PCI_IVAR_SUBCLASS:
4491	case PCI_IVAR_PROGIF:
4492	case PCI_IVAR_REVID:
4493	case PCI_IVAR_IRQ:
4494	case PCI_IVAR_DOMAIN:
4495	case PCI_IVAR_BUS:
4496	case PCI_IVAR_SLOT:
4497	case PCI_IVAR_FUNCTION:
4498		return (EINVAL);	/* disallow for now */
4499
4500	default:
4501		return (ENOENT);
4502	}
4503}
4504
4505#include "opt_ddb.h"
4506#ifdef DDB
4507#include <ddb/ddb.h>
4508#include <sys/cons.h>
4509
4510/*
4511 * List resources based on pci map registers, used for within ddb
4512 */
4513
4514DB_SHOW_COMMAND(pciregs, db_pci_dump)
4515{
4516	struct pci_devinfo *dinfo;
4517	struct devlist *devlist_head;
4518	struct pci_conf *p;
4519	const char *name;
4520	int i, error, none_count;
4521
4522	none_count = 0;
4523	/* get the head of the device queue */
4524	devlist_head = &pci_devq;
4525
4526	/*
4527	 * Go through the list of devices and print out devices
4528	 */
4529	for (error = 0, i = 0,
4530	     dinfo = STAILQ_FIRST(devlist_head);
4531	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4532	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4533
4534		/* Populate pd_name and pd_unit */
4535		name = NULL;
4536		if (dinfo->cfg.dev)
4537			name = device_get_name(dinfo->cfg.dev);
4538
4539		p = &dinfo->conf;
4540		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4541			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4542			(name && *name) ? name : "none",
4543			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4544			none_count++,
4545			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4546			p->pc_sel.pc_func, (p->pc_class << 16) |
4547			(p->pc_subclass << 8) | p->pc_progif,
4548			(p->pc_subdevice << 16) | p->pc_subvendor,
4549			(p->pc_device << 16) | p->pc_vendor,
4550			p->pc_revid, p->pc_hdr);
4551	}
4552}
4553#endif /* DDB */
4554
4555static struct resource *
4556pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4557    u_long start, u_long end, u_long count, u_int flags)
4558{
4559	struct pci_devinfo *dinfo = device_get_ivars(child);
4560	struct resource_list *rl = &dinfo->resources;
4561	struct resource *res;
4562	struct pci_map *pm;
4563	pci_addr_t map, testval;
4564	int mapsize;
4565
4566	res = NULL;
4567	pm = pci_find_bar(child, *rid);
4568	if (pm != NULL) {
4569		/* This is a BAR that we failed to allocate earlier. */
4570		mapsize = pm->pm_size;
4571		map = pm->pm_value;
4572	} else {
4573		/*
4574		 * Weed out the bogons, and figure out how large the
4575		 * BAR/map is.  BARs that read back 0 here are bogus
4576		 * and unimplemented.  Note: atapci in legacy mode are
4577		 * special and handled elsewhere in the code.  If you
4578		 * have a atapci device in legacy mode and it fails
4579		 * here, that other code is broken.
4580		 */
4581		pci_read_bar(child, *rid, &map, &testval);
4582
4583		/*
4584		 * Determine the size of the BAR and ignore BARs with a size
4585		 * of 0.  Device ROM BARs use a different mask value.
4586		 */
4587		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4588			mapsize = pci_romsize(testval);
4589		else
4590			mapsize = pci_mapsize(testval);
4591		if (mapsize == 0)
4592			goto out;
4593		pm = pci_add_bar(child, *rid, map, mapsize);
4594	}
4595
4596	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4597		if (type != SYS_RES_MEMORY) {
4598			if (bootverbose)
4599				device_printf(dev,
4600				    "child %s requested type %d for rid %#x,"
4601				    " but the BAR says it is an memio\n",
4602				    device_get_nameunit(child), type, *rid);
4603			goto out;
4604		}
4605	} else {
4606		if (type != SYS_RES_IOPORT) {
4607			if (bootverbose)
4608				device_printf(dev,
4609				    "child %s requested type %d for rid %#x,"
4610				    " but the BAR says it is an ioport\n",
4611				    device_get_nameunit(child), type, *rid);
4612			goto out;
4613		}
4614	}
4615
4616	/*
4617	 * For real BARs, we need to override the size that
4618	 * the driver requests, because that's what the BAR
4619	 * actually uses and we would otherwise have a
4620	 * situation where we might allocate the excess to
4621	 * another driver, which won't work.
4622	 */
4623	count = (pci_addr_t)1 << mapsize;
4624	if (RF_ALIGNMENT(flags) < mapsize)
4625		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4626	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4627		flags |= RF_PREFETCHABLE;
4628
4629	/*
4630	 * Allocate enough resource, and then write back the
4631	 * appropriate BAR for that resource.
4632	 */
4633	resource_list_add(rl, type, *rid, start, end, count);
4634	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4635	    count, flags & ~RF_ACTIVE);
4636	if (res == NULL) {
4637		resource_list_delete(rl, type, *rid);
4638		device_printf(child,
4639		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4640		    count, *rid, type, start, end);
4641		goto out;
4642	}
4643	if (bootverbose)
4644		device_printf(child,
4645		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4646		    count, *rid, type, rman_get_start(res));
4647	map = rman_get_start(res);
4648	pci_write_bar(child, pm, map);
4649out:
4650	return (res);
4651}
4652
4653struct resource *
4654pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4655		   u_long start, u_long end, u_long count, u_int flags)
4656{
4657	struct pci_devinfo *dinfo;
4658	struct resource_list *rl;
4659	struct resource_list_entry *rle;
4660	struct resource *res;
4661	pcicfgregs *cfg;
4662
4663	if (device_get_parent(child) != dev)
4664		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4665		    type, rid, start, end, count, flags));
4666
4667	/*
4668	 * Perform lazy resource allocation
4669	 */
4670	dinfo = device_get_ivars(child);
4671	rl = &dinfo->resources;
4672	cfg = &dinfo->cfg;
4673	switch (type) {
4674#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4675	case PCI_RES_BUS:
4676		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4677		    flags));
4678#endif
4679	case SYS_RES_IRQ:
4680		/*
4681		 * Can't alloc legacy interrupt once MSI messages have
4682		 * been allocated.
4683		 */
4684		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4685		    cfg->msix.msix_alloc > 0))
4686			return (NULL);
4687
4688		/*
4689		 * If the child device doesn't have an interrupt
4690		 * routed and is deserving of an interrupt, try to
4691		 * assign it one.
4692		 */
4693		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4694		    (cfg->intpin != 0))
4695			pci_assign_interrupt(dev, child, 0);
4696		break;
4697	case SYS_RES_IOPORT:
4698	case SYS_RES_MEMORY:
4699#ifdef NEW_PCIB
4700		/*
4701		 * PCI-PCI bridge I/O window resources are not BARs.
4702		 * For those allocations just pass the request up the
4703		 * tree.
4704		 */
4705		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4706			switch (*rid) {
4707			case PCIR_IOBASEL_1:
4708			case PCIR_MEMBASE_1:
4709			case PCIR_PMBASEL_1:
4710				/*
4711				 * XXX: Should we bother creating a resource
4712				 * list entry?
4713				 */
4714				return (bus_generic_alloc_resource(dev, child,
4715				    type, rid, start, end, count, flags));
4716			}
4717		}
4718#endif
4719		/* Reserve resources for this BAR if needed. */
4720		rle = resource_list_find(rl, type, *rid);
4721		if (rle == NULL) {
4722			res = pci_reserve_map(dev, child, type, rid, start, end,
4723			    count, flags);
4724			if (res == NULL)
4725				return (NULL);
4726		}
4727	}
4728	return (resource_list_alloc(rl, dev, child, type, rid,
4729	    start, end, count, flags));
4730}
4731
4732int
4733pci_release_resource(device_t dev, device_t child, int type, int rid,
4734    struct resource *r)
4735{
4736	struct pci_devinfo *dinfo;
4737	struct resource_list *rl;
4738	pcicfgregs *cfg;
4739
4740	if (device_get_parent(child) != dev)
4741		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4742		    type, rid, r));
4743
4744	dinfo = device_get_ivars(child);
4745	cfg = &dinfo->cfg;
4746#ifdef NEW_PCIB
4747	/*
4748	 * PCI-PCI bridge I/O window resources are not BARs.  For
4749	 * those allocations just pass the request up the tree.
4750	 */
4751	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4752	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4753		switch (rid) {
4754		case PCIR_IOBASEL_1:
4755		case PCIR_MEMBASE_1:
4756		case PCIR_PMBASEL_1:
4757			return (bus_generic_release_resource(dev, child, type,
4758			    rid, r));
4759		}
4760	}
4761#endif
4762
4763	rl = &dinfo->resources;
4764	return (resource_list_release(rl, dev, child, type, rid, r));
4765}
4766
4767int
4768pci_activate_resource(device_t dev, device_t child, int type, int rid,
4769    struct resource *r)
4770{
4771	struct pci_devinfo *dinfo;
4772	int error;
4773
4774	error = bus_generic_activate_resource(dev, child, type, rid, r);
4775	if (error)
4776		return (error);
4777
4778	/* Enable decoding in the command register when activating BARs. */
4779	if (device_get_parent(child) == dev) {
4780		/* Device ROMs need their decoding explicitly enabled. */
4781		dinfo = device_get_ivars(child);
4782		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4783			pci_write_bar(child, pci_find_bar(child, rid),
4784			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4785		switch (type) {
4786		case SYS_RES_IOPORT:
4787		case SYS_RES_MEMORY:
4788			error = PCI_ENABLE_IO(dev, child, type);
4789			break;
4790		}
4791	}
4792	return (error);
4793}
4794
4795int
4796pci_deactivate_resource(device_t dev, device_t child, int type,
4797    int rid, struct resource *r)
4798{
4799	struct pci_devinfo *dinfo;
4800	int error;
4801
4802	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4803	if (error)
4804		return (error);
4805
4806	/* Disable decoding for device ROMs. */
4807	if (device_get_parent(child) == dev) {
4808		dinfo = device_get_ivars(child);
4809		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4810			pci_write_bar(child, pci_find_bar(child, rid),
4811			    rman_get_start(r));
4812	}
4813	return (0);
4814}
4815
4816void
4817pci_delete_child(device_t dev, device_t child)
4818{
4819	struct resource_list_entry *rle;
4820	struct resource_list *rl;
4821	struct pci_devinfo *dinfo;
4822
4823	dinfo = device_get_ivars(child);
4824	rl = &dinfo->resources;
4825
4826	if (device_is_attached(child))
4827		device_detach(child);
4828
4829	/* Turn off access to resources we're about to free */
4830	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4831	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4832
4833	/* Free all allocated resources */
4834	STAILQ_FOREACH(rle, rl, link) {
4835		if (rle->res) {
4836			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4837			    resource_list_busy(rl, rle->type, rle->rid)) {
4838				pci_printf(&dinfo->cfg,
4839				    "Resource still owned, oops. "
4840				    "(type=%d, rid=%d, addr=%lx)\n",
4841				    rle->type, rle->rid,
4842				    rman_get_start(rle->res));
4843				bus_release_resource(child, rle->type, rle->rid,
4844				    rle->res);
4845			}
4846			resource_list_unreserve(rl, dev, child, rle->type,
4847			    rle->rid);
4848		}
4849	}
4850	resource_list_free(rl);
4851
4852	device_delete_child(dev, child);
4853	pci_freecfg(dinfo);
4854}
4855
4856void
4857pci_delete_resource(device_t dev, device_t child, int type, int rid)
4858{
4859	struct pci_devinfo *dinfo;
4860	struct resource_list *rl;
4861	struct resource_list_entry *rle;
4862
4863	if (device_get_parent(child) != dev)
4864		return;
4865
4866	dinfo = device_get_ivars(child);
4867	rl = &dinfo->resources;
4868	rle = resource_list_find(rl, type, rid);
4869	if (rle == NULL)
4870		return;
4871
4872	if (rle->res) {
4873		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4874		    resource_list_busy(rl, type, rid)) {
4875			device_printf(dev, "delete_resource: "
4876			    "Resource still owned by child, oops. "
4877			    "(type=%d, rid=%d, addr=%lx)\n",
4878			    type, rid, rman_get_start(rle->res));
4879			return;
4880		}
4881		resource_list_unreserve(rl, dev, child, type, rid);
4882	}
4883	resource_list_delete(rl, type, rid);
4884}
4885
4886struct resource_list *
4887pci_get_resource_list (device_t dev, device_t child)
4888{
4889	struct pci_devinfo *dinfo = device_get_ivars(child);
4890
4891	return (&dinfo->resources);
4892}
4893
4894bus_dma_tag_t
4895pci_get_dma_tag(device_t bus, device_t dev)
4896{
4897	struct pci_softc *sc = device_get_softc(bus);
4898
4899	return (sc->sc_dma_tag);
4900}
4901
4902uint32_t
4903pci_read_config_method(device_t dev, device_t child, int reg, int width)
4904{
4905	struct pci_devinfo *dinfo = device_get_ivars(child);
4906	pcicfgregs *cfg = &dinfo->cfg;
4907
4908	return (PCIB_READ_CONFIG(device_get_parent(dev),
4909	    cfg->bus, cfg->slot, cfg->func, reg, width));
4910}
4911
4912void
4913pci_write_config_method(device_t dev, device_t child, int reg,
4914    uint32_t val, int width)
4915{
4916	struct pci_devinfo *dinfo = device_get_ivars(child);
4917	pcicfgregs *cfg = &dinfo->cfg;
4918
4919	PCIB_WRITE_CONFIG(device_get_parent(dev),
4920	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4921}
4922
4923int
4924pci_child_location_str_method(device_t dev, device_t child, char *buf,
4925    size_t buflen)
4926{
4927
4928	snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
4929	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
4930	return (0);
4931}
4932
4933int
4934pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4935    size_t buflen)
4936{
4937	struct pci_devinfo *dinfo;
4938	pcicfgregs *cfg;
4939
4940	dinfo = device_get_ivars(child);
4941	cfg = &dinfo->cfg;
4942	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4943	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4944	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4945	    cfg->progif);
4946	return (0);
4947}
4948
4949int
4950pci_assign_interrupt_method(device_t dev, device_t child)
4951{
4952	struct pci_devinfo *dinfo = device_get_ivars(child);
4953	pcicfgregs *cfg = &dinfo->cfg;
4954
4955	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4956	    cfg->intpin));
4957}
4958
4959static void
4960pci_lookup(void *arg, const char *name, device_t *dev)
4961{
4962	long val;
4963	char *end;
4964	int domain, bus, slot, func;
4965
4966	if (*dev != NULL)
4967		return;
4968
4969	/*
4970	 * Accept pciconf-style selectors of either pciD:B:S:F or
4971	 * pciB:S:F.  In the latter case, the domain is assumed to
4972	 * be zero.
4973	 */
4974	if (strncmp(name, "pci", 3) != 0)
4975		return;
4976	val = strtol(name + 3, &end, 10);
4977	if (val < 0 || val > INT_MAX || *end != ':')
4978		return;
4979	domain = val;
4980	val = strtol(end + 1, &end, 10);
4981	if (val < 0 || val > INT_MAX || *end != ':')
4982		return;
4983	bus = val;
4984	val = strtol(end + 1, &end, 10);
4985	if (val < 0 || val > INT_MAX)
4986		return;
4987	slot = val;
4988	if (*end == ':') {
4989		val = strtol(end + 1, &end, 10);
4990		if (val < 0 || val > INT_MAX || *end != '\0')
4991			return;
4992		func = val;
4993	} else if (*end == '\0') {
4994		func = slot;
4995		slot = bus;
4996		bus = domain;
4997		domain = 0;
4998	} else
4999		return;
5000
5001	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5002	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5003		return;
5004
5005	*dev = pci_find_dbsf(domain, bus, slot, func);
5006}
5007
5008static int
5009pci_modevent(module_t mod, int what, void *arg)
5010{
5011	static struct cdev *pci_cdev;
5012	static eventhandler_tag tag;
5013
5014	switch (what) {
5015	case MOD_LOAD:
5016		STAILQ_INIT(&pci_devq);
5017		pci_generation = 0;
5018		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5019		    "pci");
5020		pci_load_vendor_data();
5021		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5022		    1000);
5023		break;
5024
5025	case MOD_UNLOAD:
5026		if (tag != NULL)
5027			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5028		destroy_dev(pci_cdev);
5029		break;
5030	}
5031
5032	return (0);
5033}
5034
5035static void
5036pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5037{
5038#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5039	struct pcicfg_pcie *cfg;
5040	int version, pos;
5041
5042	cfg = &dinfo->cfg.pcie;
5043	pos = cfg->pcie_location;
5044
5045	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5046
5047	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5048
5049	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5050	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5051	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5052		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5053
5054	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5055	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5056	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5057		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5058
5059	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5060	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5061		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5062
5063	if (version > 1) {
5064		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5065		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5066		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5067	}
5068#undef WREG
5069}
5070
5071static void
5072pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5073{
5074	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5075	    dinfo->cfg.pcix.pcix_command,  2);
5076}
5077
5078void
5079pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5080{
5081
5082	/*
5083	 * Only do header type 0 devices.  Type 1 devices are bridges,
5084	 * which we know need special treatment.  Type 2 devices are
5085	 * cardbus bridges which also require special treatment.
5086	 * Other types are unknown, and we err on the side of safety
5087	 * by ignoring them.
5088	 */
5089	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5090		return;
5091
5092	/*
5093	 * Restore the device to full power mode.  We must do this
5094	 * before we restore the registers because moving from D3 to
5095	 * D0 will cause the chip's BARs and some other registers to
5096	 * be reset to some unknown power on reset values.  Cut down
5097	 * the noise on boot by doing nothing if we are already in
5098	 * state D0.
5099	 */
5100	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5101		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5102	pci_restore_bars(dev);
5103	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5104	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5105	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5106	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5107	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5108	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5109	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5110	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5111	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5112
5113	/*
5114	 * Restore extended capabilities for PCI-Express and PCI-X
5115	 */
5116	if (dinfo->cfg.pcie.pcie_location != 0)
5117		pci_cfg_restore_pcie(dev, dinfo);
5118	if (dinfo->cfg.pcix.pcix_location != 0)
5119		pci_cfg_restore_pcix(dev, dinfo);
5120
5121	/* Restore MSI and MSI-X configurations if they are present. */
5122	if (dinfo->cfg.msi.msi_location != 0)
5123		pci_resume_msi(dev);
5124	if (dinfo->cfg.msix.msix_location != 0)
5125		pci_resume_msix(dev);
5126}
5127
5128static void
5129pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5130{
5131#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5132	struct pcicfg_pcie *cfg;
5133	int version, pos;
5134
5135	cfg = &dinfo->cfg.pcie;
5136	pos = cfg->pcie_location;
5137
5138	cfg->pcie_flags = RREG(PCIER_FLAGS);
5139
5140	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5141
5142	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5143
5144	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5145	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5146	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5147		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5148
5149	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5150	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5151	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5152		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5153
5154	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5155	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5156		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5157
5158	if (version > 1) {
5159		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5160		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5161		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5162	}
5163#undef RREG
5164}
5165
5166static void
5167pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5168{
5169	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5170	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5171}
5172
5173void
5174pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5175{
5176	uint32_t cls;
5177	int ps;
5178
5179	/*
5180	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5181	 * we know need special treatment.  Type 2 devices are cardbus bridges
5182	 * which also require special treatment.  Other types are unknown, and
5183	 * we err on the side of safety by ignoring them.  Powering down
5184	 * bridges should not be undertaken lightly.
5185	 */
5186	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5187		return;
5188
5189	/*
5190	 * Some drivers apparently write to these registers w/o updating our
5191	 * cached copy.  No harm happens if we update the copy, so do so here
5192	 * so we can restore them.  The COMMAND register is modified by the
5193	 * bus w/o updating the cache.  This should represent the normally
5194	 * writable portion of the 'defined' part of type 0 headers.  In
5195	 * theory we also need to save/restore the PCI capability structures
5196	 * we know about, but apart from power we don't know any that are
5197	 * writable.
5198	 */
5199	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5200	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5201	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5202	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5203	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5204	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5205	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5206	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5207	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5208	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5209	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5210	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5211	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5212	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5213	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5214
5215	if (dinfo->cfg.pcie.pcie_location != 0)
5216		pci_cfg_save_pcie(dev, dinfo);
5217
5218	if (dinfo->cfg.pcix.pcix_location != 0)
5219		pci_cfg_save_pcix(dev, dinfo);
5220
5221	/*
5222	 * don't set the state for display devices, base peripherals and
5223	 * memory devices since bad things happen when they are powered down.
5224	 * We should (a) have drivers that can easily detach and (b) use
5225	 * generic drivers for these devices so that some device actually
5226	 * attaches.  We need to make sure that when we implement (a) we don't
5227	 * power the device down on a reattach.
5228	 */
5229	cls = pci_get_class(dev);
5230	if (!setstate)
5231		return;
5232	switch (pci_do_power_nodriver)
5233	{
5234		case 0:		/* NO powerdown at all */
5235			return;
5236		case 1:		/* Conservative about what to power down */
5237			if (cls == PCIC_STORAGE)
5238				return;
5239			/*FALLTHROUGH*/
5240		case 2:		/* Agressive about what to power down */
5241			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5242			    cls == PCIC_BASEPERIPH)
5243				return;
5244			/*FALLTHROUGH*/
5245		case 3:		/* Power down everything */
5246			break;
5247	}
5248	/*
5249	 * PCI spec says we can only go into D3 state from D0 state.
5250	 * Transition from D[12] into D0 before going to D3 state.
5251	 */
5252	ps = pci_get_powerstate(dev);
5253	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5254		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5255	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5256		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5257}
5258
5259/* Wrapper APIs suitable for device driver use. */
5260void
5261pci_save_state(device_t dev)
5262{
5263	struct pci_devinfo *dinfo;
5264
5265	dinfo = device_get_ivars(dev);
5266	pci_cfg_save(dev, dinfo, 0);
5267}
5268
5269void
5270pci_restore_state(device_t dev)
5271{
5272	struct pci_devinfo *dinfo;
5273
5274	dinfo = device_get_ivars(dev);
5275	pci_cfg_restore(dev, dinfo);
5276}
5277
5278static uint16_t
5279pci_get_rid_method(device_t dev, device_t child)
5280{
5281
5282	return (PCIB_GET_RID(device_get_parent(dev), child));
5283}
5284
5285/* Find the upstream port of a given PCI device in a root complex. */
5286device_t
5287pci_find_pcie_root_port(device_t dev)
5288{
5289	struct pci_devinfo *dinfo;
5290	devclass_t pci_class;
5291	device_t pcib, bus;
5292
5293	pci_class = devclass_find("pci");
5294	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5295	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5296
5297	/*
5298	 * Walk the bridge hierarchy until we find a PCI-e root
5299	 * port or a non-PCI device.
5300	 */
5301	for (;;) {
5302		bus = device_get_parent(dev);
5303		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5304		    device_get_nameunit(dev)));
5305
5306		pcib = device_get_parent(bus);
5307		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5308		    device_get_nameunit(bus)));
5309
5310		/*
5311		 * pcib's parent must be a PCI bus for this to be a
5312		 * PCI-PCI bridge.
5313		 */
5314		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5315			return (NULL);
5316
5317		dinfo = device_get_ivars(pcib);
5318		if (dinfo->cfg.pcie.pcie_location != 0 &&
5319		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5320			return (pcib);
5321
5322		dev = pcib;
5323	}
5324}
5325