pci.c revision 306520
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 306520 2016-09-30 18:47:34Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/limits.h>
39#include <sys/linker.h>
40#include <sys/fcntl.h>
41#include <sys/conf.h>
42#include <sys/kernel.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/endian.h>
46
47#include <vm/vm.h>
48#include <vm/pmap.h>
49#include <vm/vm_extern.h>
50
51#include <sys/bus.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#include <machine/resource.h>
55#include <machine/stdarg.h>
56
57#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58#include <machine/intr_machdep.h>
59#endif
60
61#include <sys/pciio.h>
62#include <dev/pci/pcireg.h>
63#include <dev/pci/pcivar.h>
64#include <dev/pci/pci_private.h>
65
66#include <dev/usb/controller/xhcireg.h>
67#include <dev/usb/controller/ehcireg.h>
68#include <dev/usb/controller/ohcireg.h>
69#include <dev/usb/controller/uhcireg.h>
70
71#include "pcib_if.h"
72#include "pci_if.h"
73
74#define	PCIR_IS_BIOS(cfg, reg)						\
75	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77
78static int		pci_has_quirk(uint32_t devid, int quirk);
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static pci_addr_t	pci_rombase(uint64_t mapreg);
84static int		pci_romsize(uint64_t testval);
85static void		pci_fixancient(pcicfgregs *cfg);
86static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87
88static int		pci_porten(device_t dev);
89static int		pci_memen(device_t dev);
90static void		pci_assign_interrupt(device_t bus, device_t dev,
91			    int force_route);
92static int		pci_add_map(device_t bus, device_t dev, int reg,
93			    struct resource_list *rl, int force, int prefetch);
94static int		pci_probe(device_t dev);
95static int		pci_attach(device_t dev);
96#ifdef PCI_RES_BUS
97static int		pci_detach(device_t dev);
98#endif
99static void		pci_load_vendor_data(void);
100static int		pci_describe_parse_line(char **ptr, int *vendor,
101			    int *device, char **desc);
102static char		*pci_describe_device(device_t dev);
103static int		pci_modevent(module_t mod, int what, void *arg);
104static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105			    pcicfgregs *cfg);
106static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t *data);
109#if 0
110static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111			    int reg, uint32_t data);
112#endif
113static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static int		pci_msix_blacklisted(void);
118static void		pci_resume_msi(device_t dev);
119static void		pci_resume_msix(device_t dev);
120static int		pci_remap_intr_method(device_t bus, device_t dev,
121			    u_int irq);
122
123static uint16_t		pci_get_rid_method(device_t dev, device_t child);
124
125static device_method_t pci_methods[] = {
126	/* Device interface */
127	DEVMETHOD(device_probe,		pci_probe),
128	DEVMETHOD(device_attach,	pci_attach),
129#ifdef PCI_RES_BUS
130	DEVMETHOD(device_detach,	pci_detach),
131#else
132	DEVMETHOD(device_detach,	bus_generic_detach),
133#endif
134	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
135	DEVMETHOD(device_suspend,	pci_suspend),
136	DEVMETHOD(device_resume,	pci_resume),
137
138	/* Bus interface */
139	DEVMETHOD(bus_print_child,	pci_print_child),
140	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
141	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
142	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
143	DEVMETHOD(bus_driver_added,	pci_driver_added),
144	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
145	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
146
147	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
148	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
149	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
150	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
151	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
152	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
153	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
154	DEVMETHOD(bus_release_resource,	pci_release_resource),
155	DEVMETHOD(bus_activate_resource, pci_activate_resource),
156	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
157	DEVMETHOD(bus_child_detached,	pci_child_detached),
158	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
159	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
160	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
161
162	/* PCI interface */
163	DEVMETHOD(pci_read_config,	pci_read_config_method),
164	DEVMETHOD(pci_write_config,	pci_write_config_method),
165	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
175	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
176	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
177	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
179	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
180	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
181	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
182	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
183	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
184	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
185	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
186	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
187	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
188	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
189	DEVMETHOD(pci_child_added,	pci_child_added_method),
190
191	DEVMETHOD_END
192};
193
194DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
195
196static devclass_t pci_devclass;
197DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
198MODULE_VERSION(pci, 1);
199
200static char	*pci_vendordata;
201static size_t	pci_vendordata_size;
202
203struct pci_quirk {
204	uint32_t devid;	/* Vendor/device of the card */
205	int	type;
206#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
207#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
208#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
209#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
210#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
211#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
212	int	arg1;
213	int	arg2;
214};
215
216static const struct pci_quirk pci_quirks[] = {
217	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
218	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
219	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220	/* As does the Serverworks OSB4 (the SMBus mapping register) */
221	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222
223	/*
224	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
225	 * or the CMIC-SL (AKA ServerWorks GC_LE).
226	 */
227	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229
230	/*
231	 * MSI doesn't work on earlier Intel chipsets including
232	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
233	 */
234	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241
242	/*
243	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
244	 * bridge.
245	 */
246	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247
248	/*
249	 * MSI-X allocation doesn't work properly for devices passed through
250	 * by VMware up to at least ESXi 5.1.
251	 */
252	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
253	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
254
255	/*
256	 * Some virtualization environments emulate an older chipset
257	 * but support MSI just fine.  QEMU uses the Intel 82440.
258	 */
259	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
260
261	/*
262	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
263	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
264	 * It prevents us from attaching hpet(4) when the bit is unset.
265	 * Note this quirk only affects SB600 revision A13 and earlier.
266	 * For SB600 A21 and later, firmware must set the bit to hide it.
267	 * For SB700 and later, it is unused and hardcoded to zero.
268	 */
269	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
270
271	/*
272	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
273	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
274	 * command register is set.
275	 */
276	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
277	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
278	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
279
280	/*
281	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
282	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
283	 */
284	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
285	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
286	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
287	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
288	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
289	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
290
291	{ 0 }
292};
293
294/* map register information */
295#define	PCI_MAPMEM	0x01	/* memory map */
296#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
297#define	PCI_MAPPORT	0x04	/* port map */
298
299struct devlist pci_devq;
300uint32_t pci_generation;
301uint32_t pci_numdevs = 0;
302static int pcie_chipset, pcix_chipset;
303
304/* sysctl vars */
305SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
306
307static int pci_enable_io_modes = 1;
308TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
309SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
310    &pci_enable_io_modes, 1,
311    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
312enable these bits correctly.  We'd like to do this all the time, but there\n\
313are some peripherals that this causes problems with.");
314
315static int pci_do_realloc_bars = 0;
316TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
317SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
318    &pci_do_realloc_bars, 0,
319    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
320
321static int pci_do_power_nodriver = 0;
322TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
323SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
324    &pci_do_power_nodriver, 0,
325  "Place a function into D3 state when no driver attaches to it.  0 means\n\
326disable.  1 means conservatively place devices into D3 state.  2 means\n\
327agressively place devices into D3 state.  3 means put absolutely everything\n\
328in D3 state.");
329
330int pci_do_power_resume = 1;
331TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
332SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
333    &pci_do_power_resume, 1,
334  "Transition from D3 -> D0 on resume.");
335
336int pci_do_power_suspend = 1;
337TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
338SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
339    &pci_do_power_suspend, 1,
340  "Transition from D0 -> D3 on suspend.");
341
342static int pci_do_msi = 1;
343TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
344SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
345    "Enable support for MSI interrupts");
346
347static int pci_do_msix = 1;
348TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
349SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
350    "Enable support for MSI-X interrupts");
351
352static int pci_honor_msi_blacklist = 1;
353TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
354SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
355    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
356
357#if defined(__i386__) || defined(__amd64__)
358static int pci_usb_takeover = 1;
359#else
360static int pci_usb_takeover = 0;
361#endif
362TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
363SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
364    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
365Disable this if you depend on BIOS emulation of USB devices, that is\n\
366you use USB devices (like keyboard or mouse) but do not load USB drivers");
367
368static int pci_clear_bars;
369TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
370SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
371    "Ignore firmware-assigned resources for BARs.");
372
373#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
374static int pci_clear_buses;
375TUNABLE_INT("hw.pci.clear_buses", &pci_clear_buses);
376SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
377    "Ignore firmware-assigned bus numbers.");
378#endif
379
380static int pci_enable_ari = 1;
381TUNABLE_INT("hw.pci.enable_ari", &pci_enable_ari);
382SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
383    0, "Enable support for PCIe Alternative RID Interpretation");
384
385static int
386pci_has_quirk(uint32_t devid, int quirk)
387{
388	const struct pci_quirk *q;
389
390	for (q = &pci_quirks[0]; q->devid; q++) {
391		if (q->devid == devid && q->type == quirk)
392			return (1);
393	}
394	return (0);
395}
396
397/* Find a device_t by bus/slot/function in domain 0 */
398
399device_t
400pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
401{
402
403	return (pci_find_dbsf(0, bus, slot, func));
404}
405
406/* Find a device_t by domain/bus/slot/function */
407
408device_t
409pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
410{
411	struct pci_devinfo *dinfo;
412
413	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
414		if ((dinfo->cfg.domain == domain) &&
415		    (dinfo->cfg.bus == bus) &&
416		    (dinfo->cfg.slot == slot) &&
417		    (dinfo->cfg.func == func)) {
418			return (dinfo->cfg.dev);
419		}
420	}
421
422	return (NULL);
423}
424
425/* Find a device_t by vendor/device ID */
426
427device_t
428pci_find_device(uint16_t vendor, uint16_t device)
429{
430	struct pci_devinfo *dinfo;
431
432	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
433		if ((dinfo->cfg.vendor == vendor) &&
434		    (dinfo->cfg.device == device)) {
435			return (dinfo->cfg.dev);
436		}
437	}
438
439	return (NULL);
440}
441
442device_t
443pci_find_class(uint8_t class, uint8_t subclass)
444{
445	struct pci_devinfo *dinfo;
446
447	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
448		if (dinfo->cfg.baseclass == class &&
449		    dinfo->cfg.subclass == subclass) {
450			return (dinfo->cfg.dev);
451		}
452	}
453
454	return (NULL);
455}
456
457static int
458pci_printf(pcicfgregs *cfg, const char *fmt, ...)
459{
460	va_list ap;
461	int retval;
462
463	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
464	    cfg->func);
465	va_start(ap, fmt);
466	retval += vprintf(fmt, ap);
467	va_end(ap);
468	return (retval);
469}
470
471/* return base address of memory or port map */
472
473static pci_addr_t
474pci_mapbase(uint64_t mapreg)
475{
476
477	if (PCI_BAR_MEM(mapreg))
478		return (mapreg & PCIM_BAR_MEM_BASE);
479	else
480		return (mapreg & PCIM_BAR_IO_BASE);
481}
482
483/* return map type of memory or port map */
484
485static const char *
486pci_maptype(uint64_t mapreg)
487{
488
489	if (PCI_BAR_IO(mapreg))
490		return ("I/O Port");
491	if (mapreg & PCIM_BAR_MEM_PREFETCH)
492		return ("Prefetchable Memory");
493	return ("Memory");
494}
495
496/* return log2 of map size decoded for memory or port map */
497
498static int
499pci_mapsize(uint64_t testval)
500{
501	int ln2size;
502
503	testval = pci_mapbase(testval);
504	ln2size = 0;
505	if (testval != 0) {
506		while ((testval & 1) == 0)
507		{
508			ln2size++;
509			testval >>= 1;
510		}
511	}
512	return (ln2size);
513}
514
515/* return base address of device ROM */
516
517static pci_addr_t
518pci_rombase(uint64_t mapreg)
519{
520
521	return (mapreg & PCIM_BIOS_ADDR_MASK);
522}
523
524/* return log2 of map size decided for device ROM */
525
526static int
527pci_romsize(uint64_t testval)
528{
529	int ln2size;
530
531	testval = pci_rombase(testval);
532	ln2size = 0;
533	if (testval != 0) {
534		while ((testval & 1) == 0)
535		{
536			ln2size++;
537			testval >>= 1;
538		}
539	}
540	return (ln2size);
541}
542
543/* return log2 of address range supported by map register */
544
545static int
546pci_maprange(uint64_t mapreg)
547{
548	int ln2range = 0;
549
550	if (PCI_BAR_IO(mapreg))
551		ln2range = 32;
552	else
553		switch (mapreg & PCIM_BAR_MEM_TYPE) {
554		case PCIM_BAR_MEM_32:
555			ln2range = 32;
556			break;
557		case PCIM_BAR_MEM_1MB:
558			ln2range = 20;
559			break;
560		case PCIM_BAR_MEM_64:
561			ln2range = 64;
562			break;
563		}
564	return (ln2range);
565}
566
567/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
568
569static void
570pci_fixancient(pcicfgregs *cfg)
571{
572	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
573		return;
574
575	/* PCI to PCI bridges use header type 1 */
576	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
577		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
578}
579
580/* extract header type specific config data */
581
582static void
583pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
584{
585#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
586	switch (cfg->hdrtype & PCIM_HDRTYPE) {
587	case PCIM_HDRTYPE_NORMAL:
588		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
589		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
590		cfg->mingnt         = REG(PCIR_MINGNT, 1);
591		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
592		cfg->nummaps	    = PCI_MAXMAPS_0;
593		break;
594	case PCIM_HDRTYPE_BRIDGE:
595		cfg->nummaps	    = PCI_MAXMAPS_1;
596		break;
597	case PCIM_HDRTYPE_CARDBUS:
598		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
599		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
600		cfg->nummaps	    = PCI_MAXMAPS_2;
601		break;
602	}
603#undef REG
604}
605
606/* read configuration header into pcicfgregs structure */
607struct pci_devinfo *
608pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
609{
610#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
611	pcicfgregs *cfg = NULL;
612	struct pci_devinfo *devlist_entry;
613	struct devlist *devlist_head;
614
615	devlist_head = &pci_devq;
616
617	devlist_entry = NULL;
618
619	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
620		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
621
622		cfg = &devlist_entry->cfg;
623
624		cfg->domain		= d;
625		cfg->bus		= b;
626		cfg->slot		= s;
627		cfg->func		= f;
628		cfg->vendor		= REG(PCIR_VENDOR, 2);
629		cfg->device		= REG(PCIR_DEVICE, 2);
630		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
631		cfg->statreg		= REG(PCIR_STATUS, 2);
632		cfg->baseclass		= REG(PCIR_CLASS, 1);
633		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
634		cfg->progif		= REG(PCIR_PROGIF, 1);
635		cfg->revid		= REG(PCIR_REVID, 1);
636		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
637		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
638		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
639		cfg->intpin		= REG(PCIR_INTPIN, 1);
640		cfg->intline		= REG(PCIR_INTLINE, 1);
641
642		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
643		cfg->hdrtype		&= ~PCIM_MFDEV;
644		STAILQ_INIT(&cfg->maps);
645
646		pci_fixancient(cfg);
647		pci_hdrtypedata(pcib, b, s, f, cfg);
648
649		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
650			pci_read_cap(pcib, cfg);
651
652		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
653
654		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
655		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
656		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
657		devlist_entry->conf.pc_sel.pc_func = cfg->func;
658		devlist_entry->conf.pc_hdr = cfg->hdrtype;
659
660		devlist_entry->conf.pc_subvendor = cfg->subvendor;
661		devlist_entry->conf.pc_subdevice = cfg->subdevice;
662		devlist_entry->conf.pc_vendor = cfg->vendor;
663		devlist_entry->conf.pc_device = cfg->device;
664
665		devlist_entry->conf.pc_class = cfg->baseclass;
666		devlist_entry->conf.pc_subclass = cfg->subclass;
667		devlist_entry->conf.pc_progif = cfg->progif;
668		devlist_entry->conf.pc_revid = cfg->revid;
669
670		pci_numdevs++;
671		pci_generation++;
672	}
673	return (devlist_entry);
674#undef REG
675}
676
677static void
678pci_read_cap(device_t pcib, pcicfgregs *cfg)
679{
680#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
681#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
682#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
683	uint64_t addr;
684#endif
685	uint32_t val;
686	int	ptr, nextptr, ptrptr;
687
688	switch (cfg->hdrtype & PCIM_HDRTYPE) {
689	case PCIM_HDRTYPE_NORMAL:
690	case PCIM_HDRTYPE_BRIDGE:
691		ptrptr = PCIR_CAP_PTR;
692		break;
693	case PCIM_HDRTYPE_CARDBUS:
694		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
695		break;
696	default:
697		return;		/* no extended capabilities support */
698	}
699	nextptr = REG(ptrptr, 1);	/* sanity check? */
700
701	/*
702	 * Read capability entries.
703	 */
704	while (nextptr != 0) {
705		/* Sanity check */
706		if (nextptr > 255) {
707			printf("illegal PCI extended capability offset %d\n",
708			    nextptr);
709			return;
710		}
711		/* Find the next entry */
712		ptr = nextptr;
713		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
714
715		/* Process this entry */
716		switch (REG(ptr + PCICAP_ID, 1)) {
717		case PCIY_PMG:		/* PCI power management */
718			if (cfg->pp.pp_cap == 0) {
719				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
720				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
721				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
722				if ((nextptr - ptr) > PCIR_POWER_DATA)
723					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
724			}
725			break;
726		case PCIY_HT:		/* HyperTransport */
727			/* Determine HT-specific capability type. */
728			val = REG(ptr + PCIR_HT_COMMAND, 2);
729
730			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
731				cfg->ht.ht_slave = ptr;
732
733#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
734			switch (val & PCIM_HTCMD_CAP_MASK) {
735			case PCIM_HTCAP_MSI_MAPPING:
736				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
737					/* Sanity check the mapping window. */
738					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
739					    4);
740					addr <<= 32;
741					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
742					    4);
743					if (addr != MSI_INTEL_ADDR_BASE)
744						device_printf(pcib,
745	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
746						    cfg->domain, cfg->bus,
747						    cfg->slot, cfg->func,
748						    (long long)addr);
749				} else
750					addr = MSI_INTEL_ADDR_BASE;
751
752				cfg->ht.ht_msimap = ptr;
753				cfg->ht.ht_msictrl = val;
754				cfg->ht.ht_msiaddr = addr;
755				break;
756			}
757#endif
758			break;
759		case PCIY_MSI:		/* PCI MSI */
760			cfg->msi.msi_location = ptr;
761			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
762			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
763						     PCIM_MSICTRL_MMC_MASK)>>1);
764			break;
765		case PCIY_MSIX:		/* PCI MSI-X */
766			cfg->msix.msix_location = ptr;
767			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
768			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
769			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
770			val = REG(ptr + PCIR_MSIX_TABLE, 4);
771			cfg->msix.msix_table_bar = PCIR_BAR(val &
772			    PCIM_MSIX_BIR_MASK);
773			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
774			val = REG(ptr + PCIR_MSIX_PBA, 4);
775			cfg->msix.msix_pba_bar = PCIR_BAR(val &
776			    PCIM_MSIX_BIR_MASK);
777			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
778			break;
779		case PCIY_VPD:		/* PCI Vital Product Data */
780			cfg->vpd.vpd_reg = ptr;
781			break;
782		case PCIY_SUBVENDOR:
783			/* Should always be true. */
784			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
785			    PCIM_HDRTYPE_BRIDGE) {
786				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
787				cfg->subvendor = val & 0xffff;
788				cfg->subdevice = val >> 16;
789			}
790			break;
791		case PCIY_PCIX:		/* PCI-X */
792			/*
793			 * Assume we have a PCI-X chipset if we have
794			 * at least one PCI-PCI bridge with a PCI-X
795			 * capability.  Note that some systems with
796			 * PCI-express or HT chipsets might match on
797			 * this check as well.
798			 */
799			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
800			    PCIM_HDRTYPE_BRIDGE)
801				pcix_chipset = 1;
802			cfg->pcix.pcix_location = ptr;
803			break;
804		case PCIY_EXPRESS:	/* PCI-express */
805			/*
806			 * Assume we have a PCI-express chipset if we have
807			 * at least one PCI-express device.
808			 */
809			pcie_chipset = 1;
810			cfg->pcie.pcie_location = ptr;
811			val = REG(ptr + PCIER_FLAGS, 2);
812			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
813			break;
814		default:
815			break;
816		}
817	}
818
819#if defined(__powerpc__)
820	/*
821	 * Enable the MSI mapping window for all HyperTransport
822	 * slaves.  PCI-PCI bridges have their windows enabled via
823	 * PCIB_MAP_MSI().
824	 */
825	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
826	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
827		device_printf(pcib,
828	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
829		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
830		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
831		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
832		     2);
833	}
834#endif
835/* REG and WREG use carry through to next functions */
836}
837
838/*
839 * PCI Vital Product Data
840 */
841
842#define	PCI_VPD_TIMEOUT		1000000
843
844static int
845pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
846{
847	int count = PCI_VPD_TIMEOUT;
848
849	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
850
851	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
852
853	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
854		if (--count < 0)
855			return (ENXIO);
856		DELAY(1);	/* limit looping */
857	}
858	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
859
860	return (0);
861}
862
863#if 0
864static int
865pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
866{
867	int count = PCI_VPD_TIMEOUT;
868
869	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
870
871	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
872	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
873	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
874		if (--count < 0)
875			return (ENXIO);
876		DELAY(1);	/* limit looping */
877	}
878
879	return (0);
880}
881#endif
882
883#undef PCI_VPD_TIMEOUT
884
885struct vpd_readstate {
886	device_t	pcib;
887	pcicfgregs	*cfg;
888	uint32_t	val;
889	int		bytesinval;
890	int		off;
891	uint8_t		cksum;
892};
893
894static int
895vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
896{
897	uint32_t reg;
898	uint8_t byte;
899
900	if (vrs->bytesinval == 0) {
901		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
902			return (ENXIO);
903		vrs->val = le32toh(reg);
904		vrs->off += 4;
905		byte = vrs->val & 0xff;
906		vrs->bytesinval = 3;
907	} else {
908		vrs->val = vrs->val >> 8;
909		byte = vrs->val & 0xff;
910		vrs->bytesinval--;
911	}
912
913	vrs->cksum += byte;
914	*data = byte;
915	return (0);
916}
917
918static void
919pci_read_vpd(device_t pcib, pcicfgregs *cfg)
920{
921	struct vpd_readstate vrs;
922	int state;
923	int name;
924	int remain;
925	int i;
926	int alloc, off;		/* alloc/off for RO/W arrays */
927	int cksumvalid;
928	int dflen;
929	uint8_t byte;
930	uint8_t byte2;
931
932	/* init vpd reader */
933	vrs.bytesinval = 0;
934	vrs.off = 0;
935	vrs.pcib = pcib;
936	vrs.cfg = cfg;
937	vrs.cksum = 0;
938
939	state = 0;
940	name = remain = i = 0;	/* shut up stupid gcc */
941	alloc = off = 0;	/* shut up stupid gcc */
942	dflen = 0;		/* shut up stupid gcc */
943	cksumvalid = -1;
944	while (state >= 0) {
945		if (vpd_nextbyte(&vrs, &byte)) {
946			state = -2;
947			break;
948		}
949#if 0
950		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
951		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
952		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
953#endif
954		switch (state) {
955		case 0:		/* item name */
956			if (byte & 0x80) {
957				if (vpd_nextbyte(&vrs, &byte2)) {
958					state = -2;
959					break;
960				}
961				remain = byte2;
962				if (vpd_nextbyte(&vrs, &byte2)) {
963					state = -2;
964					break;
965				}
966				remain |= byte2 << 8;
967				if (remain > (0x7f*4 - vrs.off)) {
968					state = -1;
969					pci_printf(cfg,
970					    "invalid VPD data, remain %#x\n",
971					    remain);
972				}
973				name = byte & 0x7f;
974			} else {
975				remain = byte & 0x7;
976				name = (byte >> 3) & 0xf;
977			}
978			switch (name) {
979			case 0x2:	/* String */
980				cfg->vpd.vpd_ident = malloc(remain + 1,
981				    M_DEVBUF, M_WAITOK);
982				i = 0;
983				state = 1;
984				break;
985			case 0xf:	/* End */
986				state = -1;
987				break;
988			case 0x10:	/* VPD-R */
989				alloc = 8;
990				off = 0;
991				cfg->vpd.vpd_ros = malloc(alloc *
992				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
993				    M_WAITOK | M_ZERO);
994				state = 2;
995				break;
996			case 0x11:	/* VPD-W */
997				alloc = 8;
998				off = 0;
999				cfg->vpd.vpd_w = malloc(alloc *
1000				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1001				    M_WAITOK | M_ZERO);
1002				state = 5;
1003				break;
1004			default:	/* Invalid data, abort */
1005				state = -1;
1006				break;
1007			}
1008			break;
1009
1010		case 1:	/* Identifier String */
1011			cfg->vpd.vpd_ident[i++] = byte;
1012			remain--;
1013			if (remain == 0)  {
1014				cfg->vpd.vpd_ident[i] = '\0';
1015				state = 0;
1016			}
1017			break;
1018
1019		case 2:	/* VPD-R Keyword Header */
1020			if (off == alloc) {
1021				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1022				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1023				    M_DEVBUF, M_WAITOK | M_ZERO);
1024			}
1025			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1026			if (vpd_nextbyte(&vrs, &byte2)) {
1027				state = -2;
1028				break;
1029			}
1030			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1031			if (vpd_nextbyte(&vrs, &byte2)) {
1032				state = -2;
1033				break;
1034			}
1035			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1036			if (dflen == 0 &&
1037			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1038			    2) == 0) {
1039				/*
1040				 * if this happens, we can't trust the rest
1041				 * of the VPD.
1042				 */
1043				pci_printf(cfg, "bad keyword length: %d\n",
1044				    dflen);
1045				cksumvalid = 0;
1046				state = -1;
1047				break;
1048			} else if (dflen == 0) {
1049				cfg->vpd.vpd_ros[off].value = malloc(1 *
1050				    sizeof(*cfg->vpd.vpd_ros[off].value),
1051				    M_DEVBUF, M_WAITOK);
1052				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1053			} else
1054				cfg->vpd.vpd_ros[off].value = malloc(
1055				    (dflen + 1) *
1056				    sizeof(*cfg->vpd.vpd_ros[off].value),
1057				    M_DEVBUF, M_WAITOK);
1058			remain -= 3;
1059			i = 0;
1060			/* keep in sync w/ state 3's transistions */
1061			if (dflen == 0 && remain == 0)
1062				state = 0;
1063			else if (dflen == 0)
1064				state = 2;
1065			else
1066				state = 3;
1067			break;
1068
1069		case 3:	/* VPD-R Keyword Value */
1070			cfg->vpd.vpd_ros[off].value[i++] = byte;
1071			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1072			    "RV", 2) == 0 && cksumvalid == -1) {
1073				if (vrs.cksum == 0)
1074					cksumvalid = 1;
1075				else {
1076					if (bootverbose)
1077						pci_printf(cfg,
1078					    "bad VPD cksum, remain %hhu\n",
1079						    vrs.cksum);
1080					cksumvalid = 0;
1081					state = -1;
1082					break;
1083				}
1084			}
1085			dflen--;
1086			remain--;
1087			/* keep in sync w/ state 2's transistions */
1088			if (dflen == 0)
1089				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1090			if (dflen == 0 && remain == 0) {
1091				cfg->vpd.vpd_rocnt = off;
1092				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1093				    off * sizeof(*cfg->vpd.vpd_ros),
1094				    M_DEVBUF, M_WAITOK | M_ZERO);
1095				state = 0;
1096			} else if (dflen == 0)
1097				state = 2;
1098			break;
1099
1100		case 4:
1101			remain--;
1102			if (remain == 0)
1103				state = 0;
1104			break;
1105
1106		case 5:	/* VPD-W Keyword Header */
1107			if (off == alloc) {
1108				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1109				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1110				    M_DEVBUF, M_WAITOK | M_ZERO);
1111			}
1112			cfg->vpd.vpd_w[off].keyword[0] = byte;
1113			if (vpd_nextbyte(&vrs, &byte2)) {
1114				state = -2;
1115				break;
1116			}
1117			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1118			if (vpd_nextbyte(&vrs, &byte2)) {
1119				state = -2;
1120				break;
1121			}
1122			cfg->vpd.vpd_w[off].len = dflen = byte2;
1123			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1124			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1125			    sizeof(*cfg->vpd.vpd_w[off].value),
1126			    M_DEVBUF, M_WAITOK);
1127			remain -= 3;
1128			i = 0;
1129			/* keep in sync w/ state 6's transistions */
1130			if (dflen == 0 && remain == 0)
1131				state = 0;
1132			else if (dflen == 0)
1133				state = 5;
1134			else
1135				state = 6;
1136			break;
1137
1138		case 6:	/* VPD-W Keyword Value */
1139			cfg->vpd.vpd_w[off].value[i++] = byte;
1140			dflen--;
1141			remain--;
1142			/* keep in sync w/ state 5's transistions */
1143			if (dflen == 0)
1144				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1145			if (dflen == 0 && remain == 0) {
1146				cfg->vpd.vpd_wcnt = off;
1147				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1148				    off * sizeof(*cfg->vpd.vpd_w),
1149				    M_DEVBUF, M_WAITOK | M_ZERO);
1150				state = 0;
1151			} else if (dflen == 0)
1152				state = 5;
1153			break;
1154
1155		default:
1156			pci_printf(cfg, "invalid state: %d\n", state);
1157			state = -1;
1158			break;
1159		}
1160	}
1161
1162	if (cksumvalid == 0 || state < -1) {
1163		/* read-only data bad, clean up */
1164		if (cfg->vpd.vpd_ros != NULL) {
1165			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1166				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1167			free(cfg->vpd.vpd_ros, M_DEVBUF);
1168			cfg->vpd.vpd_ros = NULL;
1169		}
1170	}
1171	if (state < -1) {
1172		/* I/O error, clean up */
1173		pci_printf(cfg, "failed to read VPD data.\n");
1174		if (cfg->vpd.vpd_ident != NULL) {
1175			free(cfg->vpd.vpd_ident, M_DEVBUF);
1176			cfg->vpd.vpd_ident = NULL;
1177		}
1178		if (cfg->vpd.vpd_w != NULL) {
1179			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1180				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1181			free(cfg->vpd.vpd_w, M_DEVBUF);
1182			cfg->vpd.vpd_w = NULL;
1183		}
1184	}
1185	cfg->vpd.vpd_cached = 1;
1186#undef REG
1187#undef WREG
1188}
1189
1190int
1191pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1192{
1193	struct pci_devinfo *dinfo = device_get_ivars(child);
1194	pcicfgregs *cfg = &dinfo->cfg;
1195
1196	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1197		pci_read_vpd(device_get_parent(dev), cfg);
1198
1199	*identptr = cfg->vpd.vpd_ident;
1200
1201	if (*identptr == NULL)
1202		return (ENXIO);
1203
1204	return (0);
1205}
1206
1207int
1208pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1209	const char **vptr)
1210{
1211	struct pci_devinfo *dinfo = device_get_ivars(child);
1212	pcicfgregs *cfg = &dinfo->cfg;
1213	int i;
1214
1215	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1216		pci_read_vpd(device_get_parent(dev), cfg);
1217
1218	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1219		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1220		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1221			*vptr = cfg->vpd.vpd_ros[i].value;
1222			return (0);
1223		}
1224
1225	*vptr = NULL;
1226	return (ENXIO);
1227}
1228
1229struct pcicfg_vpd *
1230pci_fetch_vpd_list(device_t dev)
1231{
1232	struct pci_devinfo *dinfo = device_get_ivars(dev);
1233	pcicfgregs *cfg = &dinfo->cfg;
1234
1235	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1236		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1237	return (&cfg->vpd);
1238}
1239
1240/*
1241 * Find the requested HyperTransport capability and return the offset
1242 * in configuration space via the pointer provided.  The function
1243 * returns 0 on success and an error code otherwise.
1244 */
1245int
1246pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1247{
1248	int ptr, error;
1249	uint16_t val;
1250
1251	error = pci_find_cap(child, PCIY_HT, &ptr);
1252	if (error)
1253		return (error);
1254
1255	/*
1256	 * Traverse the capabilities list checking each HT capability
1257	 * to see if it matches the requested HT capability.
1258	 */
1259	while (ptr != 0) {
1260		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1261		if (capability == PCIM_HTCAP_SLAVE ||
1262		    capability == PCIM_HTCAP_HOST)
1263			val &= 0xe000;
1264		else
1265			val &= PCIM_HTCMD_CAP_MASK;
1266		if (val == capability) {
1267			if (capreg != NULL)
1268				*capreg = ptr;
1269			return (0);
1270		}
1271
1272		/* Skip to the next HT capability. */
1273		while (ptr != 0) {
1274			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1275			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1276			    PCIY_HT)
1277				break;
1278		}
1279	}
1280	return (ENOENT);
1281}
1282
1283/*
1284 * Find the requested capability and return the offset in
1285 * configuration space via the pointer provided.  The function returns
1286 * 0 on success and an error code otherwise.
1287 */
1288int
1289pci_find_cap_method(device_t dev, device_t child, int capability,
1290    int *capreg)
1291{
1292	struct pci_devinfo *dinfo = device_get_ivars(child);
1293	pcicfgregs *cfg = &dinfo->cfg;
1294	u_int32_t status;
1295	u_int8_t ptr;
1296
1297	/*
1298	 * Check the CAP_LIST bit of the PCI status register first.
1299	 */
1300	status = pci_read_config(child, PCIR_STATUS, 2);
1301	if (!(status & PCIM_STATUS_CAPPRESENT))
1302		return (ENXIO);
1303
1304	/*
1305	 * Determine the start pointer of the capabilities list.
1306	 */
1307	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1308	case PCIM_HDRTYPE_NORMAL:
1309	case PCIM_HDRTYPE_BRIDGE:
1310		ptr = PCIR_CAP_PTR;
1311		break;
1312	case PCIM_HDRTYPE_CARDBUS:
1313		ptr = PCIR_CAP_PTR_2;
1314		break;
1315	default:
1316		/* XXX: panic? */
1317		return (ENXIO);		/* no extended capabilities support */
1318	}
1319	ptr = pci_read_config(child, ptr, 1);
1320
1321	/*
1322	 * Traverse the capabilities list.
1323	 */
1324	while (ptr != 0) {
1325		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1326			if (capreg != NULL)
1327				*capreg = ptr;
1328			return (0);
1329		}
1330		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1331	}
1332
1333	return (ENOENT);
1334}
1335
1336/*
1337 * Find the requested extended capability and return the offset in
1338 * configuration space via the pointer provided.  The function returns
1339 * 0 on success and an error code otherwise.
1340 */
1341int
1342pci_find_extcap_method(device_t dev, device_t child, int capability,
1343    int *capreg)
1344{
1345	struct pci_devinfo *dinfo = device_get_ivars(child);
1346	pcicfgregs *cfg = &dinfo->cfg;
1347	uint32_t ecap;
1348	uint16_t ptr;
1349
1350	/* Only supported for PCI-express devices. */
1351	if (cfg->pcie.pcie_location == 0)
1352		return (ENXIO);
1353
1354	ptr = PCIR_EXTCAP;
1355	ecap = pci_read_config(child, ptr, 4);
1356	if (ecap == 0xffffffff || ecap == 0)
1357		return (ENOENT);
1358	for (;;) {
1359		if (PCI_EXTCAP_ID(ecap) == capability) {
1360			if (capreg != NULL)
1361				*capreg = ptr;
1362			return (0);
1363		}
1364		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1365		if (ptr == 0)
1366			break;
1367		ecap = pci_read_config(child, ptr, 4);
1368	}
1369
1370	return (ENOENT);
1371}
1372
1373/*
1374 * Support for MSI-X message interrupts.
1375 */
1376void
1377pci_enable_msix_method(device_t dev, device_t child, u_int index,
1378    uint64_t address, uint32_t data)
1379{
1380	struct pci_devinfo *dinfo = device_get_ivars(child);
1381	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382	uint32_t offset;
1383
1384	KASSERT(msix->msix_table_len > index, ("bogus index"));
1385	offset = msix->msix_table_offset + index * 16;
1386	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1387	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1388	bus_write_4(msix->msix_table_res, offset + 8, data);
1389
1390	/* Enable MSI -> HT mapping. */
1391	pci_ht_map_msi(child, address);
1392}
1393
1394void
1395pci_mask_msix(device_t dev, u_int index)
1396{
1397	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399	uint32_t offset, val;
1400
1401	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1402	offset = msix->msix_table_offset + index * 16 + 12;
1403	val = bus_read_4(msix->msix_table_res, offset);
1404	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1405		val |= PCIM_MSIX_VCTRL_MASK;
1406		bus_write_4(msix->msix_table_res, offset, val);
1407	}
1408}
1409
1410void
1411pci_unmask_msix(device_t dev, u_int index)
1412{
1413	struct pci_devinfo *dinfo = device_get_ivars(dev);
1414	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1415	uint32_t offset, val;
1416
1417	KASSERT(msix->msix_table_len > index, ("bogus index"));
1418	offset = msix->msix_table_offset + index * 16 + 12;
1419	val = bus_read_4(msix->msix_table_res, offset);
1420	if (val & PCIM_MSIX_VCTRL_MASK) {
1421		val &= ~PCIM_MSIX_VCTRL_MASK;
1422		bus_write_4(msix->msix_table_res, offset, val);
1423	}
1424}
1425
1426int
1427pci_pending_msix(device_t dev, u_int index)
1428{
1429	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431	uint32_t offset, bit;
1432
1433	KASSERT(msix->msix_table_len > index, ("bogus index"));
1434	offset = msix->msix_pba_offset + (index / 32) * 4;
1435	bit = 1 << index % 32;
1436	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1437}
1438
1439/*
1440 * Restore MSI-X registers and table during resume.  If MSI-X is
1441 * enabled then walk the virtual table to restore the actual MSI-X
1442 * table.
1443 */
1444static void
1445pci_resume_msix(device_t dev)
1446{
1447	struct pci_devinfo *dinfo = device_get_ivars(dev);
1448	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1449	struct msix_table_entry *mte;
1450	struct msix_vector *mv;
1451	int i;
1452
1453	if (msix->msix_alloc > 0) {
1454		/* First, mask all vectors. */
1455		for (i = 0; i < msix->msix_msgnum; i++)
1456			pci_mask_msix(dev, i);
1457
1458		/* Second, program any messages with at least one handler. */
1459		for (i = 0; i < msix->msix_table_len; i++) {
1460			mte = &msix->msix_table[i];
1461			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1462				continue;
1463			mv = &msix->msix_vectors[mte->mte_vector - 1];
1464			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1465			pci_unmask_msix(dev, i);
1466		}
1467	}
1468	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1469	    msix->msix_ctrl, 2);
1470}
1471
1472/*
1473 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1474 * returned in *count.  After this function returns, each message will be
1475 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1476 */
1477int
1478pci_alloc_msix_method(device_t dev, device_t child, int *count)
1479{
1480	struct pci_devinfo *dinfo = device_get_ivars(child);
1481	pcicfgregs *cfg = &dinfo->cfg;
1482	struct resource_list_entry *rle;
1483	int actual, error, i, irq, max;
1484
1485	/* Don't let count == 0 get us into trouble. */
1486	if (*count == 0)
1487		return (EINVAL);
1488
1489	/* If rid 0 is allocated, then fail. */
1490	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1491	if (rle != NULL && rle->res != NULL)
1492		return (ENXIO);
1493
1494	/* Already have allocated messages? */
1495	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1496		return (ENXIO);
1497
1498	/* If MSI-X is blacklisted for this system, fail. */
1499	if (pci_msix_blacklisted())
1500		return (ENXIO);
1501
1502	/* MSI-X capability present? */
1503	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1504		return (ENODEV);
1505
1506	/* Make sure the appropriate BARs are mapped. */
1507	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1508	    cfg->msix.msix_table_bar);
1509	if (rle == NULL || rle->res == NULL ||
1510	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1511		return (ENXIO);
1512	cfg->msix.msix_table_res = rle->res;
1513	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1514		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1515		    cfg->msix.msix_pba_bar);
1516		if (rle == NULL || rle->res == NULL ||
1517		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1518			return (ENXIO);
1519	}
1520	cfg->msix.msix_pba_res = rle->res;
1521
1522	if (bootverbose)
1523		device_printf(child,
1524		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1525		    *count, cfg->msix.msix_msgnum);
1526	max = min(*count, cfg->msix.msix_msgnum);
1527	for (i = 0; i < max; i++) {
1528		/* Allocate a message. */
1529		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1530		if (error) {
1531			if (i == 0)
1532				return (error);
1533			break;
1534		}
1535		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1536		    irq, 1);
1537	}
1538	actual = i;
1539
1540	if (bootverbose) {
1541		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1542		if (actual == 1)
1543			device_printf(child, "using IRQ %lu for MSI-X\n",
1544			    rle->start);
1545		else {
1546			int run;
1547
1548			/*
1549			 * Be fancy and try to print contiguous runs of
1550			 * IRQ values as ranges.  'irq' is the previous IRQ.
1551			 * 'run' is true if we are in a range.
1552			 */
1553			device_printf(child, "using IRQs %lu", rle->start);
1554			irq = rle->start;
1555			run = 0;
1556			for (i = 1; i < actual; i++) {
1557				rle = resource_list_find(&dinfo->resources,
1558				    SYS_RES_IRQ, i + 1);
1559
1560				/* Still in a run? */
1561				if (rle->start == irq + 1) {
1562					run = 1;
1563					irq++;
1564					continue;
1565				}
1566
1567				/* Finish previous range. */
1568				if (run) {
1569					printf("-%d", irq);
1570					run = 0;
1571				}
1572
1573				/* Start new range. */
1574				printf(",%lu", rle->start);
1575				irq = rle->start;
1576			}
1577
1578			/* Unfinished range? */
1579			if (run)
1580				printf("-%d", irq);
1581			printf(" for MSI-X\n");
1582		}
1583	}
1584
1585	/* Mask all vectors. */
1586	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1587		pci_mask_msix(child, i);
1588
1589	/* Allocate and initialize vector data and virtual table. */
1590	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1591	    M_DEVBUF, M_WAITOK | M_ZERO);
1592	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1593	    M_DEVBUF, M_WAITOK | M_ZERO);
1594	for (i = 0; i < actual; i++) {
1595		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1597		cfg->msix.msix_table[i].mte_vector = i + 1;
1598	}
1599
1600	/* Update control register to enable MSI-X. */
1601	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1602	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1603	    cfg->msix.msix_ctrl, 2);
1604
1605	/* Update counts of alloc'd messages. */
1606	cfg->msix.msix_alloc = actual;
1607	cfg->msix.msix_table_len = actual;
1608	*count = actual;
1609	return (0);
1610}
1611
1612/*
1613 * By default, pci_alloc_msix() will assign the allocated IRQ
1614 * resources consecutively to the first N messages in the MSI-X table.
1615 * However, device drivers may want to use different layouts if they
1616 * either receive fewer messages than they asked for, or they wish to
1617 * populate the MSI-X table sparsely.  This method allows the driver
1618 * to specify what layout it wants.  It must be called after a
1619 * successful pci_alloc_msix() but before any of the associated
1620 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1621 *
1622 * The 'vectors' array contains 'count' message vectors.  The array
1623 * maps directly to the MSI-X table in that index 0 in the array
1624 * specifies the vector for the first message in the MSI-X table, etc.
1625 * The vector value in each array index can either be 0 to indicate
1626 * that no vector should be assigned to a message slot, or it can be a
1627 * number from 1 to N (where N is the count returned from a
1628 * succcessful call to pci_alloc_msix()) to indicate which message
1629 * vector (IRQ) to be used for the corresponding message.
1630 *
1631 * On successful return, each message with a non-zero vector will have
1632 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1633 * 1.  Additionally, if any of the IRQs allocated via the previous
1634 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1635 * will be freed back to the system automatically.
1636 *
1637 * For example, suppose a driver has a MSI-X table with 6 messages and
1638 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1639 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1640 * C.  After the call to pci_alloc_msix(), the device will be setup to
1641 * have an MSI-X table of ABC--- (where - means no vector assigned).
1642 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1643 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1644 * be freed back to the system.  This device will also have valid
1645 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1646 *
1647 * In any case, the SYS_RES_IRQ rid X will always map to the message
1648 * at MSI-X table index X - 1 and will only be valid if a vector is
1649 * assigned to that table entry.
1650 */
1651int
1652pci_remap_msix_method(device_t dev, device_t child, int count,
1653    const u_int *vectors)
1654{
1655	struct pci_devinfo *dinfo = device_get_ivars(child);
1656	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1657	struct resource_list_entry *rle;
1658	int i, irq, j, *used;
1659
1660	/*
1661	 * Have to have at least one message in the table but the
1662	 * table can't be bigger than the actual MSI-X table in the
1663	 * device.
1664	 */
1665	if (count == 0 || count > msix->msix_msgnum)
1666		return (EINVAL);
1667
1668	/* Sanity check the vectors. */
1669	for (i = 0; i < count; i++)
1670		if (vectors[i] > msix->msix_alloc)
1671			return (EINVAL);
1672
1673	/*
1674	 * Make sure there aren't any holes in the vectors to be used.
1675	 * It's a big pain to support it, and it doesn't really make
1676	 * sense anyway.  Also, at least one vector must be used.
1677	 */
1678	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1679	    M_ZERO);
1680	for (i = 0; i < count; i++)
1681		if (vectors[i] != 0)
1682			used[vectors[i] - 1] = 1;
1683	for (i = 0; i < msix->msix_alloc - 1; i++)
1684		if (used[i] == 0 && used[i + 1] == 1) {
1685			free(used, M_DEVBUF);
1686			return (EINVAL);
1687		}
1688	if (used[0] != 1) {
1689		free(used, M_DEVBUF);
1690		return (EINVAL);
1691	}
1692
1693	/* Make sure none of the resources are allocated. */
1694	for (i = 0; i < msix->msix_table_len; i++) {
1695		if (msix->msix_table[i].mte_vector == 0)
1696			continue;
1697		if (msix->msix_table[i].mte_handlers > 0)
1698			return (EBUSY);
1699		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1700		KASSERT(rle != NULL, ("missing resource"));
1701		if (rle->res != NULL)
1702			return (EBUSY);
1703	}
1704
1705	/* Free the existing resource list entries. */
1706	for (i = 0; i < msix->msix_table_len; i++) {
1707		if (msix->msix_table[i].mte_vector == 0)
1708			continue;
1709		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1710	}
1711
1712	/*
1713	 * Build the new virtual table keeping track of which vectors are
1714	 * used.
1715	 */
1716	free(msix->msix_table, M_DEVBUF);
1717	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1718	    M_DEVBUF, M_WAITOK | M_ZERO);
1719	for (i = 0; i < count; i++)
1720		msix->msix_table[i].mte_vector = vectors[i];
1721	msix->msix_table_len = count;
1722
1723	/* Free any unused IRQs and resize the vectors array if necessary. */
1724	j = msix->msix_alloc - 1;
1725	if (used[j] == 0) {
1726		struct msix_vector *vec;
1727
1728		while (used[j] == 0) {
1729			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1730			    msix->msix_vectors[j].mv_irq);
1731			j--;
1732		}
1733		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1734		    M_WAITOK);
1735		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1736		    (j + 1));
1737		free(msix->msix_vectors, M_DEVBUF);
1738		msix->msix_vectors = vec;
1739		msix->msix_alloc = j + 1;
1740	}
1741	free(used, M_DEVBUF);
1742
1743	/* Map the IRQs onto the rids. */
1744	for (i = 0; i < count; i++) {
1745		if (vectors[i] == 0)
1746			continue;
1747		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1748		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1749		    irq, 1);
1750	}
1751
1752	if (bootverbose) {
1753		device_printf(child, "Remapped MSI-X IRQs as: ");
1754		for (i = 0; i < count; i++) {
1755			if (i != 0)
1756				printf(", ");
1757			if (vectors[i] == 0)
1758				printf("---");
1759			else
1760				printf("%d",
1761				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1762		}
1763		printf("\n");
1764	}
1765
1766	return (0);
1767}
1768
1769static int
1770pci_release_msix(device_t dev, device_t child)
1771{
1772	struct pci_devinfo *dinfo = device_get_ivars(child);
1773	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1774	struct resource_list_entry *rle;
1775	int i;
1776
1777	/* Do we have any messages to release? */
1778	if (msix->msix_alloc == 0)
1779		return (ENODEV);
1780
1781	/* Make sure none of the resources are allocated. */
1782	for (i = 0; i < msix->msix_table_len; i++) {
1783		if (msix->msix_table[i].mte_vector == 0)
1784			continue;
1785		if (msix->msix_table[i].mte_handlers > 0)
1786			return (EBUSY);
1787		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1788		KASSERT(rle != NULL, ("missing resource"));
1789		if (rle->res != NULL)
1790			return (EBUSY);
1791	}
1792
1793	/* Update control register to disable MSI-X. */
1794	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1795	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1796	    msix->msix_ctrl, 2);
1797
1798	/* Free the resource list entries. */
1799	for (i = 0; i < msix->msix_table_len; i++) {
1800		if (msix->msix_table[i].mte_vector == 0)
1801			continue;
1802		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1803	}
1804	free(msix->msix_table, M_DEVBUF);
1805	msix->msix_table_len = 0;
1806
1807	/* Release the IRQs. */
1808	for (i = 0; i < msix->msix_alloc; i++)
1809		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1810		    msix->msix_vectors[i].mv_irq);
1811	free(msix->msix_vectors, M_DEVBUF);
1812	msix->msix_alloc = 0;
1813	return (0);
1814}
1815
1816/*
1817 * Return the max supported MSI-X messages this device supports.
1818 * Basically, assuming the MD code can alloc messages, this function
1819 * should return the maximum value that pci_alloc_msix() can return.
1820 * Thus, it is subject to the tunables, etc.
1821 */
1822int
1823pci_msix_count_method(device_t dev, device_t child)
1824{
1825	struct pci_devinfo *dinfo = device_get_ivars(child);
1826	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1827
1828	if (pci_do_msix && msix->msix_location != 0)
1829		return (msix->msix_msgnum);
1830	return (0);
1831}
1832
1833int
1834pci_msix_pba_bar_method(device_t dev, device_t child)
1835{
1836	struct pci_devinfo *dinfo = device_get_ivars(child);
1837	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1838
1839	if (pci_do_msix && msix->msix_location != 0)
1840		return (msix->msix_pba_bar);
1841	return (-1);
1842}
1843
1844int
1845pci_msix_table_bar_method(device_t dev, device_t child)
1846{
1847	struct pci_devinfo *dinfo = device_get_ivars(child);
1848	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1849
1850	if (pci_do_msix && msix->msix_location != 0)
1851		return (msix->msix_table_bar);
1852	return (-1);
1853}
1854
1855/*
1856 * HyperTransport MSI mapping control
1857 */
1858void
1859pci_ht_map_msi(device_t dev, uint64_t addr)
1860{
1861	struct pci_devinfo *dinfo = device_get_ivars(dev);
1862	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1863
1864	if (!ht->ht_msimap)
1865		return;
1866
1867	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1868	    ht->ht_msiaddr >> 20 == addr >> 20) {
1869		/* Enable MSI -> HT mapping. */
1870		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1871		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1872		    ht->ht_msictrl, 2);
1873	}
1874
1875	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1876		/* Disable MSI -> HT mapping. */
1877		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1878		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1879		    ht->ht_msictrl, 2);
1880	}
1881}
1882
1883int
1884pci_get_max_payload(device_t dev)
1885{
1886	struct pci_devinfo *dinfo = device_get_ivars(dev);
1887	int cap;
1888	uint16_t val;
1889
1890	cap = dinfo->cfg.pcie.pcie_location;
1891	if (cap == 0)
1892		return (0);
1893	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1894	val &= PCIEM_CTL_MAX_PAYLOAD;
1895	val >>= 5;
1896	return (1 << (val + 7));
1897}
1898
1899int
1900pci_get_max_read_req(device_t dev)
1901{
1902	struct pci_devinfo *dinfo = device_get_ivars(dev);
1903	int cap;
1904	uint16_t val;
1905
1906	cap = dinfo->cfg.pcie.pcie_location;
1907	if (cap == 0)
1908		return (0);
1909	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1910	val &= PCIEM_CTL_MAX_READ_REQUEST;
1911	val >>= 12;
1912	return (1 << (val + 7));
1913}
1914
1915int
1916pci_set_max_read_req(device_t dev, int size)
1917{
1918	struct pci_devinfo *dinfo = device_get_ivars(dev);
1919	int cap;
1920	uint16_t val;
1921
1922	cap = dinfo->cfg.pcie.pcie_location;
1923	if (cap == 0)
1924		return (0);
1925	if (size < 128)
1926		size = 128;
1927	if (size > 4096)
1928		size = 4096;
1929	size = (1 << (fls(size) - 1));
1930	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1931	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1932	val |= (fls(size) - 8) << 12;
1933	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1934	return (size);
1935}
1936
1937uint32_t
1938pcie_read_config(device_t dev, int reg, int width)
1939{
1940	struct pci_devinfo *dinfo = device_get_ivars(dev);
1941	int cap;
1942
1943	cap = dinfo->cfg.pcie.pcie_location;
1944	if (cap == 0) {
1945		if (width == 2)
1946			return (0xffff);
1947		return (0xffffffff);
1948	}
1949
1950	return (pci_read_config(dev, cap + reg, width));
1951}
1952
1953void
1954pcie_write_config(device_t dev, int reg, uint32_t value, int width)
1955{
1956	struct pci_devinfo *dinfo = device_get_ivars(dev);
1957	int cap;
1958
1959	cap = dinfo->cfg.pcie.pcie_location;
1960	if (cap == 0)
1961		return;
1962	pci_write_config(dev, cap + reg, value, width);
1963}
1964
1965/*
1966 * Adjusts a PCI-e capability register by clearing the bits in mask
1967 * and setting the bits in (value & mask).  Bits not set in mask are
1968 * not adjusted.
1969 *
1970 * Returns the old value on success or all ones on failure.
1971 */
1972uint32_t
1973pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
1974    int width)
1975{
1976	struct pci_devinfo *dinfo = device_get_ivars(dev);
1977	uint32_t old, new;
1978	int cap;
1979
1980	cap = dinfo->cfg.pcie.pcie_location;
1981	if (cap == 0) {
1982		if (width == 2)
1983			return (0xffff);
1984		return (0xffffffff);
1985	}
1986
1987	old = pci_read_config(dev, cap + reg, width);
1988	new = old & ~mask;
1989	new |= (value & mask);
1990	pci_write_config(dev, cap + reg, new, width);
1991	return (old);
1992}
1993
1994/*
1995 * Support for MSI message signalled interrupts.
1996 */
1997void
1998pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
1999    uint16_t data)
2000{
2001	struct pci_devinfo *dinfo = device_get_ivars(child);
2002	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2003
2004	/* Write data and address values. */
2005	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2006	    address & 0xffffffff, 4);
2007	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2008		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2009		    address >> 32, 4);
2010		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2011		    data, 2);
2012	} else
2013		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2014		    2);
2015
2016	/* Enable MSI in the control register. */
2017	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2018	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2019	    msi->msi_ctrl, 2);
2020
2021	/* Enable MSI -> HT mapping. */
2022	pci_ht_map_msi(child, address);
2023}
2024
2025void
2026pci_disable_msi_method(device_t dev, device_t child)
2027{
2028	struct pci_devinfo *dinfo = device_get_ivars(child);
2029	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2030
2031	/* Disable MSI -> HT mapping. */
2032	pci_ht_map_msi(child, 0);
2033
2034	/* Disable MSI in the control register. */
2035	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2036	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2037	    msi->msi_ctrl, 2);
2038}
2039
2040/*
2041 * Restore MSI registers during resume.  If MSI is enabled then
2042 * restore the data and address registers in addition to the control
2043 * register.
2044 */
2045static void
2046pci_resume_msi(device_t dev)
2047{
2048	struct pci_devinfo *dinfo = device_get_ivars(dev);
2049	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2050	uint64_t address;
2051	uint16_t data;
2052
2053	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2054		address = msi->msi_addr;
2055		data = msi->msi_data;
2056		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2057		    address & 0xffffffff, 4);
2058		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2059			pci_write_config(dev, msi->msi_location +
2060			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2061			pci_write_config(dev, msi->msi_location +
2062			    PCIR_MSI_DATA_64BIT, data, 2);
2063		} else
2064			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2065			    data, 2);
2066	}
2067	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2068	    2);
2069}
2070
2071static int
2072pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2073{
2074	struct pci_devinfo *dinfo = device_get_ivars(dev);
2075	pcicfgregs *cfg = &dinfo->cfg;
2076	struct resource_list_entry *rle;
2077	struct msix_table_entry *mte;
2078	struct msix_vector *mv;
2079	uint64_t addr;
2080	uint32_t data;
2081	int error, i, j;
2082
2083	/*
2084	 * Handle MSI first.  We try to find this IRQ among our list
2085	 * of MSI IRQs.  If we find it, we request updated address and
2086	 * data registers and apply the results.
2087	 */
2088	if (cfg->msi.msi_alloc > 0) {
2089
2090		/* If we don't have any active handlers, nothing to do. */
2091		if (cfg->msi.msi_handlers == 0)
2092			return (0);
2093		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2094			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2095			    i + 1);
2096			if (rle->start == irq) {
2097				error = PCIB_MAP_MSI(device_get_parent(bus),
2098				    dev, irq, &addr, &data);
2099				if (error)
2100					return (error);
2101				pci_disable_msi(dev);
2102				dinfo->cfg.msi.msi_addr = addr;
2103				dinfo->cfg.msi.msi_data = data;
2104				pci_enable_msi(dev, addr, data);
2105				return (0);
2106			}
2107		}
2108		return (ENOENT);
2109	}
2110
2111	/*
2112	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2113	 * we request the updated mapping info.  If that works, we go
2114	 * through all the slots that use this IRQ and update them.
2115	 */
2116	if (cfg->msix.msix_alloc > 0) {
2117		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2118			mv = &cfg->msix.msix_vectors[i];
2119			if (mv->mv_irq == irq) {
2120				error = PCIB_MAP_MSI(device_get_parent(bus),
2121				    dev, irq, &addr, &data);
2122				if (error)
2123					return (error);
2124				mv->mv_address = addr;
2125				mv->mv_data = data;
2126				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2127					mte = &cfg->msix.msix_table[j];
2128					if (mte->mte_vector != i + 1)
2129						continue;
2130					if (mte->mte_handlers == 0)
2131						continue;
2132					pci_mask_msix(dev, j);
2133					pci_enable_msix(dev, j, addr, data);
2134					pci_unmask_msix(dev, j);
2135				}
2136			}
2137		}
2138		return (ENOENT);
2139	}
2140
2141	return (ENOENT);
2142}
2143
2144/*
2145 * Returns true if the specified device is blacklisted because MSI
2146 * doesn't work.
2147 */
2148int
2149pci_msi_device_blacklisted(device_t dev)
2150{
2151
2152	if (!pci_honor_msi_blacklist)
2153		return (0);
2154
2155	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2156}
2157
2158/*
2159 * Determine if MSI is blacklisted globally on this system.  Currently,
2160 * we just check for blacklisted chipsets as represented by the
2161 * host-PCI bridge at device 0:0:0.  In the future, it may become
2162 * necessary to check other system attributes, such as the kenv values
2163 * that give the motherboard manufacturer and model number.
2164 */
2165static int
2166pci_msi_blacklisted(void)
2167{
2168	device_t dev;
2169
2170	if (!pci_honor_msi_blacklist)
2171		return (0);
2172
2173	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2174	if (!(pcie_chipset || pcix_chipset)) {
2175		if (vm_guest != VM_GUEST_NO) {
2176			/*
2177			 * Whitelist older chipsets in virtual
2178			 * machines known to support MSI.
2179			 */
2180			dev = pci_find_bsf(0, 0, 0);
2181			if (dev != NULL)
2182				return (!pci_has_quirk(pci_get_devid(dev),
2183					PCI_QUIRK_ENABLE_MSI_VM));
2184		}
2185		return (1);
2186	}
2187
2188	dev = pci_find_bsf(0, 0, 0);
2189	if (dev != NULL)
2190		return (pci_msi_device_blacklisted(dev));
2191	return (0);
2192}
2193
2194/*
2195 * Returns true if the specified device is blacklisted because MSI-X
2196 * doesn't work.  Note that this assumes that if MSI doesn't work,
2197 * MSI-X doesn't either.
2198 */
2199int
2200pci_msix_device_blacklisted(device_t dev)
2201{
2202
2203	if (!pci_honor_msi_blacklist)
2204		return (0);
2205
2206	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2207		return (1);
2208
2209	return (pci_msi_device_blacklisted(dev));
2210}
2211
2212/*
2213 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2214 * is blacklisted, assume that MSI-X is as well.  Check for additional
2215 * chipsets where MSI works but MSI-X does not.
2216 */
2217static int
2218pci_msix_blacklisted(void)
2219{
2220	device_t dev;
2221
2222	if (!pci_honor_msi_blacklist)
2223		return (0);
2224
2225	dev = pci_find_bsf(0, 0, 0);
2226	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2227	    PCI_QUIRK_DISABLE_MSIX))
2228		return (1);
2229
2230	return (pci_msi_blacklisted());
2231}
2232
2233/*
2234 * Attempt to allocate *count MSI messages.  The actual number allocated is
2235 * returned in *count.  After this function returns, each message will be
2236 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2237 */
2238int
2239pci_alloc_msi_method(device_t dev, device_t child, int *count)
2240{
2241	struct pci_devinfo *dinfo = device_get_ivars(child);
2242	pcicfgregs *cfg = &dinfo->cfg;
2243	struct resource_list_entry *rle;
2244	int actual, error, i, irqs[32];
2245	uint16_t ctrl;
2246
2247	/* Don't let count == 0 get us into trouble. */
2248	if (*count == 0)
2249		return (EINVAL);
2250
2251	/* If rid 0 is allocated, then fail. */
2252	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2253	if (rle != NULL && rle->res != NULL)
2254		return (ENXIO);
2255
2256	/* Already have allocated messages? */
2257	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2258		return (ENXIO);
2259
2260	/* If MSI is blacklisted for this system, fail. */
2261	if (pci_msi_blacklisted())
2262		return (ENXIO);
2263
2264	/* MSI capability present? */
2265	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2266		return (ENODEV);
2267
2268	if (bootverbose)
2269		device_printf(child,
2270		    "attempting to allocate %d MSI vectors (%d supported)\n",
2271		    *count, cfg->msi.msi_msgnum);
2272
2273	/* Don't ask for more than the device supports. */
2274	actual = min(*count, cfg->msi.msi_msgnum);
2275
2276	/* Don't ask for more than 32 messages. */
2277	actual = min(actual, 32);
2278
2279	/* MSI requires power of 2 number of messages. */
2280	if (!powerof2(actual))
2281		return (EINVAL);
2282
2283	for (;;) {
2284		/* Try to allocate N messages. */
2285		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2286		    actual, irqs);
2287		if (error == 0)
2288			break;
2289		if (actual == 1)
2290			return (error);
2291
2292		/* Try N / 2. */
2293		actual >>= 1;
2294	}
2295
2296	/*
2297	 * We now have N actual messages mapped onto SYS_RES_IRQ
2298	 * resources in the irqs[] array, so add new resources
2299	 * starting at rid 1.
2300	 */
2301	for (i = 0; i < actual; i++)
2302		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2303		    irqs[i], irqs[i], 1);
2304
2305	if (bootverbose) {
2306		if (actual == 1)
2307			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2308		else {
2309			int run;
2310
2311			/*
2312			 * Be fancy and try to print contiguous runs
2313			 * of IRQ values as ranges.  'run' is true if
2314			 * we are in a range.
2315			 */
2316			device_printf(child, "using IRQs %d", irqs[0]);
2317			run = 0;
2318			for (i = 1; i < actual; i++) {
2319
2320				/* Still in a run? */
2321				if (irqs[i] == irqs[i - 1] + 1) {
2322					run = 1;
2323					continue;
2324				}
2325
2326				/* Finish previous range. */
2327				if (run) {
2328					printf("-%d", irqs[i - 1]);
2329					run = 0;
2330				}
2331
2332				/* Start new range. */
2333				printf(",%d", irqs[i]);
2334			}
2335
2336			/* Unfinished range? */
2337			if (run)
2338				printf("-%d", irqs[actual - 1]);
2339			printf(" for MSI\n");
2340		}
2341	}
2342
2343	/* Update control register with actual count. */
2344	ctrl = cfg->msi.msi_ctrl;
2345	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2346	ctrl |= (ffs(actual) - 1) << 4;
2347	cfg->msi.msi_ctrl = ctrl;
2348	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2349
2350	/* Update counts of alloc'd messages. */
2351	cfg->msi.msi_alloc = actual;
2352	cfg->msi.msi_handlers = 0;
2353	*count = actual;
2354	return (0);
2355}
2356
2357/* Release the MSI messages associated with this device. */
2358int
2359pci_release_msi_method(device_t dev, device_t child)
2360{
2361	struct pci_devinfo *dinfo = device_get_ivars(child);
2362	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2363	struct resource_list_entry *rle;
2364	int error, i, irqs[32];
2365
2366	/* Try MSI-X first. */
2367	error = pci_release_msix(dev, child);
2368	if (error != ENODEV)
2369		return (error);
2370
2371	/* Do we have any messages to release? */
2372	if (msi->msi_alloc == 0)
2373		return (ENODEV);
2374	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2375
2376	/* Make sure none of the resources are allocated. */
2377	if (msi->msi_handlers > 0)
2378		return (EBUSY);
2379	for (i = 0; i < msi->msi_alloc; i++) {
2380		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2381		KASSERT(rle != NULL, ("missing MSI resource"));
2382		if (rle->res != NULL)
2383			return (EBUSY);
2384		irqs[i] = rle->start;
2385	}
2386
2387	/* Update control register with 0 count. */
2388	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2389	    ("%s: MSI still enabled", __func__));
2390	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2391	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2392	    msi->msi_ctrl, 2);
2393
2394	/* Release the messages. */
2395	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2396	for (i = 0; i < msi->msi_alloc; i++)
2397		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2398
2399	/* Update alloc count. */
2400	msi->msi_alloc = 0;
2401	msi->msi_addr = 0;
2402	msi->msi_data = 0;
2403	return (0);
2404}
2405
2406/*
2407 * Return the max supported MSI messages this device supports.
2408 * Basically, assuming the MD code can alloc messages, this function
2409 * should return the maximum value that pci_alloc_msi() can return.
2410 * Thus, it is subject to the tunables, etc.
2411 */
2412int
2413pci_msi_count_method(device_t dev, device_t child)
2414{
2415	struct pci_devinfo *dinfo = device_get_ivars(child);
2416	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2417
2418	if (pci_do_msi && msi->msi_location != 0)
2419		return (msi->msi_msgnum);
2420	return (0);
2421}
2422
2423/* free pcicfgregs structure and all depending data structures */
2424
2425int
2426pci_freecfg(struct pci_devinfo *dinfo)
2427{
2428	struct devlist *devlist_head;
2429	struct pci_map *pm, *next;
2430	int i;
2431
2432	devlist_head = &pci_devq;
2433
2434	if (dinfo->cfg.vpd.vpd_reg) {
2435		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2436		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2437			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2438		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2439		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2440			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2441		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2442	}
2443	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2444		free(pm, M_DEVBUF);
2445	}
2446	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2447	free(dinfo, M_DEVBUF);
2448
2449	/* increment the generation count */
2450	pci_generation++;
2451
2452	/* we're losing one device */
2453	pci_numdevs--;
2454	return (0);
2455}
2456
2457/*
2458 * PCI power manangement
2459 */
2460int
2461pci_set_powerstate_method(device_t dev, device_t child, int state)
2462{
2463	struct pci_devinfo *dinfo = device_get_ivars(child);
2464	pcicfgregs *cfg = &dinfo->cfg;
2465	uint16_t status;
2466	int oldstate, highest, delay;
2467
2468	if (cfg->pp.pp_cap == 0)
2469		return (EOPNOTSUPP);
2470
2471	/*
2472	 * Optimize a no state change request away.  While it would be OK to
2473	 * write to the hardware in theory, some devices have shown odd
2474	 * behavior when going from D3 -> D3.
2475	 */
2476	oldstate = pci_get_powerstate(child);
2477	if (oldstate == state)
2478		return (0);
2479
2480	/*
2481	 * The PCI power management specification states that after a state
2482	 * transition between PCI power states, system software must
2483	 * guarantee a minimal delay before the function accesses the device.
2484	 * Compute the worst case delay that we need to guarantee before we
2485	 * access the device.  Many devices will be responsive much more
2486	 * quickly than this delay, but there are some that don't respond
2487	 * instantly to state changes.  Transitions to/from D3 state require
2488	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2489	 * is done below with DELAY rather than a sleeper function because
2490	 * this function can be called from contexts where we cannot sleep.
2491	 */
2492	highest = (oldstate > state) ? oldstate : state;
2493	if (highest == PCI_POWERSTATE_D3)
2494	    delay = 10000;
2495	else if (highest == PCI_POWERSTATE_D2)
2496	    delay = 200;
2497	else
2498	    delay = 0;
2499	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2500	    & ~PCIM_PSTAT_DMASK;
2501	switch (state) {
2502	case PCI_POWERSTATE_D0:
2503		status |= PCIM_PSTAT_D0;
2504		break;
2505	case PCI_POWERSTATE_D1:
2506		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2507			return (EOPNOTSUPP);
2508		status |= PCIM_PSTAT_D1;
2509		break;
2510	case PCI_POWERSTATE_D2:
2511		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2512			return (EOPNOTSUPP);
2513		status |= PCIM_PSTAT_D2;
2514		break;
2515	case PCI_POWERSTATE_D3:
2516		status |= PCIM_PSTAT_D3;
2517		break;
2518	default:
2519		return (EINVAL);
2520	}
2521
2522	if (bootverbose)
2523		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2524		    state);
2525
2526	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2527	if (delay)
2528		DELAY(delay);
2529	return (0);
2530}
2531
2532int
2533pci_get_powerstate_method(device_t dev, device_t child)
2534{
2535	struct pci_devinfo *dinfo = device_get_ivars(child);
2536	pcicfgregs *cfg = &dinfo->cfg;
2537	uint16_t status;
2538	int result;
2539
2540	if (cfg->pp.pp_cap != 0) {
2541		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2542		switch (status & PCIM_PSTAT_DMASK) {
2543		case PCIM_PSTAT_D0:
2544			result = PCI_POWERSTATE_D0;
2545			break;
2546		case PCIM_PSTAT_D1:
2547			result = PCI_POWERSTATE_D1;
2548			break;
2549		case PCIM_PSTAT_D2:
2550			result = PCI_POWERSTATE_D2;
2551			break;
2552		case PCIM_PSTAT_D3:
2553			result = PCI_POWERSTATE_D3;
2554			break;
2555		default:
2556			result = PCI_POWERSTATE_UNKNOWN;
2557			break;
2558		}
2559	} else {
2560		/* No support, device is always at D0 */
2561		result = PCI_POWERSTATE_D0;
2562	}
2563	return (result);
2564}
2565
2566/*
2567 * Some convenience functions for PCI device drivers.
2568 */
2569
2570static __inline void
2571pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2572{
2573	uint16_t	command;
2574
2575	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2576	command |= bit;
2577	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2578}
2579
2580static __inline void
2581pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2582{
2583	uint16_t	command;
2584
2585	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2586	command &= ~bit;
2587	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2588}
2589
2590int
2591pci_enable_busmaster_method(device_t dev, device_t child)
2592{
2593	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2594	return (0);
2595}
2596
2597int
2598pci_disable_busmaster_method(device_t dev, device_t child)
2599{
2600	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2601	return (0);
2602}
2603
2604int
2605pci_enable_io_method(device_t dev, device_t child, int space)
2606{
2607	uint16_t bit;
2608
2609	switch(space) {
2610	case SYS_RES_IOPORT:
2611		bit = PCIM_CMD_PORTEN;
2612		break;
2613	case SYS_RES_MEMORY:
2614		bit = PCIM_CMD_MEMEN;
2615		break;
2616	default:
2617		return (EINVAL);
2618	}
2619	pci_set_command_bit(dev, child, bit);
2620	return (0);
2621}
2622
2623int
2624pci_disable_io_method(device_t dev, device_t child, int space)
2625{
2626	uint16_t bit;
2627
2628	switch(space) {
2629	case SYS_RES_IOPORT:
2630		bit = PCIM_CMD_PORTEN;
2631		break;
2632	case SYS_RES_MEMORY:
2633		bit = PCIM_CMD_MEMEN;
2634		break;
2635	default:
2636		return (EINVAL);
2637	}
2638	pci_clear_command_bit(dev, child, bit);
2639	return (0);
2640}
2641
2642/*
2643 * New style pci driver.  Parent device is either a pci-host-bridge or a
2644 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2645 */
2646
2647void
2648pci_print_verbose(struct pci_devinfo *dinfo)
2649{
2650
2651	if (bootverbose) {
2652		pcicfgregs *cfg = &dinfo->cfg;
2653
2654		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2655		    cfg->vendor, cfg->device, cfg->revid);
2656		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2657		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2658		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2659		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2660		    cfg->mfdev);
2661		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2662		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2663		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2664		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2665		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2666		if (cfg->intpin > 0)
2667			printf("\tintpin=%c, irq=%d\n",
2668			    cfg->intpin +'a' -1, cfg->intline);
2669		if (cfg->pp.pp_cap) {
2670			uint16_t status;
2671
2672			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2673			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2674			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2675			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2676			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2677			    status & PCIM_PSTAT_DMASK);
2678		}
2679		if (cfg->msi.msi_location) {
2680			int ctrl;
2681
2682			ctrl = cfg->msi.msi_ctrl;
2683			printf("\tMSI supports %d message%s%s%s\n",
2684			    cfg->msi.msi_msgnum,
2685			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2686			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2687			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2688		}
2689		if (cfg->msix.msix_location) {
2690			printf("\tMSI-X supports %d message%s ",
2691			    cfg->msix.msix_msgnum,
2692			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2693			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2694				printf("in map 0x%x\n",
2695				    cfg->msix.msix_table_bar);
2696			else
2697				printf("in maps 0x%x and 0x%x\n",
2698				    cfg->msix.msix_table_bar,
2699				    cfg->msix.msix_pba_bar);
2700		}
2701	}
2702}
2703
2704static int
2705pci_porten(device_t dev)
2706{
2707	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2708}
2709
2710static int
2711pci_memen(device_t dev)
2712{
2713	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2714}
2715
2716static void
2717pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2718{
2719	struct pci_devinfo *dinfo;
2720	pci_addr_t map, testval;
2721	int ln2range;
2722	uint16_t cmd;
2723
2724	/*
2725	 * The device ROM BAR is special.  It is always a 32-bit
2726	 * memory BAR.  Bit 0 is special and should not be set when
2727	 * sizing the BAR.
2728	 */
2729	dinfo = device_get_ivars(dev);
2730	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2731		map = pci_read_config(dev, reg, 4);
2732		pci_write_config(dev, reg, 0xfffffffe, 4);
2733		testval = pci_read_config(dev, reg, 4);
2734		pci_write_config(dev, reg, map, 4);
2735		*mapp = map;
2736		*testvalp = testval;
2737		return;
2738	}
2739
2740	map = pci_read_config(dev, reg, 4);
2741	ln2range = pci_maprange(map);
2742	if (ln2range == 64)
2743		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2744
2745	/*
2746	 * Disable decoding via the command register before
2747	 * determining the BAR's length since we will be placing it in
2748	 * a weird state.
2749	 */
2750	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2751	pci_write_config(dev, PCIR_COMMAND,
2752	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2753
2754	/*
2755	 * Determine the BAR's length by writing all 1's.  The bottom
2756	 * log_2(size) bits of the BAR will stick as 0 when we read
2757	 * the value back.
2758	 */
2759	pci_write_config(dev, reg, 0xffffffff, 4);
2760	testval = pci_read_config(dev, reg, 4);
2761	if (ln2range == 64) {
2762		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2763		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2764	}
2765
2766	/*
2767	 * Restore the original value of the BAR.  We may have reprogrammed
2768	 * the BAR of the low-level console device and when booting verbose,
2769	 * we need the console device addressable.
2770	 */
2771	pci_write_config(dev, reg, map, 4);
2772	if (ln2range == 64)
2773		pci_write_config(dev, reg + 4, map >> 32, 4);
2774	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2775
2776	*mapp = map;
2777	*testvalp = testval;
2778}
2779
2780static void
2781pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2782{
2783	struct pci_devinfo *dinfo;
2784	int ln2range;
2785
2786	/* The device ROM BAR is always a 32-bit memory BAR. */
2787	dinfo = device_get_ivars(dev);
2788	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2789		ln2range = 32;
2790	else
2791		ln2range = pci_maprange(pm->pm_value);
2792	pci_write_config(dev, pm->pm_reg, base, 4);
2793	if (ln2range == 64)
2794		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2795	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2796	if (ln2range == 64)
2797		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2798		    pm->pm_reg + 4, 4) << 32;
2799}
2800
2801struct pci_map *
2802pci_find_bar(device_t dev, int reg)
2803{
2804	struct pci_devinfo *dinfo;
2805	struct pci_map *pm;
2806
2807	dinfo = device_get_ivars(dev);
2808	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2809		if (pm->pm_reg == reg)
2810			return (pm);
2811	}
2812	return (NULL);
2813}
2814
2815int
2816pci_bar_enabled(device_t dev, struct pci_map *pm)
2817{
2818	struct pci_devinfo *dinfo;
2819	uint16_t cmd;
2820
2821	dinfo = device_get_ivars(dev);
2822	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2823	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2824		return (0);
2825	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2826	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2827		return ((cmd & PCIM_CMD_MEMEN) != 0);
2828	else
2829		return ((cmd & PCIM_CMD_PORTEN) != 0);
2830}
2831
2832static struct pci_map *
2833pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2834{
2835	struct pci_devinfo *dinfo;
2836	struct pci_map *pm, *prev;
2837
2838	dinfo = device_get_ivars(dev);
2839	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2840	pm->pm_reg = reg;
2841	pm->pm_value = value;
2842	pm->pm_size = size;
2843	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2844		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2845		    reg));
2846		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2847		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2848			break;
2849	}
2850	if (prev != NULL)
2851		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2852	else
2853		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2854	return (pm);
2855}
2856
2857static void
2858pci_restore_bars(device_t dev)
2859{
2860	struct pci_devinfo *dinfo;
2861	struct pci_map *pm;
2862	int ln2range;
2863
2864	dinfo = device_get_ivars(dev);
2865	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2866		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2867			ln2range = 32;
2868		else
2869			ln2range = pci_maprange(pm->pm_value);
2870		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2871		if (ln2range == 64)
2872			pci_write_config(dev, pm->pm_reg + 4,
2873			    pm->pm_value >> 32, 4);
2874	}
2875}
2876
2877/*
2878 * Add a resource based on a pci map register. Return 1 if the map
2879 * register is a 32bit map register or 2 if it is a 64bit register.
2880 */
2881static int
2882pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2883    int force, int prefetch)
2884{
2885	struct pci_map *pm;
2886	pci_addr_t base, map, testval;
2887	pci_addr_t start, end, count;
2888	int barlen, basezero, flags, maprange, mapsize, type;
2889	uint16_t cmd;
2890	struct resource *res;
2891
2892	/*
2893	 * The BAR may already exist if the device is a CardBus card
2894	 * whose CIS is stored in this BAR.
2895	 */
2896	pm = pci_find_bar(dev, reg);
2897	if (pm != NULL) {
2898		maprange = pci_maprange(pm->pm_value);
2899		barlen = maprange == 64 ? 2 : 1;
2900		return (barlen);
2901	}
2902
2903	pci_read_bar(dev, reg, &map, &testval);
2904	if (PCI_BAR_MEM(map)) {
2905		type = SYS_RES_MEMORY;
2906		if (map & PCIM_BAR_MEM_PREFETCH)
2907			prefetch = 1;
2908	} else
2909		type = SYS_RES_IOPORT;
2910	mapsize = pci_mapsize(testval);
2911	base = pci_mapbase(map);
2912#ifdef __PCI_BAR_ZERO_VALID
2913	basezero = 0;
2914#else
2915	basezero = base == 0;
2916#endif
2917	maprange = pci_maprange(map);
2918	barlen = maprange == 64 ? 2 : 1;
2919
2920	/*
2921	 * For I/O registers, if bottom bit is set, and the next bit up
2922	 * isn't clear, we know we have a BAR that doesn't conform to the
2923	 * spec, so ignore it.  Also, sanity check the size of the data
2924	 * areas to the type of memory involved.  Memory must be at least
2925	 * 16 bytes in size, while I/O ranges must be at least 4.
2926	 */
2927	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2928		return (barlen);
2929	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2930	    (type == SYS_RES_IOPORT && mapsize < 2))
2931		return (barlen);
2932
2933	/* Save a record of this BAR. */
2934	pm = pci_add_bar(dev, reg, map, mapsize);
2935	if (bootverbose) {
2936		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2937		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2938		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2939			printf(", port disabled\n");
2940		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2941			printf(", memory disabled\n");
2942		else
2943			printf(", enabled\n");
2944	}
2945
2946	/*
2947	 * If base is 0, then we have problems if this architecture does
2948	 * not allow that.  It is best to ignore such entries for the
2949	 * moment.  These will be allocated later if the driver specifically
2950	 * requests them.  However, some removable busses look better when
2951	 * all resources are allocated, so allow '0' to be overriden.
2952	 *
2953	 * Similarly treat maps whose values is the same as the test value
2954	 * read back.  These maps have had all f's written to them by the
2955	 * BIOS in an attempt to disable the resources.
2956	 */
2957	if (!force && (basezero || map == testval))
2958		return (barlen);
2959	if ((u_long)base != base) {
2960		device_printf(bus,
2961		    "pci%d:%d:%d:%d bar %#x too many address bits",
2962		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2963		    pci_get_function(dev), reg);
2964		return (barlen);
2965	}
2966
2967	/*
2968	 * This code theoretically does the right thing, but has
2969	 * undesirable side effects in some cases where peripherals
2970	 * respond oddly to having these bits enabled.  Let the user
2971	 * be able to turn them off (since pci_enable_io_modes is 1 by
2972	 * default).
2973	 */
2974	if (pci_enable_io_modes) {
2975		/* Turn on resources that have been left off by a lazy BIOS */
2976		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2977			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2978			cmd |= PCIM_CMD_PORTEN;
2979			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2980		}
2981		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2982			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2983			cmd |= PCIM_CMD_MEMEN;
2984			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2985		}
2986	} else {
2987		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2988			return (barlen);
2989		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2990			return (barlen);
2991	}
2992
2993	count = (pci_addr_t)1 << mapsize;
2994	flags = RF_ALIGNMENT_LOG2(mapsize);
2995	if (prefetch)
2996		flags |= RF_PREFETCHABLE;
2997	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2998		start = 0;	/* Let the parent decide. */
2999		end = ~0ul;
3000	} else {
3001		start = base;
3002		end = base + count - 1;
3003	}
3004	resource_list_add(rl, type, reg, start, end, count);
3005
3006	/*
3007	 * Try to allocate the resource for this BAR from our parent
3008	 * so that this resource range is already reserved.  The
3009	 * driver for this device will later inherit this resource in
3010	 * pci_alloc_resource().
3011	 */
3012	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3013	    flags);
3014	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
3015		/*
3016		 * If the allocation fails, try to allocate a resource for
3017		 * this BAR using any available range.  The firmware felt
3018		 * it was important enough to assign a resource, so don't
3019		 * disable decoding if we can help it.
3020		 */
3021		resource_list_delete(rl, type, reg);
3022		resource_list_add(rl, type, reg, 0, ~0ul, count);
3023		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
3024		    count, flags);
3025	}
3026	if (res == NULL) {
3027		/*
3028		 * If the allocation fails, delete the resource list entry
3029		 * and disable decoding for this device.
3030		 *
3031		 * If the driver requests this resource in the future,
3032		 * pci_reserve_map() will try to allocate a fresh
3033		 * resource range.
3034		 */
3035		resource_list_delete(rl, type, reg);
3036		pci_disable_io(dev, type);
3037		if (bootverbose)
3038			device_printf(bus,
3039			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3040			    pci_get_domain(dev), pci_get_bus(dev),
3041			    pci_get_slot(dev), pci_get_function(dev), reg);
3042	} else {
3043		start = rman_get_start(res);
3044		pci_write_bar(dev, pm, start);
3045	}
3046	return (barlen);
3047}
3048
3049/*
3050 * For ATA devices we need to decide early what addressing mode to use.
3051 * Legacy demands that the primary and secondary ATA ports sits on the
3052 * same addresses that old ISA hardware did. This dictates that we use
3053 * those addresses and ignore the BAR's if we cannot set PCI native
3054 * addressing mode.
3055 */
3056static void
3057pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3058    uint32_t prefetchmask)
3059{
3060	int rid, type, progif;
3061#if 0
3062	/* if this device supports PCI native addressing use it */
3063	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3064	if ((progif & 0x8a) == 0x8a) {
3065		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3066		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3067			printf("Trying ATA native PCI addressing mode\n");
3068			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3069		}
3070	}
3071#endif
3072	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3073	type = SYS_RES_IOPORT;
3074	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3075		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3076		    prefetchmask & (1 << 0));
3077		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3078		    prefetchmask & (1 << 1));
3079	} else {
3080		rid = PCIR_BAR(0);
3081		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3082		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3083		    0x1f7, 8, 0);
3084		rid = PCIR_BAR(1);
3085		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3086		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3087		    0x3f6, 1, 0);
3088	}
3089	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3090		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3091		    prefetchmask & (1 << 2));
3092		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3093		    prefetchmask & (1 << 3));
3094	} else {
3095		rid = PCIR_BAR(2);
3096		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3097		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3098		    0x177, 8, 0);
3099		rid = PCIR_BAR(3);
3100		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3101		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3102		    0x376, 1, 0);
3103	}
3104	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3105	    prefetchmask & (1 << 4));
3106	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3107	    prefetchmask & (1 << 5));
3108}
3109
3110static void
3111pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3112{
3113	struct pci_devinfo *dinfo = device_get_ivars(dev);
3114	pcicfgregs *cfg = &dinfo->cfg;
3115	char tunable_name[64];
3116	int irq;
3117
3118	/* Has to have an intpin to have an interrupt. */
3119	if (cfg->intpin == 0)
3120		return;
3121
3122	/* Let the user override the IRQ with a tunable. */
3123	irq = PCI_INVALID_IRQ;
3124	snprintf(tunable_name, sizeof(tunable_name),
3125	    "hw.pci%d.%d.%d.INT%c.irq",
3126	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3127	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3128		irq = PCI_INVALID_IRQ;
3129
3130	/*
3131	 * If we didn't get an IRQ via the tunable, then we either use the
3132	 * IRQ value in the intline register or we ask the bus to route an
3133	 * interrupt for us.  If force_route is true, then we only use the
3134	 * value in the intline register if the bus was unable to assign an
3135	 * IRQ.
3136	 */
3137	if (!PCI_INTERRUPT_VALID(irq)) {
3138		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3139			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3140		if (!PCI_INTERRUPT_VALID(irq))
3141			irq = cfg->intline;
3142	}
3143
3144	/* If after all that we don't have an IRQ, just bail. */
3145	if (!PCI_INTERRUPT_VALID(irq))
3146		return;
3147
3148	/* Update the config register if it changed. */
3149	if (irq != cfg->intline) {
3150		cfg->intline = irq;
3151		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3152	}
3153
3154	/* Add this IRQ as rid 0 interrupt resource. */
3155	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3156}
3157
3158/* Perform early OHCI takeover from SMM. */
3159static void
3160ohci_early_takeover(device_t self)
3161{
3162	struct resource *res;
3163	uint32_t ctl;
3164	int rid;
3165	int i;
3166
3167	rid = PCIR_BAR(0);
3168	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3169	if (res == NULL)
3170		return;
3171
3172	ctl = bus_read_4(res, OHCI_CONTROL);
3173	if (ctl & OHCI_IR) {
3174		if (bootverbose)
3175			printf("ohci early: "
3176			    "SMM active, request owner change\n");
3177		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3178		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3179			DELAY(1000);
3180			ctl = bus_read_4(res, OHCI_CONTROL);
3181		}
3182		if (ctl & OHCI_IR) {
3183			if (bootverbose)
3184				printf("ohci early: "
3185				    "SMM does not respond, resetting\n");
3186			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3187		}
3188		/* Disable interrupts */
3189		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3190	}
3191
3192	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3193}
3194
3195/* Perform early UHCI takeover from SMM. */
3196static void
3197uhci_early_takeover(device_t self)
3198{
3199	struct resource *res;
3200	int rid;
3201
3202	/*
3203	 * Set the PIRQD enable bit and switch off all the others. We don't
3204	 * want legacy support to interfere with us XXX Does this also mean
3205	 * that the BIOS won't touch the keyboard anymore if it is connected
3206	 * to the ports of the root hub?
3207	 */
3208	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3209
3210	/* Disable interrupts */
3211	rid = PCI_UHCI_BASE_REG;
3212	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3213	if (res != NULL) {
3214		bus_write_2(res, UHCI_INTR, 0);
3215		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3216	}
3217}
3218
3219/* Perform early EHCI takeover from SMM. */
3220static void
3221ehci_early_takeover(device_t self)
3222{
3223	struct resource *res;
3224	uint32_t cparams;
3225	uint32_t eec;
3226	uint8_t eecp;
3227	uint8_t bios_sem;
3228	uint8_t offs;
3229	int rid;
3230	int i;
3231
3232	rid = PCIR_BAR(0);
3233	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3234	if (res == NULL)
3235		return;
3236
3237	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3238
3239	/* Synchronise with the BIOS if it owns the controller. */
3240	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3241	    eecp = EHCI_EECP_NEXT(eec)) {
3242		eec = pci_read_config(self, eecp, 4);
3243		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3244			continue;
3245		}
3246		bios_sem = pci_read_config(self, eecp +
3247		    EHCI_LEGSUP_BIOS_SEM, 1);
3248		if (bios_sem == 0) {
3249			continue;
3250		}
3251		if (bootverbose)
3252			printf("ehci early: "
3253			    "SMM active, request owner change\n");
3254
3255		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3256
3257		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3258			DELAY(1000);
3259			bios_sem = pci_read_config(self, eecp +
3260			    EHCI_LEGSUP_BIOS_SEM, 1);
3261		}
3262
3263		if (bios_sem != 0) {
3264			if (bootverbose)
3265				printf("ehci early: "
3266				    "SMM does not respond\n");
3267		}
3268		/* Disable interrupts */
3269		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3270		bus_write_4(res, offs + EHCI_USBINTR, 0);
3271	}
3272	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3273}
3274
3275/* Perform early XHCI takeover from SMM. */
3276static void
3277xhci_early_takeover(device_t self)
3278{
3279	struct resource *res;
3280	uint32_t cparams;
3281	uint32_t eec;
3282	uint8_t eecp;
3283	uint8_t bios_sem;
3284	uint8_t offs;
3285	int rid;
3286	int i;
3287
3288	rid = PCIR_BAR(0);
3289	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3290	if (res == NULL)
3291		return;
3292
3293	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3294
3295	eec = -1;
3296
3297	/* Synchronise with the BIOS if it owns the controller. */
3298	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3299	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3300		eec = bus_read_4(res, eecp);
3301
3302		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3303			continue;
3304
3305		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3306		if (bios_sem == 0)
3307			continue;
3308
3309		if (bootverbose)
3310			printf("xhci early: "
3311			    "SMM active, request owner change\n");
3312
3313		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3314
3315		/* wait a maximum of 5 second */
3316
3317		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3318			DELAY(1000);
3319			bios_sem = bus_read_1(res, eecp +
3320			    XHCI_XECP_BIOS_SEM);
3321		}
3322
3323		if (bios_sem != 0) {
3324			if (bootverbose)
3325				printf("xhci early: "
3326				    "SMM does not respond\n");
3327		}
3328
3329		/* Disable interrupts */
3330		offs = bus_read_1(res, XHCI_CAPLENGTH);
3331		bus_write_4(res, offs + XHCI_USBCMD, 0);
3332		bus_read_4(res, offs + XHCI_USBSTS);
3333	}
3334	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3335}
3336
3337#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3338static void
3339pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3340    struct resource_list *rl)
3341{
3342	struct resource *res;
3343	char *cp;
3344	u_long start, end, count;
3345	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3346
3347	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3348	case PCIM_HDRTYPE_BRIDGE:
3349		sec_reg = PCIR_SECBUS_1;
3350		sub_reg = PCIR_SUBBUS_1;
3351		break;
3352	case PCIM_HDRTYPE_CARDBUS:
3353		sec_reg = PCIR_SECBUS_2;
3354		sub_reg = PCIR_SUBBUS_2;
3355		break;
3356	default:
3357		return;
3358	}
3359
3360	/*
3361	 * If the existing bus range is valid, attempt to reserve it
3362	 * from our parent.  If this fails for any reason, clear the
3363	 * secbus and subbus registers.
3364	 *
3365	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3366	 * This would at least preserve the existing sec_bus if it is
3367	 * valid.
3368	 */
3369	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3370	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3371
3372	/* Quirk handling. */
3373	switch (pci_get_devid(dev)) {
3374	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3375		sup_bus = pci_read_config(dev, 0x41, 1);
3376		if (sup_bus != 0xff) {
3377			sec_bus = sup_bus + 1;
3378			sub_bus = sup_bus + 1;
3379			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3380			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3381		}
3382		break;
3383
3384	case 0x00dd10de:
3385		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3386		if ((cp = getenv("smbios.planar.maker")) == NULL)
3387			break;
3388		if (strncmp(cp, "Compal", 6) != 0) {
3389			freeenv(cp);
3390			break;
3391		}
3392		freeenv(cp);
3393		if ((cp = getenv("smbios.planar.product")) == NULL)
3394			break;
3395		if (strncmp(cp, "08A0", 4) != 0) {
3396			freeenv(cp);
3397			break;
3398		}
3399		freeenv(cp);
3400		if (sub_bus < 0xa) {
3401			sub_bus = 0xa;
3402			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3403		}
3404		break;
3405	}
3406
3407	if (bootverbose)
3408		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3409	if (sec_bus > 0 && sub_bus >= sec_bus) {
3410		start = sec_bus;
3411		end = sub_bus;
3412		count = end - start + 1;
3413
3414		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3415
3416		/*
3417		 * If requested, clear secondary bus registers in
3418		 * bridge devices to force a complete renumbering
3419		 * rather than reserving the existing range.  However,
3420		 * preserve the existing size.
3421		 */
3422		if (pci_clear_buses)
3423			goto clear;
3424
3425		rid = 0;
3426		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3427		    start, end, count, 0);
3428		if (res != NULL)
3429			return;
3430
3431		if (bootverbose)
3432			device_printf(bus,
3433			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3434			    pci_get_domain(dev), pci_get_bus(dev),
3435			    pci_get_slot(dev), pci_get_function(dev));
3436	}
3437
3438clear:
3439	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3440	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3441}
3442
3443static struct resource *
3444pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3445    u_long end, u_long count, u_int flags)
3446{
3447	struct pci_devinfo *dinfo;
3448	pcicfgregs *cfg;
3449	struct resource_list *rl;
3450	struct resource *res;
3451	int sec_reg, sub_reg;
3452
3453	dinfo = device_get_ivars(child);
3454	cfg = &dinfo->cfg;
3455	rl = &dinfo->resources;
3456	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3457	case PCIM_HDRTYPE_BRIDGE:
3458		sec_reg = PCIR_SECBUS_1;
3459		sub_reg = PCIR_SUBBUS_1;
3460		break;
3461	case PCIM_HDRTYPE_CARDBUS:
3462		sec_reg = PCIR_SECBUS_2;
3463		sub_reg = PCIR_SUBBUS_2;
3464		break;
3465	default:
3466		return (NULL);
3467	}
3468
3469	if (*rid != 0)
3470		return (NULL);
3471
3472	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3473		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3474	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3475		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3476		    start, end, count, flags & ~RF_ACTIVE);
3477		if (res == NULL) {
3478			resource_list_delete(rl, PCI_RES_BUS, *rid);
3479			device_printf(child, "allocating %lu bus%s failed\n",
3480			    count, count == 1 ? "" : "es");
3481			return (NULL);
3482		}
3483		if (bootverbose)
3484			device_printf(child,
3485			    "Lazy allocation of %lu bus%s at %lu\n", count,
3486			    count == 1 ? "" : "es", rman_get_start(res));
3487		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3488		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3489	}
3490	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3491	    end, count, flags));
3492}
3493#endif
3494
3495void
3496pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3497{
3498	struct pci_devinfo *dinfo;
3499	pcicfgregs *cfg;
3500	struct resource_list *rl;
3501	const struct pci_quirk *q;
3502	uint32_t devid;
3503	int i;
3504
3505	dinfo = device_get_ivars(dev);
3506	cfg = &dinfo->cfg;
3507	rl = &dinfo->resources;
3508	devid = (cfg->device << 16) | cfg->vendor;
3509
3510	/* ATA devices needs special map treatment */
3511	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3512	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3513	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3514	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3515	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3516		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3517	else
3518		for (i = 0; i < cfg->nummaps;) {
3519			/*
3520			 * Skip quirked resources.
3521			 */
3522			for (q = &pci_quirks[0]; q->devid != 0; q++)
3523				if (q->devid == devid &&
3524				    q->type == PCI_QUIRK_UNMAP_REG &&
3525				    q->arg1 == PCIR_BAR(i))
3526					break;
3527			if (q->devid != 0) {
3528				i++;
3529				continue;
3530			}
3531			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3532			    prefetchmask & (1 << i));
3533		}
3534
3535	/*
3536	 * Add additional, quirked resources.
3537	 */
3538	for (q = &pci_quirks[0]; q->devid != 0; q++)
3539		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3540			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3541
3542	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3543#ifdef __PCI_REROUTE_INTERRUPT
3544		/*
3545		 * Try to re-route interrupts. Sometimes the BIOS or
3546		 * firmware may leave bogus values in these registers.
3547		 * If the re-route fails, then just stick with what we
3548		 * have.
3549		 */
3550		pci_assign_interrupt(bus, dev, 1);
3551#else
3552		pci_assign_interrupt(bus, dev, 0);
3553#endif
3554	}
3555
3556	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3557	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3558		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3559			xhci_early_takeover(dev);
3560		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3561			ehci_early_takeover(dev);
3562		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3563			ohci_early_takeover(dev);
3564		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3565			uhci_early_takeover(dev);
3566	}
3567
3568#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3569	/*
3570	 * Reserve resources for secondary bus ranges behind bridge
3571	 * devices.
3572	 */
3573	pci_reserve_secbus(bus, dev, cfg, rl);
3574#endif
3575}
3576
3577static struct pci_devinfo *
3578pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3579    int slot, int func, size_t dinfo_size)
3580{
3581	struct pci_devinfo *dinfo;
3582
3583	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3584	if (dinfo != NULL)
3585		pci_add_child(dev, dinfo);
3586
3587	return (dinfo);
3588}
3589
3590void
3591pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3592{
3593#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3594	device_t pcib = device_get_parent(dev);
3595	struct pci_devinfo *dinfo;
3596	int maxslots;
3597	int s, f, pcifunchigh;
3598	uint8_t hdrtype;
3599	int first_func;
3600
3601	/*
3602	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3603	 * enable ARI.  We must enable ARI before detecting the rest of the
3604	 * functions on this bus as ARI changes the set of slots and functions
3605	 * that are legal on this bus.
3606	 */
3607	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3608	    dinfo_size);
3609	if (dinfo != NULL && pci_enable_ari)
3610		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3611
3612	/*
3613	 * Start looking for new devices on slot 0 at function 1 because we
3614	 * just identified the device at slot 0, function 0.
3615	 */
3616	first_func = 1;
3617
3618	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3619	    ("dinfo_size too small"));
3620	maxslots = PCIB_MAXSLOTS(pcib);
3621	for (s = 0; s <= maxslots; s++, first_func = 0) {
3622		pcifunchigh = 0;
3623		f = 0;
3624		DELAY(1);
3625		hdrtype = REG(PCIR_HDRTYPE, 1);
3626		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3627			continue;
3628		if (hdrtype & PCIM_MFDEV)
3629			pcifunchigh = PCIB_MAXFUNCS(pcib);
3630		for (f = first_func; f <= pcifunchigh; f++)
3631			pci_identify_function(pcib, dev, domain, busno, s, f,
3632			    dinfo_size);
3633	}
3634#undef REG
3635}
3636
3637void
3638pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3639{
3640	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3641	device_set_ivars(dinfo->cfg.dev, dinfo);
3642	resource_list_init(&dinfo->resources);
3643	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3644	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3645	pci_print_verbose(dinfo);
3646	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3647	pci_child_added(dinfo->cfg.dev);
3648}
3649
3650void
3651pci_child_added_method(device_t dev, device_t child)
3652{
3653
3654}
3655
3656static int
3657pci_probe(device_t dev)
3658{
3659
3660	device_set_desc(dev, "PCI bus");
3661
3662	/* Allow other subclasses to override this driver. */
3663	return (BUS_PROBE_GENERIC);
3664}
3665
3666int
3667pci_attach_common(device_t dev)
3668{
3669	struct pci_softc *sc;
3670	int busno, domain;
3671#ifdef PCI_DMA_BOUNDARY
3672	int error, tag_valid;
3673#endif
3674#ifdef PCI_RES_BUS
3675	int rid;
3676#endif
3677
3678	sc = device_get_softc(dev);
3679	domain = pcib_get_domain(dev);
3680	busno = pcib_get_bus(dev);
3681#ifdef PCI_RES_BUS
3682	rid = 0;
3683	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3684	    1, 0);
3685	if (sc->sc_bus == NULL) {
3686		device_printf(dev, "failed to allocate bus number\n");
3687		return (ENXIO);
3688	}
3689#endif
3690	if (bootverbose)
3691		device_printf(dev, "domain=%d, physical bus=%d\n",
3692		    domain, busno);
3693#ifdef PCI_DMA_BOUNDARY
3694	tag_valid = 0;
3695	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3696	    devclass_find("pci")) {
3697		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3698		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3699		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3700		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3701		if (error)
3702			device_printf(dev, "Failed to create DMA tag: %d\n",
3703			    error);
3704		else
3705			tag_valid = 1;
3706	}
3707	if (!tag_valid)
3708#endif
3709		sc->sc_dma_tag = bus_get_dma_tag(dev);
3710	return (0);
3711}
3712
3713static int
3714pci_attach(device_t dev)
3715{
3716	int busno, domain, error;
3717
3718	error = pci_attach_common(dev);
3719	if (error)
3720		return (error);
3721
3722	/*
3723	 * Since there can be multiple independantly numbered PCI
3724	 * busses on systems with multiple PCI domains, we can't use
3725	 * the unit number to decide which bus we are probing. We ask
3726	 * the parent pcib what our domain and bus numbers are.
3727	 */
3728	domain = pcib_get_domain(dev);
3729	busno = pcib_get_bus(dev);
3730	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3731	return (bus_generic_attach(dev));
3732}
3733
3734#ifdef PCI_RES_BUS
3735static int
3736pci_detach(device_t dev)
3737{
3738	struct pci_softc *sc;
3739	int error;
3740
3741	error = bus_generic_detach(dev);
3742	if (error)
3743		return (error);
3744	sc = device_get_softc(dev);
3745	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3746}
3747#endif
3748
3749static void
3750pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3751    int state)
3752{
3753	device_t child, pcib;
3754	int dstate, i;
3755
3756	/*
3757	 * Set the device to the given state.  If the firmware suggests
3758	 * a different power state, use it instead.  If power management
3759	 * is not present, the firmware is responsible for managing
3760	 * device power.  Skip children who aren't attached since they
3761	 * are handled separately.
3762	 */
3763	pcib = device_get_parent(dev);
3764	for (i = 0; i < numdevs; i++) {
3765		child = devlist[i];
3766		dstate = state;
3767		if (device_is_attached(child) &&
3768		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3769			pci_set_powerstate(child, dstate);
3770	}
3771}
3772
3773int
3774pci_suspend(device_t dev)
3775{
3776	device_t child, *devlist;
3777	struct pci_devinfo *dinfo;
3778	int error, i, numdevs;
3779
3780	/*
3781	 * Save the PCI configuration space for each child and set the
3782	 * device in the appropriate power state for this sleep state.
3783	 */
3784	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3785		return (error);
3786	for (i = 0; i < numdevs; i++) {
3787		child = devlist[i];
3788		dinfo = device_get_ivars(child);
3789		pci_cfg_save(child, dinfo, 0);
3790	}
3791
3792	/* Suspend devices before potentially powering them down. */
3793	error = bus_generic_suspend(dev);
3794	if (error) {
3795		free(devlist, M_TEMP);
3796		return (error);
3797	}
3798	if (pci_do_power_suspend)
3799		pci_set_power_children(dev, devlist, numdevs,
3800		    PCI_POWERSTATE_D3);
3801	free(devlist, M_TEMP);
3802	return (0);
3803}
3804
3805int
3806pci_resume(device_t dev)
3807{
3808	device_t child, *devlist;
3809	struct pci_devinfo *dinfo;
3810	int error, i, numdevs;
3811
3812	/*
3813	 * Set each child to D0 and restore its PCI configuration space.
3814	 */
3815	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3816		return (error);
3817	if (pci_do_power_resume)
3818		pci_set_power_children(dev, devlist, numdevs,
3819		    PCI_POWERSTATE_D0);
3820
3821	/* Now the device is powered up, restore its config space. */
3822	for (i = 0; i < numdevs; i++) {
3823		child = devlist[i];
3824		dinfo = device_get_ivars(child);
3825
3826		pci_cfg_restore(child, dinfo);
3827		if (!device_is_attached(child))
3828			pci_cfg_save(child, dinfo, 1);
3829	}
3830
3831	/*
3832	 * Resume critical devices first, then everything else later.
3833	 */
3834	for (i = 0; i < numdevs; i++) {
3835		child = devlist[i];
3836		switch (pci_get_class(child)) {
3837		case PCIC_DISPLAY:
3838		case PCIC_MEMORY:
3839		case PCIC_BRIDGE:
3840		case PCIC_BASEPERIPH:
3841			DEVICE_RESUME(child);
3842			break;
3843		}
3844	}
3845	for (i = 0; i < numdevs; i++) {
3846		child = devlist[i];
3847		switch (pci_get_class(child)) {
3848		case PCIC_DISPLAY:
3849		case PCIC_MEMORY:
3850		case PCIC_BRIDGE:
3851		case PCIC_BASEPERIPH:
3852			break;
3853		default:
3854			DEVICE_RESUME(child);
3855		}
3856	}
3857	free(devlist, M_TEMP);
3858	return (0);
3859}
3860
3861static void
3862pci_load_vendor_data(void)
3863{
3864	caddr_t data;
3865	void *ptr;
3866	size_t sz;
3867
3868	data = preload_search_by_type("pci_vendor_data");
3869	if (data != NULL) {
3870		ptr = preload_fetch_addr(data);
3871		sz = preload_fetch_size(data);
3872		if (ptr != NULL && sz != 0) {
3873			pci_vendordata = ptr;
3874			pci_vendordata_size = sz;
3875			/* terminate the database */
3876			pci_vendordata[pci_vendordata_size] = '\n';
3877		}
3878	}
3879}
3880
3881void
3882pci_driver_added(device_t dev, driver_t *driver)
3883{
3884	int numdevs;
3885	device_t *devlist;
3886	device_t child;
3887	struct pci_devinfo *dinfo;
3888	int i;
3889
3890	if (bootverbose)
3891		device_printf(dev, "driver added\n");
3892	DEVICE_IDENTIFY(driver, dev);
3893	if (device_get_children(dev, &devlist, &numdevs) != 0)
3894		return;
3895	for (i = 0; i < numdevs; i++) {
3896		child = devlist[i];
3897		if (device_get_state(child) != DS_NOTPRESENT)
3898			continue;
3899		dinfo = device_get_ivars(child);
3900		pci_print_verbose(dinfo);
3901		if (bootverbose)
3902			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3903		pci_cfg_restore(child, dinfo);
3904		if (device_probe_and_attach(child) != 0)
3905			pci_child_detached(dev, child);
3906	}
3907	free(devlist, M_TEMP);
3908}
3909
3910int
3911pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3912    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3913{
3914	struct pci_devinfo *dinfo;
3915	struct msix_table_entry *mte;
3916	struct msix_vector *mv;
3917	uint64_t addr;
3918	uint32_t data;
3919	void *cookie;
3920	int error, rid;
3921
3922	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3923	    arg, &cookie);
3924	if (error)
3925		return (error);
3926
3927	/* If this is not a direct child, just bail out. */
3928	if (device_get_parent(child) != dev) {
3929		*cookiep = cookie;
3930		return(0);
3931	}
3932
3933	rid = rman_get_rid(irq);
3934	if (rid == 0) {
3935		/* Make sure that INTx is enabled */
3936		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3937	} else {
3938		/*
3939		 * Check to see if the interrupt is MSI or MSI-X.
3940		 * Ask our parent to map the MSI and give
3941		 * us the address and data register values.
3942		 * If we fail for some reason, teardown the
3943		 * interrupt handler.
3944		 */
3945		dinfo = device_get_ivars(child);
3946		if (dinfo->cfg.msi.msi_alloc > 0) {
3947			if (dinfo->cfg.msi.msi_addr == 0) {
3948				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3949			    ("MSI has handlers, but vectors not mapped"));
3950				error = PCIB_MAP_MSI(device_get_parent(dev),
3951				    child, rman_get_start(irq), &addr, &data);
3952				if (error)
3953					goto bad;
3954				dinfo->cfg.msi.msi_addr = addr;
3955				dinfo->cfg.msi.msi_data = data;
3956			}
3957			if (dinfo->cfg.msi.msi_handlers == 0)
3958				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3959				    dinfo->cfg.msi.msi_data);
3960			dinfo->cfg.msi.msi_handlers++;
3961		} else {
3962			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3963			    ("No MSI or MSI-X interrupts allocated"));
3964			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3965			    ("MSI-X index too high"));
3966			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3967			KASSERT(mte->mte_vector != 0, ("no message vector"));
3968			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3969			KASSERT(mv->mv_irq == rman_get_start(irq),
3970			    ("IRQ mismatch"));
3971			if (mv->mv_address == 0) {
3972				KASSERT(mte->mte_handlers == 0,
3973		    ("MSI-X table entry has handlers, but vector not mapped"));
3974				error = PCIB_MAP_MSI(device_get_parent(dev),
3975				    child, rman_get_start(irq), &addr, &data);
3976				if (error)
3977					goto bad;
3978				mv->mv_address = addr;
3979				mv->mv_data = data;
3980			}
3981			if (mte->mte_handlers == 0) {
3982				pci_enable_msix(child, rid - 1, mv->mv_address,
3983				    mv->mv_data);
3984				pci_unmask_msix(child, rid - 1);
3985			}
3986			mte->mte_handlers++;
3987		}
3988
3989		/*
3990		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3991		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3992		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3993		 */
3994		if (!pci_has_quirk(pci_get_devid(child),
3995		    PCI_QUIRK_MSI_INTX_BUG))
3996			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3997		else
3998			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3999	bad:
4000		if (error) {
4001			(void)bus_generic_teardown_intr(dev, child, irq,
4002			    cookie);
4003			return (error);
4004		}
4005	}
4006	*cookiep = cookie;
4007	return (0);
4008}
4009
4010int
4011pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4012    void *cookie)
4013{
4014	struct msix_table_entry *mte;
4015	struct resource_list_entry *rle;
4016	struct pci_devinfo *dinfo;
4017	int error, rid;
4018
4019	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4020		return (EINVAL);
4021
4022	/* If this isn't a direct child, just bail out */
4023	if (device_get_parent(child) != dev)
4024		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4025
4026	rid = rman_get_rid(irq);
4027	if (rid == 0) {
4028		/* Mask INTx */
4029		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4030	} else {
4031		/*
4032		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4033		 * decrement the appropriate handlers count and mask the
4034		 * MSI-X message, or disable MSI messages if the count
4035		 * drops to 0.
4036		 */
4037		dinfo = device_get_ivars(child);
4038		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4039		if (rle->res != irq)
4040			return (EINVAL);
4041		if (dinfo->cfg.msi.msi_alloc > 0) {
4042			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4043			    ("MSI-X index too high"));
4044			if (dinfo->cfg.msi.msi_handlers == 0)
4045				return (EINVAL);
4046			dinfo->cfg.msi.msi_handlers--;
4047			if (dinfo->cfg.msi.msi_handlers == 0)
4048				pci_disable_msi(child);
4049		} else {
4050			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4051			    ("No MSI or MSI-X interrupts allocated"));
4052			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4053			    ("MSI-X index too high"));
4054			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4055			if (mte->mte_handlers == 0)
4056				return (EINVAL);
4057			mte->mte_handlers--;
4058			if (mte->mte_handlers == 0)
4059				pci_mask_msix(child, rid - 1);
4060		}
4061	}
4062	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4063	if (rid > 0)
4064		KASSERT(error == 0,
4065		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4066	return (error);
4067}
4068
4069int
4070pci_print_child(device_t dev, device_t child)
4071{
4072	struct pci_devinfo *dinfo;
4073	struct resource_list *rl;
4074	int retval = 0;
4075
4076	dinfo = device_get_ivars(child);
4077	rl = &dinfo->resources;
4078
4079	retval += bus_print_child_header(dev, child);
4080
4081	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4082	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4083	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4084	if (device_get_flags(dev))
4085		retval += printf(" flags %#x", device_get_flags(dev));
4086
4087	retval += printf(" at device %d.%d", pci_get_slot(child),
4088	    pci_get_function(child));
4089
4090	retval += bus_print_child_domain(dev, child);
4091	retval += bus_print_child_footer(dev, child);
4092
4093	return (retval);
4094}
4095
4096static const struct
4097{
4098	int		class;
4099	int		subclass;
4100	int		report; /* 0 = bootverbose, 1 = always */
4101	const char	*desc;
4102} pci_nomatch_tab[] = {
4103	{PCIC_OLD,		-1,			1, "old"},
4104	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4105	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4106	{PCIC_STORAGE,		-1,			1, "mass storage"},
4107	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4108	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4109	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4110	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4111	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4112	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4113	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4114	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4115	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4116	{PCIC_NETWORK,		-1,			1, "network"},
4117	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4118	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4119	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4120	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4121	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4122	{PCIC_DISPLAY,		-1,			1, "display"},
4123	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4124	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4125	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4126	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4127	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4128	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4129	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4130	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4131	{PCIC_MEMORY,		-1,			1, "memory"},
4132	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4133	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4134	{PCIC_BRIDGE,		-1,			1, "bridge"},
4135	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4136	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4137	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4138	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4139	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4140	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4141	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4142	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4143	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4144	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4145	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4146	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4147	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4148	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4149	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4150	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4151	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4152	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4153	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4154	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4155	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4156	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4157	{PCIC_INPUTDEV,		-1,			1, "input device"},
4158	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4159	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4160	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4161	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4162	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4163	{PCIC_DOCKING,		-1,			1, "docking station"},
4164	{PCIC_PROCESSOR,	-1,			1, "processor"},
4165	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4166	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4167	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4168	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4169	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4170	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4171	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4172	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4173	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4174	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4175	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4176	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4177	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4178	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4179	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4180	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4181	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4182	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4183	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4184	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4185	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4186	{PCIC_DASP,		-1,			0, "dasp"},
4187	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4188	{0, 0, 0,		NULL}
4189};
4190
4191void
4192pci_probe_nomatch(device_t dev, device_t child)
4193{
4194	int i, report;
4195	const char *cp, *scp;
4196	char *device;
4197
4198	/*
4199	 * Look for a listing for this device in a loaded device database.
4200	 */
4201	report = 1;
4202	if ((device = pci_describe_device(child)) != NULL) {
4203		device_printf(dev, "<%s>", device);
4204		free(device, M_DEVBUF);
4205	} else {
4206		/*
4207		 * Scan the class/subclass descriptions for a general
4208		 * description.
4209		 */
4210		cp = "unknown";
4211		scp = NULL;
4212		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4213			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4214				if (pci_nomatch_tab[i].subclass == -1) {
4215					cp = pci_nomatch_tab[i].desc;
4216					report = pci_nomatch_tab[i].report;
4217				} else if (pci_nomatch_tab[i].subclass ==
4218				    pci_get_subclass(child)) {
4219					scp = pci_nomatch_tab[i].desc;
4220					report = pci_nomatch_tab[i].report;
4221				}
4222			}
4223		}
4224		if (report || bootverbose) {
4225			device_printf(dev, "<%s%s%s>",
4226			    cp ? cp : "",
4227			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4228			    scp ? scp : "");
4229		}
4230	}
4231	if (report || bootverbose) {
4232		printf(" at device %d.%d (no driver attached)\n",
4233		    pci_get_slot(child), pci_get_function(child));
4234	}
4235	pci_cfg_save(child, device_get_ivars(child), 1);
4236}
4237
4238void
4239pci_child_detached(device_t dev, device_t child)
4240{
4241	struct pci_devinfo *dinfo;
4242	struct resource_list *rl;
4243
4244	dinfo = device_get_ivars(child);
4245	rl = &dinfo->resources;
4246
4247	/*
4248	 * Have to deallocate IRQs before releasing any MSI messages and
4249	 * have to release MSI messages before deallocating any memory
4250	 * BARs.
4251	 */
4252	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4253		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4254	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4255		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4256		(void)pci_release_msi(child);
4257	}
4258	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4259		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4260	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4261		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4262#ifdef PCI_RES_BUS
4263	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4264		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4265#endif
4266
4267	pci_cfg_save(child, dinfo, 1);
4268}
4269
4270/*
4271 * Parse the PCI device database, if loaded, and return a pointer to a
4272 * description of the device.
4273 *
4274 * The database is flat text formatted as follows:
4275 *
4276 * Any line not in a valid format is ignored.
4277 * Lines are terminated with newline '\n' characters.
4278 *
4279 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4280 * the vendor name.
4281 *
4282 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4283 * - devices cannot be listed without a corresponding VENDOR line.
4284 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4285 * another TAB, then the device name.
4286 */
4287
4288/*
4289 * Assuming (ptr) points to the beginning of a line in the database,
4290 * return the vendor or device and description of the next entry.
4291 * The value of (vendor) or (device) inappropriate for the entry type
4292 * is set to -1.  Returns nonzero at the end of the database.
4293 *
4294 * Note that this is slightly unrobust in the face of corrupt data;
4295 * we attempt to safeguard against this by spamming the end of the
4296 * database with a newline when we initialise.
4297 */
4298static int
4299pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4300{
4301	char	*cp = *ptr;
4302	int	left;
4303
4304	*device = -1;
4305	*vendor = -1;
4306	**desc = '\0';
4307	for (;;) {
4308		left = pci_vendordata_size - (cp - pci_vendordata);
4309		if (left <= 0) {
4310			*ptr = cp;
4311			return(1);
4312		}
4313
4314		/* vendor entry? */
4315		if (*cp != '\t' &&
4316		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4317			break;
4318		/* device entry? */
4319		if (*cp == '\t' &&
4320		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4321			break;
4322
4323		/* skip to next line */
4324		while (*cp != '\n' && left > 0) {
4325			cp++;
4326			left--;
4327		}
4328		if (*cp == '\n') {
4329			cp++;
4330			left--;
4331		}
4332	}
4333	/* skip to next line */
4334	while (*cp != '\n' && left > 0) {
4335		cp++;
4336		left--;
4337	}
4338	if (*cp == '\n' && left > 0)
4339		cp++;
4340	*ptr = cp;
4341	return(0);
4342}
4343
4344static char *
4345pci_describe_device(device_t dev)
4346{
4347	int	vendor, device;
4348	char	*desc, *vp, *dp, *line;
4349
4350	desc = vp = dp = NULL;
4351
4352	/*
4353	 * If we have no vendor data, we can't do anything.
4354	 */
4355	if (pci_vendordata == NULL)
4356		goto out;
4357
4358	/*
4359	 * Scan the vendor data looking for this device
4360	 */
4361	line = pci_vendordata;
4362	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4363		goto out;
4364	for (;;) {
4365		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4366			goto out;
4367		if (vendor == pci_get_vendor(dev))
4368			break;
4369	}
4370	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4371		goto out;
4372	for (;;) {
4373		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4374			*dp = 0;
4375			break;
4376		}
4377		if (vendor != -1) {
4378			*dp = 0;
4379			break;
4380		}
4381		if (device == pci_get_device(dev))
4382			break;
4383	}
4384	if (dp[0] == '\0')
4385		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4386	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4387	    NULL)
4388		sprintf(desc, "%s, %s", vp, dp);
4389out:
4390	if (vp != NULL)
4391		free(vp, M_DEVBUF);
4392	if (dp != NULL)
4393		free(dp, M_DEVBUF);
4394	return(desc);
4395}
4396
4397int
4398pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4399{
4400	struct pci_devinfo *dinfo;
4401	pcicfgregs *cfg;
4402
4403	dinfo = device_get_ivars(child);
4404	cfg = &dinfo->cfg;
4405
4406	switch (which) {
4407	case PCI_IVAR_ETHADDR:
4408		/*
4409		 * The generic accessor doesn't deal with failure, so
4410		 * we set the return value, then return an error.
4411		 */
4412		*((uint8_t **) result) = NULL;
4413		return (EINVAL);
4414	case PCI_IVAR_SUBVENDOR:
4415		*result = cfg->subvendor;
4416		break;
4417	case PCI_IVAR_SUBDEVICE:
4418		*result = cfg->subdevice;
4419		break;
4420	case PCI_IVAR_VENDOR:
4421		*result = cfg->vendor;
4422		break;
4423	case PCI_IVAR_DEVICE:
4424		*result = cfg->device;
4425		break;
4426	case PCI_IVAR_DEVID:
4427		*result = (cfg->device << 16) | cfg->vendor;
4428		break;
4429	case PCI_IVAR_CLASS:
4430		*result = cfg->baseclass;
4431		break;
4432	case PCI_IVAR_SUBCLASS:
4433		*result = cfg->subclass;
4434		break;
4435	case PCI_IVAR_PROGIF:
4436		*result = cfg->progif;
4437		break;
4438	case PCI_IVAR_REVID:
4439		*result = cfg->revid;
4440		break;
4441	case PCI_IVAR_INTPIN:
4442		*result = cfg->intpin;
4443		break;
4444	case PCI_IVAR_IRQ:
4445		*result = cfg->intline;
4446		break;
4447	case PCI_IVAR_DOMAIN:
4448		*result = cfg->domain;
4449		break;
4450	case PCI_IVAR_BUS:
4451		*result = cfg->bus;
4452		break;
4453	case PCI_IVAR_SLOT:
4454		*result = cfg->slot;
4455		break;
4456	case PCI_IVAR_FUNCTION:
4457		*result = cfg->func;
4458		break;
4459	case PCI_IVAR_CMDREG:
4460		*result = cfg->cmdreg;
4461		break;
4462	case PCI_IVAR_CACHELNSZ:
4463		*result = cfg->cachelnsz;
4464		break;
4465	case PCI_IVAR_MINGNT:
4466		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4467			*result = -1;
4468			return (EINVAL);
4469		}
4470		*result = cfg->mingnt;
4471		break;
4472	case PCI_IVAR_MAXLAT:
4473		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4474			*result = -1;
4475			return (EINVAL);
4476		}
4477		*result = cfg->maxlat;
4478		break;
4479	case PCI_IVAR_LATTIMER:
4480		*result = cfg->lattimer;
4481		break;
4482	default:
4483		return (ENOENT);
4484	}
4485	return (0);
4486}
4487
4488int
4489pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4490{
4491	struct pci_devinfo *dinfo;
4492
4493	dinfo = device_get_ivars(child);
4494
4495	switch (which) {
4496	case PCI_IVAR_INTPIN:
4497		dinfo->cfg.intpin = value;
4498		return (0);
4499	case PCI_IVAR_ETHADDR:
4500	case PCI_IVAR_SUBVENDOR:
4501	case PCI_IVAR_SUBDEVICE:
4502	case PCI_IVAR_VENDOR:
4503	case PCI_IVAR_DEVICE:
4504	case PCI_IVAR_DEVID:
4505	case PCI_IVAR_CLASS:
4506	case PCI_IVAR_SUBCLASS:
4507	case PCI_IVAR_PROGIF:
4508	case PCI_IVAR_REVID:
4509	case PCI_IVAR_IRQ:
4510	case PCI_IVAR_DOMAIN:
4511	case PCI_IVAR_BUS:
4512	case PCI_IVAR_SLOT:
4513	case PCI_IVAR_FUNCTION:
4514		return (EINVAL);	/* disallow for now */
4515
4516	default:
4517		return (ENOENT);
4518	}
4519}
4520
4521#include "opt_ddb.h"
4522#ifdef DDB
4523#include <ddb/ddb.h>
4524#include <sys/cons.h>
4525
4526/*
4527 * List resources based on pci map registers, used for within ddb
4528 */
4529
4530DB_SHOW_COMMAND(pciregs, db_pci_dump)
4531{
4532	struct pci_devinfo *dinfo;
4533	struct devlist *devlist_head;
4534	struct pci_conf *p;
4535	const char *name;
4536	int i, error, none_count;
4537
4538	none_count = 0;
4539	/* get the head of the device queue */
4540	devlist_head = &pci_devq;
4541
4542	/*
4543	 * Go through the list of devices and print out devices
4544	 */
4545	for (error = 0, i = 0,
4546	     dinfo = STAILQ_FIRST(devlist_head);
4547	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4548	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4549
4550		/* Populate pd_name and pd_unit */
4551		name = NULL;
4552		if (dinfo->cfg.dev)
4553			name = device_get_name(dinfo->cfg.dev);
4554
4555		p = &dinfo->conf;
4556		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4557			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4558			(name && *name) ? name : "none",
4559			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4560			none_count++,
4561			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4562			p->pc_sel.pc_func, (p->pc_class << 16) |
4563			(p->pc_subclass << 8) | p->pc_progif,
4564			(p->pc_subdevice << 16) | p->pc_subvendor,
4565			(p->pc_device << 16) | p->pc_vendor,
4566			p->pc_revid, p->pc_hdr);
4567	}
4568}
4569#endif /* DDB */
4570
4571static struct resource *
4572pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4573    u_long start, u_long end, u_long count, u_int flags)
4574{
4575	struct pci_devinfo *dinfo = device_get_ivars(child);
4576	struct resource_list *rl = &dinfo->resources;
4577	struct resource *res;
4578	struct pci_map *pm;
4579	pci_addr_t map, testval;
4580	int mapsize;
4581
4582	res = NULL;
4583	pm = pci_find_bar(child, *rid);
4584	if (pm != NULL) {
4585		/* This is a BAR that we failed to allocate earlier. */
4586		mapsize = pm->pm_size;
4587		map = pm->pm_value;
4588	} else {
4589		/*
4590		 * Weed out the bogons, and figure out how large the
4591		 * BAR/map is.  BARs that read back 0 here are bogus
4592		 * and unimplemented.  Note: atapci in legacy mode are
4593		 * special and handled elsewhere in the code.  If you
4594		 * have a atapci device in legacy mode and it fails
4595		 * here, that other code is broken.
4596		 */
4597		pci_read_bar(child, *rid, &map, &testval);
4598
4599		/*
4600		 * Determine the size of the BAR and ignore BARs with a size
4601		 * of 0.  Device ROM BARs use a different mask value.
4602		 */
4603		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4604			mapsize = pci_romsize(testval);
4605		else
4606			mapsize = pci_mapsize(testval);
4607		if (mapsize == 0)
4608			goto out;
4609		pm = pci_add_bar(child, *rid, map, mapsize);
4610	}
4611
4612	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4613		if (type != SYS_RES_MEMORY) {
4614			if (bootverbose)
4615				device_printf(dev,
4616				    "child %s requested type %d for rid %#x,"
4617				    " but the BAR says it is an memio\n",
4618				    device_get_nameunit(child), type, *rid);
4619			goto out;
4620		}
4621	} else {
4622		if (type != SYS_RES_IOPORT) {
4623			if (bootverbose)
4624				device_printf(dev,
4625				    "child %s requested type %d for rid %#x,"
4626				    " but the BAR says it is an ioport\n",
4627				    device_get_nameunit(child), type, *rid);
4628			goto out;
4629		}
4630	}
4631
4632	/*
4633	 * For real BARs, we need to override the size that
4634	 * the driver requests, because that's what the BAR
4635	 * actually uses and we would otherwise have a
4636	 * situation where we might allocate the excess to
4637	 * another driver, which won't work.
4638	 */
4639	count = (pci_addr_t)1 << mapsize;
4640	if (RF_ALIGNMENT(flags) < mapsize)
4641		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4642	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4643		flags |= RF_PREFETCHABLE;
4644
4645	/*
4646	 * Allocate enough resource, and then write back the
4647	 * appropriate BAR for that resource.
4648	 */
4649	resource_list_add(rl, type, *rid, start, end, count);
4650	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4651	    count, flags & ~RF_ACTIVE);
4652	if (res == NULL) {
4653		resource_list_delete(rl, type, *rid);
4654		device_printf(child,
4655		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4656		    count, *rid, type, start, end);
4657		goto out;
4658	}
4659	if (bootverbose)
4660		device_printf(child,
4661		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4662		    count, *rid, type, rman_get_start(res));
4663	map = rman_get_start(res);
4664	pci_write_bar(child, pm, map);
4665out:
4666	return (res);
4667}
4668
4669struct resource *
4670pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4671		   u_long start, u_long end, u_long count, u_int flags)
4672{
4673	struct pci_devinfo *dinfo;
4674	struct resource_list *rl;
4675	struct resource_list_entry *rle;
4676	struct resource *res;
4677	pcicfgregs *cfg;
4678
4679	if (device_get_parent(child) != dev)
4680		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4681		    type, rid, start, end, count, flags));
4682
4683	/*
4684	 * Perform lazy resource allocation
4685	 */
4686	dinfo = device_get_ivars(child);
4687	rl = &dinfo->resources;
4688	cfg = &dinfo->cfg;
4689	switch (type) {
4690#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4691	case PCI_RES_BUS:
4692		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4693		    flags));
4694#endif
4695	case SYS_RES_IRQ:
4696		/*
4697		 * Can't alloc legacy interrupt once MSI messages have
4698		 * been allocated.
4699		 */
4700		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4701		    cfg->msix.msix_alloc > 0))
4702			return (NULL);
4703
4704		/*
4705		 * If the child device doesn't have an interrupt
4706		 * routed and is deserving of an interrupt, try to
4707		 * assign it one.
4708		 */
4709		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4710		    (cfg->intpin != 0))
4711			pci_assign_interrupt(dev, child, 0);
4712		break;
4713	case SYS_RES_IOPORT:
4714	case SYS_RES_MEMORY:
4715#ifdef NEW_PCIB
4716		/*
4717		 * PCI-PCI bridge I/O window resources are not BARs.
4718		 * For those allocations just pass the request up the
4719		 * tree.
4720		 */
4721		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4722			switch (*rid) {
4723			case PCIR_IOBASEL_1:
4724			case PCIR_MEMBASE_1:
4725			case PCIR_PMBASEL_1:
4726				/*
4727				 * XXX: Should we bother creating a resource
4728				 * list entry?
4729				 */
4730				return (bus_generic_alloc_resource(dev, child,
4731				    type, rid, start, end, count, flags));
4732			}
4733		}
4734#endif
4735		/* Reserve resources for this BAR if needed. */
4736		rle = resource_list_find(rl, type, *rid);
4737		if (rle == NULL) {
4738			res = pci_reserve_map(dev, child, type, rid, start, end,
4739			    count, flags);
4740			if (res == NULL)
4741				return (NULL);
4742		}
4743	}
4744	return (resource_list_alloc(rl, dev, child, type, rid,
4745	    start, end, count, flags));
4746}
4747
4748int
4749pci_release_resource(device_t dev, device_t child, int type, int rid,
4750    struct resource *r)
4751{
4752	struct pci_devinfo *dinfo;
4753	struct resource_list *rl;
4754	pcicfgregs *cfg;
4755
4756	if (device_get_parent(child) != dev)
4757		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4758		    type, rid, r));
4759
4760	dinfo = device_get_ivars(child);
4761	cfg = &dinfo->cfg;
4762#ifdef NEW_PCIB
4763	/*
4764	 * PCI-PCI bridge I/O window resources are not BARs.  For
4765	 * those allocations just pass the request up the tree.
4766	 */
4767	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4768	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4769		switch (rid) {
4770		case PCIR_IOBASEL_1:
4771		case PCIR_MEMBASE_1:
4772		case PCIR_PMBASEL_1:
4773			return (bus_generic_release_resource(dev, child, type,
4774			    rid, r));
4775		}
4776	}
4777#endif
4778
4779	rl = &dinfo->resources;
4780	return (resource_list_release(rl, dev, child, type, rid, r));
4781}
4782
4783int
4784pci_activate_resource(device_t dev, device_t child, int type, int rid,
4785    struct resource *r)
4786{
4787	struct pci_devinfo *dinfo;
4788	int error;
4789
4790	error = bus_generic_activate_resource(dev, child, type, rid, r);
4791	if (error)
4792		return (error);
4793
4794	/* Enable decoding in the command register when activating BARs. */
4795	if (device_get_parent(child) == dev) {
4796		/* Device ROMs need their decoding explicitly enabled. */
4797		dinfo = device_get_ivars(child);
4798		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4799			pci_write_bar(child, pci_find_bar(child, rid),
4800			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4801		switch (type) {
4802		case SYS_RES_IOPORT:
4803		case SYS_RES_MEMORY:
4804			error = PCI_ENABLE_IO(dev, child, type);
4805			break;
4806		}
4807	}
4808	return (error);
4809}
4810
4811int
4812pci_deactivate_resource(device_t dev, device_t child, int type,
4813    int rid, struct resource *r)
4814{
4815	struct pci_devinfo *dinfo;
4816	int error;
4817
4818	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4819	if (error)
4820		return (error);
4821
4822	/* Disable decoding for device ROMs. */
4823	if (device_get_parent(child) == dev) {
4824		dinfo = device_get_ivars(child);
4825		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4826			pci_write_bar(child, pci_find_bar(child, rid),
4827			    rman_get_start(r));
4828	}
4829	return (0);
4830}
4831
4832void
4833pci_delete_child(device_t dev, device_t child)
4834{
4835	struct resource_list_entry *rle;
4836	struct resource_list *rl;
4837	struct pci_devinfo *dinfo;
4838
4839	dinfo = device_get_ivars(child);
4840	rl = &dinfo->resources;
4841
4842	if (device_is_attached(child))
4843		device_detach(child);
4844
4845	/* Turn off access to resources we're about to free */
4846	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4847	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4848
4849	/* Free all allocated resources */
4850	STAILQ_FOREACH(rle, rl, link) {
4851		if (rle->res) {
4852			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4853			    resource_list_busy(rl, rle->type, rle->rid)) {
4854				pci_printf(&dinfo->cfg,
4855				    "Resource still owned, oops. "
4856				    "(type=%d, rid=%d, addr=%lx)\n",
4857				    rle->type, rle->rid,
4858				    rman_get_start(rle->res));
4859				bus_release_resource(child, rle->type, rle->rid,
4860				    rle->res);
4861			}
4862			resource_list_unreserve(rl, dev, child, rle->type,
4863			    rle->rid);
4864		}
4865	}
4866	resource_list_free(rl);
4867
4868	device_delete_child(dev, child);
4869	pci_freecfg(dinfo);
4870}
4871
4872void
4873pci_delete_resource(device_t dev, device_t child, int type, int rid)
4874{
4875	struct pci_devinfo *dinfo;
4876	struct resource_list *rl;
4877	struct resource_list_entry *rle;
4878
4879	if (device_get_parent(child) != dev)
4880		return;
4881
4882	dinfo = device_get_ivars(child);
4883	rl = &dinfo->resources;
4884	rle = resource_list_find(rl, type, rid);
4885	if (rle == NULL)
4886		return;
4887
4888	if (rle->res) {
4889		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4890		    resource_list_busy(rl, type, rid)) {
4891			device_printf(dev, "delete_resource: "
4892			    "Resource still owned by child, oops. "
4893			    "(type=%d, rid=%d, addr=%lx)\n",
4894			    type, rid, rman_get_start(rle->res));
4895			return;
4896		}
4897		resource_list_unreserve(rl, dev, child, type, rid);
4898	}
4899	resource_list_delete(rl, type, rid);
4900}
4901
4902struct resource_list *
4903pci_get_resource_list (device_t dev, device_t child)
4904{
4905	struct pci_devinfo *dinfo = device_get_ivars(child);
4906
4907	return (&dinfo->resources);
4908}
4909
4910bus_dma_tag_t
4911pci_get_dma_tag(device_t bus, device_t dev)
4912{
4913	struct pci_softc *sc = device_get_softc(bus);
4914
4915	return (sc->sc_dma_tag);
4916}
4917
4918uint32_t
4919pci_read_config_method(device_t dev, device_t child, int reg, int width)
4920{
4921	struct pci_devinfo *dinfo = device_get_ivars(child);
4922	pcicfgregs *cfg = &dinfo->cfg;
4923
4924	return (PCIB_READ_CONFIG(device_get_parent(dev),
4925	    cfg->bus, cfg->slot, cfg->func, reg, width));
4926}
4927
4928void
4929pci_write_config_method(device_t dev, device_t child, int reg,
4930    uint32_t val, int width)
4931{
4932	struct pci_devinfo *dinfo = device_get_ivars(child);
4933	pcicfgregs *cfg = &dinfo->cfg;
4934
4935	PCIB_WRITE_CONFIG(device_get_parent(dev),
4936	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4937}
4938
4939int
4940pci_child_location_str_method(device_t dev, device_t child, char *buf,
4941    size_t buflen)
4942{
4943
4944	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
4945	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
4946	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
4947	return (0);
4948}
4949
4950int
4951pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4952    size_t buflen)
4953{
4954	struct pci_devinfo *dinfo;
4955	pcicfgregs *cfg;
4956
4957	dinfo = device_get_ivars(child);
4958	cfg = &dinfo->cfg;
4959	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4960	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4961	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4962	    cfg->progif);
4963	return (0);
4964}
4965
4966int
4967pci_assign_interrupt_method(device_t dev, device_t child)
4968{
4969	struct pci_devinfo *dinfo = device_get_ivars(child);
4970	pcicfgregs *cfg = &dinfo->cfg;
4971
4972	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4973	    cfg->intpin));
4974}
4975
4976static void
4977pci_lookup(void *arg, const char *name, device_t *dev)
4978{
4979	long val;
4980	char *end;
4981	int domain, bus, slot, func;
4982
4983	if (*dev != NULL)
4984		return;
4985
4986	/*
4987	 * Accept pciconf-style selectors of either pciD:B:S:F or
4988	 * pciB:S:F.  In the latter case, the domain is assumed to
4989	 * be zero.
4990	 */
4991	if (strncmp(name, "pci", 3) != 0)
4992		return;
4993	val = strtol(name + 3, &end, 10);
4994	if (val < 0 || val > INT_MAX || *end != ':')
4995		return;
4996	domain = val;
4997	val = strtol(end + 1, &end, 10);
4998	if (val < 0 || val > INT_MAX || *end != ':')
4999		return;
5000	bus = val;
5001	val = strtol(end + 1, &end, 10);
5002	if (val < 0 || val > INT_MAX)
5003		return;
5004	slot = val;
5005	if (*end == ':') {
5006		val = strtol(end + 1, &end, 10);
5007		if (val < 0 || val > INT_MAX || *end != '\0')
5008			return;
5009		func = val;
5010	} else if (*end == '\0') {
5011		func = slot;
5012		slot = bus;
5013		bus = domain;
5014		domain = 0;
5015	} else
5016		return;
5017
5018	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5019	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5020		return;
5021
5022	*dev = pci_find_dbsf(domain, bus, slot, func);
5023}
5024
5025static int
5026pci_modevent(module_t mod, int what, void *arg)
5027{
5028	static struct cdev *pci_cdev;
5029	static eventhandler_tag tag;
5030
5031	switch (what) {
5032	case MOD_LOAD:
5033		STAILQ_INIT(&pci_devq);
5034		pci_generation = 0;
5035		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5036		    "pci");
5037		pci_load_vendor_data();
5038		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5039		    1000);
5040		break;
5041
5042	case MOD_UNLOAD:
5043		if (tag != NULL)
5044			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5045		destroy_dev(pci_cdev);
5046		break;
5047	}
5048
5049	return (0);
5050}
5051
5052static void
5053pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5054{
5055#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5056	struct pcicfg_pcie *cfg;
5057	int version, pos;
5058
5059	cfg = &dinfo->cfg.pcie;
5060	pos = cfg->pcie_location;
5061
5062	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5063
5064	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5065
5066	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5067	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5068	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5069		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5070
5071	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5072	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5073	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5074		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5075
5076	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5077	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5078		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5079
5080	if (version > 1) {
5081		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5082		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5083		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5084	}
5085#undef WREG
5086}
5087
5088static void
5089pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5090{
5091	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5092	    dinfo->cfg.pcix.pcix_command,  2);
5093}
5094
5095void
5096pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5097{
5098
5099	/*
5100	 * Only do header type 0 devices.  Type 1 devices are bridges,
5101	 * which we know need special treatment.  Type 2 devices are
5102	 * cardbus bridges which also require special treatment.
5103	 * Other types are unknown, and we err on the side of safety
5104	 * by ignoring them.
5105	 */
5106	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5107		return;
5108
5109	/*
5110	 * Restore the device to full power mode.  We must do this
5111	 * before we restore the registers because moving from D3 to
5112	 * D0 will cause the chip's BARs and some other registers to
5113	 * be reset to some unknown power on reset values.  Cut down
5114	 * the noise on boot by doing nothing if we are already in
5115	 * state D0.
5116	 */
5117	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5118		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5119	pci_restore_bars(dev);
5120	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5121	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5122	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5123	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5124	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5125	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5126	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5127	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5128	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5129
5130	/*
5131	 * Restore extended capabilities for PCI-Express and PCI-X
5132	 */
5133	if (dinfo->cfg.pcie.pcie_location != 0)
5134		pci_cfg_restore_pcie(dev, dinfo);
5135	if (dinfo->cfg.pcix.pcix_location != 0)
5136		pci_cfg_restore_pcix(dev, dinfo);
5137
5138	/* Restore MSI and MSI-X configurations if they are present. */
5139	if (dinfo->cfg.msi.msi_location != 0)
5140		pci_resume_msi(dev);
5141	if (dinfo->cfg.msix.msix_location != 0)
5142		pci_resume_msix(dev);
5143}
5144
5145static void
5146pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5147{
5148#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5149	struct pcicfg_pcie *cfg;
5150	int version, pos;
5151
5152	cfg = &dinfo->cfg.pcie;
5153	pos = cfg->pcie_location;
5154
5155	cfg->pcie_flags = RREG(PCIER_FLAGS);
5156
5157	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5158
5159	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5160
5161	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5162	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5163	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5164		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5165
5166	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5167	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5168	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5169		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5170
5171	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5172	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5173		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5174
5175	if (version > 1) {
5176		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5177		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5178		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5179	}
5180#undef RREG
5181}
5182
5183static void
5184pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5185{
5186	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5187	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5188}
5189
5190void
5191pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5192{
5193	uint32_t cls;
5194	int ps;
5195
5196	/*
5197	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5198	 * we know need special treatment.  Type 2 devices are cardbus bridges
5199	 * which also require special treatment.  Other types are unknown, and
5200	 * we err on the side of safety by ignoring them.  Powering down
5201	 * bridges should not be undertaken lightly.
5202	 */
5203	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5204		return;
5205
5206	/*
5207	 * Some drivers apparently write to these registers w/o updating our
5208	 * cached copy.  No harm happens if we update the copy, so do so here
5209	 * so we can restore them.  The COMMAND register is modified by the
5210	 * bus w/o updating the cache.  This should represent the normally
5211	 * writable portion of the 'defined' part of type 0 headers.  In
5212	 * theory we also need to save/restore the PCI capability structures
5213	 * we know about, but apart from power we don't know any that are
5214	 * writable.
5215	 */
5216	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5217	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5218	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5219	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5220	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5221	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5222	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5223	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5224	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5225	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5226	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5227	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5228	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5229	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5230	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5231
5232	if (dinfo->cfg.pcie.pcie_location != 0)
5233		pci_cfg_save_pcie(dev, dinfo);
5234
5235	if (dinfo->cfg.pcix.pcix_location != 0)
5236		pci_cfg_save_pcix(dev, dinfo);
5237
5238	/*
5239	 * don't set the state for display devices, base peripherals and
5240	 * memory devices since bad things happen when they are powered down.
5241	 * We should (a) have drivers that can easily detach and (b) use
5242	 * generic drivers for these devices so that some device actually
5243	 * attaches.  We need to make sure that when we implement (a) we don't
5244	 * power the device down on a reattach.
5245	 */
5246	cls = pci_get_class(dev);
5247	if (!setstate)
5248		return;
5249	switch (pci_do_power_nodriver)
5250	{
5251		case 0:		/* NO powerdown at all */
5252			return;
5253		case 1:		/* Conservative about what to power down */
5254			if (cls == PCIC_STORAGE)
5255				return;
5256			/*FALLTHROUGH*/
5257		case 2:		/* Agressive about what to power down */
5258			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5259			    cls == PCIC_BASEPERIPH)
5260				return;
5261			/*FALLTHROUGH*/
5262		case 3:		/* Power down everything */
5263			break;
5264	}
5265	/*
5266	 * PCI spec says we can only go into D3 state from D0 state.
5267	 * Transition from D[12] into D0 before going to D3 state.
5268	 */
5269	ps = pci_get_powerstate(dev);
5270	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5271		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5272	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5273		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5274}
5275
5276/* Wrapper APIs suitable for device driver use. */
5277void
5278pci_save_state(device_t dev)
5279{
5280	struct pci_devinfo *dinfo;
5281
5282	dinfo = device_get_ivars(dev);
5283	pci_cfg_save(dev, dinfo, 0);
5284}
5285
5286void
5287pci_restore_state(device_t dev)
5288{
5289	struct pci_devinfo *dinfo;
5290
5291	dinfo = device_get_ivars(dev);
5292	pci_cfg_restore(dev, dinfo);
5293}
5294
5295static uint16_t
5296pci_get_rid_method(device_t dev, device_t child)
5297{
5298
5299	return (PCIB_GET_RID(device_get_parent(dev), child));
5300}
5301
5302/* Find the upstream port of a given PCI device in a root complex. */
5303device_t
5304pci_find_pcie_root_port(device_t dev)
5305{
5306	struct pci_devinfo *dinfo;
5307	devclass_t pci_class;
5308	device_t pcib, bus;
5309
5310	pci_class = devclass_find("pci");
5311	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5312	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5313
5314	/*
5315	 * Walk the bridge hierarchy until we find a PCI-e root
5316	 * port or a non-PCI device.
5317	 */
5318	for (;;) {
5319		bus = device_get_parent(dev);
5320		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5321		    device_get_nameunit(dev)));
5322
5323		pcib = device_get_parent(bus);
5324		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5325		    device_get_nameunit(bus)));
5326
5327		/*
5328		 * pcib's parent must be a PCI bus for this to be a
5329		 * PCI-PCI bridge.
5330		 */
5331		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5332			return (NULL);
5333
5334		dinfo = device_get_ivars(pcib);
5335		if (dinfo->cfg.pcie.pcie_location != 0 &&
5336		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5337			return (pcib);
5338
5339		dev = pcib;
5340	}
5341}
5342
5343/*
5344 * Wait for pending transactions to complete on a PCI-express function.
5345 *
5346 * The maximum delay is specified in milliseconds in max_delay.  Note
5347 * that this function may sleep.
5348 *
5349 * Returns true if the function is idle and false if the timeout is
5350 * exceeded.  If dev is not a PCI-express function, this returns true.
5351 */
5352bool
5353pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5354{
5355	struct pci_devinfo *dinfo = device_get_ivars(dev);
5356	uint16_t sta;
5357	int cap;
5358
5359	cap = dinfo->cfg.pcie.pcie_location;
5360	if (cap == 0)
5361		return (true);
5362
5363	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5364	while (sta & PCIEM_STA_TRANSACTION_PND) {
5365		if (max_delay == 0)
5366			return (false);
5367
5368		/* Poll once every 100 milliseconds up to the timeout. */
5369		if (max_delay > 100) {
5370			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5371			max_delay -= 100;
5372		} else {
5373			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5374			    C_HARDCLOCK);
5375			max_delay = 0;
5376		}
5377		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5378	}
5379
5380	return (true);
5381}
5382
5383/*
5384 * Determine the maximum Completion Timeout in microseconds.
5385 *
5386 * For non-PCI-express functions this returns 0.
5387 */
5388int
5389pcie_get_max_completion_timeout(device_t dev)
5390{
5391	struct pci_devinfo *dinfo = device_get_ivars(dev);
5392	int cap;
5393
5394	cap = dinfo->cfg.pcie.pcie_location;
5395	if (cap == 0)
5396		return (0);
5397
5398	/*
5399	 * Functions using the 1.x spec use the default timeout range of
5400	 * 50 microseconds to 50 milliseconds.  Functions that do not
5401	 * support programmable timeouts also use this range.
5402	 */
5403	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
5404	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
5405	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
5406		return (50 * 1000);
5407
5408	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
5409	    PCIEM_CTL2_COMP_TIMO_VAL) {
5410	case PCIEM_CTL2_COMP_TIMO_100US:
5411		return (100);
5412	case PCIEM_CTL2_COMP_TIMO_10MS:
5413		return (10 * 1000);
5414	case PCIEM_CTL2_COMP_TIMO_55MS:
5415		return (55 * 1000);
5416	case PCIEM_CTL2_COMP_TIMO_210MS:
5417		return (210 * 1000);
5418	case PCIEM_CTL2_COMP_TIMO_900MS:
5419		return (900 * 1000);
5420	case PCIEM_CTL2_COMP_TIMO_3500MS:
5421		return (3500 * 1000);
5422	case PCIEM_CTL2_COMP_TIMO_13S:
5423		return (13 * 1000 * 1000);
5424	case PCIEM_CTL2_COMP_TIMO_64S:
5425		return (64 * 1000 * 1000);
5426	default:
5427		return (50 * 1000);
5428	}
5429}
5430
5431/*
5432 * Perform a Function Level Reset (FLR) on a device.
5433 *
5434 * This function first waits for any pending transactions to complete
5435 * within the timeout specified by max_delay.  If transactions are
5436 * still pending, the function will return false without attempting a
5437 * reset.
5438 *
5439 * If dev is not a PCI-express function or does not support FLR, this
5440 * function returns false.
5441 *
5442 * Note that no registers are saved or restored.  The caller is
5443 * responsible for saving and restoring any registers including
5444 * PCI-standard registers via pci_save_state() and
5445 * pci_restore_state().
5446 */
5447bool
5448pcie_flr(device_t dev, u_int max_delay, bool force)
5449{
5450	struct pci_devinfo *dinfo = device_get_ivars(dev);
5451	uint16_t cmd, ctl;
5452	int compl_delay;
5453	int cap;
5454
5455	cap = dinfo->cfg.pcie.pcie_location;
5456	if (cap == 0)
5457		return (false);
5458
5459	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
5460		return (false);
5461
5462	/*
5463	 * Disable busmastering to prevent generation of new
5464	 * transactions while waiting for the device to go idle.  If
5465	 * the idle timeout fails, the command register is restored
5466	 * which will re-enable busmastering.
5467	 */
5468	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
5469	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
5470	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
5471		if (!force) {
5472			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
5473			return (false);
5474		}
5475		pci_printf(&dinfo->cfg,
5476		    "Resetting with transactions pending after %d ms\n",
5477		    max_delay);
5478
5479		/*
5480		 * Extend the post-FLR delay to cover the maximum
5481		 * Completion Timeout delay of anything in flight
5482		 * during the FLR delay.  Enforce a minimum delay of
5483		 * at least 10ms.
5484		 */
5485		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
5486		if (compl_delay < 10)
5487			compl_delay = 10;
5488	} else
5489		compl_delay = 0;
5490
5491	/* Initiate the reset. */
5492	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
5493	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
5494	    PCIEM_CTL_INITIATE_FLR, 2);
5495
5496	/* Wait for 100ms. */
5497	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
5498
5499	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
5500	    PCIEM_STA_TRANSACTION_PND)
5501		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
5502	return (true);
5503}
5504