pci.c revision 306520
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/dev/pci/pci.c 306520 2016-09-30 18:47:34Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/limits.h>
39#include <sys/linker.h>
40#include <sys/fcntl.h>
41#include <sys/conf.h>
42#include <sys/kernel.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/endian.h>
46
47#include <vm/vm.h>
48#include <vm/pmap.h>
49#include <vm/vm_extern.h>
50
51#include <sys/bus.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#include <machine/resource.h>
55#include <machine/stdarg.h>
56
57#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58#include <machine/intr_machdep.h>
59#endif
60
61#include <sys/pciio.h>
62#include <dev/pci/pcireg.h>
63#include <dev/pci/pcivar.h>
64#include <dev/pci/pci_private.h>
65
66#ifdef PCI_IOV
67#include <sys/nv.h>
68#include <dev/pci/pci_iov_private.h>
69#endif
70
71#include <dev/usb/controller/xhcireg.h>
72#include <dev/usb/controller/ehcireg.h>
73#include <dev/usb/controller/ohcireg.h>
74#include <dev/usb/controller/uhcireg.h>
75
76#include "pcib_if.h"
77#include "pci_if.h"
78
79#define	PCIR_IS_BIOS(cfg, reg)						\
80	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82
83static int		pci_has_quirk(uint32_t devid, int quirk);
84static pci_addr_t	pci_mapbase(uint64_t mapreg);
85static const char	*pci_maptype(uint64_t mapreg);
86static int		pci_maprange(uint64_t mapreg);
87static pci_addr_t	pci_rombase(uint64_t mapreg);
88static int		pci_romsize(uint64_t testval);
89static void		pci_fixancient(pcicfgregs *cfg);
90static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91
92static int		pci_porten(device_t dev);
93static int		pci_memen(device_t dev);
94static void		pci_assign_interrupt(device_t bus, device_t dev,
95			    int force_route);
96static int		pci_add_map(device_t bus, device_t dev, int reg,
97			    struct resource_list *rl, int force, int prefetch);
98static int		pci_probe(device_t dev);
99static int		pci_attach(device_t dev);
100static int		pci_detach(device_t dev);
101static void		pci_load_vendor_data(void);
102static int		pci_describe_parse_line(char **ptr, int *vendor,
103			    int *device, char **desc);
104static char		*pci_describe_device(device_t dev);
105static int		pci_modevent(module_t mod, int what, void *arg);
106static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107			    pcicfgregs *cfg);
108static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110			    int reg, uint32_t *data);
111#if 0
112static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113			    int reg, uint32_t data);
114#endif
115static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116static void		pci_mask_msix(device_t dev, u_int index);
117static void		pci_unmask_msix(device_t dev, u_int index);
118static int		pci_msi_blacklisted(void);
119static int		pci_msix_blacklisted(void);
120static void		pci_resume_msi(device_t dev);
121static void		pci_resume_msix(device_t dev);
122static int		pci_remap_intr_method(device_t bus, device_t dev,
123			    u_int irq);
124
125static int		pci_get_id_method(device_t dev, device_t child,
126			    enum pci_id_type type, uintptr_t *rid);
127
128static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129    int b, int s, int f, uint16_t vid, uint16_t did);
130
131static device_method_t pci_methods[] = {
132	/* Device interface */
133	DEVMETHOD(device_probe,		pci_probe),
134	DEVMETHOD(device_attach,	pci_attach),
135	DEVMETHOD(device_detach,	pci_detach),
136	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137	DEVMETHOD(device_suspend,	bus_generic_suspend),
138	DEVMETHOD(device_resume,	pci_resume),
139
140	/* Bus interface */
141	DEVMETHOD(bus_print_child,	pci_print_child),
142	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145	DEVMETHOD(bus_driver_added,	pci_driver_added),
146	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148
149	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156	DEVMETHOD(bus_release_resource,	pci_release_resource),
157	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160	DEVMETHOD(bus_child_detached,	pci_child_detached),
161	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165	DEVMETHOD(bus_resume_child,	pci_resume_child),
166	DEVMETHOD(bus_rescan,		pci_rescan_method),
167
168	/* PCI interface */
169	DEVMETHOD(pci_read_config,	pci_read_config_method),
170	DEVMETHOD(pci_write_config,	pci_write_config_method),
171	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194	DEVMETHOD(pci_get_id,		pci_get_id_method),
195	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196	DEVMETHOD(pci_child_added,	pci_child_added_method),
197#ifdef PCI_IOV
198	DEVMETHOD(pci_iov_attach_name,	pci_iov_attach_method),
199	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201#endif
202
203	DEVMETHOD_END
204};
205
206DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207
208static devclass_t pci_devclass;
209DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210MODULE_VERSION(pci, 1);
211
212static char	*pci_vendordata;
213static size_t	pci_vendordata_size;
214
215struct pci_quirk {
216	uint32_t devid;	/* Vendor/device of the card */
217	int	type;
218#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224	int	arg1;
225	int	arg2;
226};
227
228static const struct pci_quirk pci_quirks[] = {
229	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234
235	/*
236	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238	 */
239	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241
242	/*
243	 * MSI doesn't work on earlier Intel chipsets including
244	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245	 */
246	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253
254	/*
255	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256	 * bridge.
257	 */
258	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259
260	/*
261	 * MSI-X allocation doesn't work properly for devices passed through
262	 * by VMware up to at least ESXi 5.1.
263	 */
264	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266
267	/*
268	 * Some virtualization environments emulate an older chipset
269	 * but support MSI just fine.  QEMU uses the Intel 82440.
270	 */
271	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272
273	/*
274	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276	 * It prevents us from attaching hpet(4) when the bit is unset.
277	 * Note this quirk only affects SB600 revision A13 and earlier.
278	 * For SB600 A21 and later, firmware must set the bit to hide it.
279	 * For SB700 and later, it is unused and hardcoded to zero.
280	 */
281	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282
283	/*
284	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
285	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
286	 * command register is set.
287	 */
288	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291
292	/*
293	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
294	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
295	 */
296	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
297	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
298	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
299	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
300	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
301	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
302
303	{ 0 }
304};
305
306/* map register information */
307#define	PCI_MAPMEM	0x01	/* memory map */
308#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
309#define	PCI_MAPPORT	0x04	/* port map */
310
311struct devlist pci_devq;
312uint32_t pci_generation;
313uint32_t pci_numdevs = 0;
314static int pcie_chipset, pcix_chipset;
315
316/* sysctl vars */
317SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
318
319static int pci_enable_io_modes = 1;
320SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
321    &pci_enable_io_modes, 1,
322    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
323enable these bits correctly.  We'd like to do this all the time, but there\n\
324are some peripherals that this causes problems with.");
325
326static int pci_do_realloc_bars = 0;
327SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
328    &pci_do_realloc_bars, 0,
329    "Attempt to allocate a new range for any BARs whose original "
330    "firmware-assigned ranges fail to allocate during the initial device scan.");
331
332static int pci_do_power_nodriver = 0;
333SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
334    &pci_do_power_nodriver, 0,
335  "Place a function into D3 state when no driver attaches to it.  0 means\n\
336disable.  1 means conservatively place devices into D3 state.  2 means\n\
337aggressively place devices into D3 state.  3 means put absolutely everything\n\
338in D3 state.");
339
340int pci_do_power_resume = 1;
341SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
342    &pci_do_power_resume, 1,
343  "Transition from D3 -> D0 on resume.");
344
345int pci_do_power_suspend = 1;
346SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
347    &pci_do_power_suspend, 1,
348  "Transition from D0 -> D3 on suspend.");
349
350static int pci_do_msi = 1;
351SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
352    "Enable support for MSI interrupts");
353
354static int pci_do_msix = 1;
355SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
356    "Enable support for MSI-X interrupts");
357
358static int pci_honor_msi_blacklist = 1;
359SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
360    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
361
362#if defined(__i386__) || defined(__amd64__)
363static int pci_usb_takeover = 1;
364#else
365static int pci_usb_takeover = 0;
366#endif
367SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
368    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
369Disable this if you depend on BIOS emulation of USB devices, that is\n\
370you use USB devices (like keyboard or mouse) but do not load USB drivers");
371
372static int pci_clear_bars;
373SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
374    "Ignore firmware-assigned resources for BARs.");
375
376#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
377static int pci_clear_buses;
378SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
379    "Ignore firmware-assigned bus numbers.");
380#endif
381
382static int pci_enable_ari = 1;
383SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
384    0, "Enable support for PCIe Alternative RID Interpretation");
385
386static int
387pci_has_quirk(uint32_t devid, int quirk)
388{
389	const struct pci_quirk *q;
390
391	for (q = &pci_quirks[0]; q->devid; q++) {
392		if (q->devid == devid && q->type == quirk)
393			return (1);
394	}
395	return (0);
396}
397
398/* Find a device_t by bus/slot/function in domain 0 */
399
400device_t
401pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
402{
403
404	return (pci_find_dbsf(0, bus, slot, func));
405}
406
407/* Find a device_t by domain/bus/slot/function */
408
409device_t
410pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
411{
412	struct pci_devinfo *dinfo;
413
414	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
415		if ((dinfo->cfg.domain == domain) &&
416		    (dinfo->cfg.bus == bus) &&
417		    (dinfo->cfg.slot == slot) &&
418		    (dinfo->cfg.func == func)) {
419			return (dinfo->cfg.dev);
420		}
421	}
422
423	return (NULL);
424}
425
426/* Find a device_t by vendor/device ID */
427
428device_t
429pci_find_device(uint16_t vendor, uint16_t device)
430{
431	struct pci_devinfo *dinfo;
432
433	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
434		if ((dinfo->cfg.vendor == vendor) &&
435		    (dinfo->cfg.device == device)) {
436			return (dinfo->cfg.dev);
437		}
438	}
439
440	return (NULL);
441}
442
443device_t
444pci_find_class(uint8_t class, uint8_t subclass)
445{
446	struct pci_devinfo *dinfo;
447
448	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
449		if (dinfo->cfg.baseclass == class &&
450		    dinfo->cfg.subclass == subclass) {
451			return (dinfo->cfg.dev);
452		}
453	}
454
455	return (NULL);
456}
457
458static int
459pci_printf(pcicfgregs *cfg, const char *fmt, ...)
460{
461	va_list ap;
462	int retval;
463
464	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
465	    cfg->func);
466	va_start(ap, fmt);
467	retval += vprintf(fmt, ap);
468	va_end(ap);
469	return (retval);
470}
471
472/* return base address of memory or port map */
473
474static pci_addr_t
475pci_mapbase(uint64_t mapreg)
476{
477
478	if (PCI_BAR_MEM(mapreg))
479		return (mapreg & PCIM_BAR_MEM_BASE);
480	else
481		return (mapreg & PCIM_BAR_IO_BASE);
482}
483
484/* return map type of memory or port map */
485
486static const char *
487pci_maptype(uint64_t mapreg)
488{
489
490	if (PCI_BAR_IO(mapreg))
491		return ("I/O Port");
492	if (mapreg & PCIM_BAR_MEM_PREFETCH)
493		return ("Prefetchable Memory");
494	return ("Memory");
495}
496
497/* return log2 of map size decoded for memory or port map */
498
499int
500pci_mapsize(uint64_t testval)
501{
502	int ln2size;
503
504	testval = pci_mapbase(testval);
505	ln2size = 0;
506	if (testval != 0) {
507		while ((testval & 1) == 0)
508		{
509			ln2size++;
510			testval >>= 1;
511		}
512	}
513	return (ln2size);
514}
515
516/* return base address of device ROM */
517
518static pci_addr_t
519pci_rombase(uint64_t mapreg)
520{
521
522	return (mapreg & PCIM_BIOS_ADDR_MASK);
523}
524
525/* return log2 of map size decided for device ROM */
526
527static int
528pci_romsize(uint64_t testval)
529{
530	int ln2size;
531
532	testval = pci_rombase(testval);
533	ln2size = 0;
534	if (testval != 0) {
535		while ((testval & 1) == 0)
536		{
537			ln2size++;
538			testval >>= 1;
539		}
540	}
541	return (ln2size);
542}
543
544/* return log2 of address range supported by map register */
545
546static int
547pci_maprange(uint64_t mapreg)
548{
549	int ln2range = 0;
550
551	if (PCI_BAR_IO(mapreg))
552		ln2range = 32;
553	else
554		switch (mapreg & PCIM_BAR_MEM_TYPE) {
555		case PCIM_BAR_MEM_32:
556			ln2range = 32;
557			break;
558		case PCIM_BAR_MEM_1MB:
559			ln2range = 20;
560			break;
561		case PCIM_BAR_MEM_64:
562			ln2range = 64;
563			break;
564		}
565	return (ln2range);
566}
567
568/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
569
570static void
571pci_fixancient(pcicfgregs *cfg)
572{
573	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
574		return;
575
576	/* PCI to PCI bridges use header type 1 */
577	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
578		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
579}
580
581/* extract header type specific config data */
582
583static void
584pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
585{
586#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
587	switch (cfg->hdrtype & PCIM_HDRTYPE) {
588	case PCIM_HDRTYPE_NORMAL:
589		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
590		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
591		cfg->mingnt         = REG(PCIR_MINGNT, 1);
592		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
593		cfg->nummaps	    = PCI_MAXMAPS_0;
594		break;
595	case PCIM_HDRTYPE_BRIDGE:
596		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
597		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
598		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
599		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
600		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
601		cfg->nummaps	    = PCI_MAXMAPS_1;
602		break;
603	case PCIM_HDRTYPE_CARDBUS:
604		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
605		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
606		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
607		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
608		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
609		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
610		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
611		cfg->nummaps	    = PCI_MAXMAPS_2;
612		break;
613	}
614#undef REG
615}
616
617/* read configuration header into pcicfgregs structure */
618struct pci_devinfo *
619pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
620{
621#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
622	uint16_t vid, did;
623
624	vid = REG(PCIR_VENDOR, 2);
625	did = REG(PCIR_DEVICE, 2);
626	if (vid != 0xffff)
627		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
628
629	return (NULL);
630}
631
632struct pci_devinfo *
633pci_alloc_devinfo_method(device_t dev)
634{
635
636	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
637	    M_WAITOK | M_ZERO));
638}
639
640static struct pci_devinfo *
641pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
642    uint16_t vid, uint16_t did)
643{
644	struct pci_devinfo *devlist_entry;
645	pcicfgregs *cfg;
646
647	devlist_entry = PCI_ALLOC_DEVINFO(bus);
648
649	cfg = &devlist_entry->cfg;
650
651	cfg->domain		= d;
652	cfg->bus		= b;
653	cfg->slot		= s;
654	cfg->func		= f;
655	cfg->vendor		= vid;
656	cfg->device		= did;
657	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
658	cfg->statreg		= REG(PCIR_STATUS, 2);
659	cfg->baseclass		= REG(PCIR_CLASS, 1);
660	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
661	cfg->progif		= REG(PCIR_PROGIF, 1);
662	cfg->revid		= REG(PCIR_REVID, 1);
663	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
664	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
665	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
666	cfg->intpin		= REG(PCIR_INTPIN, 1);
667	cfg->intline		= REG(PCIR_INTLINE, 1);
668
669	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
670	cfg->hdrtype		&= ~PCIM_MFDEV;
671	STAILQ_INIT(&cfg->maps);
672
673	cfg->iov		= NULL;
674
675	pci_fixancient(cfg);
676	pci_hdrtypedata(pcib, b, s, f, cfg);
677
678	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
679		pci_read_cap(pcib, cfg);
680
681	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
682
683	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
684	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
685	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
686	devlist_entry->conf.pc_sel.pc_func = cfg->func;
687	devlist_entry->conf.pc_hdr = cfg->hdrtype;
688
689	devlist_entry->conf.pc_subvendor = cfg->subvendor;
690	devlist_entry->conf.pc_subdevice = cfg->subdevice;
691	devlist_entry->conf.pc_vendor = cfg->vendor;
692	devlist_entry->conf.pc_device = cfg->device;
693
694	devlist_entry->conf.pc_class = cfg->baseclass;
695	devlist_entry->conf.pc_subclass = cfg->subclass;
696	devlist_entry->conf.pc_progif = cfg->progif;
697	devlist_entry->conf.pc_revid = cfg->revid;
698
699	pci_numdevs++;
700	pci_generation++;
701
702	return (devlist_entry);
703}
704#undef REG
705
706static void
707pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
708{
709#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
710    cfg->ea.ea_location + (n), w)
711	int num_ent;
712	int ptr;
713	int a, b;
714	uint32_t val;
715	int ent_size;
716	uint32_t dw[4];
717	uint64_t base, max_offset;
718	struct pci_ea_entry *eae;
719
720	if (cfg->ea.ea_location == 0)
721		return;
722
723	STAILQ_INIT(&cfg->ea.ea_entries);
724
725	/* Determine the number of entries */
726	num_ent = REG(PCIR_EA_NUM_ENT, 2);
727	num_ent &= PCIM_EA_NUM_ENT_MASK;
728
729	/* Find the first entry to care of */
730	ptr = PCIR_EA_FIRST_ENT;
731
732	/* Skip DWORD 2 for type 1 functions */
733	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
734		ptr += 4;
735
736	for (a = 0; a < num_ent; a++) {
737
738		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
739		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
740
741		/* Read a number of dwords in the entry */
742		val = REG(ptr, 4);
743		ptr += 4;
744		ent_size = (val & PCIM_EA_ES);
745
746		for (b = 0; b < ent_size; b++) {
747			dw[b] = REG(ptr, 4);
748			ptr += 4;
749		}
750
751		eae->eae_flags = val;
752		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
753
754		base = dw[0] & PCIM_EA_FIELD_MASK;
755		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
756		b = 2;
757		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
758			base |= (uint64_t)dw[b] << 32UL;
759			b++;
760		}
761		if (((dw[1] & PCIM_EA_IS_64) != 0)
762		    && (b < ent_size)) {
763			max_offset |= (uint64_t)dw[b] << 32UL;
764			b++;
765		}
766
767		eae->eae_base = base;
768		eae->eae_max_offset = max_offset;
769
770		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
771
772		if (bootverbose) {
773			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
774			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
775			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
776		}
777	}
778}
779#undef REG
780
781static void
782pci_read_cap(device_t pcib, pcicfgregs *cfg)
783{
784#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
785#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
786#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
787	uint64_t addr;
788#endif
789	uint32_t val;
790	int	ptr, nextptr, ptrptr;
791
792	switch (cfg->hdrtype & PCIM_HDRTYPE) {
793	case PCIM_HDRTYPE_NORMAL:
794	case PCIM_HDRTYPE_BRIDGE:
795		ptrptr = PCIR_CAP_PTR;
796		break;
797	case PCIM_HDRTYPE_CARDBUS:
798		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
799		break;
800	default:
801		return;		/* no extended capabilities support */
802	}
803	nextptr = REG(ptrptr, 1);	/* sanity check? */
804
805	/*
806	 * Read capability entries.
807	 */
808	while (nextptr != 0) {
809		/* Sanity check */
810		if (nextptr > 255) {
811			printf("illegal PCI extended capability offset %d\n",
812			    nextptr);
813			return;
814		}
815		/* Find the next entry */
816		ptr = nextptr;
817		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
818
819		/* Process this entry */
820		switch (REG(ptr + PCICAP_ID, 1)) {
821		case PCIY_PMG:		/* PCI power management */
822			if (cfg->pp.pp_cap == 0) {
823				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
824				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
825				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
826				if ((nextptr - ptr) > PCIR_POWER_DATA)
827					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
828			}
829			break;
830		case PCIY_HT:		/* HyperTransport */
831			/* Determine HT-specific capability type. */
832			val = REG(ptr + PCIR_HT_COMMAND, 2);
833
834			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
835				cfg->ht.ht_slave = ptr;
836
837#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
838			switch (val & PCIM_HTCMD_CAP_MASK) {
839			case PCIM_HTCAP_MSI_MAPPING:
840				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
841					/* Sanity check the mapping window. */
842					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
843					    4);
844					addr <<= 32;
845					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
846					    4);
847					if (addr != MSI_INTEL_ADDR_BASE)
848						device_printf(pcib,
849	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
850						    cfg->domain, cfg->bus,
851						    cfg->slot, cfg->func,
852						    (long long)addr);
853				} else
854					addr = MSI_INTEL_ADDR_BASE;
855
856				cfg->ht.ht_msimap = ptr;
857				cfg->ht.ht_msictrl = val;
858				cfg->ht.ht_msiaddr = addr;
859				break;
860			}
861#endif
862			break;
863		case PCIY_MSI:		/* PCI MSI */
864			cfg->msi.msi_location = ptr;
865			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
866			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
867						     PCIM_MSICTRL_MMC_MASK)>>1);
868			break;
869		case PCIY_MSIX:		/* PCI MSI-X */
870			cfg->msix.msix_location = ptr;
871			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
872			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
873			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
874			val = REG(ptr + PCIR_MSIX_TABLE, 4);
875			cfg->msix.msix_table_bar = PCIR_BAR(val &
876			    PCIM_MSIX_BIR_MASK);
877			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
878			val = REG(ptr + PCIR_MSIX_PBA, 4);
879			cfg->msix.msix_pba_bar = PCIR_BAR(val &
880			    PCIM_MSIX_BIR_MASK);
881			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
882			break;
883		case PCIY_VPD:		/* PCI Vital Product Data */
884			cfg->vpd.vpd_reg = ptr;
885			break;
886		case PCIY_SUBVENDOR:
887			/* Should always be true. */
888			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
889			    PCIM_HDRTYPE_BRIDGE) {
890				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
891				cfg->subvendor = val & 0xffff;
892				cfg->subdevice = val >> 16;
893			}
894			break;
895		case PCIY_PCIX:		/* PCI-X */
896			/*
897			 * Assume we have a PCI-X chipset if we have
898			 * at least one PCI-PCI bridge with a PCI-X
899			 * capability.  Note that some systems with
900			 * PCI-express or HT chipsets might match on
901			 * this check as well.
902			 */
903			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
904			    PCIM_HDRTYPE_BRIDGE)
905				pcix_chipset = 1;
906			cfg->pcix.pcix_location = ptr;
907			break;
908		case PCIY_EXPRESS:	/* PCI-express */
909			/*
910			 * Assume we have a PCI-express chipset if we have
911			 * at least one PCI-express device.
912			 */
913			pcie_chipset = 1;
914			cfg->pcie.pcie_location = ptr;
915			val = REG(ptr + PCIER_FLAGS, 2);
916			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
917			break;
918		case PCIY_EA:		/* Enhanced Allocation */
919			cfg->ea.ea_location = ptr;
920			pci_ea_fill_info(pcib, cfg);
921			break;
922		default:
923			break;
924		}
925	}
926
927#if defined(__powerpc__)
928	/*
929	 * Enable the MSI mapping window for all HyperTransport
930	 * slaves.  PCI-PCI bridges have their windows enabled via
931	 * PCIB_MAP_MSI().
932	 */
933	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
934	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
935		device_printf(pcib,
936	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
937		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
938		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
939		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
940		     2);
941	}
942#endif
943/* REG and WREG use carry through to next functions */
944}
945
946/*
947 * PCI Vital Product Data
948 */
949
950#define	PCI_VPD_TIMEOUT		1000000
951
952static int
953pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
954{
955	int count = PCI_VPD_TIMEOUT;
956
957	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
958
959	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
960
961	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
962		if (--count < 0)
963			return (ENXIO);
964		DELAY(1);	/* limit looping */
965	}
966	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
967
968	return (0);
969}
970
971#if 0
972static int
973pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
974{
975	int count = PCI_VPD_TIMEOUT;
976
977	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
978
979	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
980	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
981	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
982		if (--count < 0)
983			return (ENXIO);
984		DELAY(1);	/* limit looping */
985	}
986
987	return (0);
988}
989#endif
990
991#undef PCI_VPD_TIMEOUT
992
993struct vpd_readstate {
994	device_t	pcib;
995	pcicfgregs	*cfg;
996	uint32_t	val;
997	int		bytesinval;
998	int		off;
999	uint8_t		cksum;
1000};
1001
1002static int
1003vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1004{
1005	uint32_t reg;
1006	uint8_t byte;
1007
1008	if (vrs->bytesinval == 0) {
1009		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1010			return (ENXIO);
1011		vrs->val = le32toh(reg);
1012		vrs->off += 4;
1013		byte = vrs->val & 0xff;
1014		vrs->bytesinval = 3;
1015	} else {
1016		vrs->val = vrs->val >> 8;
1017		byte = vrs->val & 0xff;
1018		vrs->bytesinval--;
1019	}
1020
1021	vrs->cksum += byte;
1022	*data = byte;
1023	return (0);
1024}
1025
1026static void
1027pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1028{
1029	struct vpd_readstate vrs;
1030	int state;
1031	int name;
1032	int remain;
1033	int i;
1034	int alloc, off;		/* alloc/off for RO/W arrays */
1035	int cksumvalid;
1036	int dflen;
1037	uint8_t byte;
1038	uint8_t byte2;
1039
1040	/* init vpd reader */
1041	vrs.bytesinval = 0;
1042	vrs.off = 0;
1043	vrs.pcib = pcib;
1044	vrs.cfg = cfg;
1045	vrs.cksum = 0;
1046
1047	state = 0;
1048	name = remain = i = 0;	/* shut up stupid gcc */
1049	alloc = off = 0;	/* shut up stupid gcc */
1050	dflen = 0;		/* shut up stupid gcc */
1051	cksumvalid = -1;
1052	while (state >= 0) {
1053		if (vpd_nextbyte(&vrs, &byte)) {
1054			state = -2;
1055			break;
1056		}
1057#if 0
1058		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1059		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1060		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1061#endif
1062		switch (state) {
1063		case 0:		/* item name */
1064			if (byte & 0x80) {
1065				if (vpd_nextbyte(&vrs, &byte2)) {
1066					state = -2;
1067					break;
1068				}
1069				remain = byte2;
1070				if (vpd_nextbyte(&vrs, &byte2)) {
1071					state = -2;
1072					break;
1073				}
1074				remain |= byte2 << 8;
1075				if (remain > (0x7f*4 - vrs.off)) {
1076					state = -1;
1077					pci_printf(cfg,
1078					    "invalid VPD data, remain %#x\n",
1079					    remain);
1080				}
1081				name = byte & 0x7f;
1082			} else {
1083				remain = byte & 0x7;
1084				name = (byte >> 3) & 0xf;
1085			}
1086			switch (name) {
1087			case 0x2:	/* String */
1088				cfg->vpd.vpd_ident = malloc(remain + 1,
1089				    M_DEVBUF, M_WAITOK);
1090				i = 0;
1091				state = 1;
1092				break;
1093			case 0xf:	/* End */
1094				state = -1;
1095				break;
1096			case 0x10:	/* VPD-R */
1097				alloc = 8;
1098				off = 0;
1099				cfg->vpd.vpd_ros = malloc(alloc *
1100				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1101				    M_WAITOK | M_ZERO);
1102				state = 2;
1103				break;
1104			case 0x11:	/* VPD-W */
1105				alloc = 8;
1106				off = 0;
1107				cfg->vpd.vpd_w = malloc(alloc *
1108				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1109				    M_WAITOK | M_ZERO);
1110				state = 5;
1111				break;
1112			default:	/* Invalid data, abort */
1113				state = -1;
1114				break;
1115			}
1116			break;
1117
1118		case 1:	/* Identifier String */
1119			cfg->vpd.vpd_ident[i++] = byte;
1120			remain--;
1121			if (remain == 0)  {
1122				cfg->vpd.vpd_ident[i] = '\0';
1123				state = 0;
1124			}
1125			break;
1126
1127		case 2:	/* VPD-R Keyword Header */
1128			if (off == alloc) {
1129				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1130				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1131				    M_DEVBUF, M_WAITOK | M_ZERO);
1132			}
1133			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1134			if (vpd_nextbyte(&vrs, &byte2)) {
1135				state = -2;
1136				break;
1137			}
1138			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1139			if (vpd_nextbyte(&vrs, &byte2)) {
1140				state = -2;
1141				break;
1142			}
1143			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1144			if (dflen == 0 &&
1145			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1146			    2) == 0) {
1147				/*
1148				 * if this happens, we can't trust the rest
1149				 * of the VPD.
1150				 */
1151				pci_printf(cfg, "bad keyword length: %d\n",
1152				    dflen);
1153				cksumvalid = 0;
1154				state = -1;
1155				break;
1156			} else if (dflen == 0) {
1157				cfg->vpd.vpd_ros[off].value = malloc(1 *
1158				    sizeof(*cfg->vpd.vpd_ros[off].value),
1159				    M_DEVBUF, M_WAITOK);
1160				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1161			} else
1162				cfg->vpd.vpd_ros[off].value = malloc(
1163				    (dflen + 1) *
1164				    sizeof(*cfg->vpd.vpd_ros[off].value),
1165				    M_DEVBUF, M_WAITOK);
1166			remain -= 3;
1167			i = 0;
1168			/* keep in sync w/ state 3's transistions */
1169			if (dflen == 0 && remain == 0)
1170				state = 0;
1171			else if (dflen == 0)
1172				state = 2;
1173			else
1174				state = 3;
1175			break;
1176
1177		case 3:	/* VPD-R Keyword Value */
1178			cfg->vpd.vpd_ros[off].value[i++] = byte;
1179			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1180			    "RV", 2) == 0 && cksumvalid == -1) {
1181				if (vrs.cksum == 0)
1182					cksumvalid = 1;
1183				else {
1184					if (bootverbose)
1185						pci_printf(cfg,
1186					    "bad VPD cksum, remain %hhu\n",
1187						    vrs.cksum);
1188					cksumvalid = 0;
1189					state = -1;
1190					break;
1191				}
1192			}
1193			dflen--;
1194			remain--;
1195			/* keep in sync w/ state 2's transistions */
1196			if (dflen == 0)
1197				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1198			if (dflen == 0 && remain == 0) {
1199				cfg->vpd.vpd_rocnt = off;
1200				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1201				    off * sizeof(*cfg->vpd.vpd_ros),
1202				    M_DEVBUF, M_WAITOK | M_ZERO);
1203				state = 0;
1204			} else if (dflen == 0)
1205				state = 2;
1206			break;
1207
1208		case 4:
1209			remain--;
1210			if (remain == 0)
1211				state = 0;
1212			break;
1213
1214		case 5:	/* VPD-W Keyword Header */
1215			if (off == alloc) {
1216				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1217				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1218				    M_DEVBUF, M_WAITOK | M_ZERO);
1219			}
1220			cfg->vpd.vpd_w[off].keyword[0] = byte;
1221			if (vpd_nextbyte(&vrs, &byte2)) {
1222				state = -2;
1223				break;
1224			}
1225			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1226			if (vpd_nextbyte(&vrs, &byte2)) {
1227				state = -2;
1228				break;
1229			}
1230			cfg->vpd.vpd_w[off].len = dflen = byte2;
1231			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1232			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1233			    sizeof(*cfg->vpd.vpd_w[off].value),
1234			    M_DEVBUF, M_WAITOK);
1235			remain -= 3;
1236			i = 0;
1237			/* keep in sync w/ state 6's transistions */
1238			if (dflen == 0 && remain == 0)
1239				state = 0;
1240			else if (dflen == 0)
1241				state = 5;
1242			else
1243				state = 6;
1244			break;
1245
1246		case 6:	/* VPD-W Keyword Value */
1247			cfg->vpd.vpd_w[off].value[i++] = byte;
1248			dflen--;
1249			remain--;
1250			/* keep in sync w/ state 5's transistions */
1251			if (dflen == 0)
1252				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1253			if (dflen == 0 && remain == 0) {
1254				cfg->vpd.vpd_wcnt = off;
1255				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1256				    off * sizeof(*cfg->vpd.vpd_w),
1257				    M_DEVBUF, M_WAITOK | M_ZERO);
1258				state = 0;
1259			} else if (dflen == 0)
1260				state = 5;
1261			break;
1262
1263		default:
1264			pci_printf(cfg, "invalid state: %d\n", state);
1265			state = -1;
1266			break;
1267		}
1268	}
1269
1270	if (cksumvalid == 0 || state < -1) {
1271		/* read-only data bad, clean up */
1272		if (cfg->vpd.vpd_ros != NULL) {
1273			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1274				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1275			free(cfg->vpd.vpd_ros, M_DEVBUF);
1276			cfg->vpd.vpd_ros = NULL;
1277		}
1278	}
1279	if (state < -1) {
1280		/* I/O error, clean up */
1281		pci_printf(cfg, "failed to read VPD data.\n");
1282		if (cfg->vpd.vpd_ident != NULL) {
1283			free(cfg->vpd.vpd_ident, M_DEVBUF);
1284			cfg->vpd.vpd_ident = NULL;
1285		}
1286		if (cfg->vpd.vpd_w != NULL) {
1287			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1288				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1289			free(cfg->vpd.vpd_w, M_DEVBUF);
1290			cfg->vpd.vpd_w = NULL;
1291		}
1292	}
1293	cfg->vpd.vpd_cached = 1;
1294#undef REG
1295#undef WREG
1296}
1297
1298int
1299pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1300{
1301	struct pci_devinfo *dinfo = device_get_ivars(child);
1302	pcicfgregs *cfg = &dinfo->cfg;
1303
1304	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1305		pci_read_vpd(device_get_parent(dev), cfg);
1306
1307	*identptr = cfg->vpd.vpd_ident;
1308
1309	if (*identptr == NULL)
1310		return (ENXIO);
1311
1312	return (0);
1313}
1314
1315int
1316pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1317	const char **vptr)
1318{
1319	struct pci_devinfo *dinfo = device_get_ivars(child);
1320	pcicfgregs *cfg = &dinfo->cfg;
1321	int i;
1322
1323	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1324		pci_read_vpd(device_get_parent(dev), cfg);
1325
1326	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1327		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1328		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1329			*vptr = cfg->vpd.vpd_ros[i].value;
1330			return (0);
1331		}
1332
1333	*vptr = NULL;
1334	return (ENXIO);
1335}
1336
1337struct pcicfg_vpd *
1338pci_fetch_vpd_list(device_t dev)
1339{
1340	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341	pcicfgregs *cfg = &dinfo->cfg;
1342
1343	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1344		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1345	return (&cfg->vpd);
1346}
1347
1348/*
1349 * Find the requested HyperTransport capability and return the offset
1350 * in configuration space via the pointer provided.  The function
1351 * returns 0 on success and an error code otherwise.
1352 */
1353int
1354pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1355{
1356	int ptr, error;
1357	uint16_t val;
1358
1359	error = pci_find_cap(child, PCIY_HT, &ptr);
1360	if (error)
1361		return (error);
1362
1363	/*
1364	 * Traverse the capabilities list checking each HT capability
1365	 * to see if it matches the requested HT capability.
1366	 */
1367	while (ptr != 0) {
1368		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1369		if (capability == PCIM_HTCAP_SLAVE ||
1370		    capability == PCIM_HTCAP_HOST)
1371			val &= 0xe000;
1372		else
1373			val &= PCIM_HTCMD_CAP_MASK;
1374		if (val == capability) {
1375			if (capreg != NULL)
1376				*capreg = ptr;
1377			return (0);
1378		}
1379
1380		/* Skip to the next HT capability. */
1381		while (ptr != 0) {
1382			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1383			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1384			    PCIY_HT)
1385				break;
1386		}
1387	}
1388	return (ENOENT);
1389}
1390
1391/*
1392 * Find the requested capability and return the offset in
1393 * configuration space via the pointer provided.  The function returns
1394 * 0 on success and an error code otherwise.
1395 */
1396int
1397pci_find_cap_method(device_t dev, device_t child, int capability,
1398    int *capreg)
1399{
1400	struct pci_devinfo *dinfo = device_get_ivars(child);
1401	pcicfgregs *cfg = &dinfo->cfg;
1402	u_int32_t status;
1403	u_int8_t ptr;
1404
1405	/*
1406	 * Check the CAP_LIST bit of the PCI status register first.
1407	 */
1408	status = pci_read_config(child, PCIR_STATUS, 2);
1409	if (!(status & PCIM_STATUS_CAPPRESENT))
1410		return (ENXIO);
1411
1412	/*
1413	 * Determine the start pointer of the capabilities list.
1414	 */
1415	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1416	case PCIM_HDRTYPE_NORMAL:
1417	case PCIM_HDRTYPE_BRIDGE:
1418		ptr = PCIR_CAP_PTR;
1419		break;
1420	case PCIM_HDRTYPE_CARDBUS:
1421		ptr = PCIR_CAP_PTR_2;
1422		break;
1423	default:
1424		/* XXX: panic? */
1425		return (ENXIO);		/* no extended capabilities support */
1426	}
1427	ptr = pci_read_config(child, ptr, 1);
1428
1429	/*
1430	 * Traverse the capabilities list.
1431	 */
1432	while (ptr != 0) {
1433		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1434			if (capreg != NULL)
1435				*capreg = ptr;
1436			return (0);
1437		}
1438		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1439	}
1440
1441	return (ENOENT);
1442}
1443
1444/*
1445 * Find the requested extended capability and return the offset in
1446 * configuration space via the pointer provided.  The function returns
1447 * 0 on success and an error code otherwise.
1448 */
1449int
1450pci_find_extcap_method(device_t dev, device_t child, int capability,
1451    int *capreg)
1452{
1453	struct pci_devinfo *dinfo = device_get_ivars(child);
1454	pcicfgregs *cfg = &dinfo->cfg;
1455	uint32_t ecap;
1456	uint16_t ptr;
1457
1458	/* Only supported for PCI-express devices. */
1459	if (cfg->pcie.pcie_location == 0)
1460		return (ENXIO);
1461
1462	ptr = PCIR_EXTCAP;
1463	ecap = pci_read_config(child, ptr, 4);
1464	if (ecap == 0xffffffff || ecap == 0)
1465		return (ENOENT);
1466	for (;;) {
1467		if (PCI_EXTCAP_ID(ecap) == capability) {
1468			if (capreg != NULL)
1469				*capreg = ptr;
1470			return (0);
1471		}
1472		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1473		if (ptr == 0)
1474			break;
1475		ecap = pci_read_config(child, ptr, 4);
1476	}
1477
1478	return (ENOENT);
1479}
1480
1481/*
1482 * Support for MSI-X message interrupts.
1483 */
1484void
1485pci_enable_msix_method(device_t dev, device_t child, u_int index,
1486    uint64_t address, uint32_t data)
1487{
1488	struct pci_devinfo *dinfo = device_get_ivars(child);
1489	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1490	uint32_t offset;
1491
1492	KASSERT(msix->msix_table_len > index, ("bogus index"));
1493	offset = msix->msix_table_offset + index * 16;
1494	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1495	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1496	bus_write_4(msix->msix_table_res, offset + 8, data);
1497
1498	/* Enable MSI -> HT mapping. */
1499	pci_ht_map_msi(child, address);
1500}
1501
1502void
1503pci_mask_msix(device_t dev, u_int index)
1504{
1505	struct pci_devinfo *dinfo = device_get_ivars(dev);
1506	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1507	uint32_t offset, val;
1508
1509	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1510	offset = msix->msix_table_offset + index * 16 + 12;
1511	val = bus_read_4(msix->msix_table_res, offset);
1512	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1513		val |= PCIM_MSIX_VCTRL_MASK;
1514		bus_write_4(msix->msix_table_res, offset, val);
1515	}
1516}
1517
1518void
1519pci_unmask_msix(device_t dev, u_int index)
1520{
1521	struct pci_devinfo *dinfo = device_get_ivars(dev);
1522	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1523	uint32_t offset, val;
1524
1525	KASSERT(msix->msix_table_len > index, ("bogus index"));
1526	offset = msix->msix_table_offset + index * 16 + 12;
1527	val = bus_read_4(msix->msix_table_res, offset);
1528	if (val & PCIM_MSIX_VCTRL_MASK) {
1529		val &= ~PCIM_MSIX_VCTRL_MASK;
1530		bus_write_4(msix->msix_table_res, offset, val);
1531	}
1532}
1533
1534int
1535pci_pending_msix(device_t dev, u_int index)
1536{
1537	struct pci_devinfo *dinfo = device_get_ivars(dev);
1538	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1539	uint32_t offset, bit;
1540
1541	KASSERT(msix->msix_table_len > index, ("bogus index"));
1542	offset = msix->msix_pba_offset + (index / 32) * 4;
1543	bit = 1 << index % 32;
1544	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1545}
1546
1547/*
1548 * Restore MSI-X registers and table during resume.  If MSI-X is
1549 * enabled then walk the virtual table to restore the actual MSI-X
1550 * table.
1551 */
1552static void
1553pci_resume_msix(device_t dev)
1554{
1555	struct pci_devinfo *dinfo = device_get_ivars(dev);
1556	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1557	struct msix_table_entry *mte;
1558	struct msix_vector *mv;
1559	int i;
1560
1561	if (msix->msix_alloc > 0) {
1562		/* First, mask all vectors. */
1563		for (i = 0; i < msix->msix_msgnum; i++)
1564			pci_mask_msix(dev, i);
1565
1566		/* Second, program any messages with at least one handler. */
1567		for (i = 0; i < msix->msix_table_len; i++) {
1568			mte = &msix->msix_table[i];
1569			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1570				continue;
1571			mv = &msix->msix_vectors[mte->mte_vector - 1];
1572			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1573			pci_unmask_msix(dev, i);
1574		}
1575	}
1576	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1577	    msix->msix_ctrl, 2);
1578}
1579
1580/*
1581 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1582 * returned in *count.  After this function returns, each message will be
1583 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1584 */
1585int
1586pci_alloc_msix_method(device_t dev, device_t child, int *count)
1587{
1588	struct pci_devinfo *dinfo = device_get_ivars(child);
1589	pcicfgregs *cfg = &dinfo->cfg;
1590	struct resource_list_entry *rle;
1591	int actual, error, i, irq, max;
1592
1593	/* Don't let count == 0 get us into trouble. */
1594	if (*count == 0)
1595		return (EINVAL);
1596
1597	/* If rid 0 is allocated, then fail. */
1598	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1599	if (rle != NULL && rle->res != NULL)
1600		return (ENXIO);
1601
1602	/* Already have allocated messages? */
1603	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1604		return (ENXIO);
1605
1606	/* If MSI-X is blacklisted for this system, fail. */
1607	if (pci_msix_blacklisted())
1608		return (ENXIO);
1609
1610	/* MSI-X capability present? */
1611	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1612		return (ENODEV);
1613
1614	/* Make sure the appropriate BARs are mapped. */
1615	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1616	    cfg->msix.msix_table_bar);
1617	if (rle == NULL || rle->res == NULL ||
1618	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1619		return (ENXIO);
1620	cfg->msix.msix_table_res = rle->res;
1621	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1622		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1623		    cfg->msix.msix_pba_bar);
1624		if (rle == NULL || rle->res == NULL ||
1625		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1626			return (ENXIO);
1627	}
1628	cfg->msix.msix_pba_res = rle->res;
1629
1630	if (bootverbose)
1631		device_printf(child,
1632		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1633		    *count, cfg->msix.msix_msgnum);
1634	max = min(*count, cfg->msix.msix_msgnum);
1635	for (i = 0; i < max; i++) {
1636		/* Allocate a message. */
1637		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1638		if (error) {
1639			if (i == 0)
1640				return (error);
1641			break;
1642		}
1643		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1644		    irq, 1);
1645	}
1646	actual = i;
1647
1648	if (bootverbose) {
1649		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1650		if (actual == 1)
1651			device_printf(child, "using IRQ %ju for MSI-X\n",
1652			    rle->start);
1653		else {
1654			int run;
1655
1656			/*
1657			 * Be fancy and try to print contiguous runs of
1658			 * IRQ values as ranges.  'irq' is the previous IRQ.
1659			 * 'run' is true if we are in a range.
1660			 */
1661			device_printf(child, "using IRQs %ju", rle->start);
1662			irq = rle->start;
1663			run = 0;
1664			for (i = 1; i < actual; i++) {
1665				rle = resource_list_find(&dinfo->resources,
1666				    SYS_RES_IRQ, i + 1);
1667
1668				/* Still in a run? */
1669				if (rle->start == irq + 1) {
1670					run = 1;
1671					irq++;
1672					continue;
1673				}
1674
1675				/* Finish previous range. */
1676				if (run) {
1677					printf("-%d", irq);
1678					run = 0;
1679				}
1680
1681				/* Start new range. */
1682				printf(",%ju", rle->start);
1683				irq = rle->start;
1684			}
1685
1686			/* Unfinished range? */
1687			if (run)
1688				printf("-%d", irq);
1689			printf(" for MSI-X\n");
1690		}
1691	}
1692
1693	/* Mask all vectors. */
1694	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1695		pci_mask_msix(child, i);
1696
1697	/* Allocate and initialize vector data and virtual table. */
1698	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1699	    M_DEVBUF, M_WAITOK | M_ZERO);
1700	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1701	    M_DEVBUF, M_WAITOK | M_ZERO);
1702	for (i = 0; i < actual; i++) {
1703		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1704		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1705		cfg->msix.msix_table[i].mte_vector = i + 1;
1706	}
1707
1708	/* Update control register to enable MSI-X. */
1709	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1710	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1711	    cfg->msix.msix_ctrl, 2);
1712
1713	/* Update counts of alloc'd messages. */
1714	cfg->msix.msix_alloc = actual;
1715	cfg->msix.msix_table_len = actual;
1716	*count = actual;
1717	return (0);
1718}
1719
1720/*
1721 * By default, pci_alloc_msix() will assign the allocated IRQ
1722 * resources consecutively to the first N messages in the MSI-X table.
1723 * However, device drivers may want to use different layouts if they
1724 * either receive fewer messages than they asked for, or they wish to
1725 * populate the MSI-X table sparsely.  This method allows the driver
1726 * to specify what layout it wants.  It must be called after a
1727 * successful pci_alloc_msix() but before any of the associated
1728 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1729 *
1730 * The 'vectors' array contains 'count' message vectors.  The array
1731 * maps directly to the MSI-X table in that index 0 in the array
1732 * specifies the vector for the first message in the MSI-X table, etc.
1733 * The vector value in each array index can either be 0 to indicate
1734 * that no vector should be assigned to a message slot, or it can be a
1735 * number from 1 to N (where N is the count returned from a
1736 * succcessful call to pci_alloc_msix()) to indicate which message
1737 * vector (IRQ) to be used for the corresponding message.
1738 *
1739 * On successful return, each message with a non-zero vector will have
1740 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1741 * 1.  Additionally, if any of the IRQs allocated via the previous
1742 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1743 * will be freed back to the system automatically.
1744 *
1745 * For example, suppose a driver has a MSI-X table with 6 messages and
1746 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1747 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1748 * C.  After the call to pci_alloc_msix(), the device will be setup to
1749 * have an MSI-X table of ABC--- (where - means no vector assigned).
1750 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1751 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1752 * be freed back to the system.  This device will also have valid
1753 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1754 *
1755 * In any case, the SYS_RES_IRQ rid X will always map to the message
1756 * at MSI-X table index X - 1 and will only be valid if a vector is
1757 * assigned to that table entry.
1758 */
1759int
1760pci_remap_msix_method(device_t dev, device_t child, int count,
1761    const u_int *vectors)
1762{
1763	struct pci_devinfo *dinfo = device_get_ivars(child);
1764	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1765	struct resource_list_entry *rle;
1766	int i, irq, j, *used;
1767
1768	/*
1769	 * Have to have at least one message in the table but the
1770	 * table can't be bigger than the actual MSI-X table in the
1771	 * device.
1772	 */
1773	if (count == 0 || count > msix->msix_msgnum)
1774		return (EINVAL);
1775
1776	/* Sanity check the vectors. */
1777	for (i = 0; i < count; i++)
1778		if (vectors[i] > msix->msix_alloc)
1779			return (EINVAL);
1780
1781	/*
1782	 * Make sure there aren't any holes in the vectors to be used.
1783	 * It's a big pain to support it, and it doesn't really make
1784	 * sense anyway.  Also, at least one vector must be used.
1785	 */
1786	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1787	    M_ZERO);
1788	for (i = 0; i < count; i++)
1789		if (vectors[i] != 0)
1790			used[vectors[i] - 1] = 1;
1791	for (i = 0; i < msix->msix_alloc - 1; i++)
1792		if (used[i] == 0 && used[i + 1] == 1) {
1793			free(used, M_DEVBUF);
1794			return (EINVAL);
1795		}
1796	if (used[0] != 1) {
1797		free(used, M_DEVBUF);
1798		return (EINVAL);
1799	}
1800
1801	/* Make sure none of the resources are allocated. */
1802	for (i = 0; i < msix->msix_table_len; i++) {
1803		if (msix->msix_table[i].mte_vector == 0)
1804			continue;
1805		if (msix->msix_table[i].mte_handlers > 0) {
1806			free(used, M_DEVBUF);
1807			return (EBUSY);
1808		}
1809		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1810		KASSERT(rle != NULL, ("missing resource"));
1811		if (rle->res != NULL) {
1812			free(used, M_DEVBUF);
1813			return (EBUSY);
1814		}
1815	}
1816
1817	/* Free the existing resource list entries. */
1818	for (i = 0; i < msix->msix_table_len; i++) {
1819		if (msix->msix_table[i].mte_vector == 0)
1820			continue;
1821		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1822	}
1823
1824	/*
1825	 * Build the new virtual table keeping track of which vectors are
1826	 * used.
1827	 */
1828	free(msix->msix_table, M_DEVBUF);
1829	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1830	    M_DEVBUF, M_WAITOK | M_ZERO);
1831	for (i = 0; i < count; i++)
1832		msix->msix_table[i].mte_vector = vectors[i];
1833	msix->msix_table_len = count;
1834
1835	/* Free any unused IRQs and resize the vectors array if necessary. */
1836	j = msix->msix_alloc - 1;
1837	if (used[j] == 0) {
1838		struct msix_vector *vec;
1839
1840		while (used[j] == 0) {
1841			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1842			    msix->msix_vectors[j].mv_irq);
1843			j--;
1844		}
1845		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1846		    M_WAITOK);
1847		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1848		    (j + 1));
1849		free(msix->msix_vectors, M_DEVBUF);
1850		msix->msix_vectors = vec;
1851		msix->msix_alloc = j + 1;
1852	}
1853	free(used, M_DEVBUF);
1854
1855	/* Map the IRQs onto the rids. */
1856	for (i = 0; i < count; i++) {
1857		if (vectors[i] == 0)
1858			continue;
1859		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1860		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1861		    irq, 1);
1862	}
1863
1864	if (bootverbose) {
1865		device_printf(child, "Remapped MSI-X IRQs as: ");
1866		for (i = 0; i < count; i++) {
1867			if (i != 0)
1868				printf(", ");
1869			if (vectors[i] == 0)
1870				printf("---");
1871			else
1872				printf("%d",
1873				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1874		}
1875		printf("\n");
1876	}
1877
1878	return (0);
1879}
1880
1881static int
1882pci_release_msix(device_t dev, device_t child)
1883{
1884	struct pci_devinfo *dinfo = device_get_ivars(child);
1885	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1886	struct resource_list_entry *rle;
1887	int i;
1888
1889	/* Do we have any messages to release? */
1890	if (msix->msix_alloc == 0)
1891		return (ENODEV);
1892
1893	/* Make sure none of the resources are allocated. */
1894	for (i = 0; i < msix->msix_table_len; i++) {
1895		if (msix->msix_table[i].mte_vector == 0)
1896			continue;
1897		if (msix->msix_table[i].mte_handlers > 0)
1898			return (EBUSY);
1899		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1900		KASSERT(rle != NULL, ("missing resource"));
1901		if (rle->res != NULL)
1902			return (EBUSY);
1903	}
1904
1905	/* Update control register to disable MSI-X. */
1906	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1907	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1908	    msix->msix_ctrl, 2);
1909
1910	/* Free the resource list entries. */
1911	for (i = 0; i < msix->msix_table_len; i++) {
1912		if (msix->msix_table[i].mte_vector == 0)
1913			continue;
1914		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1915	}
1916	free(msix->msix_table, M_DEVBUF);
1917	msix->msix_table_len = 0;
1918
1919	/* Release the IRQs. */
1920	for (i = 0; i < msix->msix_alloc; i++)
1921		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1922		    msix->msix_vectors[i].mv_irq);
1923	free(msix->msix_vectors, M_DEVBUF);
1924	msix->msix_alloc = 0;
1925	return (0);
1926}
1927
1928/*
1929 * Return the max supported MSI-X messages this device supports.
1930 * Basically, assuming the MD code can alloc messages, this function
1931 * should return the maximum value that pci_alloc_msix() can return.
1932 * Thus, it is subject to the tunables, etc.
1933 */
1934int
1935pci_msix_count_method(device_t dev, device_t child)
1936{
1937	struct pci_devinfo *dinfo = device_get_ivars(child);
1938	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1939
1940	if (pci_do_msix && msix->msix_location != 0)
1941		return (msix->msix_msgnum);
1942	return (0);
1943}
1944
1945int
1946pci_msix_pba_bar_method(device_t dev, device_t child)
1947{
1948	struct pci_devinfo *dinfo = device_get_ivars(child);
1949	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1950
1951	if (pci_do_msix && msix->msix_location != 0)
1952		return (msix->msix_pba_bar);
1953	return (-1);
1954}
1955
1956int
1957pci_msix_table_bar_method(device_t dev, device_t child)
1958{
1959	struct pci_devinfo *dinfo = device_get_ivars(child);
1960	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1961
1962	if (pci_do_msix && msix->msix_location != 0)
1963		return (msix->msix_table_bar);
1964	return (-1);
1965}
1966
1967/*
1968 * HyperTransport MSI mapping control
1969 */
1970void
1971pci_ht_map_msi(device_t dev, uint64_t addr)
1972{
1973	struct pci_devinfo *dinfo = device_get_ivars(dev);
1974	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1975
1976	if (!ht->ht_msimap)
1977		return;
1978
1979	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1980	    ht->ht_msiaddr >> 20 == addr >> 20) {
1981		/* Enable MSI -> HT mapping. */
1982		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1983		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1984		    ht->ht_msictrl, 2);
1985	}
1986
1987	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1988		/* Disable MSI -> HT mapping. */
1989		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1990		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1991		    ht->ht_msictrl, 2);
1992	}
1993}
1994
1995int
1996pci_get_max_payload(device_t dev)
1997{
1998	struct pci_devinfo *dinfo = device_get_ivars(dev);
1999	int cap;
2000	uint16_t val;
2001
2002	cap = dinfo->cfg.pcie.pcie_location;
2003	if (cap == 0)
2004		return (0);
2005	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2006	val &= PCIEM_CTL_MAX_PAYLOAD;
2007	val >>= 5;
2008	return (1 << (val + 7));
2009}
2010
2011int
2012pci_get_max_read_req(device_t dev)
2013{
2014	struct pci_devinfo *dinfo = device_get_ivars(dev);
2015	int cap;
2016	uint16_t val;
2017
2018	cap = dinfo->cfg.pcie.pcie_location;
2019	if (cap == 0)
2020		return (0);
2021	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2022	val &= PCIEM_CTL_MAX_READ_REQUEST;
2023	val >>= 12;
2024	return (1 << (val + 7));
2025}
2026
2027int
2028pci_set_max_read_req(device_t dev, int size)
2029{
2030	struct pci_devinfo *dinfo = device_get_ivars(dev);
2031	int cap;
2032	uint16_t val;
2033
2034	cap = dinfo->cfg.pcie.pcie_location;
2035	if (cap == 0)
2036		return (0);
2037	if (size < 128)
2038		size = 128;
2039	if (size > 4096)
2040		size = 4096;
2041	size = (1 << (fls(size) - 1));
2042	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2043	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2044	val |= (fls(size) - 8) << 12;
2045	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2046	return (size);
2047}
2048
2049uint32_t
2050pcie_read_config(device_t dev, int reg, int width)
2051{
2052	struct pci_devinfo *dinfo = device_get_ivars(dev);
2053	int cap;
2054
2055	cap = dinfo->cfg.pcie.pcie_location;
2056	if (cap == 0) {
2057		if (width == 2)
2058			return (0xffff);
2059		return (0xffffffff);
2060	}
2061
2062	return (pci_read_config(dev, cap + reg, width));
2063}
2064
2065void
2066pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2067{
2068	struct pci_devinfo *dinfo = device_get_ivars(dev);
2069	int cap;
2070
2071	cap = dinfo->cfg.pcie.pcie_location;
2072	if (cap == 0)
2073		return;
2074	pci_write_config(dev, cap + reg, value, width);
2075}
2076
2077/*
2078 * Adjusts a PCI-e capability register by clearing the bits in mask
2079 * and setting the bits in (value & mask).  Bits not set in mask are
2080 * not adjusted.
2081 *
2082 * Returns the old value on success or all ones on failure.
2083 */
2084uint32_t
2085pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2086    int width)
2087{
2088	struct pci_devinfo *dinfo = device_get_ivars(dev);
2089	uint32_t old, new;
2090	int cap;
2091
2092	cap = dinfo->cfg.pcie.pcie_location;
2093	if (cap == 0) {
2094		if (width == 2)
2095			return (0xffff);
2096		return (0xffffffff);
2097	}
2098
2099	old = pci_read_config(dev, cap + reg, width);
2100	new = old & ~mask;
2101	new |= (value & mask);
2102	pci_write_config(dev, cap + reg, new, width);
2103	return (old);
2104}
2105
2106/*
2107 * Support for MSI message signalled interrupts.
2108 */
2109void
2110pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2111    uint16_t data)
2112{
2113	struct pci_devinfo *dinfo = device_get_ivars(child);
2114	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2115
2116	/* Write data and address values. */
2117	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2118	    address & 0xffffffff, 4);
2119	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2120		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2121		    address >> 32, 4);
2122		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2123		    data, 2);
2124	} else
2125		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2126		    2);
2127
2128	/* Enable MSI in the control register. */
2129	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2130	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2131	    msi->msi_ctrl, 2);
2132
2133	/* Enable MSI -> HT mapping. */
2134	pci_ht_map_msi(child, address);
2135}
2136
2137void
2138pci_disable_msi_method(device_t dev, device_t child)
2139{
2140	struct pci_devinfo *dinfo = device_get_ivars(child);
2141	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2142
2143	/* Disable MSI -> HT mapping. */
2144	pci_ht_map_msi(child, 0);
2145
2146	/* Disable MSI in the control register. */
2147	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2148	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2149	    msi->msi_ctrl, 2);
2150}
2151
2152/*
2153 * Restore MSI registers during resume.  If MSI is enabled then
2154 * restore the data and address registers in addition to the control
2155 * register.
2156 */
2157static void
2158pci_resume_msi(device_t dev)
2159{
2160	struct pci_devinfo *dinfo = device_get_ivars(dev);
2161	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2162	uint64_t address;
2163	uint16_t data;
2164
2165	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2166		address = msi->msi_addr;
2167		data = msi->msi_data;
2168		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2169		    address & 0xffffffff, 4);
2170		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2171			pci_write_config(dev, msi->msi_location +
2172			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2173			pci_write_config(dev, msi->msi_location +
2174			    PCIR_MSI_DATA_64BIT, data, 2);
2175		} else
2176			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2177			    data, 2);
2178	}
2179	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2180	    2);
2181}
2182
2183static int
2184pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2185{
2186	struct pci_devinfo *dinfo = device_get_ivars(dev);
2187	pcicfgregs *cfg = &dinfo->cfg;
2188	struct resource_list_entry *rle;
2189	struct msix_table_entry *mte;
2190	struct msix_vector *mv;
2191	uint64_t addr;
2192	uint32_t data;
2193	int error, i, j;
2194
2195	/*
2196	 * Handle MSI first.  We try to find this IRQ among our list
2197	 * of MSI IRQs.  If we find it, we request updated address and
2198	 * data registers and apply the results.
2199	 */
2200	if (cfg->msi.msi_alloc > 0) {
2201
2202		/* If we don't have any active handlers, nothing to do. */
2203		if (cfg->msi.msi_handlers == 0)
2204			return (0);
2205		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2206			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2207			    i + 1);
2208			if (rle->start == irq) {
2209				error = PCIB_MAP_MSI(device_get_parent(bus),
2210				    dev, irq, &addr, &data);
2211				if (error)
2212					return (error);
2213				pci_disable_msi(dev);
2214				dinfo->cfg.msi.msi_addr = addr;
2215				dinfo->cfg.msi.msi_data = data;
2216				pci_enable_msi(dev, addr, data);
2217				return (0);
2218			}
2219		}
2220		return (ENOENT);
2221	}
2222
2223	/*
2224	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2225	 * we request the updated mapping info.  If that works, we go
2226	 * through all the slots that use this IRQ and update them.
2227	 */
2228	if (cfg->msix.msix_alloc > 0) {
2229		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2230			mv = &cfg->msix.msix_vectors[i];
2231			if (mv->mv_irq == irq) {
2232				error = PCIB_MAP_MSI(device_get_parent(bus),
2233				    dev, irq, &addr, &data);
2234				if (error)
2235					return (error);
2236				mv->mv_address = addr;
2237				mv->mv_data = data;
2238				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2239					mte = &cfg->msix.msix_table[j];
2240					if (mte->mte_vector != i + 1)
2241						continue;
2242					if (mte->mte_handlers == 0)
2243						continue;
2244					pci_mask_msix(dev, j);
2245					pci_enable_msix(dev, j, addr, data);
2246					pci_unmask_msix(dev, j);
2247				}
2248			}
2249		}
2250		return (ENOENT);
2251	}
2252
2253	return (ENOENT);
2254}
2255
2256/*
2257 * Returns true if the specified device is blacklisted because MSI
2258 * doesn't work.
2259 */
2260int
2261pci_msi_device_blacklisted(device_t dev)
2262{
2263
2264	if (!pci_honor_msi_blacklist)
2265		return (0);
2266
2267	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2268}
2269
2270/*
2271 * Determine if MSI is blacklisted globally on this system.  Currently,
2272 * we just check for blacklisted chipsets as represented by the
2273 * host-PCI bridge at device 0:0:0.  In the future, it may become
2274 * necessary to check other system attributes, such as the kenv values
2275 * that give the motherboard manufacturer and model number.
2276 */
2277static int
2278pci_msi_blacklisted(void)
2279{
2280	device_t dev;
2281
2282	if (!pci_honor_msi_blacklist)
2283		return (0);
2284
2285	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2286	if (!(pcie_chipset || pcix_chipset)) {
2287		if (vm_guest != VM_GUEST_NO) {
2288			/*
2289			 * Whitelist older chipsets in virtual
2290			 * machines known to support MSI.
2291			 */
2292			dev = pci_find_bsf(0, 0, 0);
2293			if (dev != NULL)
2294				return (!pci_has_quirk(pci_get_devid(dev),
2295					PCI_QUIRK_ENABLE_MSI_VM));
2296		}
2297		return (1);
2298	}
2299
2300	dev = pci_find_bsf(0, 0, 0);
2301	if (dev != NULL)
2302		return (pci_msi_device_blacklisted(dev));
2303	return (0);
2304}
2305
2306/*
2307 * Returns true if the specified device is blacklisted because MSI-X
2308 * doesn't work.  Note that this assumes that if MSI doesn't work,
2309 * MSI-X doesn't either.
2310 */
2311int
2312pci_msix_device_blacklisted(device_t dev)
2313{
2314
2315	if (!pci_honor_msi_blacklist)
2316		return (0);
2317
2318	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2319		return (1);
2320
2321	return (pci_msi_device_blacklisted(dev));
2322}
2323
2324/*
2325 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2326 * is blacklisted, assume that MSI-X is as well.  Check for additional
2327 * chipsets where MSI works but MSI-X does not.
2328 */
2329static int
2330pci_msix_blacklisted(void)
2331{
2332	device_t dev;
2333
2334	if (!pci_honor_msi_blacklist)
2335		return (0);
2336
2337	dev = pci_find_bsf(0, 0, 0);
2338	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2339	    PCI_QUIRK_DISABLE_MSIX))
2340		return (1);
2341
2342	return (pci_msi_blacklisted());
2343}
2344
2345/*
2346 * Attempt to allocate *count MSI messages.  The actual number allocated is
2347 * returned in *count.  After this function returns, each message will be
2348 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2349 */
2350int
2351pci_alloc_msi_method(device_t dev, device_t child, int *count)
2352{
2353	struct pci_devinfo *dinfo = device_get_ivars(child);
2354	pcicfgregs *cfg = &dinfo->cfg;
2355	struct resource_list_entry *rle;
2356	int actual, error, i, irqs[32];
2357	uint16_t ctrl;
2358
2359	/* Don't let count == 0 get us into trouble. */
2360	if (*count == 0)
2361		return (EINVAL);
2362
2363	/* If rid 0 is allocated, then fail. */
2364	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2365	if (rle != NULL && rle->res != NULL)
2366		return (ENXIO);
2367
2368	/* Already have allocated messages? */
2369	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2370		return (ENXIO);
2371
2372	/* If MSI is blacklisted for this system, fail. */
2373	if (pci_msi_blacklisted())
2374		return (ENXIO);
2375
2376	/* MSI capability present? */
2377	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2378		return (ENODEV);
2379
2380	if (bootverbose)
2381		device_printf(child,
2382		    "attempting to allocate %d MSI vectors (%d supported)\n",
2383		    *count, cfg->msi.msi_msgnum);
2384
2385	/* Don't ask for more than the device supports. */
2386	actual = min(*count, cfg->msi.msi_msgnum);
2387
2388	/* Don't ask for more than 32 messages. */
2389	actual = min(actual, 32);
2390
2391	/* MSI requires power of 2 number of messages. */
2392	if (!powerof2(actual))
2393		return (EINVAL);
2394
2395	for (;;) {
2396		/* Try to allocate N messages. */
2397		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2398		    actual, irqs);
2399		if (error == 0)
2400			break;
2401		if (actual == 1)
2402			return (error);
2403
2404		/* Try N / 2. */
2405		actual >>= 1;
2406	}
2407
2408	/*
2409	 * We now have N actual messages mapped onto SYS_RES_IRQ
2410	 * resources in the irqs[] array, so add new resources
2411	 * starting at rid 1.
2412	 */
2413	for (i = 0; i < actual; i++)
2414		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2415		    irqs[i], irqs[i], 1);
2416
2417	if (bootverbose) {
2418		if (actual == 1)
2419			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2420		else {
2421			int run;
2422
2423			/*
2424			 * Be fancy and try to print contiguous runs
2425			 * of IRQ values as ranges.  'run' is true if
2426			 * we are in a range.
2427			 */
2428			device_printf(child, "using IRQs %d", irqs[0]);
2429			run = 0;
2430			for (i = 1; i < actual; i++) {
2431
2432				/* Still in a run? */
2433				if (irqs[i] == irqs[i - 1] + 1) {
2434					run = 1;
2435					continue;
2436				}
2437
2438				/* Finish previous range. */
2439				if (run) {
2440					printf("-%d", irqs[i - 1]);
2441					run = 0;
2442				}
2443
2444				/* Start new range. */
2445				printf(",%d", irqs[i]);
2446			}
2447
2448			/* Unfinished range? */
2449			if (run)
2450				printf("-%d", irqs[actual - 1]);
2451			printf(" for MSI\n");
2452		}
2453	}
2454
2455	/* Update control register with actual count. */
2456	ctrl = cfg->msi.msi_ctrl;
2457	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2458	ctrl |= (ffs(actual) - 1) << 4;
2459	cfg->msi.msi_ctrl = ctrl;
2460	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2461
2462	/* Update counts of alloc'd messages. */
2463	cfg->msi.msi_alloc = actual;
2464	cfg->msi.msi_handlers = 0;
2465	*count = actual;
2466	return (0);
2467}
2468
2469/* Release the MSI messages associated with this device. */
2470int
2471pci_release_msi_method(device_t dev, device_t child)
2472{
2473	struct pci_devinfo *dinfo = device_get_ivars(child);
2474	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2475	struct resource_list_entry *rle;
2476	int error, i, irqs[32];
2477
2478	/* Try MSI-X first. */
2479	error = pci_release_msix(dev, child);
2480	if (error != ENODEV)
2481		return (error);
2482
2483	/* Do we have any messages to release? */
2484	if (msi->msi_alloc == 0)
2485		return (ENODEV);
2486	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2487
2488	/* Make sure none of the resources are allocated. */
2489	if (msi->msi_handlers > 0)
2490		return (EBUSY);
2491	for (i = 0; i < msi->msi_alloc; i++) {
2492		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2493		KASSERT(rle != NULL, ("missing MSI resource"));
2494		if (rle->res != NULL)
2495			return (EBUSY);
2496		irqs[i] = rle->start;
2497	}
2498
2499	/* Update control register with 0 count. */
2500	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2501	    ("%s: MSI still enabled", __func__));
2502	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2503	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2504	    msi->msi_ctrl, 2);
2505
2506	/* Release the messages. */
2507	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2508	for (i = 0; i < msi->msi_alloc; i++)
2509		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2510
2511	/* Update alloc count. */
2512	msi->msi_alloc = 0;
2513	msi->msi_addr = 0;
2514	msi->msi_data = 0;
2515	return (0);
2516}
2517
2518/*
2519 * Return the max supported MSI messages this device supports.
2520 * Basically, assuming the MD code can alloc messages, this function
2521 * should return the maximum value that pci_alloc_msi() can return.
2522 * Thus, it is subject to the tunables, etc.
2523 */
2524int
2525pci_msi_count_method(device_t dev, device_t child)
2526{
2527	struct pci_devinfo *dinfo = device_get_ivars(child);
2528	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2529
2530	if (pci_do_msi && msi->msi_location != 0)
2531		return (msi->msi_msgnum);
2532	return (0);
2533}
2534
2535/* free pcicfgregs structure and all depending data structures */
2536
2537int
2538pci_freecfg(struct pci_devinfo *dinfo)
2539{
2540	struct devlist *devlist_head;
2541	struct pci_map *pm, *next;
2542	int i;
2543
2544	devlist_head = &pci_devq;
2545
2546	if (dinfo->cfg.vpd.vpd_reg) {
2547		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2548		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2549			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2550		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2551		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2552			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2553		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2554	}
2555	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2556		free(pm, M_DEVBUF);
2557	}
2558	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2559	free(dinfo, M_DEVBUF);
2560
2561	/* increment the generation count */
2562	pci_generation++;
2563
2564	/* we're losing one device */
2565	pci_numdevs--;
2566	return (0);
2567}
2568
2569/*
2570 * PCI power manangement
2571 */
2572int
2573pci_set_powerstate_method(device_t dev, device_t child, int state)
2574{
2575	struct pci_devinfo *dinfo = device_get_ivars(child);
2576	pcicfgregs *cfg = &dinfo->cfg;
2577	uint16_t status;
2578	int oldstate, highest, delay;
2579
2580	if (cfg->pp.pp_cap == 0)
2581		return (EOPNOTSUPP);
2582
2583	/*
2584	 * Optimize a no state change request away.  While it would be OK to
2585	 * write to the hardware in theory, some devices have shown odd
2586	 * behavior when going from D3 -> D3.
2587	 */
2588	oldstate = pci_get_powerstate(child);
2589	if (oldstate == state)
2590		return (0);
2591
2592	/*
2593	 * The PCI power management specification states that after a state
2594	 * transition between PCI power states, system software must
2595	 * guarantee a minimal delay before the function accesses the device.
2596	 * Compute the worst case delay that we need to guarantee before we
2597	 * access the device.  Many devices will be responsive much more
2598	 * quickly than this delay, but there are some that don't respond
2599	 * instantly to state changes.  Transitions to/from D3 state require
2600	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2601	 * is done below with DELAY rather than a sleeper function because
2602	 * this function can be called from contexts where we cannot sleep.
2603	 */
2604	highest = (oldstate > state) ? oldstate : state;
2605	if (highest == PCI_POWERSTATE_D3)
2606	    delay = 10000;
2607	else if (highest == PCI_POWERSTATE_D2)
2608	    delay = 200;
2609	else
2610	    delay = 0;
2611	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2612	    & ~PCIM_PSTAT_DMASK;
2613	switch (state) {
2614	case PCI_POWERSTATE_D0:
2615		status |= PCIM_PSTAT_D0;
2616		break;
2617	case PCI_POWERSTATE_D1:
2618		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2619			return (EOPNOTSUPP);
2620		status |= PCIM_PSTAT_D1;
2621		break;
2622	case PCI_POWERSTATE_D2:
2623		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2624			return (EOPNOTSUPP);
2625		status |= PCIM_PSTAT_D2;
2626		break;
2627	case PCI_POWERSTATE_D3:
2628		status |= PCIM_PSTAT_D3;
2629		break;
2630	default:
2631		return (EINVAL);
2632	}
2633
2634	if (bootverbose)
2635		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2636		    state);
2637
2638	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2639	if (delay)
2640		DELAY(delay);
2641	return (0);
2642}
2643
2644int
2645pci_get_powerstate_method(device_t dev, device_t child)
2646{
2647	struct pci_devinfo *dinfo = device_get_ivars(child);
2648	pcicfgregs *cfg = &dinfo->cfg;
2649	uint16_t status;
2650	int result;
2651
2652	if (cfg->pp.pp_cap != 0) {
2653		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2654		switch (status & PCIM_PSTAT_DMASK) {
2655		case PCIM_PSTAT_D0:
2656			result = PCI_POWERSTATE_D0;
2657			break;
2658		case PCIM_PSTAT_D1:
2659			result = PCI_POWERSTATE_D1;
2660			break;
2661		case PCIM_PSTAT_D2:
2662			result = PCI_POWERSTATE_D2;
2663			break;
2664		case PCIM_PSTAT_D3:
2665			result = PCI_POWERSTATE_D3;
2666			break;
2667		default:
2668			result = PCI_POWERSTATE_UNKNOWN;
2669			break;
2670		}
2671	} else {
2672		/* No support, device is always at D0 */
2673		result = PCI_POWERSTATE_D0;
2674	}
2675	return (result);
2676}
2677
2678/*
2679 * Some convenience functions for PCI device drivers.
2680 */
2681
2682static __inline void
2683pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2684{
2685	uint16_t	command;
2686
2687	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2688	command |= bit;
2689	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2690}
2691
2692static __inline void
2693pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2694{
2695	uint16_t	command;
2696
2697	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2698	command &= ~bit;
2699	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2700}
2701
2702int
2703pci_enable_busmaster_method(device_t dev, device_t child)
2704{
2705	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2706	return (0);
2707}
2708
2709int
2710pci_disable_busmaster_method(device_t dev, device_t child)
2711{
2712	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2713	return (0);
2714}
2715
2716int
2717pci_enable_io_method(device_t dev, device_t child, int space)
2718{
2719	uint16_t bit;
2720
2721	switch(space) {
2722	case SYS_RES_IOPORT:
2723		bit = PCIM_CMD_PORTEN;
2724		break;
2725	case SYS_RES_MEMORY:
2726		bit = PCIM_CMD_MEMEN;
2727		break;
2728	default:
2729		return (EINVAL);
2730	}
2731	pci_set_command_bit(dev, child, bit);
2732	return (0);
2733}
2734
2735int
2736pci_disable_io_method(device_t dev, device_t child, int space)
2737{
2738	uint16_t bit;
2739
2740	switch(space) {
2741	case SYS_RES_IOPORT:
2742		bit = PCIM_CMD_PORTEN;
2743		break;
2744	case SYS_RES_MEMORY:
2745		bit = PCIM_CMD_MEMEN;
2746		break;
2747	default:
2748		return (EINVAL);
2749	}
2750	pci_clear_command_bit(dev, child, bit);
2751	return (0);
2752}
2753
2754/*
2755 * New style pci driver.  Parent device is either a pci-host-bridge or a
2756 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2757 */
2758
2759void
2760pci_print_verbose(struct pci_devinfo *dinfo)
2761{
2762
2763	if (bootverbose) {
2764		pcicfgregs *cfg = &dinfo->cfg;
2765
2766		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2767		    cfg->vendor, cfg->device, cfg->revid);
2768		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2769		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2770		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2771		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2772		    cfg->mfdev);
2773		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2774		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2775		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2776		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2777		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2778		if (cfg->intpin > 0)
2779			printf("\tintpin=%c, irq=%d\n",
2780			    cfg->intpin +'a' -1, cfg->intline);
2781		if (cfg->pp.pp_cap) {
2782			uint16_t status;
2783
2784			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2785			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2786			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2787			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2788			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2789			    status & PCIM_PSTAT_DMASK);
2790		}
2791		if (cfg->msi.msi_location) {
2792			int ctrl;
2793
2794			ctrl = cfg->msi.msi_ctrl;
2795			printf("\tMSI supports %d message%s%s%s\n",
2796			    cfg->msi.msi_msgnum,
2797			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2798			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2799			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2800		}
2801		if (cfg->msix.msix_location) {
2802			printf("\tMSI-X supports %d message%s ",
2803			    cfg->msix.msix_msgnum,
2804			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2805			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2806				printf("in map 0x%x\n",
2807				    cfg->msix.msix_table_bar);
2808			else
2809				printf("in maps 0x%x and 0x%x\n",
2810				    cfg->msix.msix_table_bar,
2811				    cfg->msix.msix_pba_bar);
2812		}
2813	}
2814}
2815
2816static int
2817pci_porten(device_t dev)
2818{
2819	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2820}
2821
2822static int
2823pci_memen(device_t dev)
2824{
2825	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2826}
2827
2828void
2829pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2830    int *bar64)
2831{
2832	struct pci_devinfo *dinfo;
2833	pci_addr_t map, testval;
2834	int ln2range;
2835	uint16_t cmd;
2836
2837	/*
2838	 * The device ROM BAR is special.  It is always a 32-bit
2839	 * memory BAR.  Bit 0 is special and should not be set when
2840	 * sizing the BAR.
2841	 */
2842	dinfo = device_get_ivars(dev);
2843	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2844		map = pci_read_config(dev, reg, 4);
2845		pci_write_config(dev, reg, 0xfffffffe, 4);
2846		testval = pci_read_config(dev, reg, 4);
2847		pci_write_config(dev, reg, map, 4);
2848		*mapp = map;
2849		*testvalp = testval;
2850		if (bar64 != NULL)
2851			*bar64 = 0;
2852		return;
2853	}
2854
2855	map = pci_read_config(dev, reg, 4);
2856	ln2range = pci_maprange(map);
2857	if (ln2range == 64)
2858		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2859
2860	/*
2861	 * Disable decoding via the command register before
2862	 * determining the BAR's length since we will be placing it in
2863	 * a weird state.
2864	 */
2865	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2866	pci_write_config(dev, PCIR_COMMAND,
2867	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2868
2869	/*
2870	 * Determine the BAR's length by writing all 1's.  The bottom
2871	 * log_2(size) bits of the BAR will stick as 0 when we read
2872	 * the value back.
2873	 */
2874	pci_write_config(dev, reg, 0xffffffff, 4);
2875	testval = pci_read_config(dev, reg, 4);
2876	if (ln2range == 64) {
2877		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2878		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2879	}
2880
2881	/*
2882	 * Restore the original value of the BAR.  We may have reprogrammed
2883	 * the BAR of the low-level console device and when booting verbose,
2884	 * we need the console device addressable.
2885	 */
2886	pci_write_config(dev, reg, map, 4);
2887	if (ln2range == 64)
2888		pci_write_config(dev, reg + 4, map >> 32, 4);
2889	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2890
2891	*mapp = map;
2892	*testvalp = testval;
2893	if (bar64 != NULL)
2894		*bar64 = (ln2range == 64);
2895}
2896
2897static void
2898pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2899{
2900	struct pci_devinfo *dinfo;
2901	int ln2range;
2902
2903	/* The device ROM BAR is always a 32-bit memory BAR. */
2904	dinfo = device_get_ivars(dev);
2905	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2906		ln2range = 32;
2907	else
2908		ln2range = pci_maprange(pm->pm_value);
2909	pci_write_config(dev, pm->pm_reg, base, 4);
2910	if (ln2range == 64)
2911		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2912	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2913	if (ln2range == 64)
2914		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2915		    pm->pm_reg + 4, 4) << 32;
2916}
2917
2918struct pci_map *
2919pci_find_bar(device_t dev, int reg)
2920{
2921	struct pci_devinfo *dinfo;
2922	struct pci_map *pm;
2923
2924	dinfo = device_get_ivars(dev);
2925	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2926		if (pm->pm_reg == reg)
2927			return (pm);
2928	}
2929	return (NULL);
2930}
2931
2932int
2933pci_bar_enabled(device_t dev, struct pci_map *pm)
2934{
2935	struct pci_devinfo *dinfo;
2936	uint16_t cmd;
2937
2938	dinfo = device_get_ivars(dev);
2939	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2940	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2941		return (0);
2942	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2943	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2944		return ((cmd & PCIM_CMD_MEMEN) != 0);
2945	else
2946		return ((cmd & PCIM_CMD_PORTEN) != 0);
2947}
2948
2949struct pci_map *
2950pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2951{
2952	struct pci_devinfo *dinfo;
2953	struct pci_map *pm, *prev;
2954
2955	dinfo = device_get_ivars(dev);
2956	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2957	pm->pm_reg = reg;
2958	pm->pm_value = value;
2959	pm->pm_size = size;
2960	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2961		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2962		    reg));
2963		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2964		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2965			break;
2966	}
2967	if (prev != NULL)
2968		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2969	else
2970		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2971	return (pm);
2972}
2973
2974static void
2975pci_restore_bars(device_t dev)
2976{
2977	struct pci_devinfo *dinfo;
2978	struct pci_map *pm;
2979	int ln2range;
2980
2981	dinfo = device_get_ivars(dev);
2982	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2983		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2984			ln2range = 32;
2985		else
2986			ln2range = pci_maprange(pm->pm_value);
2987		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2988		if (ln2range == 64)
2989			pci_write_config(dev, pm->pm_reg + 4,
2990			    pm->pm_value >> 32, 4);
2991	}
2992}
2993
2994/*
2995 * Add a resource based on a pci map register. Return 1 if the map
2996 * register is a 32bit map register or 2 if it is a 64bit register.
2997 */
2998static int
2999pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3000    int force, int prefetch)
3001{
3002	struct pci_map *pm;
3003	pci_addr_t base, map, testval;
3004	pci_addr_t start, end, count;
3005	int barlen, basezero, flags, maprange, mapsize, type;
3006	uint16_t cmd;
3007	struct resource *res;
3008
3009	/*
3010	 * The BAR may already exist if the device is a CardBus card
3011	 * whose CIS is stored in this BAR.
3012	 */
3013	pm = pci_find_bar(dev, reg);
3014	if (pm != NULL) {
3015		maprange = pci_maprange(pm->pm_value);
3016		barlen = maprange == 64 ? 2 : 1;
3017		return (barlen);
3018	}
3019
3020	pci_read_bar(dev, reg, &map, &testval, NULL);
3021	if (PCI_BAR_MEM(map)) {
3022		type = SYS_RES_MEMORY;
3023		if (map & PCIM_BAR_MEM_PREFETCH)
3024			prefetch = 1;
3025	} else
3026		type = SYS_RES_IOPORT;
3027	mapsize = pci_mapsize(testval);
3028	base = pci_mapbase(map);
3029#ifdef __PCI_BAR_ZERO_VALID
3030	basezero = 0;
3031#else
3032	basezero = base == 0;
3033#endif
3034	maprange = pci_maprange(map);
3035	barlen = maprange == 64 ? 2 : 1;
3036
3037	/*
3038	 * For I/O registers, if bottom bit is set, and the next bit up
3039	 * isn't clear, we know we have a BAR that doesn't conform to the
3040	 * spec, so ignore it.  Also, sanity check the size of the data
3041	 * areas to the type of memory involved.  Memory must be at least
3042	 * 16 bytes in size, while I/O ranges must be at least 4.
3043	 */
3044	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3045		return (barlen);
3046	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3047	    (type == SYS_RES_IOPORT && mapsize < 2))
3048		return (barlen);
3049
3050	/* Save a record of this BAR. */
3051	pm = pci_add_bar(dev, reg, map, mapsize);
3052	if (bootverbose) {
3053		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3054		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3055		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3056			printf(", port disabled\n");
3057		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3058			printf(", memory disabled\n");
3059		else
3060			printf(", enabled\n");
3061	}
3062
3063	/*
3064	 * If base is 0, then we have problems if this architecture does
3065	 * not allow that.  It is best to ignore such entries for the
3066	 * moment.  These will be allocated later if the driver specifically
3067	 * requests them.  However, some removable busses look better when
3068	 * all resources are allocated, so allow '0' to be overriden.
3069	 *
3070	 * Similarly treat maps whose values is the same as the test value
3071	 * read back.  These maps have had all f's written to them by the
3072	 * BIOS in an attempt to disable the resources.
3073	 */
3074	if (!force && (basezero || map == testval))
3075		return (barlen);
3076	if ((u_long)base != base) {
3077		device_printf(bus,
3078		    "pci%d:%d:%d:%d bar %#x too many address bits",
3079		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3080		    pci_get_function(dev), reg);
3081		return (barlen);
3082	}
3083
3084	/*
3085	 * This code theoretically does the right thing, but has
3086	 * undesirable side effects in some cases where peripherals
3087	 * respond oddly to having these bits enabled.  Let the user
3088	 * be able to turn them off (since pci_enable_io_modes is 1 by
3089	 * default).
3090	 */
3091	if (pci_enable_io_modes) {
3092		/* Turn on resources that have been left off by a lazy BIOS */
3093		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3094			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3095			cmd |= PCIM_CMD_PORTEN;
3096			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3097		}
3098		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3099			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3100			cmd |= PCIM_CMD_MEMEN;
3101			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3102		}
3103	} else {
3104		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3105			return (barlen);
3106		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3107			return (barlen);
3108	}
3109
3110	count = (pci_addr_t)1 << mapsize;
3111	flags = RF_ALIGNMENT_LOG2(mapsize);
3112	if (prefetch)
3113		flags |= RF_PREFETCHABLE;
3114	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3115		start = 0;	/* Let the parent decide. */
3116		end = ~0;
3117	} else {
3118		start = base;
3119		end = base + count - 1;
3120	}
3121	resource_list_add(rl, type, reg, start, end, count);
3122
3123	/*
3124	 * Try to allocate the resource for this BAR from our parent
3125	 * so that this resource range is already reserved.  The
3126	 * driver for this device will later inherit this resource in
3127	 * pci_alloc_resource().
3128	 */
3129	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3130	    flags);
3131	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3132		/*
3133		 * If the allocation fails, try to allocate a resource for
3134		 * this BAR using any available range.  The firmware felt
3135		 * it was important enough to assign a resource, so don't
3136		 * disable decoding if we can help it.
3137		 */
3138		resource_list_delete(rl, type, reg);
3139		resource_list_add(rl, type, reg, 0, ~0, count);
3140		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3141		    count, flags);
3142	}
3143	if (res == NULL) {
3144		/*
3145		 * If the allocation fails, delete the resource list entry
3146		 * and disable decoding for this device.
3147		 *
3148		 * If the driver requests this resource in the future,
3149		 * pci_reserve_map() will try to allocate a fresh
3150		 * resource range.
3151		 */
3152		resource_list_delete(rl, type, reg);
3153		pci_disable_io(dev, type);
3154		if (bootverbose)
3155			device_printf(bus,
3156			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3157			    pci_get_domain(dev), pci_get_bus(dev),
3158			    pci_get_slot(dev), pci_get_function(dev), reg);
3159	} else {
3160		start = rman_get_start(res);
3161		pci_write_bar(dev, pm, start);
3162	}
3163	return (barlen);
3164}
3165
3166/*
3167 * For ATA devices we need to decide early what addressing mode to use.
3168 * Legacy demands that the primary and secondary ATA ports sits on the
3169 * same addresses that old ISA hardware did. This dictates that we use
3170 * those addresses and ignore the BAR's if we cannot set PCI native
3171 * addressing mode.
3172 */
3173static void
3174pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3175    uint32_t prefetchmask)
3176{
3177	int rid, type, progif;
3178#if 0
3179	/* if this device supports PCI native addressing use it */
3180	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3181	if ((progif & 0x8a) == 0x8a) {
3182		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3183		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3184			printf("Trying ATA native PCI addressing mode\n");
3185			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3186		}
3187	}
3188#endif
3189	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3190	type = SYS_RES_IOPORT;
3191	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3192		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3193		    prefetchmask & (1 << 0));
3194		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3195		    prefetchmask & (1 << 1));
3196	} else {
3197		rid = PCIR_BAR(0);
3198		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3199		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3200		    0x1f7, 8, 0);
3201		rid = PCIR_BAR(1);
3202		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3203		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3204		    0x3f6, 1, 0);
3205	}
3206	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3207		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3208		    prefetchmask & (1 << 2));
3209		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3210		    prefetchmask & (1 << 3));
3211	} else {
3212		rid = PCIR_BAR(2);
3213		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3214		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3215		    0x177, 8, 0);
3216		rid = PCIR_BAR(3);
3217		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3218		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3219		    0x376, 1, 0);
3220	}
3221	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3222	    prefetchmask & (1 << 4));
3223	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3224	    prefetchmask & (1 << 5));
3225}
3226
3227static void
3228pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3229{
3230	struct pci_devinfo *dinfo = device_get_ivars(dev);
3231	pcicfgregs *cfg = &dinfo->cfg;
3232	char tunable_name[64];
3233	int irq;
3234
3235	/* Has to have an intpin to have an interrupt. */
3236	if (cfg->intpin == 0)
3237		return;
3238
3239	/* Let the user override the IRQ with a tunable. */
3240	irq = PCI_INVALID_IRQ;
3241	snprintf(tunable_name, sizeof(tunable_name),
3242	    "hw.pci%d.%d.%d.INT%c.irq",
3243	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3244	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3245		irq = PCI_INVALID_IRQ;
3246
3247	/*
3248	 * If we didn't get an IRQ via the tunable, then we either use the
3249	 * IRQ value in the intline register or we ask the bus to route an
3250	 * interrupt for us.  If force_route is true, then we only use the
3251	 * value in the intline register if the bus was unable to assign an
3252	 * IRQ.
3253	 */
3254	if (!PCI_INTERRUPT_VALID(irq)) {
3255		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3256			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3257		if (!PCI_INTERRUPT_VALID(irq))
3258			irq = cfg->intline;
3259	}
3260
3261	/* If after all that we don't have an IRQ, just bail. */
3262	if (!PCI_INTERRUPT_VALID(irq))
3263		return;
3264
3265	/* Update the config register if it changed. */
3266	if (irq != cfg->intline) {
3267		cfg->intline = irq;
3268		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3269	}
3270
3271	/* Add this IRQ as rid 0 interrupt resource. */
3272	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3273}
3274
3275/* Perform early OHCI takeover from SMM. */
3276static void
3277ohci_early_takeover(device_t self)
3278{
3279	struct resource *res;
3280	uint32_t ctl;
3281	int rid;
3282	int i;
3283
3284	rid = PCIR_BAR(0);
3285	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3286	if (res == NULL)
3287		return;
3288
3289	ctl = bus_read_4(res, OHCI_CONTROL);
3290	if (ctl & OHCI_IR) {
3291		if (bootverbose)
3292			printf("ohci early: "
3293			    "SMM active, request owner change\n");
3294		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3295		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3296			DELAY(1000);
3297			ctl = bus_read_4(res, OHCI_CONTROL);
3298		}
3299		if (ctl & OHCI_IR) {
3300			if (bootverbose)
3301				printf("ohci early: "
3302				    "SMM does not respond, resetting\n");
3303			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3304		}
3305		/* Disable interrupts */
3306		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3307	}
3308
3309	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3310}
3311
3312/* Perform early UHCI takeover from SMM. */
3313static void
3314uhci_early_takeover(device_t self)
3315{
3316	struct resource *res;
3317	int rid;
3318
3319	/*
3320	 * Set the PIRQD enable bit and switch off all the others. We don't
3321	 * want legacy support to interfere with us XXX Does this also mean
3322	 * that the BIOS won't touch the keyboard anymore if it is connected
3323	 * to the ports of the root hub?
3324	 */
3325	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3326
3327	/* Disable interrupts */
3328	rid = PCI_UHCI_BASE_REG;
3329	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3330	if (res != NULL) {
3331		bus_write_2(res, UHCI_INTR, 0);
3332		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3333	}
3334}
3335
3336/* Perform early EHCI takeover from SMM. */
3337static void
3338ehci_early_takeover(device_t self)
3339{
3340	struct resource *res;
3341	uint32_t cparams;
3342	uint32_t eec;
3343	uint8_t eecp;
3344	uint8_t bios_sem;
3345	uint8_t offs;
3346	int rid;
3347	int i;
3348
3349	rid = PCIR_BAR(0);
3350	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3351	if (res == NULL)
3352		return;
3353
3354	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3355
3356	/* Synchronise with the BIOS if it owns the controller. */
3357	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3358	    eecp = EHCI_EECP_NEXT(eec)) {
3359		eec = pci_read_config(self, eecp, 4);
3360		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3361			continue;
3362		}
3363		bios_sem = pci_read_config(self, eecp +
3364		    EHCI_LEGSUP_BIOS_SEM, 1);
3365		if (bios_sem == 0) {
3366			continue;
3367		}
3368		if (bootverbose)
3369			printf("ehci early: "
3370			    "SMM active, request owner change\n");
3371
3372		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3373
3374		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3375			DELAY(1000);
3376			bios_sem = pci_read_config(self, eecp +
3377			    EHCI_LEGSUP_BIOS_SEM, 1);
3378		}
3379
3380		if (bios_sem != 0) {
3381			if (bootverbose)
3382				printf("ehci early: "
3383				    "SMM does not respond\n");
3384		}
3385		/* Disable interrupts */
3386		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3387		bus_write_4(res, offs + EHCI_USBINTR, 0);
3388	}
3389	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3390}
3391
3392/* Perform early XHCI takeover from SMM. */
3393static void
3394xhci_early_takeover(device_t self)
3395{
3396	struct resource *res;
3397	uint32_t cparams;
3398	uint32_t eec;
3399	uint8_t eecp;
3400	uint8_t bios_sem;
3401	uint8_t offs;
3402	int rid;
3403	int i;
3404
3405	rid = PCIR_BAR(0);
3406	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3407	if (res == NULL)
3408		return;
3409
3410	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3411
3412	eec = -1;
3413
3414	/* Synchronise with the BIOS if it owns the controller. */
3415	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3416	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3417		eec = bus_read_4(res, eecp);
3418
3419		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3420			continue;
3421
3422		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3423		if (bios_sem == 0)
3424			continue;
3425
3426		if (bootverbose)
3427			printf("xhci early: "
3428			    "SMM active, request owner change\n");
3429
3430		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3431
3432		/* wait a maximum of 5 second */
3433
3434		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3435			DELAY(1000);
3436			bios_sem = bus_read_1(res, eecp +
3437			    XHCI_XECP_BIOS_SEM);
3438		}
3439
3440		if (bios_sem != 0) {
3441			if (bootverbose)
3442				printf("xhci early: "
3443				    "SMM does not respond\n");
3444		}
3445
3446		/* Disable interrupts */
3447		offs = bus_read_1(res, XHCI_CAPLENGTH);
3448		bus_write_4(res, offs + XHCI_USBCMD, 0);
3449		bus_read_4(res, offs + XHCI_USBSTS);
3450	}
3451	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3452}
3453
3454#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3455static void
3456pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3457    struct resource_list *rl)
3458{
3459	struct resource *res;
3460	char *cp;
3461	rman_res_t start, end, count;
3462	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3463
3464	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3465	case PCIM_HDRTYPE_BRIDGE:
3466		sec_reg = PCIR_SECBUS_1;
3467		sub_reg = PCIR_SUBBUS_1;
3468		break;
3469	case PCIM_HDRTYPE_CARDBUS:
3470		sec_reg = PCIR_SECBUS_2;
3471		sub_reg = PCIR_SUBBUS_2;
3472		break;
3473	default:
3474		return;
3475	}
3476
3477	/*
3478	 * If the existing bus range is valid, attempt to reserve it
3479	 * from our parent.  If this fails for any reason, clear the
3480	 * secbus and subbus registers.
3481	 *
3482	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3483	 * This would at least preserve the existing sec_bus if it is
3484	 * valid.
3485	 */
3486	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3487	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3488
3489	/* Quirk handling. */
3490	switch (pci_get_devid(dev)) {
3491	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3492		sup_bus = pci_read_config(dev, 0x41, 1);
3493		if (sup_bus != 0xff) {
3494			sec_bus = sup_bus + 1;
3495			sub_bus = sup_bus + 1;
3496			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3497			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3498		}
3499		break;
3500
3501	case 0x00dd10de:
3502		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3503		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3504			break;
3505		if (strncmp(cp, "Compal", 6) != 0) {
3506			freeenv(cp);
3507			break;
3508		}
3509		freeenv(cp);
3510		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3511			break;
3512		if (strncmp(cp, "08A0", 4) != 0) {
3513			freeenv(cp);
3514			break;
3515		}
3516		freeenv(cp);
3517		if (sub_bus < 0xa) {
3518			sub_bus = 0xa;
3519			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3520		}
3521		break;
3522	}
3523
3524	if (bootverbose)
3525		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3526	if (sec_bus > 0 && sub_bus >= sec_bus) {
3527		start = sec_bus;
3528		end = sub_bus;
3529		count = end - start + 1;
3530
3531		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3532
3533		/*
3534		 * If requested, clear secondary bus registers in
3535		 * bridge devices to force a complete renumbering
3536		 * rather than reserving the existing range.  However,
3537		 * preserve the existing size.
3538		 */
3539		if (pci_clear_buses)
3540			goto clear;
3541
3542		rid = 0;
3543		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3544		    start, end, count, 0);
3545		if (res != NULL)
3546			return;
3547
3548		if (bootverbose)
3549			device_printf(bus,
3550			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3551			    pci_get_domain(dev), pci_get_bus(dev),
3552			    pci_get_slot(dev), pci_get_function(dev));
3553	}
3554
3555clear:
3556	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3557	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3558}
3559
3560static struct resource *
3561pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3562    rman_res_t end, rman_res_t count, u_int flags)
3563{
3564	struct pci_devinfo *dinfo;
3565	pcicfgregs *cfg;
3566	struct resource_list *rl;
3567	struct resource *res;
3568	int sec_reg, sub_reg;
3569
3570	dinfo = device_get_ivars(child);
3571	cfg = &dinfo->cfg;
3572	rl = &dinfo->resources;
3573	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3574	case PCIM_HDRTYPE_BRIDGE:
3575		sec_reg = PCIR_SECBUS_1;
3576		sub_reg = PCIR_SUBBUS_1;
3577		break;
3578	case PCIM_HDRTYPE_CARDBUS:
3579		sec_reg = PCIR_SECBUS_2;
3580		sub_reg = PCIR_SUBBUS_2;
3581		break;
3582	default:
3583		return (NULL);
3584	}
3585
3586	if (*rid != 0)
3587		return (NULL);
3588
3589	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3590		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3591	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3592		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3593		    start, end, count, flags & ~RF_ACTIVE);
3594		if (res == NULL) {
3595			resource_list_delete(rl, PCI_RES_BUS, *rid);
3596			device_printf(child, "allocating %ju bus%s failed\n",
3597			    count, count == 1 ? "" : "es");
3598			return (NULL);
3599		}
3600		if (bootverbose)
3601			device_printf(child,
3602			    "Lazy allocation of %ju bus%s at %ju\n", count,
3603			    count == 1 ? "" : "es", rman_get_start(res));
3604		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3605		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3606	}
3607	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3608	    end, count, flags));
3609}
3610#endif
3611
3612static int
3613pci_ea_bei_to_rid(device_t dev, int bei)
3614{
3615#ifdef PCI_IOV
3616	struct pci_devinfo *dinfo;
3617	int iov_pos;
3618	struct pcicfg_iov *iov;
3619
3620	dinfo = device_get_ivars(dev);
3621	iov = dinfo->cfg.iov;
3622	if (iov != NULL)
3623		iov_pos = iov->iov_pos;
3624	else
3625		iov_pos = 0;
3626#endif
3627
3628	/* Check if matches BAR */
3629	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3630	    (bei <= PCIM_EA_BEI_BAR_5))
3631		return (PCIR_BAR(bei));
3632
3633	/* Check ROM */
3634	if (bei == PCIM_EA_BEI_ROM)
3635		return (PCIR_BIOS);
3636
3637#ifdef PCI_IOV
3638	/* Check if matches VF_BAR */
3639	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3640	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3641		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3642		    iov_pos);
3643#endif
3644
3645	return (-1);
3646}
3647
3648int
3649pci_ea_is_enabled(device_t dev, int rid)
3650{
3651	struct pci_ea_entry *ea;
3652	struct pci_devinfo *dinfo;
3653
3654	dinfo = device_get_ivars(dev);
3655
3656	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3657		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3658			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3659	}
3660
3661	return (0);
3662}
3663
3664void
3665pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3666{
3667	struct pci_ea_entry *ea;
3668	struct pci_devinfo *dinfo;
3669	pci_addr_t start, end, count;
3670	struct resource_list *rl;
3671	int type, flags, rid;
3672	struct resource *res;
3673	uint32_t tmp;
3674#ifdef PCI_IOV
3675	struct pcicfg_iov *iov;
3676#endif
3677
3678	dinfo = device_get_ivars(dev);
3679	rl = &dinfo->resources;
3680	flags = 0;
3681
3682#ifdef PCI_IOV
3683	iov = dinfo->cfg.iov;
3684#endif
3685
3686	if (dinfo->cfg.ea.ea_location == 0)
3687		return;
3688
3689	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3690
3691		/*
3692		 * TODO: Ignore EA-BAR if is not enabled.
3693		 *   Currently the EA implementation supports
3694		 *   only situation, where EA structure contains
3695		 *   predefined entries. In case they are not enabled
3696		 *   leave them unallocated and proceed with
3697		 *   a legacy-BAR mechanism.
3698		 */
3699		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3700			continue;
3701
3702		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3703		case PCIM_EA_P_MEM_PREFETCH:
3704		case PCIM_EA_P_VF_MEM_PREFETCH:
3705			flags = RF_PREFETCHABLE;
3706			/* FALLTHROUGH */
3707		case PCIM_EA_P_VF_MEM:
3708		case PCIM_EA_P_MEM:
3709			type = SYS_RES_MEMORY;
3710			break;
3711		case PCIM_EA_P_IO:
3712			type = SYS_RES_IOPORT;
3713			break;
3714		default:
3715			continue;
3716		}
3717
3718		if (alloc_iov != 0) {
3719#ifdef PCI_IOV
3720			/* Allocating IOV, confirm BEI matches */
3721			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3722			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3723				continue;
3724#else
3725			continue;
3726#endif
3727		} else {
3728			/* Allocating BAR, confirm BEI matches */
3729			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3730			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3731			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3732				continue;
3733		}
3734
3735		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3736		if (rid < 0)
3737			continue;
3738
3739		/* Skip resources already allocated by EA */
3740		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3741		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3742			continue;
3743
3744		start = ea->eae_base;
3745		count = ea->eae_max_offset + 1;
3746#ifdef PCI_IOV
3747		if (iov != NULL)
3748			count = count * iov->iov_num_vfs;
3749#endif
3750		end = start + count - 1;
3751		if (count == 0)
3752			continue;
3753
3754		resource_list_add(rl, type, rid, start, end, count);
3755		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3756		    flags);
3757		if (res == NULL) {
3758			resource_list_delete(rl, type, rid);
3759
3760			/*
3761			 * Failed to allocate using EA, disable entry.
3762			 * Another attempt to allocation will be performed
3763			 * further, but this time using legacy BAR registers
3764			 */
3765			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3766			tmp &= ~PCIM_EA_ENABLE;
3767			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3768
3769			/*
3770			 * Disabling entry might fail in case it is hardwired.
3771			 * Read flags again to match current status.
3772			 */
3773			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3774
3775			continue;
3776		}
3777
3778		/* As per specification, fill BAR with zeros */
3779		pci_write_config(dev, rid, 0, 4);
3780	}
3781}
3782
3783void
3784pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3785{
3786	struct pci_devinfo *dinfo;
3787	pcicfgregs *cfg;
3788	struct resource_list *rl;
3789	const struct pci_quirk *q;
3790	uint32_t devid;
3791	int i;
3792
3793	dinfo = device_get_ivars(dev);
3794	cfg = &dinfo->cfg;
3795	rl = &dinfo->resources;
3796	devid = (cfg->device << 16) | cfg->vendor;
3797
3798	/* Allocate resources using Enhanced Allocation */
3799	pci_add_resources_ea(bus, dev, 0);
3800
3801	/* ATA devices needs special map treatment */
3802	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3803	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3804	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3805	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3806	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3807		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3808	else
3809		for (i = 0; i < cfg->nummaps;) {
3810			/* Skip resources already managed by EA */
3811			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3812			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3813			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3814				i++;
3815				continue;
3816			}
3817
3818			/*
3819			 * Skip quirked resources.
3820			 */
3821			for (q = &pci_quirks[0]; q->devid != 0; q++)
3822				if (q->devid == devid &&
3823				    q->type == PCI_QUIRK_UNMAP_REG &&
3824				    q->arg1 == PCIR_BAR(i))
3825					break;
3826			if (q->devid != 0) {
3827				i++;
3828				continue;
3829			}
3830			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3831			    prefetchmask & (1 << i));
3832		}
3833
3834	/*
3835	 * Add additional, quirked resources.
3836	 */
3837	for (q = &pci_quirks[0]; q->devid != 0; q++)
3838		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3839			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3840
3841	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3842#ifdef __PCI_REROUTE_INTERRUPT
3843		/*
3844		 * Try to re-route interrupts. Sometimes the BIOS or
3845		 * firmware may leave bogus values in these registers.
3846		 * If the re-route fails, then just stick with what we
3847		 * have.
3848		 */
3849		pci_assign_interrupt(bus, dev, 1);
3850#else
3851		pci_assign_interrupt(bus, dev, 0);
3852#endif
3853	}
3854
3855	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3856	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3857		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3858			xhci_early_takeover(dev);
3859		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3860			ehci_early_takeover(dev);
3861		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3862			ohci_early_takeover(dev);
3863		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3864			uhci_early_takeover(dev);
3865	}
3866
3867#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3868	/*
3869	 * Reserve resources for secondary bus ranges behind bridge
3870	 * devices.
3871	 */
3872	pci_reserve_secbus(bus, dev, cfg, rl);
3873#endif
3874}
3875
3876static struct pci_devinfo *
3877pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3878    int slot, int func)
3879{
3880	struct pci_devinfo *dinfo;
3881
3882	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3883	if (dinfo != NULL)
3884		pci_add_child(dev, dinfo);
3885
3886	return (dinfo);
3887}
3888
3889void
3890pci_add_children(device_t dev, int domain, int busno)
3891{
3892#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3893	device_t pcib = device_get_parent(dev);
3894	struct pci_devinfo *dinfo;
3895	int maxslots;
3896	int s, f, pcifunchigh;
3897	uint8_t hdrtype;
3898	int first_func;
3899
3900	/*
3901	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3902	 * enable ARI.  We must enable ARI before detecting the rest of the
3903	 * functions on this bus as ARI changes the set of slots and functions
3904	 * that are legal on this bus.
3905	 */
3906	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3907	if (dinfo != NULL && pci_enable_ari)
3908		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3909
3910	/*
3911	 * Start looking for new devices on slot 0 at function 1 because we
3912	 * just identified the device at slot 0, function 0.
3913	 */
3914	first_func = 1;
3915
3916	maxslots = PCIB_MAXSLOTS(pcib);
3917	for (s = 0; s <= maxslots; s++, first_func = 0) {
3918		pcifunchigh = 0;
3919		f = 0;
3920		DELAY(1);
3921		hdrtype = REG(PCIR_HDRTYPE, 1);
3922		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3923			continue;
3924		if (hdrtype & PCIM_MFDEV)
3925			pcifunchigh = PCIB_MAXFUNCS(pcib);
3926		for (f = first_func; f <= pcifunchigh; f++)
3927			pci_identify_function(pcib, dev, domain, busno, s, f);
3928	}
3929#undef REG
3930}
3931
3932int
3933pci_rescan_method(device_t dev)
3934{
3935#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3936	device_t pcib = device_get_parent(dev);
3937	struct pci_softc *sc;
3938	device_t child, *devlist, *unchanged;
3939	int devcount, error, i, j, maxslots, oldcount;
3940	int busno, domain, s, f, pcifunchigh;
3941	uint8_t hdrtype;
3942
3943	/* No need to check for ARI on a rescan. */
3944	error = device_get_children(dev, &devlist, &devcount);
3945	if (error)
3946		return (error);
3947	if (devcount != 0) {
3948		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3949		    M_NOWAIT | M_ZERO);
3950		if (unchanged == NULL) {
3951			free(devlist, M_TEMP);
3952			return (ENOMEM);
3953		}
3954	} else
3955		unchanged = NULL;
3956
3957	sc = device_get_softc(dev);
3958	domain = pcib_get_domain(dev);
3959	busno = pcib_get_bus(dev);
3960	maxslots = PCIB_MAXSLOTS(pcib);
3961	for (s = 0; s <= maxslots; s++) {
3962		/* If function 0 is not present, skip to the next slot. */
3963		f = 0;
3964		if (REG(PCIR_VENDOR, 2) == 0xffff)
3965			continue;
3966		pcifunchigh = 0;
3967		hdrtype = REG(PCIR_HDRTYPE, 1);
3968		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3969			continue;
3970		if (hdrtype & PCIM_MFDEV)
3971			pcifunchigh = PCIB_MAXFUNCS(pcib);
3972		for (f = 0; f <= pcifunchigh; f++) {
3973			if (REG(PCIR_VENDOR, 2) == 0xfff)
3974				continue;
3975
3976			/*
3977			 * Found a valid function.  Check if a
3978			 * device_t for this device already exists.
3979			 */
3980			for (i = 0; i < devcount; i++) {
3981				child = devlist[i];
3982				if (child == NULL)
3983					continue;
3984				if (pci_get_slot(child) == s &&
3985				    pci_get_function(child) == f) {
3986					unchanged[i] = child;
3987					goto next_func;
3988				}
3989			}
3990
3991			pci_identify_function(pcib, dev, domain, busno, s, f);
3992		next_func:;
3993		}
3994	}
3995
3996	/* Remove devices that are no longer present. */
3997	for (i = 0; i < devcount; i++) {
3998		if (unchanged[i] != NULL)
3999			continue;
4000		device_delete_child(dev, devlist[i]);
4001	}
4002
4003	free(devlist, M_TEMP);
4004	oldcount = devcount;
4005
4006	/* Try to attach the devices just added. */
4007	error = device_get_children(dev, &devlist, &devcount);
4008	if (error) {
4009		free(unchanged, M_TEMP);
4010		return (error);
4011	}
4012
4013	for (i = 0; i < devcount; i++) {
4014		for (j = 0; j < oldcount; j++) {
4015			if (devlist[i] == unchanged[j])
4016				goto next_device;
4017		}
4018
4019		device_probe_and_attach(devlist[i]);
4020	next_device:;
4021	}
4022
4023	free(unchanged, M_TEMP);
4024	free(devlist, M_TEMP);
4025	return (0);
4026#undef REG
4027}
4028
4029#ifdef PCI_IOV
4030device_t
4031pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4032    uint16_t did)
4033{
4034	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4035	device_t pcib;
4036	int busno, slot, func;
4037
4038	pf_dinfo = device_get_ivars(pf);
4039
4040	pcib = device_get_parent(bus);
4041
4042	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4043
4044	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4045	    slot, func, vid, did);
4046
4047	vf_dinfo->cfg.flags |= PCICFG_VF;
4048	pci_add_child(bus, vf_dinfo);
4049
4050	return (vf_dinfo->cfg.dev);
4051}
4052
4053device_t
4054pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4055    uint16_t vid, uint16_t did)
4056{
4057
4058	return (pci_add_iov_child(bus, pf, rid, vid, did));
4059}
4060#endif
4061
4062void
4063pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4064{
4065	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4066	device_set_ivars(dinfo->cfg.dev, dinfo);
4067	resource_list_init(&dinfo->resources);
4068	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4069	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4070	pci_print_verbose(dinfo);
4071	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4072	pci_child_added(dinfo->cfg.dev);
4073	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
4074}
4075
4076void
4077pci_child_added_method(device_t dev, device_t child)
4078{
4079
4080}
4081
4082static int
4083pci_probe(device_t dev)
4084{
4085
4086	device_set_desc(dev, "PCI bus");
4087
4088	/* Allow other subclasses to override this driver. */
4089	return (BUS_PROBE_GENERIC);
4090}
4091
4092int
4093pci_attach_common(device_t dev)
4094{
4095	struct pci_softc *sc;
4096	int busno, domain;
4097#ifdef PCI_DMA_BOUNDARY
4098	int error, tag_valid;
4099#endif
4100#ifdef PCI_RES_BUS
4101	int rid;
4102#endif
4103
4104	sc = device_get_softc(dev);
4105	domain = pcib_get_domain(dev);
4106	busno = pcib_get_bus(dev);
4107#ifdef PCI_RES_BUS
4108	rid = 0;
4109	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4110	    1, 0);
4111	if (sc->sc_bus == NULL) {
4112		device_printf(dev, "failed to allocate bus number\n");
4113		return (ENXIO);
4114	}
4115#endif
4116	if (bootverbose)
4117		device_printf(dev, "domain=%d, physical bus=%d\n",
4118		    domain, busno);
4119#ifdef PCI_DMA_BOUNDARY
4120	tag_valid = 0;
4121	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4122	    devclass_find("pci")) {
4123		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4124		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4125		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4126		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4127		if (error)
4128			device_printf(dev, "Failed to create DMA tag: %d\n",
4129			    error);
4130		else
4131			tag_valid = 1;
4132	}
4133	if (!tag_valid)
4134#endif
4135		sc->sc_dma_tag = bus_get_dma_tag(dev);
4136	return (0);
4137}
4138
4139static int
4140pci_attach(device_t dev)
4141{
4142	int busno, domain, error;
4143
4144	error = pci_attach_common(dev);
4145	if (error)
4146		return (error);
4147
4148	/*
4149	 * Since there can be multiple independently numbered PCI
4150	 * busses on systems with multiple PCI domains, we can't use
4151	 * the unit number to decide which bus we are probing. We ask
4152	 * the parent pcib what our domain and bus numbers are.
4153	 */
4154	domain = pcib_get_domain(dev);
4155	busno = pcib_get_bus(dev);
4156	pci_add_children(dev, domain, busno);
4157	return (bus_generic_attach(dev));
4158}
4159
4160static int
4161pci_detach(device_t dev)
4162{
4163#ifdef PCI_RES_BUS
4164	struct pci_softc *sc;
4165#endif
4166	int error;
4167
4168	error = bus_generic_detach(dev);
4169	if (error)
4170		return (error);
4171#ifdef PCI_RES_BUS
4172	sc = device_get_softc(dev);
4173	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4174	if (error)
4175		return (error);
4176#endif
4177	return (device_delete_children(dev));
4178}
4179
4180static void
4181pci_set_power_child(device_t dev, device_t child, int state)
4182{
4183	device_t pcib;
4184	int dstate;
4185
4186	/*
4187	 * Set the device to the given state.  If the firmware suggests
4188	 * a different power state, use it instead.  If power management
4189	 * is not present, the firmware is responsible for managing
4190	 * device power.  Skip children who aren't attached since they
4191	 * are handled separately.
4192	 */
4193	pcib = device_get_parent(dev);
4194	dstate = state;
4195	if (device_is_attached(child) &&
4196	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4197		pci_set_powerstate(child, dstate);
4198}
4199
4200int
4201pci_suspend_child(device_t dev, device_t child)
4202{
4203	struct pci_devinfo *dinfo;
4204	int error;
4205
4206	dinfo = device_get_ivars(child);
4207
4208	/*
4209	 * Save the PCI configuration space for the child and set the
4210	 * device in the appropriate power state for this sleep state.
4211	 */
4212	pci_cfg_save(child, dinfo, 0);
4213
4214	/* Suspend devices before potentially powering them down. */
4215	error = bus_generic_suspend_child(dev, child);
4216
4217	if (error)
4218		return (error);
4219
4220	if (pci_do_power_suspend)
4221		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4222
4223	return (0);
4224}
4225
4226int
4227pci_resume_child(device_t dev, device_t child)
4228{
4229	struct pci_devinfo *dinfo;
4230
4231	if (pci_do_power_resume)
4232		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4233
4234	dinfo = device_get_ivars(child);
4235	pci_cfg_restore(child, dinfo);
4236	if (!device_is_attached(child))
4237		pci_cfg_save(child, dinfo, 1);
4238
4239	bus_generic_resume_child(dev, child);
4240
4241	return (0);
4242}
4243
4244int
4245pci_resume(device_t dev)
4246{
4247	device_t child, *devlist;
4248	int error, i, numdevs;
4249
4250	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4251		return (error);
4252
4253	/*
4254	 * Resume critical devices first, then everything else later.
4255	 */
4256	for (i = 0; i < numdevs; i++) {
4257		child = devlist[i];
4258		switch (pci_get_class(child)) {
4259		case PCIC_DISPLAY:
4260		case PCIC_MEMORY:
4261		case PCIC_BRIDGE:
4262		case PCIC_BASEPERIPH:
4263			BUS_RESUME_CHILD(dev, child);
4264			break;
4265		}
4266	}
4267	for (i = 0; i < numdevs; i++) {
4268		child = devlist[i];
4269		switch (pci_get_class(child)) {
4270		case PCIC_DISPLAY:
4271		case PCIC_MEMORY:
4272		case PCIC_BRIDGE:
4273		case PCIC_BASEPERIPH:
4274			break;
4275		default:
4276			BUS_RESUME_CHILD(dev, child);
4277		}
4278	}
4279	free(devlist, M_TEMP);
4280	return (0);
4281}
4282
4283static void
4284pci_load_vendor_data(void)
4285{
4286	caddr_t data;
4287	void *ptr;
4288	size_t sz;
4289
4290	data = preload_search_by_type("pci_vendor_data");
4291	if (data != NULL) {
4292		ptr = preload_fetch_addr(data);
4293		sz = preload_fetch_size(data);
4294		if (ptr != NULL && sz != 0) {
4295			pci_vendordata = ptr;
4296			pci_vendordata_size = sz;
4297			/* terminate the database */
4298			pci_vendordata[pci_vendordata_size] = '\n';
4299		}
4300	}
4301}
4302
4303void
4304pci_driver_added(device_t dev, driver_t *driver)
4305{
4306	int numdevs;
4307	device_t *devlist;
4308	device_t child;
4309	struct pci_devinfo *dinfo;
4310	int i;
4311
4312	if (bootverbose)
4313		device_printf(dev, "driver added\n");
4314	DEVICE_IDENTIFY(driver, dev);
4315	if (device_get_children(dev, &devlist, &numdevs) != 0)
4316		return;
4317	for (i = 0; i < numdevs; i++) {
4318		child = devlist[i];
4319		if (device_get_state(child) != DS_NOTPRESENT)
4320			continue;
4321		dinfo = device_get_ivars(child);
4322		pci_print_verbose(dinfo);
4323		if (bootverbose)
4324			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4325		pci_cfg_restore(child, dinfo);
4326		if (device_probe_and_attach(child) != 0)
4327			pci_child_detached(dev, child);
4328	}
4329	free(devlist, M_TEMP);
4330}
4331
4332int
4333pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4334    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4335{
4336	struct pci_devinfo *dinfo;
4337	struct msix_table_entry *mte;
4338	struct msix_vector *mv;
4339	uint64_t addr;
4340	uint32_t data;
4341	void *cookie;
4342	int error, rid;
4343
4344	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4345	    arg, &cookie);
4346	if (error)
4347		return (error);
4348
4349	/* If this is not a direct child, just bail out. */
4350	if (device_get_parent(child) != dev) {
4351		*cookiep = cookie;
4352		return(0);
4353	}
4354
4355	rid = rman_get_rid(irq);
4356	if (rid == 0) {
4357		/* Make sure that INTx is enabled */
4358		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4359	} else {
4360		/*
4361		 * Check to see if the interrupt is MSI or MSI-X.
4362		 * Ask our parent to map the MSI and give
4363		 * us the address and data register values.
4364		 * If we fail for some reason, teardown the
4365		 * interrupt handler.
4366		 */
4367		dinfo = device_get_ivars(child);
4368		if (dinfo->cfg.msi.msi_alloc > 0) {
4369			if (dinfo->cfg.msi.msi_addr == 0) {
4370				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4371			    ("MSI has handlers, but vectors not mapped"));
4372				error = PCIB_MAP_MSI(device_get_parent(dev),
4373				    child, rman_get_start(irq), &addr, &data);
4374				if (error)
4375					goto bad;
4376				dinfo->cfg.msi.msi_addr = addr;
4377				dinfo->cfg.msi.msi_data = data;
4378			}
4379			if (dinfo->cfg.msi.msi_handlers == 0)
4380				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4381				    dinfo->cfg.msi.msi_data);
4382			dinfo->cfg.msi.msi_handlers++;
4383		} else {
4384			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4385			    ("No MSI or MSI-X interrupts allocated"));
4386			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4387			    ("MSI-X index too high"));
4388			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4389			KASSERT(mte->mte_vector != 0, ("no message vector"));
4390			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4391			KASSERT(mv->mv_irq == rman_get_start(irq),
4392			    ("IRQ mismatch"));
4393			if (mv->mv_address == 0) {
4394				KASSERT(mte->mte_handlers == 0,
4395		    ("MSI-X table entry has handlers, but vector not mapped"));
4396				error = PCIB_MAP_MSI(device_get_parent(dev),
4397				    child, rman_get_start(irq), &addr, &data);
4398				if (error)
4399					goto bad;
4400				mv->mv_address = addr;
4401				mv->mv_data = data;
4402			}
4403			if (mte->mte_handlers == 0) {
4404				pci_enable_msix(child, rid - 1, mv->mv_address,
4405				    mv->mv_data);
4406				pci_unmask_msix(child, rid - 1);
4407			}
4408			mte->mte_handlers++;
4409		}
4410
4411		/*
4412		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4413		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4414		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4415		 */
4416		if (!pci_has_quirk(pci_get_devid(child),
4417		    PCI_QUIRK_MSI_INTX_BUG))
4418			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4419		else
4420			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4421	bad:
4422		if (error) {
4423			(void)bus_generic_teardown_intr(dev, child, irq,
4424			    cookie);
4425			return (error);
4426		}
4427	}
4428	*cookiep = cookie;
4429	return (0);
4430}
4431
4432int
4433pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4434    void *cookie)
4435{
4436	struct msix_table_entry *mte;
4437	struct resource_list_entry *rle;
4438	struct pci_devinfo *dinfo;
4439	int error, rid;
4440
4441	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4442		return (EINVAL);
4443
4444	/* If this isn't a direct child, just bail out */
4445	if (device_get_parent(child) != dev)
4446		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4447
4448	rid = rman_get_rid(irq);
4449	if (rid == 0) {
4450		/* Mask INTx */
4451		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4452	} else {
4453		/*
4454		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4455		 * decrement the appropriate handlers count and mask the
4456		 * MSI-X message, or disable MSI messages if the count
4457		 * drops to 0.
4458		 */
4459		dinfo = device_get_ivars(child);
4460		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4461		if (rle->res != irq)
4462			return (EINVAL);
4463		if (dinfo->cfg.msi.msi_alloc > 0) {
4464			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4465			    ("MSI-X index too high"));
4466			if (dinfo->cfg.msi.msi_handlers == 0)
4467				return (EINVAL);
4468			dinfo->cfg.msi.msi_handlers--;
4469			if (dinfo->cfg.msi.msi_handlers == 0)
4470				pci_disable_msi(child);
4471		} else {
4472			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4473			    ("No MSI or MSI-X interrupts allocated"));
4474			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4475			    ("MSI-X index too high"));
4476			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4477			if (mte->mte_handlers == 0)
4478				return (EINVAL);
4479			mte->mte_handlers--;
4480			if (mte->mte_handlers == 0)
4481				pci_mask_msix(child, rid - 1);
4482		}
4483	}
4484	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4485	if (rid > 0)
4486		KASSERT(error == 0,
4487		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4488	return (error);
4489}
4490
4491int
4492pci_print_child(device_t dev, device_t child)
4493{
4494	struct pci_devinfo *dinfo;
4495	struct resource_list *rl;
4496	int retval = 0;
4497
4498	dinfo = device_get_ivars(child);
4499	rl = &dinfo->resources;
4500
4501	retval += bus_print_child_header(dev, child);
4502
4503	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4504	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4505	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4506	if (device_get_flags(dev))
4507		retval += printf(" flags %#x", device_get_flags(dev));
4508
4509	retval += printf(" at device %d.%d", pci_get_slot(child),
4510	    pci_get_function(child));
4511
4512	retval += bus_print_child_domain(dev, child);
4513	retval += bus_print_child_footer(dev, child);
4514
4515	return (retval);
4516}
4517
4518static const struct
4519{
4520	int		class;
4521	int		subclass;
4522	int		report; /* 0 = bootverbose, 1 = always */
4523	const char	*desc;
4524} pci_nomatch_tab[] = {
4525	{PCIC_OLD,		-1,			1, "old"},
4526	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4527	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4528	{PCIC_STORAGE,		-1,			1, "mass storage"},
4529	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4530	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4531	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4532	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4533	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4534	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4535	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4536	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4537	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4538	{PCIC_NETWORK,		-1,			1, "network"},
4539	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4540	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4541	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4542	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4543	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4544	{PCIC_DISPLAY,		-1,			1, "display"},
4545	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4546	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4547	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4548	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4549	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4550	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4551	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4552	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4553	{PCIC_MEMORY,		-1,			1, "memory"},
4554	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4555	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4556	{PCIC_BRIDGE,		-1,			1, "bridge"},
4557	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4558	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4559	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4560	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4561	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4562	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4563	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4564	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4565	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4566	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4567	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4568	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4569	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4570	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4571	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4572	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4573	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4574	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4575	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4576	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4577	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4578	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4579	{PCIC_INPUTDEV,		-1,			1, "input device"},
4580	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4581	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4582	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4583	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4584	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4585	{PCIC_DOCKING,		-1,			1, "docking station"},
4586	{PCIC_PROCESSOR,	-1,			1, "processor"},
4587	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4588	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4589	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4590	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4591	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4592	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4593	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4594	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4595	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4596	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4597	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4598	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4599	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4600	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4601	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4602	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4603	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4604	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4605	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4606	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4607	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4608	{PCIC_DASP,		-1,			0, "dasp"},
4609	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4610	{0, 0, 0,		NULL}
4611};
4612
4613void
4614pci_probe_nomatch(device_t dev, device_t child)
4615{
4616	int i, report;
4617	const char *cp, *scp;
4618	char *device;
4619
4620	/*
4621	 * Look for a listing for this device in a loaded device database.
4622	 */
4623	report = 1;
4624	if ((device = pci_describe_device(child)) != NULL) {
4625		device_printf(dev, "<%s>", device);
4626		free(device, M_DEVBUF);
4627	} else {
4628		/*
4629		 * Scan the class/subclass descriptions for a general
4630		 * description.
4631		 */
4632		cp = "unknown";
4633		scp = NULL;
4634		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4635			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4636				if (pci_nomatch_tab[i].subclass == -1) {
4637					cp = pci_nomatch_tab[i].desc;
4638					report = pci_nomatch_tab[i].report;
4639				} else if (pci_nomatch_tab[i].subclass ==
4640				    pci_get_subclass(child)) {
4641					scp = pci_nomatch_tab[i].desc;
4642					report = pci_nomatch_tab[i].report;
4643				}
4644			}
4645		}
4646		if (report || bootverbose) {
4647			device_printf(dev, "<%s%s%s>",
4648			    cp ? cp : "",
4649			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4650			    scp ? scp : "");
4651		}
4652	}
4653	if (report || bootverbose) {
4654		printf(" at device %d.%d (no driver attached)\n",
4655		    pci_get_slot(child), pci_get_function(child));
4656	}
4657	pci_cfg_save(child, device_get_ivars(child), 1);
4658}
4659
4660void
4661pci_child_detached(device_t dev, device_t child)
4662{
4663	struct pci_devinfo *dinfo;
4664	struct resource_list *rl;
4665
4666	dinfo = device_get_ivars(child);
4667	rl = &dinfo->resources;
4668
4669	/*
4670	 * Have to deallocate IRQs before releasing any MSI messages and
4671	 * have to release MSI messages before deallocating any memory
4672	 * BARs.
4673	 */
4674	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4675		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4676	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4677		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4678		(void)pci_release_msi(child);
4679	}
4680	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4681		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4682	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4683		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4684#ifdef PCI_RES_BUS
4685	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4686		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4687#endif
4688
4689	pci_cfg_save(child, dinfo, 1);
4690}
4691
4692/*
4693 * Parse the PCI device database, if loaded, and return a pointer to a
4694 * description of the device.
4695 *
4696 * The database is flat text formatted as follows:
4697 *
4698 * Any line not in a valid format is ignored.
4699 * Lines are terminated with newline '\n' characters.
4700 *
4701 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4702 * the vendor name.
4703 *
4704 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4705 * - devices cannot be listed without a corresponding VENDOR line.
4706 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4707 * another TAB, then the device name.
4708 */
4709
4710/*
4711 * Assuming (ptr) points to the beginning of a line in the database,
4712 * return the vendor or device and description of the next entry.
4713 * The value of (vendor) or (device) inappropriate for the entry type
4714 * is set to -1.  Returns nonzero at the end of the database.
4715 *
4716 * Note that this is slightly unrobust in the face of corrupt data;
4717 * we attempt to safeguard against this by spamming the end of the
4718 * database with a newline when we initialise.
4719 */
4720static int
4721pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4722{
4723	char	*cp = *ptr;
4724	int	left;
4725
4726	*device = -1;
4727	*vendor = -1;
4728	**desc = '\0';
4729	for (;;) {
4730		left = pci_vendordata_size - (cp - pci_vendordata);
4731		if (left <= 0) {
4732			*ptr = cp;
4733			return(1);
4734		}
4735
4736		/* vendor entry? */
4737		if (*cp != '\t' &&
4738		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4739			break;
4740		/* device entry? */
4741		if (*cp == '\t' &&
4742		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4743			break;
4744
4745		/* skip to next line */
4746		while (*cp != '\n' && left > 0) {
4747			cp++;
4748			left--;
4749		}
4750		if (*cp == '\n') {
4751			cp++;
4752			left--;
4753		}
4754	}
4755	/* skip to next line */
4756	while (*cp != '\n' && left > 0) {
4757		cp++;
4758		left--;
4759	}
4760	if (*cp == '\n' && left > 0)
4761		cp++;
4762	*ptr = cp;
4763	return(0);
4764}
4765
4766static char *
4767pci_describe_device(device_t dev)
4768{
4769	int	vendor, device;
4770	char	*desc, *vp, *dp, *line;
4771
4772	desc = vp = dp = NULL;
4773
4774	/*
4775	 * If we have no vendor data, we can't do anything.
4776	 */
4777	if (pci_vendordata == NULL)
4778		goto out;
4779
4780	/*
4781	 * Scan the vendor data looking for this device
4782	 */
4783	line = pci_vendordata;
4784	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4785		goto out;
4786	for (;;) {
4787		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4788			goto out;
4789		if (vendor == pci_get_vendor(dev))
4790			break;
4791	}
4792	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4793		goto out;
4794	for (;;) {
4795		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4796			*dp = 0;
4797			break;
4798		}
4799		if (vendor != -1) {
4800			*dp = 0;
4801			break;
4802		}
4803		if (device == pci_get_device(dev))
4804			break;
4805	}
4806	if (dp[0] == '\0')
4807		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4808	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4809	    NULL)
4810		sprintf(desc, "%s, %s", vp, dp);
4811out:
4812	if (vp != NULL)
4813		free(vp, M_DEVBUF);
4814	if (dp != NULL)
4815		free(dp, M_DEVBUF);
4816	return(desc);
4817}
4818
4819int
4820pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4821{
4822	struct pci_devinfo *dinfo;
4823	pcicfgregs *cfg;
4824
4825	dinfo = device_get_ivars(child);
4826	cfg = &dinfo->cfg;
4827
4828	switch (which) {
4829	case PCI_IVAR_ETHADDR:
4830		/*
4831		 * The generic accessor doesn't deal with failure, so
4832		 * we set the return value, then return an error.
4833		 */
4834		*((uint8_t **) result) = NULL;
4835		return (EINVAL);
4836	case PCI_IVAR_SUBVENDOR:
4837		*result = cfg->subvendor;
4838		break;
4839	case PCI_IVAR_SUBDEVICE:
4840		*result = cfg->subdevice;
4841		break;
4842	case PCI_IVAR_VENDOR:
4843		*result = cfg->vendor;
4844		break;
4845	case PCI_IVAR_DEVICE:
4846		*result = cfg->device;
4847		break;
4848	case PCI_IVAR_DEVID:
4849		*result = (cfg->device << 16) | cfg->vendor;
4850		break;
4851	case PCI_IVAR_CLASS:
4852		*result = cfg->baseclass;
4853		break;
4854	case PCI_IVAR_SUBCLASS:
4855		*result = cfg->subclass;
4856		break;
4857	case PCI_IVAR_PROGIF:
4858		*result = cfg->progif;
4859		break;
4860	case PCI_IVAR_REVID:
4861		*result = cfg->revid;
4862		break;
4863	case PCI_IVAR_INTPIN:
4864		*result = cfg->intpin;
4865		break;
4866	case PCI_IVAR_IRQ:
4867		*result = cfg->intline;
4868		break;
4869	case PCI_IVAR_DOMAIN:
4870		*result = cfg->domain;
4871		break;
4872	case PCI_IVAR_BUS:
4873		*result = cfg->bus;
4874		break;
4875	case PCI_IVAR_SLOT:
4876		*result = cfg->slot;
4877		break;
4878	case PCI_IVAR_FUNCTION:
4879		*result = cfg->func;
4880		break;
4881	case PCI_IVAR_CMDREG:
4882		*result = cfg->cmdreg;
4883		break;
4884	case PCI_IVAR_CACHELNSZ:
4885		*result = cfg->cachelnsz;
4886		break;
4887	case PCI_IVAR_MINGNT:
4888		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4889			*result = -1;
4890			return (EINVAL);
4891		}
4892		*result = cfg->mingnt;
4893		break;
4894	case PCI_IVAR_MAXLAT:
4895		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4896			*result = -1;
4897			return (EINVAL);
4898		}
4899		*result = cfg->maxlat;
4900		break;
4901	case PCI_IVAR_LATTIMER:
4902		*result = cfg->lattimer;
4903		break;
4904	default:
4905		return (ENOENT);
4906	}
4907	return (0);
4908}
4909
4910int
4911pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4912{
4913	struct pci_devinfo *dinfo;
4914
4915	dinfo = device_get_ivars(child);
4916
4917	switch (which) {
4918	case PCI_IVAR_INTPIN:
4919		dinfo->cfg.intpin = value;
4920		return (0);
4921	case PCI_IVAR_ETHADDR:
4922	case PCI_IVAR_SUBVENDOR:
4923	case PCI_IVAR_SUBDEVICE:
4924	case PCI_IVAR_VENDOR:
4925	case PCI_IVAR_DEVICE:
4926	case PCI_IVAR_DEVID:
4927	case PCI_IVAR_CLASS:
4928	case PCI_IVAR_SUBCLASS:
4929	case PCI_IVAR_PROGIF:
4930	case PCI_IVAR_REVID:
4931	case PCI_IVAR_IRQ:
4932	case PCI_IVAR_DOMAIN:
4933	case PCI_IVAR_BUS:
4934	case PCI_IVAR_SLOT:
4935	case PCI_IVAR_FUNCTION:
4936		return (EINVAL);	/* disallow for now */
4937
4938	default:
4939		return (ENOENT);
4940	}
4941}
4942
4943#include "opt_ddb.h"
4944#ifdef DDB
4945#include <ddb/ddb.h>
4946#include <sys/cons.h>
4947
4948/*
4949 * List resources based on pci map registers, used for within ddb
4950 */
4951
4952DB_SHOW_COMMAND(pciregs, db_pci_dump)
4953{
4954	struct pci_devinfo *dinfo;
4955	struct devlist *devlist_head;
4956	struct pci_conf *p;
4957	const char *name;
4958	int i, error, none_count;
4959
4960	none_count = 0;
4961	/* get the head of the device queue */
4962	devlist_head = &pci_devq;
4963
4964	/*
4965	 * Go through the list of devices and print out devices
4966	 */
4967	for (error = 0, i = 0,
4968	     dinfo = STAILQ_FIRST(devlist_head);
4969	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4970	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4971
4972		/* Populate pd_name and pd_unit */
4973		name = NULL;
4974		if (dinfo->cfg.dev)
4975			name = device_get_name(dinfo->cfg.dev);
4976
4977		p = &dinfo->conf;
4978		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4979			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4980			(name && *name) ? name : "none",
4981			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4982			none_count++,
4983			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4984			p->pc_sel.pc_func, (p->pc_class << 16) |
4985			(p->pc_subclass << 8) | p->pc_progif,
4986			(p->pc_subdevice << 16) | p->pc_subvendor,
4987			(p->pc_device << 16) | p->pc_vendor,
4988			p->pc_revid, p->pc_hdr);
4989	}
4990}
4991#endif /* DDB */
4992
4993static struct resource *
4994pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4995    rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4996    u_int flags)
4997{
4998	struct pci_devinfo *dinfo = device_get_ivars(child);
4999	struct resource_list *rl = &dinfo->resources;
5000	struct resource *res;
5001	struct pci_map *pm;
5002	pci_addr_t map, testval;
5003	int mapsize;
5004
5005	res = NULL;
5006
5007	/* If rid is managed by EA, ignore it */
5008	if (pci_ea_is_enabled(child, *rid))
5009		goto out;
5010
5011	pm = pci_find_bar(child, *rid);
5012	if (pm != NULL) {
5013		/* This is a BAR that we failed to allocate earlier. */
5014		mapsize = pm->pm_size;
5015		map = pm->pm_value;
5016	} else {
5017		/*
5018		 * Weed out the bogons, and figure out how large the
5019		 * BAR/map is.  BARs that read back 0 here are bogus
5020		 * and unimplemented.  Note: atapci in legacy mode are
5021		 * special and handled elsewhere in the code.  If you
5022		 * have a atapci device in legacy mode and it fails
5023		 * here, that other code is broken.
5024		 */
5025		pci_read_bar(child, *rid, &map, &testval, NULL);
5026
5027		/*
5028		 * Determine the size of the BAR and ignore BARs with a size
5029		 * of 0.  Device ROM BARs use a different mask value.
5030		 */
5031		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5032			mapsize = pci_romsize(testval);
5033		else
5034			mapsize = pci_mapsize(testval);
5035		if (mapsize == 0)
5036			goto out;
5037		pm = pci_add_bar(child, *rid, map, mapsize);
5038	}
5039
5040	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5041		if (type != SYS_RES_MEMORY) {
5042			if (bootverbose)
5043				device_printf(dev,
5044				    "child %s requested type %d for rid %#x,"
5045				    " but the BAR says it is an memio\n",
5046				    device_get_nameunit(child), type, *rid);
5047			goto out;
5048		}
5049	} else {
5050		if (type != SYS_RES_IOPORT) {
5051			if (bootverbose)
5052				device_printf(dev,
5053				    "child %s requested type %d for rid %#x,"
5054				    " but the BAR says it is an ioport\n",
5055				    device_get_nameunit(child), type, *rid);
5056			goto out;
5057		}
5058	}
5059
5060	/*
5061	 * For real BARs, we need to override the size that
5062	 * the driver requests, because that's what the BAR
5063	 * actually uses and we would otherwise have a
5064	 * situation where we might allocate the excess to
5065	 * another driver, which won't work.
5066	 */
5067	count = ((pci_addr_t)1 << mapsize) * num;
5068	if (RF_ALIGNMENT(flags) < mapsize)
5069		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5070	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5071		flags |= RF_PREFETCHABLE;
5072
5073	/*
5074	 * Allocate enough resource, and then write back the
5075	 * appropriate BAR for that resource.
5076	 */
5077	resource_list_add(rl, type, *rid, start, end, count);
5078	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5079	    count, flags & ~RF_ACTIVE);
5080	if (res == NULL) {
5081		resource_list_delete(rl, type, *rid);
5082		device_printf(child,
5083		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5084		    count, *rid, type, start, end);
5085		goto out;
5086	}
5087	if (bootverbose)
5088		device_printf(child,
5089		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5090		    count, *rid, type, rman_get_start(res));
5091	map = rman_get_start(res);
5092	pci_write_bar(child, pm, map);
5093out:
5094	return (res);
5095}
5096
5097struct resource *
5098pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5099    rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5100    u_int flags)
5101{
5102	struct pci_devinfo *dinfo;
5103	struct resource_list *rl;
5104	struct resource_list_entry *rle;
5105	struct resource *res;
5106	pcicfgregs *cfg;
5107
5108	/*
5109	 * Perform lazy resource allocation
5110	 */
5111	dinfo = device_get_ivars(child);
5112	rl = &dinfo->resources;
5113	cfg = &dinfo->cfg;
5114	switch (type) {
5115#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5116	case PCI_RES_BUS:
5117		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5118		    flags));
5119#endif
5120	case SYS_RES_IRQ:
5121		/*
5122		 * Can't alloc legacy interrupt once MSI messages have
5123		 * been allocated.
5124		 */
5125		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5126		    cfg->msix.msix_alloc > 0))
5127			return (NULL);
5128
5129		/*
5130		 * If the child device doesn't have an interrupt
5131		 * routed and is deserving of an interrupt, try to
5132		 * assign it one.
5133		 */
5134		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5135		    (cfg->intpin != 0))
5136			pci_assign_interrupt(dev, child, 0);
5137		break;
5138	case SYS_RES_IOPORT:
5139	case SYS_RES_MEMORY:
5140#ifdef NEW_PCIB
5141		/*
5142		 * PCI-PCI bridge I/O window resources are not BARs.
5143		 * For those allocations just pass the request up the
5144		 * tree.
5145		 */
5146		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5147			switch (*rid) {
5148			case PCIR_IOBASEL_1:
5149			case PCIR_MEMBASE_1:
5150			case PCIR_PMBASEL_1:
5151				/*
5152				 * XXX: Should we bother creating a resource
5153				 * list entry?
5154				 */
5155				return (bus_generic_alloc_resource(dev, child,
5156				    type, rid, start, end, count, flags));
5157			}
5158		}
5159#endif
5160		/* Reserve resources for this BAR if needed. */
5161		rle = resource_list_find(rl, type, *rid);
5162		if (rle == NULL) {
5163			res = pci_reserve_map(dev, child, type, rid, start, end,
5164			    count, num, flags);
5165			if (res == NULL)
5166				return (NULL);
5167		}
5168	}
5169	return (resource_list_alloc(rl, dev, child, type, rid,
5170	    start, end, count, flags));
5171}
5172
5173struct resource *
5174pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5175    rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5176{
5177#ifdef PCI_IOV
5178	struct pci_devinfo *dinfo;
5179#endif
5180
5181	if (device_get_parent(child) != dev)
5182		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5183		    type, rid, start, end, count, flags));
5184
5185#ifdef PCI_IOV
5186	dinfo = device_get_ivars(child);
5187	if (dinfo->cfg.flags & PCICFG_VF) {
5188		switch (type) {
5189		/* VFs can't have I/O BARs. */
5190		case SYS_RES_IOPORT:
5191			return (NULL);
5192		case SYS_RES_MEMORY:
5193			return (pci_vf_alloc_mem_resource(dev, child, rid,
5194			    start, end, count, flags));
5195		}
5196
5197		/* Fall through for other types of resource allocations. */
5198	}
5199#endif
5200
5201	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5202	    count, 1, flags));
5203}
5204
5205int
5206pci_release_resource(device_t dev, device_t child, int type, int rid,
5207    struct resource *r)
5208{
5209	struct pci_devinfo *dinfo;
5210	struct resource_list *rl;
5211	pcicfgregs *cfg;
5212
5213	if (device_get_parent(child) != dev)
5214		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5215		    type, rid, r));
5216
5217	dinfo = device_get_ivars(child);
5218	cfg = &dinfo->cfg;
5219
5220#ifdef PCI_IOV
5221	if (dinfo->cfg.flags & PCICFG_VF) {
5222		switch (type) {
5223		/* VFs can't have I/O BARs. */
5224		case SYS_RES_IOPORT:
5225			return (EDOOFUS);
5226		case SYS_RES_MEMORY:
5227			return (pci_vf_release_mem_resource(dev, child, rid,
5228			    r));
5229		}
5230
5231		/* Fall through for other types of resource allocations. */
5232	}
5233#endif
5234
5235#ifdef NEW_PCIB
5236	/*
5237	 * PCI-PCI bridge I/O window resources are not BARs.  For
5238	 * those allocations just pass the request up the tree.
5239	 */
5240	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5241	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5242		switch (rid) {
5243		case PCIR_IOBASEL_1:
5244		case PCIR_MEMBASE_1:
5245		case PCIR_PMBASEL_1:
5246			return (bus_generic_release_resource(dev, child, type,
5247			    rid, r));
5248		}
5249	}
5250#endif
5251
5252	rl = &dinfo->resources;
5253	return (resource_list_release(rl, dev, child, type, rid, r));
5254}
5255
5256int
5257pci_activate_resource(device_t dev, device_t child, int type, int rid,
5258    struct resource *r)
5259{
5260	struct pci_devinfo *dinfo;
5261	int error;
5262
5263	error = bus_generic_activate_resource(dev, child, type, rid, r);
5264	if (error)
5265		return (error);
5266
5267	/* Enable decoding in the command register when activating BARs. */
5268	if (device_get_parent(child) == dev) {
5269		/* Device ROMs need their decoding explicitly enabled. */
5270		dinfo = device_get_ivars(child);
5271		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5272			pci_write_bar(child, pci_find_bar(child, rid),
5273			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5274		switch (type) {
5275		case SYS_RES_IOPORT:
5276		case SYS_RES_MEMORY:
5277			error = PCI_ENABLE_IO(dev, child, type);
5278			break;
5279		}
5280	}
5281	return (error);
5282}
5283
5284int
5285pci_deactivate_resource(device_t dev, device_t child, int type,
5286    int rid, struct resource *r)
5287{
5288	struct pci_devinfo *dinfo;
5289	int error;
5290
5291	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5292	if (error)
5293		return (error);
5294
5295	/* Disable decoding for device ROMs. */
5296	if (device_get_parent(child) == dev) {
5297		dinfo = device_get_ivars(child);
5298		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5299			pci_write_bar(child, pci_find_bar(child, rid),
5300			    rman_get_start(r));
5301	}
5302	return (0);
5303}
5304
5305void
5306pci_child_deleted(device_t dev, device_t child)
5307{
5308	struct resource_list_entry *rle;
5309	struct resource_list *rl;
5310	struct pci_devinfo *dinfo;
5311
5312	dinfo = device_get_ivars(child);
5313	rl = &dinfo->resources;
5314
5315	EVENTHANDLER_INVOKE(pci_delete_device, child);
5316
5317	/* Turn off access to resources we're about to free */
5318	if (bus_child_present(child) != 0) {
5319		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5320		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5321
5322		pci_disable_busmaster(child);
5323	}
5324
5325	/* Free all allocated resources */
5326	STAILQ_FOREACH(rle, rl, link) {
5327		if (rle->res) {
5328			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5329			    resource_list_busy(rl, rle->type, rle->rid)) {
5330				pci_printf(&dinfo->cfg,
5331				    "Resource still owned, oops. "
5332				    "(type=%d, rid=%d, addr=%lx)\n",
5333				    rle->type, rle->rid,
5334				    rman_get_start(rle->res));
5335				bus_release_resource(child, rle->type, rle->rid,
5336				    rle->res);
5337			}
5338			resource_list_unreserve(rl, dev, child, rle->type,
5339			    rle->rid);
5340		}
5341	}
5342	resource_list_free(rl);
5343
5344	pci_freecfg(dinfo);
5345}
5346
5347void
5348pci_delete_resource(device_t dev, device_t child, int type, int rid)
5349{
5350	struct pci_devinfo *dinfo;
5351	struct resource_list *rl;
5352	struct resource_list_entry *rle;
5353
5354	if (device_get_parent(child) != dev)
5355		return;
5356
5357	dinfo = device_get_ivars(child);
5358	rl = &dinfo->resources;
5359	rle = resource_list_find(rl, type, rid);
5360	if (rle == NULL)
5361		return;
5362
5363	if (rle->res) {
5364		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5365		    resource_list_busy(rl, type, rid)) {
5366			device_printf(dev, "delete_resource: "
5367			    "Resource still owned by child, oops. "
5368			    "(type=%d, rid=%d, addr=%jx)\n",
5369			    type, rid, rman_get_start(rle->res));
5370			return;
5371		}
5372		resource_list_unreserve(rl, dev, child, type, rid);
5373	}
5374	resource_list_delete(rl, type, rid);
5375}
5376
5377struct resource_list *
5378pci_get_resource_list (device_t dev, device_t child)
5379{
5380	struct pci_devinfo *dinfo = device_get_ivars(child);
5381
5382	return (&dinfo->resources);
5383}
5384
5385bus_dma_tag_t
5386pci_get_dma_tag(device_t bus, device_t dev)
5387{
5388	struct pci_softc *sc = device_get_softc(bus);
5389
5390	return (sc->sc_dma_tag);
5391}
5392
5393uint32_t
5394pci_read_config_method(device_t dev, device_t child, int reg, int width)
5395{
5396	struct pci_devinfo *dinfo = device_get_ivars(child);
5397	pcicfgregs *cfg = &dinfo->cfg;
5398
5399#ifdef PCI_IOV
5400	/*
5401	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5402	 * emulate them here.
5403	 */
5404	if (cfg->flags & PCICFG_VF) {
5405		if (reg == PCIR_VENDOR) {
5406			switch (width) {
5407			case 4:
5408				return (cfg->device << 16 | cfg->vendor);
5409			case 2:
5410				return (cfg->vendor);
5411			case 1:
5412				return (cfg->vendor & 0xff);
5413			default:
5414				return (0xffffffff);
5415			}
5416		} else if (reg == PCIR_DEVICE) {
5417			switch (width) {
5418			/* Note that an unaligned 4-byte read is an error. */
5419			case 2:
5420				return (cfg->device);
5421			case 1:
5422				return (cfg->device & 0xff);
5423			default:
5424				return (0xffffffff);
5425			}
5426		}
5427	}
5428#endif
5429
5430	return (PCIB_READ_CONFIG(device_get_parent(dev),
5431	    cfg->bus, cfg->slot, cfg->func, reg, width));
5432}
5433
5434void
5435pci_write_config_method(device_t dev, device_t child, int reg,
5436    uint32_t val, int width)
5437{
5438	struct pci_devinfo *dinfo = device_get_ivars(child);
5439	pcicfgregs *cfg = &dinfo->cfg;
5440
5441	PCIB_WRITE_CONFIG(device_get_parent(dev),
5442	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5443}
5444
5445int
5446pci_child_location_str_method(device_t dev, device_t child, char *buf,
5447    size_t buflen)
5448{
5449
5450	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5451	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5452	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5453	return (0);
5454}
5455
5456int
5457pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5458    size_t buflen)
5459{
5460	struct pci_devinfo *dinfo;
5461	pcicfgregs *cfg;
5462
5463	dinfo = device_get_ivars(child);
5464	cfg = &dinfo->cfg;
5465	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5466	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5467	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5468	    cfg->progif);
5469	return (0);
5470}
5471
5472int
5473pci_assign_interrupt_method(device_t dev, device_t child)
5474{
5475	struct pci_devinfo *dinfo = device_get_ivars(child);
5476	pcicfgregs *cfg = &dinfo->cfg;
5477
5478	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5479	    cfg->intpin));
5480}
5481
5482static void
5483pci_lookup(void *arg, const char *name, device_t *dev)
5484{
5485	long val;
5486	char *end;
5487	int domain, bus, slot, func;
5488
5489	if (*dev != NULL)
5490		return;
5491
5492	/*
5493	 * Accept pciconf-style selectors of either pciD:B:S:F or
5494	 * pciB:S:F.  In the latter case, the domain is assumed to
5495	 * be zero.
5496	 */
5497	if (strncmp(name, "pci", 3) != 0)
5498		return;
5499	val = strtol(name + 3, &end, 10);
5500	if (val < 0 || val > INT_MAX || *end != ':')
5501		return;
5502	domain = val;
5503	val = strtol(end + 1, &end, 10);
5504	if (val < 0 || val > INT_MAX || *end != ':')
5505		return;
5506	bus = val;
5507	val = strtol(end + 1, &end, 10);
5508	if (val < 0 || val > INT_MAX)
5509		return;
5510	slot = val;
5511	if (*end == ':') {
5512		val = strtol(end + 1, &end, 10);
5513		if (val < 0 || val > INT_MAX || *end != '\0')
5514			return;
5515		func = val;
5516	} else if (*end == '\0') {
5517		func = slot;
5518		slot = bus;
5519		bus = domain;
5520		domain = 0;
5521	} else
5522		return;
5523
5524	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5525	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5526		return;
5527
5528	*dev = pci_find_dbsf(domain, bus, slot, func);
5529}
5530
5531static int
5532pci_modevent(module_t mod, int what, void *arg)
5533{
5534	static struct cdev *pci_cdev;
5535	static eventhandler_tag tag;
5536
5537	switch (what) {
5538	case MOD_LOAD:
5539		STAILQ_INIT(&pci_devq);
5540		pci_generation = 0;
5541		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5542		    "pci");
5543		pci_load_vendor_data();
5544		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5545		    1000);
5546		break;
5547
5548	case MOD_UNLOAD:
5549		if (tag != NULL)
5550			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5551		destroy_dev(pci_cdev);
5552		break;
5553	}
5554
5555	return (0);
5556}
5557
5558static void
5559pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5560{
5561#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5562	struct pcicfg_pcie *cfg;
5563	int version, pos;
5564
5565	cfg = &dinfo->cfg.pcie;
5566	pos = cfg->pcie_location;
5567
5568	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5569
5570	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5571
5572	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5573	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5574	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5575		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5576
5577	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5578	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5579	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5580		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5581
5582	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5583	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5584		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5585
5586	if (version > 1) {
5587		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5588		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5589		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5590	}
5591#undef WREG
5592}
5593
5594static void
5595pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5596{
5597	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5598	    dinfo->cfg.pcix.pcix_command,  2);
5599}
5600
5601void
5602pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5603{
5604
5605	/*
5606	 * Restore the device to full power mode.  We must do this
5607	 * before we restore the registers because moving from D3 to
5608	 * D0 will cause the chip's BARs and some other registers to
5609	 * be reset to some unknown power on reset values.  Cut down
5610	 * the noise on boot by doing nothing if we are already in
5611	 * state D0.
5612	 */
5613	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5614		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5615	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5616	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5617	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5618	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5619	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5620	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5621	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5622	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5623	case PCIM_HDRTYPE_NORMAL:
5624		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5625		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5626		break;
5627	case PCIM_HDRTYPE_BRIDGE:
5628		pci_write_config(dev, PCIR_SECLAT_1,
5629		    dinfo->cfg.bridge.br_seclat, 1);
5630		pci_write_config(dev, PCIR_SUBBUS_1,
5631		    dinfo->cfg.bridge.br_subbus, 1);
5632		pci_write_config(dev, PCIR_SECBUS_1,
5633		    dinfo->cfg.bridge.br_secbus, 1);
5634		pci_write_config(dev, PCIR_PRIBUS_1,
5635		    dinfo->cfg.bridge.br_pribus, 1);
5636		pci_write_config(dev, PCIR_BRIDGECTL_1,
5637		    dinfo->cfg.bridge.br_control, 2);
5638		break;
5639	case PCIM_HDRTYPE_CARDBUS:
5640		pci_write_config(dev, PCIR_SECLAT_2,
5641		    dinfo->cfg.bridge.br_seclat, 1);
5642		pci_write_config(dev, PCIR_SUBBUS_2,
5643		    dinfo->cfg.bridge.br_subbus, 1);
5644		pci_write_config(dev, PCIR_SECBUS_2,
5645		    dinfo->cfg.bridge.br_secbus, 1);
5646		pci_write_config(dev, PCIR_PRIBUS_2,
5647		    dinfo->cfg.bridge.br_pribus, 1);
5648		pci_write_config(dev, PCIR_BRIDGECTL_2,
5649		    dinfo->cfg.bridge.br_control, 2);
5650		break;
5651	}
5652	pci_restore_bars(dev);
5653
5654	/*
5655	 * Restore extended capabilities for PCI-Express and PCI-X
5656	 */
5657	if (dinfo->cfg.pcie.pcie_location != 0)
5658		pci_cfg_restore_pcie(dev, dinfo);
5659	if (dinfo->cfg.pcix.pcix_location != 0)
5660		pci_cfg_restore_pcix(dev, dinfo);
5661
5662	/* Restore MSI and MSI-X configurations if they are present. */
5663	if (dinfo->cfg.msi.msi_location != 0)
5664		pci_resume_msi(dev);
5665	if (dinfo->cfg.msix.msix_location != 0)
5666		pci_resume_msix(dev);
5667
5668#ifdef PCI_IOV
5669	if (dinfo->cfg.iov != NULL)
5670		pci_iov_cfg_restore(dev, dinfo);
5671#endif
5672}
5673
5674static void
5675pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5676{
5677#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5678	struct pcicfg_pcie *cfg;
5679	int version, pos;
5680
5681	cfg = &dinfo->cfg.pcie;
5682	pos = cfg->pcie_location;
5683
5684	cfg->pcie_flags = RREG(PCIER_FLAGS);
5685
5686	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5687
5688	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5689
5690	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5691	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5692	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5693		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5694
5695	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5696	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5697	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5698		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5699
5700	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5701	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5702		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5703
5704	if (version > 1) {
5705		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5706		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5707		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5708	}
5709#undef RREG
5710}
5711
5712static void
5713pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5714{
5715	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5716	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5717}
5718
5719void
5720pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5721{
5722	uint32_t cls;
5723	int ps;
5724
5725	/*
5726	 * Some drivers apparently write to these registers w/o updating our
5727	 * cached copy.  No harm happens if we update the copy, so do so here
5728	 * so we can restore them.  The COMMAND register is modified by the
5729	 * bus w/o updating the cache.  This should represent the normally
5730	 * writable portion of the 'defined' part of type 0/1/2 headers.
5731	 */
5732	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5733	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5734	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5735	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5736	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5737	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5738	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5739	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5740	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5741	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5742	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5743	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5744	case PCIM_HDRTYPE_NORMAL:
5745		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5746		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5747		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5748		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5749		break;
5750	case PCIM_HDRTYPE_BRIDGE:
5751		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5752		    PCIR_SECLAT_1, 1);
5753		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5754		    PCIR_SUBBUS_1, 1);
5755		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5756		    PCIR_SECBUS_1, 1);
5757		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5758		    PCIR_PRIBUS_1, 1);
5759		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5760		    PCIR_BRIDGECTL_1, 2);
5761		break;
5762	case PCIM_HDRTYPE_CARDBUS:
5763		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5764		    PCIR_SECLAT_2, 1);
5765		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5766		    PCIR_SUBBUS_2, 1);
5767		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5768		    PCIR_SECBUS_2, 1);
5769		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5770		    PCIR_PRIBUS_2, 1);
5771		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5772		    PCIR_BRIDGECTL_2, 2);
5773		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5774		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5775		break;
5776	}
5777
5778	if (dinfo->cfg.pcie.pcie_location != 0)
5779		pci_cfg_save_pcie(dev, dinfo);
5780
5781	if (dinfo->cfg.pcix.pcix_location != 0)
5782		pci_cfg_save_pcix(dev, dinfo);
5783
5784#ifdef PCI_IOV
5785	if (dinfo->cfg.iov != NULL)
5786		pci_iov_cfg_save(dev, dinfo);
5787#endif
5788
5789	/*
5790	 * don't set the state for display devices, base peripherals and
5791	 * memory devices since bad things happen when they are powered down.
5792	 * We should (a) have drivers that can easily detach and (b) use
5793	 * generic drivers for these devices so that some device actually
5794	 * attaches.  We need to make sure that when we implement (a) we don't
5795	 * power the device down on a reattach.
5796	 */
5797	cls = pci_get_class(dev);
5798	if (!setstate)
5799		return;
5800	switch (pci_do_power_nodriver)
5801	{
5802		case 0:		/* NO powerdown at all */
5803			return;
5804		case 1:		/* Conservative about what to power down */
5805			if (cls == PCIC_STORAGE)
5806				return;
5807			/*FALLTHROUGH*/
5808		case 2:		/* Aggressive about what to power down */
5809			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5810			    cls == PCIC_BASEPERIPH)
5811				return;
5812			/*FALLTHROUGH*/
5813		case 3:		/* Power down everything */
5814			break;
5815	}
5816	/*
5817	 * PCI spec says we can only go into D3 state from D0 state.
5818	 * Transition from D[12] into D0 before going to D3 state.
5819	 */
5820	ps = pci_get_powerstate(dev);
5821	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5822		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5823	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5824		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5825}
5826
5827/* Wrapper APIs suitable for device driver use. */
5828void
5829pci_save_state(device_t dev)
5830{
5831	struct pci_devinfo *dinfo;
5832
5833	dinfo = device_get_ivars(dev);
5834	pci_cfg_save(dev, dinfo, 0);
5835}
5836
5837void
5838pci_restore_state(device_t dev)
5839{
5840	struct pci_devinfo *dinfo;
5841
5842	dinfo = device_get_ivars(dev);
5843	pci_cfg_restore(dev, dinfo);
5844}
5845
5846static int
5847pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5848    uintptr_t *id)
5849{
5850
5851	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5852}
5853
5854/* Find the upstream port of a given PCI device in a root complex. */
5855device_t
5856pci_find_pcie_root_port(device_t dev)
5857{
5858	struct pci_devinfo *dinfo;
5859	devclass_t pci_class;
5860	device_t pcib, bus;
5861
5862	pci_class = devclass_find("pci");
5863	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5864	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5865
5866	/*
5867	 * Walk the bridge hierarchy until we find a PCI-e root
5868	 * port or a non-PCI device.
5869	 */
5870	for (;;) {
5871		bus = device_get_parent(dev);
5872		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5873		    device_get_nameunit(dev)));
5874
5875		pcib = device_get_parent(bus);
5876		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5877		    device_get_nameunit(bus)));
5878
5879		/*
5880		 * pcib's parent must be a PCI bus for this to be a
5881		 * PCI-PCI bridge.
5882		 */
5883		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5884			return (NULL);
5885
5886		dinfo = device_get_ivars(pcib);
5887		if (dinfo->cfg.pcie.pcie_location != 0 &&
5888		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5889			return (pcib);
5890
5891		dev = pcib;
5892	}
5893}
5894
5895/*
5896 * Wait for pending transactions to complete on a PCI-express function.
5897 *
5898 * The maximum delay is specified in milliseconds in max_delay.  Note
5899 * that this function may sleep.
5900 *
5901 * Returns true if the function is idle and false if the timeout is
5902 * exceeded.  If dev is not a PCI-express function, this returns true.
5903 */
5904bool
5905pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5906{
5907	struct pci_devinfo *dinfo = device_get_ivars(dev);
5908	uint16_t sta;
5909	int cap;
5910
5911	cap = dinfo->cfg.pcie.pcie_location;
5912	if (cap == 0)
5913		return (true);
5914
5915	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5916	while (sta & PCIEM_STA_TRANSACTION_PND) {
5917		if (max_delay == 0)
5918			return (false);
5919
5920		/* Poll once every 100 milliseconds up to the timeout. */
5921		if (max_delay > 100) {
5922			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5923			max_delay -= 100;
5924		} else {
5925			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5926			    C_HARDCLOCK);
5927			max_delay = 0;
5928		}
5929		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5930	}
5931
5932	return (true);
5933}
5934
5935/*
5936 * Determine the maximum Completion Timeout in microseconds.
5937 *
5938 * For non-PCI-express functions this returns 0.
5939 */
5940int
5941pcie_get_max_completion_timeout(device_t dev)
5942{
5943	struct pci_devinfo *dinfo = device_get_ivars(dev);
5944	int cap;
5945
5946	cap = dinfo->cfg.pcie.pcie_location;
5947	if (cap == 0)
5948		return (0);
5949
5950	/*
5951	 * Functions using the 1.x spec use the default timeout range of
5952	 * 50 microseconds to 50 milliseconds.  Functions that do not
5953	 * support programmable timeouts also use this range.
5954	 */
5955	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
5956	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
5957	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
5958		return (50 * 1000);
5959
5960	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
5961	    PCIEM_CTL2_COMP_TIMO_VAL) {
5962	case PCIEM_CTL2_COMP_TIMO_100US:
5963		return (100);
5964	case PCIEM_CTL2_COMP_TIMO_10MS:
5965		return (10 * 1000);
5966	case PCIEM_CTL2_COMP_TIMO_55MS:
5967		return (55 * 1000);
5968	case PCIEM_CTL2_COMP_TIMO_210MS:
5969		return (210 * 1000);
5970	case PCIEM_CTL2_COMP_TIMO_900MS:
5971		return (900 * 1000);
5972	case PCIEM_CTL2_COMP_TIMO_3500MS:
5973		return (3500 * 1000);
5974	case PCIEM_CTL2_COMP_TIMO_13S:
5975		return (13 * 1000 * 1000);
5976	case PCIEM_CTL2_COMP_TIMO_64S:
5977		return (64 * 1000 * 1000);
5978	default:
5979		return (50 * 1000);
5980	}
5981}
5982
5983/*
5984 * Perform a Function Level Reset (FLR) on a device.
5985 *
5986 * This function first waits for any pending transactions to complete
5987 * within the timeout specified by max_delay.  If transactions are
5988 * still pending, the function will return false without attempting a
5989 * reset.
5990 *
5991 * If dev is not a PCI-express function or does not support FLR, this
5992 * function returns false.
5993 *
5994 * Note that no registers are saved or restored.  The caller is
5995 * responsible for saving and restoring any registers including
5996 * PCI-standard registers via pci_save_state() and
5997 * pci_restore_state().
5998 */
5999bool
6000pcie_flr(device_t dev, u_int max_delay, bool force)
6001{
6002	struct pci_devinfo *dinfo = device_get_ivars(dev);
6003	uint16_t cmd, ctl;
6004	int compl_delay;
6005	int cap;
6006
6007	cap = dinfo->cfg.pcie.pcie_location;
6008	if (cap == 0)
6009		return (false);
6010
6011	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
6012		return (false);
6013
6014	/*
6015	 * Disable busmastering to prevent generation of new
6016	 * transactions while waiting for the device to go idle.  If
6017	 * the idle timeout fails, the command register is restored
6018	 * which will re-enable busmastering.
6019	 */
6020	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
6021	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
6022	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
6023		if (!force) {
6024			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
6025			return (false);
6026		}
6027		pci_printf(&dinfo->cfg,
6028		    "Resetting with transactions pending after %d ms\n",
6029		    max_delay);
6030
6031		/*
6032		 * Extend the post-FLR delay to cover the maximum
6033		 * Completion Timeout delay of anything in flight
6034		 * during the FLR delay.  Enforce a minimum delay of
6035		 * at least 10ms.
6036		 */
6037		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
6038		if (compl_delay < 10)
6039			compl_delay = 10;
6040	} else
6041		compl_delay = 0;
6042
6043	/* Initiate the reset. */
6044	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
6045	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
6046	    PCIEM_CTL_INITIATE_FLR, 2);
6047
6048	/* Wait for 100ms. */
6049	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
6050
6051	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
6052	    PCIEM_STA_TRANSACTION_PND)
6053		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
6054	return (true);
6055}
6056