1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
5 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
6 * Copyright (c) 2000, BSDi
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_bus.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/limits.h>
41#include <sys/linker.h>
42#include <sys/fcntl.h>
43#include <sys/conf.h>
44#include <sys/kernel.h>
45#include <sys/queue.h>
46#include <sys/sysctl.h>
47#include <sys/endian.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51#include <vm/vm_extern.h>
52
53#include <sys/bus.h>
54#include <machine/bus.h>
55#include <sys/rman.h>
56#include <machine/resource.h>
57#include <machine/stdarg.h>
58
59#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
60#include <machine/intr_machdep.h>
61#endif
62
63#include <sys/pciio.h>
64#include <dev/pci/pcireg.h>
65#include <dev/pci/pcivar.h>
66#include <dev/pci/pci_private.h>
67
68#ifdef PCI_IOV
69#include <sys/nv.h>
70#include <dev/pci/pci_iov_private.h>
71#endif
72
73#include <dev/usb/controller/xhcireg.h>
74#include <dev/usb/controller/ehcireg.h>
75#include <dev/usb/controller/ohcireg.h>
76#include <dev/usb/controller/uhcireg.h>
77
78#include "pcib_if.h"
79#include "pci_if.h"
80
81#define	PCIR_IS_BIOS(cfg, reg)						\
82	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
83	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
84
85static int		pci_has_quirk(uint32_t devid, int quirk);
86static pci_addr_t	pci_mapbase(uint64_t mapreg);
87static const char	*pci_maptype(uint64_t mapreg);
88static int		pci_maprange(uint64_t mapreg);
89static pci_addr_t	pci_rombase(uint64_t mapreg);
90static int		pci_romsize(uint64_t testval);
91static void		pci_fixancient(pcicfgregs *cfg);
92static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
93
94static int		pci_porten(device_t dev);
95static int		pci_memen(device_t dev);
96static void		pci_assign_interrupt(device_t bus, device_t dev,
97			    int force_route);
98static int		pci_add_map(device_t bus, device_t dev, int reg,
99			    struct resource_list *rl, int force, int prefetch);
100static int		pci_probe(device_t dev);
101static void		pci_load_vendor_data(void);
102static int		pci_describe_parse_line(char **ptr, int *vendor,
103			    int *device, char **desc);
104static char		*pci_describe_device(device_t dev);
105static int		pci_modevent(module_t mod, int what, void *arg);
106static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107			    pcicfgregs *cfg);
108static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110			    int reg, uint32_t *data);
111#if 0
112static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113			    int reg, uint32_t data);
114#endif
115static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116static void		pci_mask_msix(device_t dev, u_int index);
117static void		pci_unmask_msix(device_t dev, u_int index);
118static int		pci_msi_blacklisted(void);
119static int		pci_msix_blacklisted(void);
120static void		pci_resume_msi(device_t dev);
121static void		pci_resume_msix(device_t dev);
122static int		pci_remap_intr_method(device_t bus, device_t dev,
123			    u_int irq);
124static void		pci_hint_device_unit(device_t acdev, device_t child,
125			    const char *name, int *unitp);
126static int		pci_reset_post(device_t dev, device_t child);
127static int		pci_reset_prepare(device_t dev, device_t child);
128static int		pci_reset_child(device_t dev, device_t child,
129			    int flags);
130
131static int		pci_get_id_method(device_t dev, device_t child,
132			    enum pci_id_type type, uintptr_t *rid);
133
134static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
135    int b, int s, int f, uint16_t vid, uint16_t did);
136
137static device_method_t pci_methods[] = {
138	/* Device interface */
139	DEVMETHOD(device_probe,		pci_probe),
140	DEVMETHOD(device_attach,	pci_attach),
141	DEVMETHOD(device_detach,	pci_detach),
142	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
143	DEVMETHOD(device_suspend,	bus_generic_suspend),
144	DEVMETHOD(device_resume,	pci_resume),
145
146	/* Bus interface */
147	DEVMETHOD(bus_print_child,	pci_print_child),
148	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
149	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
150	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
151	DEVMETHOD(bus_driver_added,	pci_driver_added),
152	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
153	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
154	DEVMETHOD(bus_reset_prepare,	pci_reset_prepare),
155	DEVMETHOD(bus_reset_post,	pci_reset_post),
156	DEVMETHOD(bus_reset_child,	pci_reset_child),
157
158	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
159	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
160	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
161	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
162	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
163	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
164	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
165	DEVMETHOD(bus_release_resource,	pci_release_resource),
166	DEVMETHOD(bus_activate_resource, pci_activate_resource),
167	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
168	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
169	DEVMETHOD(bus_child_detached,	pci_child_detached),
170	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
171	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
172	DEVMETHOD(bus_hint_device_unit,	pci_hint_device_unit),
173	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
174	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
175	DEVMETHOD(bus_resume_child,	pci_resume_child),
176	DEVMETHOD(bus_rescan,		pci_rescan_method),
177
178	/* PCI interface */
179	DEVMETHOD(pci_read_config,	pci_read_config_method),
180	DEVMETHOD(pci_write_config,	pci_write_config_method),
181	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
182	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
183	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
184	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
185	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
186	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
187	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
188	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
189	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
190	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
191	DEVMETHOD(pci_find_next_cap,	pci_find_next_cap_method),
192	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
193	DEVMETHOD(pci_find_next_extcap,	pci_find_next_extcap_method),
194	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
195	DEVMETHOD(pci_find_next_htcap,	pci_find_next_htcap_method),
196	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
197	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
198	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
199	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
200	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
201	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
202	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
203	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
204	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
205	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
206	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
207	DEVMETHOD(pci_get_id,		pci_get_id_method),
208	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
209	DEVMETHOD(pci_child_added,	pci_child_added_method),
210#ifdef PCI_IOV
211	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
212	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
213	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
214#endif
215
216	DEVMETHOD_END
217};
218
219DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
220
221static devclass_t pci_devclass;
222DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
223MODULE_VERSION(pci, 1);
224
225static char	*pci_vendordata;
226static size_t	pci_vendordata_size;
227
228struct pci_quirk {
229	uint32_t devid;	/* Vendor/device of the card */
230	int	type;
231#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
232#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
233#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
234#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
235#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
236#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
237#define	PCI_QUIRK_REALLOC_BAR	7 /* Can't allocate memory at the default address */
238	int	arg1;
239	int	arg2;
240};
241
242static const struct pci_quirk pci_quirks[] = {
243	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
244	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
245	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
246	/* As does the Serverworks OSB4 (the SMBus mapping register) */
247	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
248
249	/*
250	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
251	 * or the CMIC-SL (AKA ServerWorks GC_LE).
252	 */
253	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
254	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
255
256	/*
257	 * MSI doesn't work on earlier Intel chipsets including
258	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
259	 */
260	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
261	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
262	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
263	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
264	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
265	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
266	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
267
268	/*
269	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
270	 * bridge.
271	 */
272	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
273
274	/*
275	 * MSI-X allocation doesn't work properly for devices passed through
276	 * by VMware up to at least ESXi 5.1.
277	 */
278	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
279	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
280
281	/*
282	 * Some virtualization environments emulate an older chipset
283	 * but support MSI just fine.  QEMU uses the Intel 82440.
284	 */
285	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
286
287	/*
288	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
289	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
290	 * It prevents us from attaching hpet(4) when the bit is unset.
291	 * Note this quirk only affects SB600 revision A13 and earlier.
292	 * For SB600 A21 and later, firmware must set the bit to hide it.
293	 * For SB700 and later, it is unused and hardcoded to zero.
294	 */
295	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
296
297	/*
298	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
299	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
300	 * of the command register is set.
301	 */
302	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
303	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
304	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
305	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
306	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
307
308	/*
309	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
310	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
311	 */
312	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
313	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
314	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
315	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
316	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
317	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
318
319	/*
320	 * HPE Gen 10 VGA has a memory range that can't be allocated in the
321	 * expected place.
322	 */
323	{ 0x98741002, PCI_QUIRK_REALLOC_BAR,	0, 	0 },
324
325	{ 0 }
326};
327
328/* map register information */
329#define	PCI_MAPMEM	0x01	/* memory map */
330#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
331#define	PCI_MAPPORT	0x04	/* port map */
332
333struct devlist pci_devq;
334uint32_t pci_generation;
335uint32_t pci_numdevs = 0;
336static int pcie_chipset, pcix_chipset;
337
338/* sysctl vars */
339SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
340
341static int pci_enable_io_modes = 1;
342SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
343    &pci_enable_io_modes, 1,
344    "Enable I/O and memory bits in the config register.  Some BIOSes do not"
345    " enable these bits correctly.  We'd like to do this all the time, but"
346    " there are some peripherals that this causes problems with.");
347
348static int pci_do_realloc_bars = 0;
349SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
350    &pci_do_realloc_bars, 0,
351    "Attempt to allocate a new range for any BARs whose original "
352    "firmware-assigned ranges fail to allocate during the initial device scan.");
353
354static int pci_do_power_nodriver = 0;
355SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
356    &pci_do_power_nodriver, 0,
357    "Place a function into D3 state when no driver attaches to it.  0 means"
358    " disable.  1 means conservatively place devices into D3 state.  2 means"
359    " aggressively place devices into D3 state.  3 means put absolutely"
360    " everything in D3 state.");
361
362int pci_do_power_resume = 1;
363SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
364    &pci_do_power_resume, 1,
365  "Transition from D3 -> D0 on resume.");
366
367int pci_do_power_suspend = 1;
368SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
369    &pci_do_power_suspend, 1,
370  "Transition from D0 -> D3 on suspend.");
371
372static int pci_do_msi = 1;
373SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
374    "Enable support for MSI interrupts");
375
376static int pci_do_msix = 1;
377SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
378    "Enable support for MSI-X interrupts");
379
380static int pci_msix_rewrite_table = 0;
381SYSCTL_INT(_hw_pci, OID_AUTO, msix_rewrite_table, CTLFLAG_RWTUN,
382    &pci_msix_rewrite_table, 0,
383    "Rewrite entire MSI-X table when updating MSI-X entries");
384
385static int pci_honor_msi_blacklist = 1;
386SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
387    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
388
389#if defined(__i386__) || defined(__amd64__)
390static int pci_usb_takeover = 1;
391#else
392static int pci_usb_takeover = 0;
393#endif
394SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
395    &pci_usb_takeover, 1,
396    "Enable early takeover of USB controllers. Disable this if you depend on"
397    " BIOS emulation of USB devices, that is you use USB devices (like"
398    " keyboard or mouse) but do not load USB drivers");
399
400static int pci_clear_bars;
401SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
402    "Ignore firmware-assigned resources for BARs.");
403
404#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
405static int pci_clear_buses;
406SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
407    "Ignore firmware-assigned bus numbers.");
408#endif
409
410static int pci_enable_ari = 1;
411SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
412    0, "Enable support for PCIe Alternative RID Interpretation");
413
414int pci_enable_aspm = 1;
415SYSCTL_INT(_hw_pci, OID_AUTO, enable_aspm, CTLFLAG_RDTUN, &pci_enable_aspm,
416    0, "Enable support for PCIe Active State Power Management");
417
418static int pci_clear_aer_on_attach = 0;
419SYSCTL_INT(_hw_pci, OID_AUTO, clear_aer_on_attach, CTLFLAG_RWTUN,
420    &pci_clear_aer_on_attach, 0,
421    "Clear port and device AER state on driver attach");
422
423static int
424pci_has_quirk(uint32_t devid, int quirk)
425{
426	const struct pci_quirk *q;
427
428	for (q = &pci_quirks[0]; q->devid; q++) {
429		if (q->devid == devid && q->type == quirk)
430			return (1);
431	}
432	return (0);
433}
434
435/* Find a device_t by bus/slot/function in domain 0 */
436
437device_t
438pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
439{
440
441	return (pci_find_dbsf(0, bus, slot, func));
442}
443
444/* Find a device_t by domain/bus/slot/function */
445
446device_t
447pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
448{
449	struct pci_devinfo *dinfo;
450
451	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
452		if ((dinfo->cfg.domain == domain) &&
453		    (dinfo->cfg.bus == bus) &&
454		    (dinfo->cfg.slot == slot) &&
455		    (dinfo->cfg.func == func)) {
456			return (dinfo->cfg.dev);
457		}
458	}
459
460	return (NULL);
461}
462
463/* Find a device_t by vendor/device ID */
464
465device_t
466pci_find_device(uint16_t vendor, uint16_t device)
467{
468	struct pci_devinfo *dinfo;
469
470	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
471		if ((dinfo->cfg.vendor == vendor) &&
472		    (dinfo->cfg.device == device)) {
473			return (dinfo->cfg.dev);
474		}
475	}
476
477	return (NULL);
478}
479
480device_t
481pci_find_class(uint8_t class, uint8_t subclass)
482{
483	struct pci_devinfo *dinfo;
484
485	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
486		if (dinfo->cfg.baseclass == class &&
487		    dinfo->cfg.subclass == subclass) {
488			return (dinfo->cfg.dev);
489		}
490	}
491
492	return (NULL);
493}
494
495static int
496pci_printf(pcicfgregs *cfg, const char *fmt, ...)
497{
498	va_list ap;
499	int retval;
500
501	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
502	    cfg->func);
503	va_start(ap, fmt);
504	retval += vprintf(fmt, ap);
505	va_end(ap);
506	return (retval);
507}
508
509/* return base address of memory or port map */
510
511static pci_addr_t
512pci_mapbase(uint64_t mapreg)
513{
514
515	if (PCI_BAR_MEM(mapreg))
516		return (mapreg & PCIM_BAR_MEM_BASE);
517	else
518		return (mapreg & PCIM_BAR_IO_BASE);
519}
520
521/* return map type of memory or port map */
522
523static const char *
524pci_maptype(uint64_t mapreg)
525{
526
527	if (PCI_BAR_IO(mapreg))
528		return ("I/O Port");
529	if (mapreg & PCIM_BAR_MEM_PREFETCH)
530		return ("Prefetchable Memory");
531	return ("Memory");
532}
533
534/* return log2 of map size decoded for memory or port map */
535
536int
537pci_mapsize(uint64_t testval)
538{
539	int ln2size;
540
541	testval = pci_mapbase(testval);
542	ln2size = 0;
543	if (testval != 0) {
544		while ((testval & 1) == 0)
545		{
546			ln2size++;
547			testval >>= 1;
548		}
549	}
550	return (ln2size);
551}
552
553/* return base address of device ROM */
554
555static pci_addr_t
556pci_rombase(uint64_t mapreg)
557{
558
559	return (mapreg & PCIM_BIOS_ADDR_MASK);
560}
561
562/* return log2 of map size decided for device ROM */
563
564static int
565pci_romsize(uint64_t testval)
566{
567	int ln2size;
568
569	testval = pci_rombase(testval);
570	ln2size = 0;
571	if (testval != 0) {
572		while ((testval & 1) == 0)
573		{
574			ln2size++;
575			testval >>= 1;
576		}
577	}
578	return (ln2size);
579}
580
581/* return log2 of address range supported by map register */
582
583static int
584pci_maprange(uint64_t mapreg)
585{
586	int ln2range = 0;
587
588	if (PCI_BAR_IO(mapreg))
589		ln2range = 32;
590	else
591		switch (mapreg & PCIM_BAR_MEM_TYPE) {
592		case PCIM_BAR_MEM_32:
593			ln2range = 32;
594			break;
595		case PCIM_BAR_MEM_1MB:
596			ln2range = 20;
597			break;
598		case PCIM_BAR_MEM_64:
599			ln2range = 64;
600			break;
601		}
602	return (ln2range);
603}
604
605/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
606
607static void
608pci_fixancient(pcicfgregs *cfg)
609{
610	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
611		return;
612
613	/* PCI to PCI bridges use header type 1 */
614	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
615		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
616}
617
618/* extract header type specific config data */
619
620static void
621pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
622{
623#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
624	switch (cfg->hdrtype & PCIM_HDRTYPE) {
625	case PCIM_HDRTYPE_NORMAL:
626		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
627		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
628		cfg->mingnt         = REG(PCIR_MINGNT, 1);
629		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
630		cfg->nummaps	    = PCI_MAXMAPS_0;
631		break;
632	case PCIM_HDRTYPE_BRIDGE:
633		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
634		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
635		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
636		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
637		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
638		cfg->nummaps	    = PCI_MAXMAPS_1;
639		break;
640	case PCIM_HDRTYPE_CARDBUS:
641		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
642		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
643		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
644		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
645		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
646		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
647		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
648		cfg->nummaps	    = PCI_MAXMAPS_2;
649		break;
650	}
651#undef REG
652}
653
654/* read configuration header into pcicfgregs structure */
655struct pci_devinfo *
656pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
657{
658#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
659	uint16_t vid, did;
660
661	vid = REG(PCIR_VENDOR, 2);
662	did = REG(PCIR_DEVICE, 2);
663	if (vid != 0xffff)
664		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
665
666	return (NULL);
667}
668
669struct pci_devinfo *
670pci_alloc_devinfo_method(device_t dev)
671{
672
673	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
674	    M_WAITOK | M_ZERO));
675}
676
677static struct pci_devinfo *
678pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
679    uint16_t vid, uint16_t did)
680{
681	struct pci_devinfo *devlist_entry;
682	pcicfgregs *cfg;
683
684	devlist_entry = PCI_ALLOC_DEVINFO(bus);
685
686	cfg = &devlist_entry->cfg;
687
688	cfg->domain		= d;
689	cfg->bus		= b;
690	cfg->slot		= s;
691	cfg->func		= f;
692	cfg->vendor		= vid;
693	cfg->device		= did;
694	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
695	cfg->statreg		= REG(PCIR_STATUS, 2);
696	cfg->baseclass		= REG(PCIR_CLASS, 1);
697	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
698	cfg->progif		= REG(PCIR_PROGIF, 1);
699	cfg->revid		= REG(PCIR_REVID, 1);
700	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
701	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
702	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
703	cfg->intpin		= REG(PCIR_INTPIN, 1);
704	cfg->intline		= REG(PCIR_INTLINE, 1);
705
706	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
707	cfg->hdrtype		&= ~PCIM_MFDEV;
708	STAILQ_INIT(&cfg->maps);
709
710	cfg->iov		= NULL;
711
712	pci_fixancient(cfg);
713	pci_hdrtypedata(pcib, b, s, f, cfg);
714
715	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
716		pci_read_cap(pcib, cfg);
717
718	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
719
720	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
721	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
722	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
723	devlist_entry->conf.pc_sel.pc_func = cfg->func;
724	devlist_entry->conf.pc_hdr = cfg->hdrtype;
725
726	devlist_entry->conf.pc_subvendor = cfg->subvendor;
727	devlist_entry->conf.pc_subdevice = cfg->subdevice;
728	devlist_entry->conf.pc_vendor = cfg->vendor;
729	devlist_entry->conf.pc_device = cfg->device;
730
731	devlist_entry->conf.pc_class = cfg->baseclass;
732	devlist_entry->conf.pc_subclass = cfg->subclass;
733	devlist_entry->conf.pc_progif = cfg->progif;
734	devlist_entry->conf.pc_revid = cfg->revid;
735
736	pci_numdevs++;
737	pci_generation++;
738
739	return (devlist_entry);
740}
741#undef REG
742
743static void
744pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
745{
746#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
747    cfg->ea.ea_location + (n), w)
748	int num_ent;
749	int ptr;
750	int a, b;
751	uint32_t val;
752	int ent_size;
753	uint32_t dw[4];
754	uint64_t base, max_offset;
755	struct pci_ea_entry *eae;
756
757	if (cfg->ea.ea_location == 0)
758		return;
759
760	STAILQ_INIT(&cfg->ea.ea_entries);
761
762	/* Determine the number of entries */
763	num_ent = REG(PCIR_EA_NUM_ENT, 2);
764	num_ent &= PCIM_EA_NUM_ENT_MASK;
765
766	/* Find the first entry to care of */
767	ptr = PCIR_EA_FIRST_ENT;
768
769	/* Skip DWORD 2 for type 1 functions */
770	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
771		ptr += 4;
772
773	for (a = 0; a < num_ent; a++) {
774
775		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
776		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
777
778		/* Read a number of dwords in the entry */
779		val = REG(ptr, 4);
780		ptr += 4;
781		ent_size = (val & PCIM_EA_ES);
782
783		for (b = 0; b < ent_size; b++) {
784			dw[b] = REG(ptr, 4);
785			ptr += 4;
786		}
787
788		eae->eae_flags = val;
789		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
790
791		base = dw[0] & PCIM_EA_FIELD_MASK;
792		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
793		b = 2;
794		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
795			base |= (uint64_t)dw[b] << 32UL;
796			b++;
797		}
798		if (((dw[1] & PCIM_EA_IS_64) != 0)
799		    && (b < ent_size)) {
800			max_offset |= (uint64_t)dw[b] << 32UL;
801			b++;
802		}
803
804		eae->eae_base = base;
805		eae->eae_max_offset = max_offset;
806
807		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
808
809		if (bootverbose) {
810			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
811			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
812			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
813		}
814	}
815}
816#undef REG
817
818static void
819pci_read_cap(device_t pcib, pcicfgregs *cfg)
820{
821#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
822#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
823#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
824	uint64_t addr;
825#endif
826	uint32_t val;
827	int	ptr, nextptr, ptrptr;
828
829	switch (cfg->hdrtype & PCIM_HDRTYPE) {
830	case PCIM_HDRTYPE_NORMAL:
831	case PCIM_HDRTYPE_BRIDGE:
832		ptrptr = PCIR_CAP_PTR;
833		break;
834	case PCIM_HDRTYPE_CARDBUS:
835		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
836		break;
837	default:
838		return;		/* no extended capabilities support */
839	}
840	nextptr = REG(ptrptr, 1);	/* sanity check? */
841
842	/*
843	 * Read capability entries.
844	 */
845	while (nextptr != 0) {
846		/* Sanity check */
847		if (nextptr > 255) {
848			printf("illegal PCI extended capability offset %d\n",
849			    nextptr);
850			return;
851		}
852		/* Find the next entry */
853		ptr = nextptr;
854		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
855
856		/* Process this entry */
857		switch (REG(ptr + PCICAP_ID, 1)) {
858		case PCIY_PMG:		/* PCI power management */
859			if (cfg->pp.pp_cap == 0) {
860				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
861				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
862				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
863				if ((nextptr - ptr) > PCIR_POWER_DATA)
864					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
865			}
866			break;
867		case PCIY_HT:		/* HyperTransport */
868			/* Determine HT-specific capability type. */
869			val = REG(ptr + PCIR_HT_COMMAND, 2);
870
871			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
872				cfg->ht.ht_slave = ptr;
873
874#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
875			switch (val & PCIM_HTCMD_CAP_MASK) {
876			case PCIM_HTCAP_MSI_MAPPING:
877				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
878					/* Sanity check the mapping window. */
879					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
880					    4);
881					addr <<= 32;
882					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
883					    4);
884					if (addr != MSI_INTEL_ADDR_BASE)
885						device_printf(pcib,
886	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
887						    cfg->domain, cfg->bus,
888						    cfg->slot, cfg->func,
889						    (long long)addr);
890				} else
891					addr = MSI_INTEL_ADDR_BASE;
892
893				cfg->ht.ht_msimap = ptr;
894				cfg->ht.ht_msictrl = val;
895				cfg->ht.ht_msiaddr = addr;
896				break;
897			}
898#endif
899			break;
900		case PCIY_MSI:		/* PCI MSI */
901			cfg->msi.msi_location = ptr;
902			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
903			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
904						     PCIM_MSICTRL_MMC_MASK)>>1);
905			break;
906		case PCIY_MSIX:		/* PCI MSI-X */
907			cfg->msix.msix_location = ptr;
908			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
909			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
910			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
911			val = REG(ptr + PCIR_MSIX_TABLE, 4);
912			cfg->msix.msix_table_bar = PCIR_BAR(val &
913			    PCIM_MSIX_BIR_MASK);
914			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
915			val = REG(ptr + PCIR_MSIX_PBA, 4);
916			cfg->msix.msix_pba_bar = PCIR_BAR(val &
917			    PCIM_MSIX_BIR_MASK);
918			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
919			break;
920		case PCIY_VPD:		/* PCI Vital Product Data */
921			cfg->vpd.vpd_reg = ptr;
922			break;
923		case PCIY_SUBVENDOR:
924			/* Should always be true. */
925			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
926			    PCIM_HDRTYPE_BRIDGE) {
927				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
928				cfg->subvendor = val & 0xffff;
929				cfg->subdevice = val >> 16;
930			}
931			break;
932		case PCIY_PCIX:		/* PCI-X */
933			/*
934			 * Assume we have a PCI-X chipset if we have
935			 * at least one PCI-PCI bridge with a PCI-X
936			 * capability.  Note that some systems with
937			 * PCI-express or HT chipsets might match on
938			 * this check as well.
939			 */
940			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
941			    PCIM_HDRTYPE_BRIDGE)
942				pcix_chipset = 1;
943			cfg->pcix.pcix_location = ptr;
944			break;
945		case PCIY_EXPRESS:	/* PCI-express */
946			/*
947			 * Assume we have a PCI-express chipset if we have
948			 * at least one PCI-express device.
949			 */
950			pcie_chipset = 1;
951			cfg->pcie.pcie_location = ptr;
952			val = REG(ptr + PCIER_FLAGS, 2);
953			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
954			break;
955		case PCIY_EA:		/* Enhanced Allocation */
956			cfg->ea.ea_location = ptr;
957			pci_ea_fill_info(pcib, cfg);
958			break;
959		default:
960			break;
961		}
962	}
963
964#if defined(__powerpc__)
965	/*
966	 * Enable the MSI mapping window for all HyperTransport
967	 * slaves.  PCI-PCI bridges have their windows enabled via
968	 * PCIB_MAP_MSI().
969	 */
970	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
971	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
972		device_printf(pcib,
973	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
974		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
975		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
976		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
977		     2);
978	}
979#endif
980/* REG and WREG use carry through to next functions */
981}
982
983/*
984 * PCI Vital Product Data
985 */
986
987#define	PCI_VPD_TIMEOUT		1000000
988
989static int
990pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
991{
992	int count = PCI_VPD_TIMEOUT;
993
994	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
995
996	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
997
998	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
999		if (--count < 0)
1000			return (ENXIO);
1001		DELAY(1);	/* limit looping */
1002	}
1003	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
1004
1005	return (0);
1006}
1007
1008#if 0
1009static int
1010pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
1011{
1012	int count = PCI_VPD_TIMEOUT;
1013
1014	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
1015
1016	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
1017	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
1018	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
1019		if (--count < 0)
1020			return (ENXIO);
1021		DELAY(1);	/* limit looping */
1022	}
1023
1024	return (0);
1025}
1026#endif
1027
1028#undef PCI_VPD_TIMEOUT
1029
1030struct vpd_readstate {
1031	device_t	pcib;
1032	pcicfgregs	*cfg;
1033	uint32_t	val;
1034	int		bytesinval;
1035	int		off;
1036	uint8_t		cksum;
1037};
1038
1039static int
1040vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1041{
1042	uint32_t reg;
1043	uint8_t byte;
1044
1045	if (vrs->bytesinval == 0) {
1046		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1047			return (ENXIO);
1048		vrs->val = le32toh(reg);
1049		vrs->off += 4;
1050		byte = vrs->val & 0xff;
1051		vrs->bytesinval = 3;
1052	} else {
1053		vrs->val = vrs->val >> 8;
1054		byte = vrs->val & 0xff;
1055		vrs->bytesinval--;
1056	}
1057
1058	vrs->cksum += byte;
1059	*data = byte;
1060	return (0);
1061}
1062
1063static void
1064pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1065{
1066	struct vpd_readstate vrs;
1067	int state;
1068	int name;
1069	int remain;
1070	int i;
1071	int alloc, off;		/* alloc/off for RO/W arrays */
1072	int cksumvalid;
1073	int dflen;
1074	uint8_t byte;
1075	uint8_t byte2;
1076
1077	/* init vpd reader */
1078	vrs.bytesinval = 0;
1079	vrs.off = 0;
1080	vrs.pcib = pcib;
1081	vrs.cfg = cfg;
1082	vrs.cksum = 0;
1083
1084	state = 0;
1085	name = remain = i = 0;	/* shut up stupid gcc */
1086	alloc = off = 0;	/* shut up stupid gcc */
1087	dflen = 0;		/* shut up stupid gcc */
1088	cksumvalid = -1;
1089	while (state >= 0) {
1090		if (vpd_nextbyte(&vrs, &byte)) {
1091			state = -2;
1092			break;
1093		}
1094#if 0
1095		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1096		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1097		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1098#endif
1099		switch (state) {
1100		case 0:		/* item name */
1101			if (byte & 0x80) {
1102				if (vpd_nextbyte(&vrs, &byte2)) {
1103					state = -2;
1104					break;
1105				}
1106				remain = byte2;
1107				if (vpd_nextbyte(&vrs, &byte2)) {
1108					state = -2;
1109					break;
1110				}
1111				remain |= byte2 << 8;
1112				name = byte & 0x7f;
1113			} else {
1114				remain = byte & 0x7;
1115				name = (byte >> 3) & 0xf;
1116			}
1117			if (vrs.off + remain - vrs.bytesinval > 0x8000) {
1118				pci_printf(cfg,
1119				    "VPD data overflow, remain %#x\n", remain);
1120				state = -1;
1121				break;
1122			}
1123			switch (name) {
1124			case 0x2:	/* String */
1125				cfg->vpd.vpd_ident = malloc(remain + 1,
1126				    M_DEVBUF, M_WAITOK);
1127				i = 0;
1128				state = 1;
1129				break;
1130			case 0xf:	/* End */
1131				state = -1;
1132				break;
1133			case 0x10:	/* VPD-R */
1134				alloc = 8;
1135				off = 0;
1136				cfg->vpd.vpd_ros = malloc(alloc *
1137				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1138				    M_WAITOK | M_ZERO);
1139				state = 2;
1140				break;
1141			case 0x11:	/* VPD-W */
1142				alloc = 8;
1143				off = 0;
1144				cfg->vpd.vpd_w = malloc(alloc *
1145				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1146				    M_WAITOK | M_ZERO);
1147				state = 5;
1148				break;
1149			default:	/* Invalid data, abort */
1150				state = -1;
1151				break;
1152			}
1153			break;
1154
1155		case 1:	/* Identifier String */
1156			cfg->vpd.vpd_ident[i++] = byte;
1157			remain--;
1158			if (remain == 0)  {
1159				cfg->vpd.vpd_ident[i] = '\0';
1160				state = 0;
1161			}
1162			break;
1163
1164		case 2:	/* VPD-R Keyword Header */
1165			if (off == alloc) {
1166				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1167				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1168				    M_DEVBUF, M_WAITOK | M_ZERO);
1169			}
1170			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1171			if (vpd_nextbyte(&vrs, &byte2)) {
1172				state = -2;
1173				break;
1174			}
1175			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1176			if (vpd_nextbyte(&vrs, &byte2)) {
1177				state = -2;
1178				break;
1179			}
1180			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1181			if (dflen == 0 &&
1182			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1183			    2) == 0) {
1184				/*
1185				 * if this happens, we can't trust the rest
1186				 * of the VPD.
1187				 */
1188				pci_printf(cfg, "bad keyword length: %d\n",
1189				    dflen);
1190				cksumvalid = 0;
1191				state = -1;
1192				break;
1193			} else if (dflen == 0) {
1194				cfg->vpd.vpd_ros[off].value = malloc(1 *
1195				    sizeof(*cfg->vpd.vpd_ros[off].value),
1196				    M_DEVBUF, M_WAITOK);
1197				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1198			} else
1199				cfg->vpd.vpd_ros[off].value = malloc(
1200				    (dflen + 1) *
1201				    sizeof(*cfg->vpd.vpd_ros[off].value),
1202				    M_DEVBUF, M_WAITOK);
1203			remain -= 3;
1204			i = 0;
1205			/* keep in sync w/ state 3's transistions */
1206			if (dflen == 0 && remain == 0)
1207				state = 0;
1208			else if (dflen == 0)
1209				state = 2;
1210			else
1211				state = 3;
1212			break;
1213
1214		case 3:	/* VPD-R Keyword Value */
1215			cfg->vpd.vpd_ros[off].value[i++] = byte;
1216			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1217			    "RV", 2) == 0 && cksumvalid == -1) {
1218				if (vrs.cksum == 0)
1219					cksumvalid = 1;
1220				else {
1221					if (bootverbose)
1222						pci_printf(cfg,
1223					    "bad VPD cksum, remain %hhu\n",
1224						    vrs.cksum);
1225					cksumvalid = 0;
1226					state = -1;
1227					break;
1228				}
1229			}
1230			dflen--;
1231			remain--;
1232			/* keep in sync w/ state 2's transistions */
1233			if (dflen == 0)
1234				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1235			if (dflen == 0 && remain == 0) {
1236				cfg->vpd.vpd_rocnt = off;
1237				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1238				    off * sizeof(*cfg->vpd.vpd_ros),
1239				    M_DEVBUF, M_WAITOK | M_ZERO);
1240				state = 0;
1241			} else if (dflen == 0)
1242				state = 2;
1243			break;
1244
1245		case 4:
1246			remain--;
1247			if (remain == 0)
1248				state = 0;
1249			break;
1250
1251		case 5:	/* VPD-W Keyword Header */
1252			if (off == alloc) {
1253				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1254				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1255				    M_DEVBUF, M_WAITOK | M_ZERO);
1256			}
1257			cfg->vpd.vpd_w[off].keyword[0] = byte;
1258			if (vpd_nextbyte(&vrs, &byte2)) {
1259				state = -2;
1260				break;
1261			}
1262			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1263			if (vpd_nextbyte(&vrs, &byte2)) {
1264				state = -2;
1265				break;
1266			}
1267			cfg->vpd.vpd_w[off].len = dflen = byte2;
1268			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1269			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1270			    sizeof(*cfg->vpd.vpd_w[off].value),
1271			    M_DEVBUF, M_WAITOK);
1272			remain -= 3;
1273			i = 0;
1274			/* keep in sync w/ state 6's transistions */
1275			if (dflen == 0 && remain == 0)
1276				state = 0;
1277			else if (dflen == 0)
1278				state = 5;
1279			else
1280				state = 6;
1281			break;
1282
1283		case 6:	/* VPD-W Keyword Value */
1284			cfg->vpd.vpd_w[off].value[i++] = byte;
1285			dflen--;
1286			remain--;
1287			/* keep in sync w/ state 5's transistions */
1288			if (dflen == 0)
1289				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1290			if (dflen == 0 && remain == 0) {
1291				cfg->vpd.vpd_wcnt = off;
1292				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1293				    off * sizeof(*cfg->vpd.vpd_w),
1294				    M_DEVBUF, M_WAITOK | M_ZERO);
1295				state = 0;
1296			} else if (dflen == 0)
1297				state = 5;
1298			break;
1299
1300		default:
1301			pci_printf(cfg, "invalid state: %d\n", state);
1302			state = -1;
1303			break;
1304		}
1305	}
1306
1307	if (cksumvalid == 0 || state < -1) {
1308		/* read-only data bad, clean up */
1309		if (cfg->vpd.vpd_ros != NULL) {
1310			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1311				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1312			free(cfg->vpd.vpd_ros, M_DEVBUF);
1313			cfg->vpd.vpd_ros = NULL;
1314		}
1315	}
1316	if (state < -1) {
1317		/* I/O error, clean up */
1318		pci_printf(cfg, "failed to read VPD data.\n");
1319		if (cfg->vpd.vpd_ident != NULL) {
1320			free(cfg->vpd.vpd_ident, M_DEVBUF);
1321			cfg->vpd.vpd_ident = NULL;
1322		}
1323		if (cfg->vpd.vpd_w != NULL) {
1324			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1325				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1326			free(cfg->vpd.vpd_w, M_DEVBUF);
1327			cfg->vpd.vpd_w = NULL;
1328		}
1329	}
1330	cfg->vpd.vpd_cached = 1;
1331#undef REG
1332#undef WREG
1333}
1334
1335int
1336pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1337{
1338	struct pci_devinfo *dinfo = device_get_ivars(child);
1339	pcicfgregs *cfg = &dinfo->cfg;
1340
1341	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1342		pci_read_vpd(device_get_parent(dev), cfg);
1343
1344	*identptr = cfg->vpd.vpd_ident;
1345
1346	if (*identptr == NULL)
1347		return (ENXIO);
1348
1349	return (0);
1350}
1351
1352int
1353pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1354	const char **vptr)
1355{
1356	struct pci_devinfo *dinfo = device_get_ivars(child);
1357	pcicfgregs *cfg = &dinfo->cfg;
1358	int i;
1359
1360	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1361		pci_read_vpd(device_get_parent(dev), cfg);
1362
1363	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1364		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1365		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1366			*vptr = cfg->vpd.vpd_ros[i].value;
1367			return (0);
1368		}
1369
1370	*vptr = NULL;
1371	return (ENXIO);
1372}
1373
1374struct pcicfg_vpd *
1375pci_fetch_vpd_list(device_t dev)
1376{
1377	struct pci_devinfo *dinfo = device_get_ivars(dev);
1378	pcicfgregs *cfg = &dinfo->cfg;
1379
1380	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1381		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1382	return (&cfg->vpd);
1383}
1384
1385/*
1386 * Find the requested HyperTransport capability and return the offset
1387 * in configuration space via the pointer provided.  The function
1388 * returns 0 on success and an error code otherwise.
1389 */
1390int
1391pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1392{
1393	int ptr, error;
1394	uint16_t val;
1395
1396	error = pci_find_cap(child, PCIY_HT, &ptr);
1397	if (error)
1398		return (error);
1399
1400	/*
1401	 * Traverse the capabilities list checking each HT capability
1402	 * to see if it matches the requested HT capability.
1403	 */
1404	for (;;) {
1405		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1406		if (capability == PCIM_HTCAP_SLAVE ||
1407		    capability == PCIM_HTCAP_HOST)
1408			val &= 0xe000;
1409		else
1410			val &= PCIM_HTCMD_CAP_MASK;
1411		if (val == capability) {
1412			if (capreg != NULL)
1413				*capreg = ptr;
1414			return (0);
1415		}
1416
1417		/* Skip to the next HT capability. */
1418		if (pci_find_next_cap(child, PCIY_HT, ptr, &ptr) != 0)
1419			break;
1420	}
1421
1422	return (ENOENT);
1423}
1424
1425/*
1426 * Find the next requested HyperTransport capability after start and return
1427 * the offset in configuration space via the pointer provided.  The function
1428 * returns 0 on success and an error code otherwise.
1429 */
1430int
1431pci_find_next_htcap_method(device_t dev, device_t child, int capability,
1432    int start, int *capreg)
1433{
1434	int ptr;
1435	uint16_t val;
1436
1437	KASSERT(pci_read_config(child, start + PCICAP_ID, 1) == PCIY_HT,
1438	    ("start capability is not HyperTransport capability"));
1439	ptr = start;
1440
1441	/*
1442	 * Traverse the capabilities list checking each HT capability
1443	 * to see if it matches the requested HT capability.
1444	 */
1445	for (;;) {
1446		/* Skip to the next HT capability. */
1447		if (pci_find_next_cap(child, PCIY_HT, ptr, &ptr) != 0)
1448			break;
1449
1450		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1451		if (capability == PCIM_HTCAP_SLAVE ||
1452		    capability == PCIM_HTCAP_HOST)
1453			val &= 0xe000;
1454		else
1455			val &= PCIM_HTCMD_CAP_MASK;
1456		if (val == capability) {
1457			if (capreg != NULL)
1458				*capreg = ptr;
1459			return (0);
1460		}
1461	}
1462
1463	return (ENOENT);
1464}
1465
1466/*
1467 * Find the requested capability and return the offset in
1468 * configuration space via the pointer provided.  The function returns
1469 * 0 on success and an error code otherwise.
1470 */
1471int
1472pci_find_cap_method(device_t dev, device_t child, int capability,
1473    int *capreg)
1474{
1475	struct pci_devinfo *dinfo = device_get_ivars(child);
1476	pcicfgregs *cfg = &dinfo->cfg;
1477	uint32_t status;
1478	uint8_t ptr;
1479
1480	/*
1481	 * Check the CAP_LIST bit of the PCI status register first.
1482	 */
1483	status = pci_read_config(child, PCIR_STATUS, 2);
1484	if (!(status & PCIM_STATUS_CAPPRESENT))
1485		return (ENXIO);
1486
1487	/*
1488	 * Determine the start pointer of the capabilities list.
1489	 */
1490	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1491	case PCIM_HDRTYPE_NORMAL:
1492	case PCIM_HDRTYPE_BRIDGE:
1493		ptr = PCIR_CAP_PTR;
1494		break;
1495	case PCIM_HDRTYPE_CARDBUS:
1496		ptr = PCIR_CAP_PTR_2;
1497		break;
1498	default:
1499		/* XXX: panic? */
1500		return (ENXIO);		/* no extended capabilities support */
1501	}
1502	ptr = pci_read_config(child, ptr, 1);
1503
1504	/*
1505	 * Traverse the capabilities list.
1506	 */
1507	while (ptr != 0) {
1508		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1509			if (capreg != NULL)
1510				*capreg = ptr;
1511			return (0);
1512		}
1513		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1514	}
1515
1516	return (ENOENT);
1517}
1518
1519/*
1520 * Find the next requested capability after start and return the offset in
1521 * configuration space via the pointer provided.  The function returns
1522 * 0 on success and an error code otherwise.
1523 */
1524int
1525pci_find_next_cap_method(device_t dev, device_t child, int capability,
1526    int start, int *capreg)
1527{
1528	uint8_t ptr;
1529
1530	KASSERT(pci_read_config(child, start + PCICAP_ID, 1) == capability,
1531	    ("start capability is not expected capability"));
1532
1533	ptr = pci_read_config(child, start + PCICAP_NEXTPTR, 1);
1534	while (ptr != 0) {
1535		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1536			if (capreg != NULL)
1537				*capreg = ptr;
1538			return (0);
1539		}
1540		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1541	}
1542
1543	return (ENOENT);
1544}
1545
1546/*
1547 * Find the requested extended capability and return the offset in
1548 * configuration space via the pointer provided.  The function returns
1549 * 0 on success and an error code otherwise.
1550 */
1551int
1552pci_find_extcap_method(device_t dev, device_t child, int capability,
1553    int *capreg)
1554{
1555	struct pci_devinfo *dinfo = device_get_ivars(child);
1556	pcicfgregs *cfg = &dinfo->cfg;
1557	uint32_t ecap;
1558	uint16_t ptr;
1559
1560	/* Only supported for PCI-express devices. */
1561	if (cfg->pcie.pcie_location == 0)
1562		return (ENXIO);
1563
1564	ptr = PCIR_EXTCAP;
1565	ecap = pci_read_config(child, ptr, 4);
1566	if (ecap == 0xffffffff || ecap == 0)
1567		return (ENOENT);
1568	for (;;) {
1569		if (PCI_EXTCAP_ID(ecap) == capability) {
1570			if (capreg != NULL)
1571				*capreg = ptr;
1572			return (0);
1573		}
1574		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1575		if (ptr == 0)
1576			break;
1577		ecap = pci_read_config(child, ptr, 4);
1578	}
1579
1580	return (ENOENT);
1581}
1582
1583/*
1584 * Find the next requested extended capability after start and return the
1585 * offset in configuration space via the pointer provided.  The function
1586 * returns 0 on success and an error code otherwise.
1587 */
1588int
1589pci_find_next_extcap_method(device_t dev, device_t child, int capability,
1590    int start, int *capreg)
1591{
1592	struct pci_devinfo *dinfo = device_get_ivars(child);
1593	pcicfgregs *cfg = &dinfo->cfg;
1594	uint32_t ecap;
1595	uint16_t ptr;
1596
1597	/* Only supported for PCI-express devices. */
1598	if (cfg->pcie.pcie_location == 0)
1599		return (ENXIO);
1600
1601	ecap = pci_read_config(child, start, 4);
1602	KASSERT(PCI_EXTCAP_ID(ecap) == capability,
1603	    ("start extended capability is not expected capability"));
1604	ptr = PCI_EXTCAP_NEXTPTR(ecap);
1605	while (ptr != 0) {
1606		ecap = pci_read_config(child, ptr, 4);
1607		if (PCI_EXTCAP_ID(ecap) == capability) {
1608			if (capreg != NULL)
1609				*capreg = ptr;
1610			return (0);
1611		}
1612		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1613	}
1614
1615	return (ENOENT);
1616}
1617
1618/*
1619 * Support for MSI-X message interrupts.
1620 */
1621static void
1622pci_write_msix_entry(device_t dev, u_int index, uint64_t address, uint32_t data)
1623{
1624	struct pci_devinfo *dinfo = device_get_ivars(dev);
1625	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1626	uint32_t offset;
1627
1628	KASSERT(msix->msix_table_len > index, ("bogus index"));
1629	offset = msix->msix_table_offset + index * 16;
1630	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1631	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1632	bus_write_4(msix->msix_table_res, offset + 8, data);
1633}
1634
1635void
1636pci_enable_msix_method(device_t dev, device_t child, u_int index,
1637    uint64_t address, uint32_t data)
1638{
1639
1640	if (pci_msix_rewrite_table) {
1641		struct pci_devinfo *dinfo = device_get_ivars(child);
1642		struct pcicfg_msix *msix = &dinfo->cfg.msix;
1643
1644		/*
1645		 * Some VM hosts require MSIX to be disabled in the
1646		 * control register before updating the MSIX table
1647		 * entries are allowed. It is not enough to only
1648		 * disable MSIX while updating a single entry. MSIX
1649		 * must be disabled while updating all entries in the
1650		 * table.
1651		 */
1652		pci_write_config(child,
1653		    msix->msix_location + PCIR_MSIX_CTRL,
1654		    msix->msix_ctrl & ~PCIM_MSIXCTRL_MSIX_ENABLE, 2);
1655		pci_resume_msix(child);
1656	} else
1657		pci_write_msix_entry(child, index, address, data);
1658
1659	/* Enable MSI -> HT mapping. */
1660	pci_ht_map_msi(child, address);
1661}
1662
1663void
1664pci_mask_msix(device_t dev, u_int index)
1665{
1666	struct pci_devinfo *dinfo = device_get_ivars(dev);
1667	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1668	uint32_t offset, val;
1669
1670	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1671	offset = msix->msix_table_offset + index * 16 + 12;
1672	val = bus_read_4(msix->msix_table_res, offset);
1673	val |= PCIM_MSIX_VCTRL_MASK;
1674
1675	/*
1676	 * Some devices (e.g. Samsung PM961) do not support reads of this
1677	 * register, so always write the new value.
1678	 */
1679	bus_write_4(msix->msix_table_res, offset, val);
1680}
1681
1682void
1683pci_unmask_msix(device_t dev, u_int index)
1684{
1685	struct pci_devinfo *dinfo = device_get_ivars(dev);
1686	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1687	uint32_t offset, val;
1688
1689	KASSERT(msix->msix_table_len > index, ("bogus index"));
1690	offset = msix->msix_table_offset + index * 16 + 12;
1691	val = bus_read_4(msix->msix_table_res, offset);
1692	val &= ~PCIM_MSIX_VCTRL_MASK;
1693
1694	/*
1695	 * Some devices (e.g. Samsung PM961) do not support reads of this
1696	 * register, so always write the new value.
1697	 */
1698	bus_write_4(msix->msix_table_res, offset, val);
1699}
1700
1701int
1702pci_pending_msix(device_t dev, u_int index)
1703{
1704	struct pci_devinfo *dinfo = device_get_ivars(dev);
1705	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1706	uint32_t offset, bit;
1707
1708	KASSERT(msix->msix_table_len > index, ("bogus index"));
1709	offset = msix->msix_pba_offset + (index / 32) * 4;
1710	bit = 1 << index % 32;
1711	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1712}
1713
1714/*
1715 * Restore MSI-X registers and table during resume.  If MSI-X is
1716 * enabled then walk the virtual table to restore the actual MSI-X
1717 * table.
1718 */
1719static void
1720pci_resume_msix(device_t dev)
1721{
1722	struct pci_devinfo *dinfo = device_get_ivars(dev);
1723	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1724	struct msix_table_entry *mte;
1725	struct msix_vector *mv;
1726	int i;
1727
1728	if (msix->msix_alloc > 0) {
1729		/* First, mask all vectors. */
1730		for (i = 0; i < msix->msix_msgnum; i++)
1731			pci_mask_msix(dev, i);
1732
1733		/* Second, program any messages with at least one handler. */
1734		for (i = 0; i < msix->msix_table_len; i++) {
1735			mte = &msix->msix_table[i];
1736			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1737				continue;
1738			mv = &msix->msix_vectors[mte->mte_vector - 1];
1739			pci_write_msix_entry(dev, i, mv->mv_address,
1740			    mv->mv_data);
1741			pci_unmask_msix(dev, i);
1742		}
1743	}
1744	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1745	    msix->msix_ctrl, 2);
1746}
1747
1748/*
1749 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1750 * returned in *count.  After this function returns, each message will be
1751 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1752 */
1753int
1754pci_alloc_msix_method(device_t dev, device_t child, int *count)
1755{
1756	struct pci_devinfo *dinfo = device_get_ivars(child);
1757	pcicfgregs *cfg = &dinfo->cfg;
1758	struct resource_list_entry *rle;
1759	int actual, error, i, irq, max;
1760
1761	/* Don't let count == 0 get us into trouble. */
1762	if (*count == 0)
1763		return (EINVAL);
1764
1765	/* If rid 0 is allocated, then fail. */
1766	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1767	if (rle != NULL && rle->res != NULL)
1768		return (ENXIO);
1769
1770	/* Already have allocated messages? */
1771	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1772		return (ENXIO);
1773
1774	/* If MSI-X is blacklisted for this system, fail. */
1775	if (pci_msix_blacklisted())
1776		return (ENXIO);
1777
1778	/* MSI-X capability present? */
1779	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1780		return (ENODEV);
1781
1782	/* Make sure the appropriate BARs are mapped. */
1783	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1784	    cfg->msix.msix_table_bar);
1785	if (rle == NULL || rle->res == NULL ||
1786	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1787		return (ENXIO);
1788	cfg->msix.msix_table_res = rle->res;
1789	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1790		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1791		    cfg->msix.msix_pba_bar);
1792		if (rle == NULL || rle->res == NULL ||
1793		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1794			return (ENXIO);
1795	}
1796	cfg->msix.msix_pba_res = rle->res;
1797
1798	if (bootverbose)
1799		device_printf(child,
1800		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1801		    *count, cfg->msix.msix_msgnum);
1802	max = min(*count, cfg->msix.msix_msgnum);
1803	for (i = 0; i < max; i++) {
1804		/* Allocate a message. */
1805		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1806		if (error) {
1807			if (i == 0)
1808				return (error);
1809			break;
1810		}
1811		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1812		    irq, 1);
1813	}
1814	actual = i;
1815
1816	if (bootverbose) {
1817		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1818		if (actual == 1)
1819			device_printf(child, "using IRQ %ju for MSI-X\n",
1820			    rle->start);
1821		else {
1822			int run;
1823
1824			/*
1825			 * Be fancy and try to print contiguous runs of
1826			 * IRQ values as ranges.  'irq' is the previous IRQ.
1827			 * 'run' is true if we are in a range.
1828			 */
1829			device_printf(child, "using IRQs %ju", rle->start);
1830			irq = rle->start;
1831			run = 0;
1832			for (i = 1; i < actual; i++) {
1833				rle = resource_list_find(&dinfo->resources,
1834				    SYS_RES_IRQ, i + 1);
1835
1836				/* Still in a run? */
1837				if (rle->start == irq + 1) {
1838					run = 1;
1839					irq++;
1840					continue;
1841				}
1842
1843				/* Finish previous range. */
1844				if (run) {
1845					printf("-%d", irq);
1846					run = 0;
1847				}
1848
1849				/* Start new range. */
1850				printf(",%ju", rle->start);
1851				irq = rle->start;
1852			}
1853
1854			/* Unfinished range? */
1855			if (run)
1856				printf("-%d", irq);
1857			printf(" for MSI-X\n");
1858		}
1859	}
1860
1861	/* Mask all vectors. */
1862	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1863		pci_mask_msix(child, i);
1864
1865	/* Allocate and initialize vector data and virtual table. */
1866	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1867	    M_DEVBUF, M_WAITOK | M_ZERO);
1868	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1869	    M_DEVBUF, M_WAITOK | M_ZERO);
1870	for (i = 0; i < actual; i++) {
1871		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1872		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1873		cfg->msix.msix_table[i].mte_vector = i + 1;
1874	}
1875
1876	/* Update control register to enable MSI-X. */
1877	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1878	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1879	    cfg->msix.msix_ctrl, 2);
1880
1881	/* Update counts of alloc'd messages. */
1882	cfg->msix.msix_alloc = actual;
1883	cfg->msix.msix_table_len = actual;
1884	*count = actual;
1885	return (0);
1886}
1887
1888/*
1889 * By default, pci_alloc_msix() will assign the allocated IRQ
1890 * resources consecutively to the first N messages in the MSI-X table.
1891 * However, device drivers may want to use different layouts if they
1892 * either receive fewer messages than they asked for, or they wish to
1893 * populate the MSI-X table sparsely.  This method allows the driver
1894 * to specify what layout it wants.  It must be called after a
1895 * successful pci_alloc_msix() but before any of the associated
1896 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1897 *
1898 * The 'vectors' array contains 'count' message vectors.  The array
1899 * maps directly to the MSI-X table in that index 0 in the array
1900 * specifies the vector for the first message in the MSI-X table, etc.
1901 * The vector value in each array index can either be 0 to indicate
1902 * that no vector should be assigned to a message slot, or it can be a
1903 * number from 1 to N (where N is the count returned from a
1904 * succcessful call to pci_alloc_msix()) to indicate which message
1905 * vector (IRQ) to be used for the corresponding message.
1906 *
1907 * On successful return, each message with a non-zero vector will have
1908 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1909 * 1.  Additionally, if any of the IRQs allocated via the previous
1910 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1911 * will be freed back to the system automatically.
1912 *
1913 * For example, suppose a driver has a MSI-X table with 6 messages and
1914 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1915 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1916 * C.  After the call to pci_alloc_msix(), the device will be setup to
1917 * have an MSI-X table of ABC--- (where - means no vector assigned).
1918 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1919 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1920 * be freed back to the system.  This device will also have valid
1921 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1922 *
1923 * In any case, the SYS_RES_IRQ rid X will always map to the message
1924 * at MSI-X table index X - 1 and will only be valid if a vector is
1925 * assigned to that table entry.
1926 */
1927int
1928pci_remap_msix_method(device_t dev, device_t child, int count,
1929    const u_int *vectors)
1930{
1931	struct pci_devinfo *dinfo = device_get_ivars(child);
1932	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1933	struct resource_list_entry *rle;
1934	int i, irq, j, *used;
1935
1936	/*
1937	 * Have to have at least one message in the table but the
1938	 * table can't be bigger than the actual MSI-X table in the
1939	 * device.
1940	 */
1941	if (count == 0 || count > msix->msix_msgnum)
1942		return (EINVAL);
1943
1944	/* Sanity check the vectors. */
1945	for (i = 0; i < count; i++)
1946		if (vectors[i] > msix->msix_alloc)
1947			return (EINVAL);
1948
1949	/*
1950	 * Make sure there aren't any holes in the vectors to be used.
1951	 * It's a big pain to support it, and it doesn't really make
1952	 * sense anyway.  Also, at least one vector must be used.
1953	 */
1954	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1955	    M_ZERO);
1956	for (i = 0; i < count; i++)
1957		if (vectors[i] != 0)
1958			used[vectors[i] - 1] = 1;
1959	for (i = 0; i < msix->msix_alloc - 1; i++)
1960		if (used[i] == 0 && used[i + 1] == 1) {
1961			free(used, M_DEVBUF);
1962			return (EINVAL);
1963		}
1964	if (used[0] != 1) {
1965		free(used, M_DEVBUF);
1966		return (EINVAL);
1967	}
1968
1969	/* Make sure none of the resources are allocated. */
1970	for (i = 0; i < msix->msix_table_len; i++) {
1971		if (msix->msix_table[i].mte_vector == 0)
1972			continue;
1973		if (msix->msix_table[i].mte_handlers > 0) {
1974			free(used, M_DEVBUF);
1975			return (EBUSY);
1976		}
1977		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1978		KASSERT(rle != NULL, ("missing resource"));
1979		if (rle->res != NULL) {
1980			free(used, M_DEVBUF);
1981			return (EBUSY);
1982		}
1983	}
1984
1985	/* Free the existing resource list entries. */
1986	for (i = 0; i < msix->msix_table_len; i++) {
1987		if (msix->msix_table[i].mte_vector == 0)
1988			continue;
1989		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1990	}
1991
1992	/*
1993	 * Build the new virtual table keeping track of which vectors are
1994	 * used.
1995	 */
1996	free(msix->msix_table, M_DEVBUF);
1997	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1998	    M_DEVBUF, M_WAITOK | M_ZERO);
1999	for (i = 0; i < count; i++)
2000		msix->msix_table[i].mte_vector = vectors[i];
2001	msix->msix_table_len = count;
2002
2003	/* Free any unused IRQs and resize the vectors array if necessary. */
2004	j = msix->msix_alloc - 1;
2005	if (used[j] == 0) {
2006		struct msix_vector *vec;
2007
2008		while (used[j] == 0) {
2009			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
2010			    msix->msix_vectors[j].mv_irq);
2011			j--;
2012		}
2013		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
2014		    M_WAITOK);
2015		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
2016		    (j + 1));
2017		free(msix->msix_vectors, M_DEVBUF);
2018		msix->msix_vectors = vec;
2019		msix->msix_alloc = j + 1;
2020	}
2021	free(used, M_DEVBUF);
2022
2023	/* Map the IRQs onto the rids. */
2024	for (i = 0; i < count; i++) {
2025		if (vectors[i] == 0)
2026			continue;
2027		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
2028		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
2029		    irq, 1);
2030	}
2031
2032	if (bootverbose) {
2033		device_printf(child, "Remapped MSI-X IRQs as: ");
2034		for (i = 0; i < count; i++) {
2035			if (i != 0)
2036				printf(", ");
2037			if (vectors[i] == 0)
2038				printf("---");
2039			else
2040				printf("%d",
2041				    msix->msix_vectors[vectors[i] - 1].mv_irq);
2042		}
2043		printf("\n");
2044	}
2045
2046	return (0);
2047}
2048
2049static int
2050pci_release_msix(device_t dev, device_t child)
2051{
2052	struct pci_devinfo *dinfo = device_get_ivars(child);
2053	struct pcicfg_msix *msix = &dinfo->cfg.msix;
2054	struct resource_list_entry *rle;
2055	int i;
2056
2057	/* Do we have any messages to release? */
2058	if (msix->msix_alloc == 0)
2059		return (ENODEV);
2060
2061	/* Make sure none of the resources are allocated. */
2062	for (i = 0; i < msix->msix_table_len; i++) {
2063		if (msix->msix_table[i].mte_vector == 0)
2064			continue;
2065		if (msix->msix_table[i].mte_handlers > 0)
2066			return (EBUSY);
2067		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2068		KASSERT(rle != NULL, ("missing resource"));
2069		if (rle->res != NULL)
2070			return (EBUSY);
2071	}
2072
2073	/* Update control register to disable MSI-X. */
2074	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
2075	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
2076	    msix->msix_ctrl, 2);
2077
2078	/* Free the resource list entries. */
2079	for (i = 0; i < msix->msix_table_len; i++) {
2080		if (msix->msix_table[i].mte_vector == 0)
2081			continue;
2082		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2083	}
2084	free(msix->msix_table, M_DEVBUF);
2085	msix->msix_table_len = 0;
2086
2087	/* Release the IRQs. */
2088	for (i = 0; i < msix->msix_alloc; i++)
2089		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
2090		    msix->msix_vectors[i].mv_irq);
2091	free(msix->msix_vectors, M_DEVBUF);
2092	msix->msix_alloc = 0;
2093	return (0);
2094}
2095
2096/*
2097 * Return the max supported MSI-X messages this device supports.
2098 * Basically, assuming the MD code can alloc messages, this function
2099 * should return the maximum value that pci_alloc_msix() can return.
2100 * Thus, it is subject to the tunables, etc.
2101 */
2102int
2103pci_msix_count_method(device_t dev, device_t child)
2104{
2105	struct pci_devinfo *dinfo = device_get_ivars(child);
2106	struct pcicfg_msix *msix = &dinfo->cfg.msix;
2107
2108	if (pci_do_msix && msix->msix_location != 0)
2109		return (msix->msix_msgnum);
2110	return (0);
2111}
2112
2113int
2114pci_msix_pba_bar_method(device_t dev, device_t child)
2115{
2116	struct pci_devinfo *dinfo = device_get_ivars(child);
2117	struct pcicfg_msix *msix = &dinfo->cfg.msix;
2118
2119	if (pci_do_msix && msix->msix_location != 0)
2120		return (msix->msix_pba_bar);
2121	return (-1);
2122}
2123
2124int
2125pci_msix_table_bar_method(device_t dev, device_t child)
2126{
2127	struct pci_devinfo *dinfo = device_get_ivars(child);
2128	struct pcicfg_msix *msix = &dinfo->cfg.msix;
2129
2130	if (pci_do_msix && msix->msix_location != 0)
2131		return (msix->msix_table_bar);
2132	return (-1);
2133}
2134
2135/*
2136 * HyperTransport MSI mapping control
2137 */
2138void
2139pci_ht_map_msi(device_t dev, uint64_t addr)
2140{
2141	struct pci_devinfo *dinfo = device_get_ivars(dev);
2142	struct pcicfg_ht *ht = &dinfo->cfg.ht;
2143
2144	if (!ht->ht_msimap)
2145		return;
2146
2147	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
2148	    ht->ht_msiaddr >> 20 == addr >> 20) {
2149		/* Enable MSI -> HT mapping. */
2150		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
2151		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2152		    ht->ht_msictrl, 2);
2153	}
2154
2155	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
2156		/* Disable MSI -> HT mapping. */
2157		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
2158		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
2159		    ht->ht_msictrl, 2);
2160	}
2161}
2162
2163int
2164pci_get_relaxed_ordering_enabled(device_t dev)
2165{
2166	struct pci_devinfo *dinfo = device_get_ivars(dev);
2167	int cap;
2168	uint16_t val;
2169
2170	cap = dinfo->cfg.pcie.pcie_location;
2171	if (cap == 0)
2172		return (0);
2173	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2174	val &= PCIEM_CTL_RELAXED_ORD_ENABLE;
2175	return (val != 0);
2176}
2177
2178int
2179pci_get_max_payload(device_t dev)
2180{
2181	struct pci_devinfo *dinfo = device_get_ivars(dev);
2182	int cap;
2183	uint16_t val;
2184
2185	cap = dinfo->cfg.pcie.pcie_location;
2186	if (cap == 0)
2187		return (0);
2188	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2189	val &= PCIEM_CTL_MAX_PAYLOAD;
2190	val >>= 5;
2191	return (1 << (val + 7));
2192}
2193
2194int
2195pci_get_max_read_req(device_t dev)
2196{
2197	struct pci_devinfo *dinfo = device_get_ivars(dev);
2198	int cap;
2199	uint16_t val;
2200
2201	cap = dinfo->cfg.pcie.pcie_location;
2202	if (cap == 0)
2203		return (0);
2204	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2205	val &= PCIEM_CTL_MAX_READ_REQUEST;
2206	val >>= 12;
2207	return (1 << (val + 7));
2208}
2209
2210int
2211pci_set_max_read_req(device_t dev, int size)
2212{
2213	struct pci_devinfo *dinfo = device_get_ivars(dev);
2214	int cap;
2215	uint16_t val;
2216
2217	cap = dinfo->cfg.pcie.pcie_location;
2218	if (cap == 0)
2219		return (0);
2220	if (size < 128)
2221		size = 128;
2222	if (size > 4096)
2223		size = 4096;
2224	size = (1 << (fls(size) - 1));
2225	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2226	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2227	val |= (fls(size) - 8) << 12;
2228	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2229	return (size);
2230}
2231
2232uint32_t
2233pcie_read_config(device_t dev, int reg, int width)
2234{
2235	struct pci_devinfo *dinfo = device_get_ivars(dev);
2236	int cap;
2237
2238	cap = dinfo->cfg.pcie.pcie_location;
2239	if (cap == 0) {
2240		if (width == 2)
2241			return (0xffff);
2242		return (0xffffffff);
2243	}
2244
2245	return (pci_read_config(dev, cap + reg, width));
2246}
2247
2248void
2249pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2250{
2251	struct pci_devinfo *dinfo = device_get_ivars(dev);
2252	int cap;
2253
2254	cap = dinfo->cfg.pcie.pcie_location;
2255	if (cap == 0)
2256		return;
2257	pci_write_config(dev, cap + reg, value, width);
2258}
2259
2260/*
2261 * Adjusts a PCI-e capability register by clearing the bits in mask
2262 * and setting the bits in (value & mask).  Bits not set in mask are
2263 * not adjusted.
2264 *
2265 * Returns the old value on success or all ones on failure.
2266 */
2267uint32_t
2268pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2269    int width)
2270{
2271	struct pci_devinfo *dinfo = device_get_ivars(dev);
2272	uint32_t old, new;
2273	int cap;
2274
2275	cap = dinfo->cfg.pcie.pcie_location;
2276	if (cap == 0) {
2277		if (width == 2)
2278			return (0xffff);
2279		return (0xffffffff);
2280	}
2281
2282	old = pci_read_config(dev, cap + reg, width);
2283	new = old & ~mask;
2284	new |= (value & mask);
2285	pci_write_config(dev, cap + reg, new, width);
2286	return (old);
2287}
2288
2289/*
2290 * Support for MSI message signalled interrupts.
2291 */
2292void
2293pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2294    uint16_t data)
2295{
2296	struct pci_devinfo *dinfo = device_get_ivars(child);
2297	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2298
2299	/* Write data and address values. */
2300	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2301	    address & 0xffffffff, 4);
2302	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2303		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2304		    address >> 32, 4);
2305		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2306		    data, 2);
2307	} else
2308		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2309		    2);
2310
2311	/* Enable MSI in the control register. */
2312	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2313	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2314	    msi->msi_ctrl, 2);
2315
2316	/* Enable MSI -> HT mapping. */
2317	pci_ht_map_msi(child, address);
2318}
2319
2320void
2321pci_disable_msi_method(device_t dev, device_t child)
2322{
2323	struct pci_devinfo *dinfo = device_get_ivars(child);
2324	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2325
2326	/* Disable MSI -> HT mapping. */
2327	pci_ht_map_msi(child, 0);
2328
2329	/* Disable MSI in the control register. */
2330	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2331	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2332	    msi->msi_ctrl, 2);
2333}
2334
2335/*
2336 * Restore MSI registers during resume.  If MSI is enabled then
2337 * restore the data and address registers in addition to the control
2338 * register.
2339 */
2340static void
2341pci_resume_msi(device_t dev)
2342{
2343	struct pci_devinfo *dinfo = device_get_ivars(dev);
2344	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2345	uint64_t address;
2346	uint16_t data;
2347
2348	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2349		address = msi->msi_addr;
2350		data = msi->msi_data;
2351		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2352		    address & 0xffffffff, 4);
2353		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2354			pci_write_config(dev, msi->msi_location +
2355			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2356			pci_write_config(dev, msi->msi_location +
2357			    PCIR_MSI_DATA_64BIT, data, 2);
2358		} else
2359			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2360			    data, 2);
2361	}
2362	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2363	    2);
2364}
2365
2366static int
2367pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2368{
2369	struct pci_devinfo *dinfo = device_get_ivars(dev);
2370	pcicfgregs *cfg = &dinfo->cfg;
2371	struct resource_list_entry *rle;
2372	struct msix_table_entry *mte;
2373	struct msix_vector *mv;
2374	uint64_t addr;
2375	uint32_t data;
2376	int error, i, j;
2377
2378	/*
2379	 * Handle MSI first.  We try to find this IRQ among our list
2380	 * of MSI IRQs.  If we find it, we request updated address and
2381	 * data registers and apply the results.
2382	 */
2383	if (cfg->msi.msi_alloc > 0) {
2384
2385		/* If we don't have any active handlers, nothing to do. */
2386		if (cfg->msi.msi_handlers == 0)
2387			return (0);
2388		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2389			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2390			    i + 1);
2391			if (rle->start == irq) {
2392				error = PCIB_MAP_MSI(device_get_parent(bus),
2393				    dev, irq, &addr, &data);
2394				if (error)
2395					return (error);
2396				pci_disable_msi(dev);
2397				dinfo->cfg.msi.msi_addr = addr;
2398				dinfo->cfg.msi.msi_data = data;
2399				pci_enable_msi(dev, addr, data);
2400				return (0);
2401			}
2402		}
2403		return (ENOENT);
2404	}
2405
2406	/*
2407	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2408	 * we request the updated mapping info.  If that works, we go
2409	 * through all the slots that use this IRQ and update them.
2410	 */
2411	if (cfg->msix.msix_alloc > 0) {
2412		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2413			mv = &cfg->msix.msix_vectors[i];
2414			if (mv->mv_irq == irq) {
2415				error = PCIB_MAP_MSI(device_get_parent(bus),
2416				    dev, irq, &addr, &data);
2417				if (error)
2418					return (error);
2419				mv->mv_address = addr;
2420				mv->mv_data = data;
2421				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2422					mte = &cfg->msix.msix_table[j];
2423					if (mte->mte_vector != i + 1)
2424						continue;
2425					if (mte->mte_handlers == 0)
2426						continue;
2427					pci_mask_msix(dev, j);
2428					pci_enable_msix(dev, j, addr, data);
2429					pci_unmask_msix(dev, j);
2430				}
2431			}
2432		}
2433		return (ENOENT);
2434	}
2435
2436	return (ENOENT);
2437}
2438
2439/*
2440 * Returns true if the specified device is blacklisted because MSI
2441 * doesn't work.
2442 */
2443int
2444pci_msi_device_blacklisted(device_t dev)
2445{
2446
2447	if (!pci_honor_msi_blacklist)
2448		return (0);
2449
2450	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2451}
2452
2453/*
2454 * Determine if MSI is blacklisted globally on this system.  Currently,
2455 * we just check for blacklisted chipsets as represented by the
2456 * host-PCI bridge at device 0:0:0.  In the future, it may become
2457 * necessary to check other system attributes, such as the kenv values
2458 * that give the motherboard manufacturer and model number.
2459 */
2460static int
2461pci_msi_blacklisted(void)
2462{
2463	device_t dev;
2464
2465	if (!pci_honor_msi_blacklist)
2466		return (0);
2467
2468	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2469	if (!(pcie_chipset || pcix_chipset)) {
2470		if (vm_guest != VM_GUEST_NO) {
2471			/*
2472			 * Whitelist older chipsets in virtual
2473			 * machines known to support MSI.
2474			 */
2475			dev = pci_find_bsf(0, 0, 0);
2476			if (dev != NULL)
2477				return (!pci_has_quirk(pci_get_devid(dev),
2478					PCI_QUIRK_ENABLE_MSI_VM));
2479		}
2480		return (1);
2481	}
2482
2483	dev = pci_find_bsf(0, 0, 0);
2484	if (dev != NULL)
2485		return (pci_msi_device_blacklisted(dev));
2486	return (0);
2487}
2488
2489/*
2490 * Returns true if the specified device is blacklisted because MSI-X
2491 * doesn't work.  Note that this assumes that if MSI doesn't work,
2492 * MSI-X doesn't either.
2493 */
2494int
2495pci_msix_device_blacklisted(device_t dev)
2496{
2497
2498	if (!pci_honor_msi_blacklist)
2499		return (0);
2500
2501	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2502		return (1);
2503
2504	return (pci_msi_device_blacklisted(dev));
2505}
2506
2507/*
2508 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2509 * is blacklisted, assume that MSI-X is as well.  Check for additional
2510 * chipsets where MSI works but MSI-X does not.
2511 */
2512static int
2513pci_msix_blacklisted(void)
2514{
2515	device_t dev;
2516
2517	if (!pci_honor_msi_blacklist)
2518		return (0);
2519
2520	dev = pci_find_bsf(0, 0, 0);
2521	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2522	    PCI_QUIRK_DISABLE_MSIX))
2523		return (1);
2524
2525	return (pci_msi_blacklisted());
2526}
2527
2528/*
2529 * Attempt to allocate *count MSI messages.  The actual number allocated is
2530 * returned in *count.  After this function returns, each message will be
2531 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2532 */
2533int
2534pci_alloc_msi_method(device_t dev, device_t child, int *count)
2535{
2536	struct pci_devinfo *dinfo = device_get_ivars(child);
2537	pcicfgregs *cfg = &dinfo->cfg;
2538	struct resource_list_entry *rle;
2539	int actual, error, i, irqs[32];
2540	uint16_t ctrl;
2541
2542	/* Don't let count == 0 get us into trouble. */
2543	if (*count == 0)
2544		return (EINVAL);
2545
2546	/* If rid 0 is allocated, then fail. */
2547	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2548	if (rle != NULL && rle->res != NULL)
2549		return (ENXIO);
2550
2551	/* Already have allocated messages? */
2552	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2553		return (ENXIO);
2554
2555	/* If MSI is blacklisted for this system, fail. */
2556	if (pci_msi_blacklisted())
2557		return (ENXIO);
2558
2559	/* MSI capability present? */
2560	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2561		return (ENODEV);
2562
2563	if (bootverbose)
2564		device_printf(child,
2565		    "attempting to allocate %d MSI vectors (%d supported)\n",
2566		    *count, cfg->msi.msi_msgnum);
2567
2568	/* Don't ask for more than the device supports. */
2569	actual = min(*count, cfg->msi.msi_msgnum);
2570
2571	/* Don't ask for more than 32 messages. */
2572	actual = min(actual, 32);
2573
2574	/* MSI requires power of 2 number of messages. */
2575	if (!powerof2(actual))
2576		return (EINVAL);
2577
2578	for (;;) {
2579		/* Try to allocate N messages. */
2580		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2581		    actual, irqs);
2582		if (error == 0)
2583			break;
2584		if (actual == 1)
2585			return (error);
2586
2587		/* Try N / 2. */
2588		actual >>= 1;
2589	}
2590
2591	/*
2592	 * We now have N actual messages mapped onto SYS_RES_IRQ
2593	 * resources in the irqs[] array, so add new resources
2594	 * starting at rid 1.
2595	 */
2596	for (i = 0; i < actual; i++)
2597		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2598		    irqs[i], irqs[i], 1);
2599
2600	if (bootverbose) {
2601		if (actual == 1)
2602			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2603		else {
2604			int run;
2605
2606			/*
2607			 * Be fancy and try to print contiguous runs
2608			 * of IRQ values as ranges.  'run' is true if
2609			 * we are in a range.
2610			 */
2611			device_printf(child, "using IRQs %d", irqs[0]);
2612			run = 0;
2613			for (i = 1; i < actual; i++) {
2614
2615				/* Still in a run? */
2616				if (irqs[i] == irqs[i - 1] + 1) {
2617					run = 1;
2618					continue;
2619				}
2620
2621				/* Finish previous range. */
2622				if (run) {
2623					printf("-%d", irqs[i - 1]);
2624					run = 0;
2625				}
2626
2627				/* Start new range. */
2628				printf(",%d", irqs[i]);
2629			}
2630
2631			/* Unfinished range? */
2632			if (run)
2633				printf("-%d", irqs[actual - 1]);
2634			printf(" for MSI\n");
2635		}
2636	}
2637
2638	/* Update control register with actual count. */
2639	ctrl = cfg->msi.msi_ctrl;
2640	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2641	ctrl |= (ffs(actual) - 1) << 4;
2642	cfg->msi.msi_ctrl = ctrl;
2643	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2644
2645	/* Update counts of alloc'd messages. */
2646	cfg->msi.msi_alloc = actual;
2647	cfg->msi.msi_handlers = 0;
2648	*count = actual;
2649	return (0);
2650}
2651
2652/* Release the MSI messages associated with this device. */
2653int
2654pci_release_msi_method(device_t dev, device_t child)
2655{
2656	struct pci_devinfo *dinfo = device_get_ivars(child);
2657	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2658	struct resource_list_entry *rle;
2659	int error, i, irqs[32];
2660
2661	/* Try MSI-X first. */
2662	error = pci_release_msix(dev, child);
2663	if (error != ENODEV)
2664		return (error);
2665
2666	/* Do we have any messages to release? */
2667	if (msi->msi_alloc == 0)
2668		return (ENODEV);
2669	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2670
2671	/* Make sure none of the resources are allocated. */
2672	if (msi->msi_handlers > 0)
2673		return (EBUSY);
2674	for (i = 0; i < msi->msi_alloc; i++) {
2675		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2676		KASSERT(rle != NULL, ("missing MSI resource"));
2677		if (rle->res != NULL)
2678			return (EBUSY);
2679		irqs[i] = rle->start;
2680	}
2681
2682	/* Update control register with 0 count. */
2683	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2684	    ("%s: MSI still enabled", __func__));
2685	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2686	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2687	    msi->msi_ctrl, 2);
2688
2689	/* Release the messages. */
2690	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2691	for (i = 0; i < msi->msi_alloc; i++)
2692		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2693
2694	/* Update alloc count. */
2695	msi->msi_alloc = 0;
2696	msi->msi_addr = 0;
2697	msi->msi_data = 0;
2698	return (0);
2699}
2700
2701/*
2702 * Return the max supported MSI messages this device supports.
2703 * Basically, assuming the MD code can alloc messages, this function
2704 * should return the maximum value that pci_alloc_msi() can return.
2705 * Thus, it is subject to the tunables, etc.
2706 */
2707int
2708pci_msi_count_method(device_t dev, device_t child)
2709{
2710	struct pci_devinfo *dinfo = device_get_ivars(child);
2711	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2712
2713	if (pci_do_msi && msi->msi_location != 0)
2714		return (msi->msi_msgnum);
2715	return (0);
2716}
2717
2718/* free pcicfgregs structure and all depending data structures */
2719
2720int
2721pci_freecfg(struct pci_devinfo *dinfo)
2722{
2723	struct devlist *devlist_head;
2724	struct pci_map *pm, *next;
2725	int i;
2726
2727	devlist_head = &pci_devq;
2728
2729	if (dinfo->cfg.vpd.vpd_reg) {
2730		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2731		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2732			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2733		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2734		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2735			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2736		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2737	}
2738	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2739		free(pm, M_DEVBUF);
2740	}
2741	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2742	free(dinfo, M_DEVBUF);
2743
2744	/* increment the generation count */
2745	pci_generation++;
2746
2747	/* we're losing one device */
2748	pci_numdevs--;
2749	return (0);
2750}
2751
2752/*
2753 * PCI power manangement
2754 */
2755int
2756pci_set_powerstate_method(device_t dev, device_t child, int state)
2757{
2758	struct pci_devinfo *dinfo = device_get_ivars(child);
2759	pcicfgregs *cfg = &dinfo->cfg;
2760	uint16_t status;
2761	int oldstate, highest, delay;
2762
2763	if (cfg->pp.pp_cap == 0)
2764		return (EOPNOTSUPP);
2765
2766	/*
2767	 * Optimize a no state change request away.  While it would be OK to
2768	 * write to the hardware in theory, some devices have shown odd
2769	 * behavior when going from D3 -> D3.
2770	 */
2771	oldstate = pci_get_powerstate(child);
2772	if (oldstate == state)
2773		return (0);
2774
2775	/*
2776	 * The PCI power management specification states that after a state
2777	 * transition between PCI power states, system software must
2778	 * guarantee a minimal delay before the function accesses the device.
2779	 * Compute the worst case delay that we need to guarantee before we
2780	 * access the device.  Many devices will be responsive much more
2781	 * quickly than this delay, but there are some that don't respond
2782	 * instantly to state changes.  Transitions to/from D3 state require
2783	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2784	 * is done below with DELAY rather than a sleeper function because
2785	 * this function can be called from contexts where we cannot sleep.
2786	 */
2787	highest = (oldstate > state) ? oldstate : state;
2788	if (highest == PCI_POWERSTATE_D3)
2789	    delay = 10000;
2790	else if (highest == PCI_POWERSTATE_D2)
2791	    delay = 200;
2792	else
2793	    delay = 0;
2794	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2795	    & ~PCIM_PSTAT_DMASK;
2796	switch (state) {
2797	case PCI_POWERSTATE_D0:
2798		status |= PCIM_PSTAT_D0;
2799		break;
2800	case PCI_POWERSTATE_D1:
2801		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2802			return (EOPNOTSUPP);
2803		status |= PCIM_PSTAT_D1;
2804		break;
2805	case PCI_POWERSTATE_D2:
2806		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2807			return (EOPNOTSUPP);
2808		status |= PCIM_PSTAT_D2;
2809		break;
2810	case PCI_POWERSTATE_D3:
2811		status |= PCIM_PSTAT_D3;
2812		break;
2813	default:
2814		return (EINVAL);
2815	}
2816
2817	if (bootverbose)
2818		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2819		    state);
2820
2821	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2822	if (delay)
2823		DELAY(delay);
2824	return (0);
2825}
2826
2827int
2828pci_get_powerstate_method(device_t dev, device_t child)
2829{
2830	struct pci_devinfo *dinfo = device_get_ivars(child);
2831	pcicfgregs *cfg = &dinfo->cfg;
2832	uint16_t status;
2833	int result;
2834
2835	if (cfg->pp.pp_cap != 0) {
2836		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2837		switch (status & PCIM_PSTAT_DMASK) {
2838		case PCIM_PSTAT_D0:
2839			result = PCI_POWERSTATE_D0;
2840			break;
2841		case PCIM_PSTAT_D1:
2842			result = PCI_POWERSTATE_D1;
2843			break;
2844		case PCIM_PSTAT_D2:
2845			result = PCI_POWERSTATE_D2;
2846			break;
2847		case PCIM_PSTAT_D3:
2848			result = PCI_POWERSTATE_D3;
2849			break;
2850		default:
2851			result = PCI_POWERSTATE_UNKNOWN;
2852			break;
2853		}
2854	} else {
2855		/* No support, device is always at D0 */
2856		result = PCI_POWERSTATE_D0;
2857	}
2858	return (result);
2859}
2860
2861/*
2862 * Some convenience functions for PCI device drivers.
2863 */
2864
2865static __inline void
2866pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2867{
2868	uint16_t	command;
2869
2870	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2871	command |= bit;
2872	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2873}
2874
2875static __inline void
2876pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2877{
2878	uint16_t	command;
2879
2880	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2881	command &= ~bit;
2882	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2883}
2884
2885int
2886pci_enable_busmaster_method(device_t dev, device_t child)
2887{
2888	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2889	return (0);
2890}
2891
2892int
2893pci_disable_busmaster_method(device_t dev, device_t child)
2894{
2895	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2896	return (0);
2897}
2898
2899int
2900pci_enable_io_method(device_t dev, device_t child, int space)
2901{
2902	uint16_t bit;
2903
2904	switch(space) {
2905	case SYS_RES_IOPORT:
2906		bit = PCIM_CMD_PORTEN;
2907		break;
2908	case SYS_RES_MEMORY:
2909		bit = PCIM_CMD_MEMEN;
2910		break;
2911	default:
2912		return (EINVAL);
2913	}
2914	pci_set_command_bit(dev, child, bit);
2915	return (0);
2916}
2917
2918int
2919pci_disable_io_method(device_t dev, device_t child, int space)
2920{
2921	uint16_t bit;
2922
2923	switch(space) {
2924	case SYS_RES_IOPORT:
2925		bit = PCIM_CMD_PORTEN;
2926		break;
2927	case SYS_RES_MEMORY:
2928		bit = PCIM_CMD_MEMEN;
2929		break;
2930	default:
2931		return (EINVAL);
2932	}
2933	pci_clear_command_bit(dev, child, bit);
2934	return (0);
2935}
2936
2937/*
2938 * New style pci driver.  Parent device is either a pci-host-bridge or a
2939 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2940 */
2941
2942void
2943pci_print_verbose(struct pci_devinfo *dinfo)
2944{
2945
2946	if (bootverbose) {
2947		pcicfgregs *cfg = &dinfo->cfg;
2948
2949		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2950		    cfg->vendor, cfg->device, cfg->revid);
2951		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2952		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2953		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2954		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2955		    cfg->mfdev);
2956		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2957		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2958		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2959		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2960		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2961		if (cfg->intpin > 0)
2962			printf("\tintpin=%c, irq=%d\n",
2963			    cfg->intpin +'a' -1, cfg->intline);
2964		if (cfg->pp.pp_cap) {
2965			uint16_t status;
2966
2967			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2968			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2969			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2970			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2971			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2972			    status & PCIM_PSTAT_DMASK);
2973		}
2974		if (cfg->msi.msi_location) {
2975			int ctrl;
2976
2977			ctrl = cfg->msi.msi_ctrl;
2978			printf("\tMSI supports %d message%s%s%s\n",
2979			    cfg->msi.msi_msgnum,
2980			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2981			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2982			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2983		}
2984		if (cfg->msix.msix_location) {
2985			printf("\tMSI-X supports %d message%s ",
2986			    cfg->msix.msix_msgnum,
2987			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2988			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2989				printf("in map 0x%x\n",
2990				    cfg->msix.msix_table_bar);
2991			else
2992				printf("in maps 0x%x and 0x%x\n",
2993				    cfg->msix.msix_table_bar,
2994				    cfg->msix.msix_pba_bar);
2995		}
2996	}
2997}
2998
2999static int
3000pci_porten(device_t dev)
3001{
3002	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
3003}
3004
3005static int
3006pci_memen(device_t dev)
3007{
3008	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
3009}
3010
3011void
3012pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
3013    int *bar64)
3014{
3015	struct pci_devinfo *dinfo;
3016	pci_addr_t map, testval;
3017	int ln2range;
3018	uint16_t cmd;
3019
3020	/*
3021	 * The device ROM BAR is special.  It is always a 32-bit
3022	 * memory BAR.  Bit 0 is special and should not be set when
3023	 * sizing the BAR.
3024	 */
3025	dinfo = device_get_ivars(dev);
3026	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
3027		map = pci_read_config(dev, reg, 4);
3028		pci_write_config(dev, reg, 0xfffffffe, 4);
3029		testval = pci_read_config(dev, reg, 4);
3030		pci_write_config(dev, reg, map, 4);
3031		*mapp = map;
3032		*testvalp = testval;
3033		if (bar64 != NULL)
3034			*bar64 = 0;
3035		return;
3036	}
3037
3038	map = pci_read_config(dev, reg, 4);
3039	ln2range = pci_maprange(map);
3040	if (ln2range == 64)
3041		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
3042
3043	/*
3044	 * Disable decoding via the command register before
3045	 * determining the BAR's length since we will be placing it in
3046	 * a weird state.
3047	 */
3048	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3049	pci_write_config(dev, PCIR_COMMAND,
3050	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
3051
3052	/*
3053	 * Determine the BAR's length by writing all 1's.  The bottom
3054	 * log_2(size) bits of the BAR will stick as 0 when we read
3055	 * the value back.
3056	 *
3057	 * NB: according to the PCI Local Bus Specification, rev. 3.0:
3058	 * "Software writes 0FFFFFFFFh to both registers, reads them back,
3059	 * and combines the result into a 64-bit value." (section 6.2.5.1)
3060	 *
3061	 * Writes to both registers must be performed before attempting to
3062	 * read back the size value.
3063	 */
3064	testval = 0;
3065	pci_write_config(dev, reg, 0xffffffff, 4);
3066	if (ln2range == 64) {
3067		pci_write_config(dev, reg + 4, 0xffffffff, 4);
3068		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
3069	}
3070	testval |= pci_read_config(dev, reg, 4);
3071
3072	/*
3073	 * Restore the original value of the BAR.  We may have reprogrammed
3074	 * the BAR of the low-level console device and when booting verbose,
3075	 * we need the console device addressable.
3076	 */
3077	pci_write_config(dev, reg, map, 4);
3078	if (ln2range == 64)
3079		pci_write_config(dev, reg + 4, map >> 32, 4);
3080	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3081
3082	*mapp = map;
3083	*testvalp = testval;
3084	if (bar64 != NULL)
3085		*bar64 = (ln2range == 64);
3086}
3087
3088static void
3089pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
3090{
3091	struct pci_devinfo *dinfo;
3092	int ln2range;
3093
3094	/* The device ROM BAR is always a 32-bit memory BAR. */
3095	dinfo = device_get_ivars(dev);
3096	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
3097		ln2range = 32;
3098	else
3099		ln2range = pci_maprange(pm->pm_value);
3100	pci_write_config(dev, pm->pm_reg, base, 4);
3101	if (ln2range == 64)
3102		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
3103	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
3104	if (ln2range == 64)
3105		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
3106		    pm->pm_reg + 4, 4) << 32;
3107}
3108
3109struct pci_map *
3110pci_find_bar(device_t dev, int reg)
3111{
3112	struct pci_devinfo *dinfo;
3113	struct pci_map *pm;
3114
3115	dinfo = device_get_ivars(dev);
3116	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
3117		if (pm->pm_reg == reg)
3118			return (pm);
3119	}
3120	return (NULL);
3121}
3122
3123int
3124pci_bar_enabled(device_t dev, struct pci_map *pm)
3125{
3126	struct pci_devinfo *dinfo;
3127	uint16_t cmd;
3128
3129	dinfo = device_get_ivars(dev);
3130	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
3131	    !(pm->pm_value & PCIM_BIOS_ENABLE))
3132		return (0);
3133	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3134	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
3135		return ((cmd & PCIM_CMD_MEMEN) != 0);
3136	else
3137		return ((cmd & PCIM_CMD_PORTEN) != 0);
3138}
3139
3140struct pci_map *
3141pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
3142{
3143	struct pci_devinfo *dinfo;
3144	struct pci_map *pm, *prev;
3145
3146	dinfo = device_get_ivars(dev);
3147	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
3148	pm->pm_reg = reg;
3149	pm->pm_value = value;
3150	pm->pm_size = size;
3151	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
3152		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
3153		    reg));
3154		if (STAILQ_NEXT(prev, pm_link) == NULL ||
3155		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
3156			break;
3157	}
3158	if (prev != NULL)
3159		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
3160	else
3161		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
3162	return (pm);
3163}
3164
3165static void
3166pci_restore_bars(device_t dev)
3167{
3168	struct pci_devinfo *dinfo;
3169	struct pci_map *pm;
3170	int ln2range;
3171
3172	dinfo = device_get_ivars(dev);
3173	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
3174		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
3175			ln2range = 32;
3176		else
3177			ln2range = pci_maprange(pm->pm_value);
3178		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
3179		if (ln2range == 64)
3180			pci_write_config(dev, pm->pm_reg + 4,
3181			    pm->pm_value >> 32, 4);
3182	}
3183}
3184
3185/*
3186 * Add a resource based on a pci map register. Return 1 if the map
3187 * register is a 32bit map register or 2 if it is a 64bit register.
3188 */
3189static int
3190pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3191    int force, int prefetch)
3192{
3193	struct pci_map *pm;
3194	pci_addr_t base, map, testval;
3195	pci_addr_t start, end, count;
3196	int barlen, basezero, flags, maprange, mapsize, type;
3197	uint16_t cmd;
3198	struct resource *res;
3199
3200	/*
3201	 * The BAR may already exist if the device is a CardBus card
3202	 * whose CIS is stored in this BAR.
3203	 */
3204	pm = pci_find_bar(dev, reg);
3205	if (pm != NULL) {
3206		maprange = pci_maprange(pm->pm_value);
3207		barlen = maprange == 64 ? 2 : 1;
3208		return (barlen);
3209	}
3210
3211	pci_read_bar(dev, reg, &map, &testval, NULL);
3212	if (PCI_BAR_MEM(map)) {
3213		type = SYS_RES_MEMORY;
3214		if (map & PCIM_BAR_MEM_PREFETCH)
3215			prefetch = 1;
3216	} else
3217		type = SYS_RES_IOPORT;
3218	mapsize = pci_mapsize(testval);
3219	base = pci_mapbase(map);
3220#ifdef __PCI_BAR_ZERO_VALID
3221	basezero = 0;
3222#else
3223	basezero = base == 0;
3224#endif
3225	maprange = pci_maprange(map);
3226	barlen = maprange == 64 ? 2 : 1;
3227
3228	/*
3229	 * For I/O registers, if bottom bit is set, and the next bit up
3230	 * isn't clear, we know we have a BAR that doesn't conform to the
3231	 * spec, so ignore it.  Also, sanity check the size of the data
3232	 * areas to the type of memory involved.  Memory must be at least
3233	 * 16 bytes in size, while I/O ranges must be at least 4.
3234	 */
3235	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3236		return (barlen);
3237	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3238	    (type == SYS_RES_IOPORT && mapsize < 2))
3239		return (barlen);
3240
3241	/* Save a record of this BAR. */
3242	pm = pci_add_bar(dev, reg, map, mapsize);
3243	if (bootverbose) {
3244		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3245		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3246		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3247			printf(", port disabled\n");
3248		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3249			printf(", memory disabled\n");
3250		else
3251			printf(", enabled\n");
3252	}
3253
3254	/*
3255	 * If base is 0, then we have problems if this architecture does
3256	 * not allow that.  It is best to ignore such entries for the
3257	 * moment.  These will be allocated later if the driver specifically
3258	 * requests them.  However, some removable buses look better when
3259	 * all resources are allocated, so allow '0' to be overriden.
3260	 *
3261	 * Similarly treat maps whose values is the same as the test value
3262	 * read back.  These maps have had all f's written to them by the
3263	 * BIOS in an attempt to disable the resources.
3264	 */
3265	if (!force && (basezero || map == testval))
3266		return (barlen);
3267	if ((u_long)base != base) {
3268		device_printf(bus,
3269		    "pci%d:%d:%d:%d bar %#x too many address bits",
3270		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3271		    pci_get_function(dev), reg);
3272		return (barlen);
3273	}
3274
3275	/*
3276	 * This code theoretically does the right thing, but has
3277	 * undesirable side effects in some cases where peripherals
3278	 * respond oddly to having these bits enabled.  Let the user
3279	 * be able to turn them off (since pci_enable_io_modes is 1 by
3280	 * default).
3281	 */
3282	if (pci_enable_io_modes) {
3283		/* Turn on resources that have been left off by a lazy BIOS */
3284		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3285			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3286			cmd |= PCIM_CMD_PORTEN;
3287			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3288		}
3289		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3290			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3291			cmd |= PCIM_CMD_MEMEN;
3292			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3293		}
3294	} else {
3295		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3296			return (barlen);
3297		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3298			return (barlen);
3299	}
3300
3301	count = (pci_addr_t)1 << mapsize;
3302	flags = RF_ALIGNMENT_LOG2(mapsize);
3303	if (prefetch)
3304		flags |= RF_PREFETCHABLE;
3305	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3306		start = 0;	/* Let the parent decide. */
3307		end = ~0;
3308	} else {
3309		start = base;
3310		end = base + count - 1;
3311	}
3312	resource_list_add(rl, type, reg, start, end, count);
3313
3314	/*
3315	 * Try to allocate the resource for this BAR from our parent
3316	 * so that this resource range is already reserved.  The
3317	 * driver for this device will later inherit this resource in
3318	 * pci_alloc_resource().
3319	 */
3320	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3321	    flags);
3322	if ((pci_do_realloc_bars
3323		|| pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_REALLOC_BAR))
3324	    && res == NULL && (start != 0 || end != ~0)) {
3325		/*
3326		 * If the allocation fails, try to allocate a resource for
3327		 * this BAR using any available range.  The firmware felt
3328		 * it was important enough to assign a resource, so don't
3329		 * disable decoding if we can help it.
3330		 */
3331		resource_list_delete(rl, type, reg);
3332		resource_list_add(rl, type, reg, 0, ~0, count);
3333		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3334		    count, flags);
3335	}
3336	if (res == NULL) {
3337		/*
3338		 * If the allocation fails, delete the resource list entry
3339		 * and disable decoding for this device.
3340		 *
3341		 * If the driver requests this resource in the future,
3342		 * pci_reserve_map() will try to allocate a fresh
3343		 * resource range.
3344		 */
3345		resource_list_delete(rl, type, reg);
3346		pci_disable_io(dev, type);
3347		if (bootverbose)
3348			device_printf(bus,
3349			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3350			    pci_get_domain(dev), pci_get_bus(dev),
3351			    pci_get_slot(dev), pci_get_function(dev), reg);
3352	} else {
3353		start = rman_get_start(res);
3354		pci_write_bar(dev, pm, start);
3355	}
3356	return (barlen);
3357}
3358
3359/*
3360 * For ATA devices we need to decide early what addressing mode to use.
3361 * Legacy demands that the primary and secondary ATA ports sits on the
3362 * same addresses that old ISA hardware did. This dictates that we use
3363 * those addresses and ignore the BAR's if we cannot set PCI native
3364 * addressing mode.
3365 */
3366static void
3367pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3368    uint32_t prefetchmask)
3369{
3370	int rid, type, progif;
3371#if 0
3372	/* if this device supports PCI native addressing use it */
3373	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3374	if ((progif & 0x8a) == 0x8a) {
3375		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3376		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3377			printf("Trying ATA native PCI addressing mode\n");
3378			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3379		}
3380	}
3381#endif
3382	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3383	type = SYS_RES_IOPORT;
3384	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3385		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3386		    prefetchmask & (1 << 0));
3387		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3388		    prefetchmask & (1 << 1));
3389	} else {
3390		rid = PCIR_BAR(0);
3391		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3392		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3393		    0x1f7, 8, 0);
3394		rid = PCIR_BAR(1);
3395		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3396		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3397		    0x3f6, 1, 0);
3398	}
3399	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3400		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3401		    prefetchmask & (1 << 2));
3402		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3403		    prefetchmask & (1 << 3));
3404	} else {
3405		rid = PCIR_BAR(2);
3406		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3407		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3408		    0x177, 8, 0);
3409		rid = PCIR_BAR(3);
3410		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3411		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3412		    0x376, 1, 0);
3413	}
3414	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3415	    prefetchmask & (1 << 4));
3416	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3417	    prefetchmask & (1 << 5));
3418}
3419
3420static void
3421pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3422{
3423	struct pci_devinfo *dinfo = device_get_ivars(dev);
3424	pcicfgregs *cfg = &dinfo->cfg;
3425	char tunable_name[64];
3426	int irq;
3427
3428	/* Has to have an intpin to have an interrupt. */
3429	if (cfg->intpin == 0)
3430		return;
3431
3432	/* Let the user override the IRQ with a tunable. */
3433	irq = PCI_INVALID_IRQ;
3434	snprintf(tunable_name, sizeof(tunable_name),
3435	    "hw.pci%d.%d.%d.INT%c.irq",
3436	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3437	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3438		irq = PCI_INVALID_IRQ;
3439
3440	/*
3441	 * If we didn't get an IRQ via the tunable, then we either use the
3442	 * IRQ value in the intline register or we ask the bus to route an
3443	 * interrupt for us.  If force_route is true, then we only use the
3444	 * value in the intline register if the bus was unable to assign an
3445	 * IRQ.
3446	 */
3447	if (!PCI_INTERRUPT_VALID(irq)) {
3448		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3449			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3450		if (!PCI_INTERRUPT_VALID(irq))
3451			irq = cfg->intline;
3452	}
3453
3454	/* If after all that we don't have an IRQ, just bail. */
3455	if (!PCI_INTERRUPT_VALID(irq))
3456		return;
3457
3458	/* Update the config register if it changed. */
3459	if (irq != cfg->intline) {
3460		cfg->intline = irq;
3461		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3462	}
3463
3464	/* Add this IRQ as rid 0 interrupt resource. */
3465	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3466}
3467
3468/* Perform early OHCI takeover from SMM. */
3469static void
3470ohci_early_takeover(device_t self)
3471{
3472	struct resource *res;
3473	uint32_t ctl;
3474	int rid;
3475	int i;
3476
3477	rid = PCIR_BAR(0);
3478	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3479	if (res == NULL)
3480		return;
3481
3482	ctl = bus_read_4(res, OHCI_CONTROL);
3483	if (ctl & OHCI_IR) {
3484		if (bootverbose)
3485			printf("ohci early: "
3486			    "SMM active, request owner change\n");
3487		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3488		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3489			DELAY(1000);
3490			ctl = bus_read_4(res, OHCI_CONTROL);
3491		}
3492		if (ctl & OHCI_IR) {
3493			if (bootverbose)
3494				printf("ohci early: "
3495				    "SMM does not respond, resetting\n");
3496			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3497		}
3498		/* Disable interrupts */
3499		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3500	}
3501
3502	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3503}
3504
3505/* Perform early UHCI takeover from SMM. */
3506static void
3507uhci_early_takeover(device_t self)
3508{
3509	struct resource *res;
3510	int rid;
3511
3512	/*
3513	 * Set the PIRQD enable bit and switch off all the others. We don't
3514	 * want legacy support to interfere with us XXX Does this also mean
3515	 * that the BIOS won't touch the keyboard anymore if it is connected
3516	 * to the ports of the root hub?
3517	 */
3518	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3519
3520	/* Disable interrupts */
3521	rid = PCI_UHCI_BASE_REG;
3522	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3523	if (res != NULL) {
3524		bus_write_2(res, UHCI_INTR, 0);
3525		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3526	}
3527}
3528
3529/* Perform early EHCI takeover from SMM. */
3530static void
3531ehci_early_takeover(device_t self)
3532{
3533	struct resource *res;
3534	uint32_t cparams;
3535	uint32_t eec;
3536	uint8_t eecp;
3537	uint8_t bios_sem;
3538	uint8_t offs;
3539	int rid;
3540	int i;
3541
3542	rid = PCIR_BAR(0);
3543	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3544	if (res == NULL)
3545		return;
3546
3547	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3548
3549	/* Synchronise with the BIOS if it owns the controller. */
3550	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3551	    eecp = EHCI_EECP_NEXT(eec)) {
3552		eec = pci_read_config(self, eecp, 4);
3553		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3554			continue;
3555		}
3556		bios_sem = pci_read_config(self, eecp +
3557		    EHCI_LEGSUP_BIOS_SEM, 1);
3558		if (bios_sem == 0) {
3559			continue;
3560		}
3561		if (bootverbose)
3562			printf("ehci early: "
3563			    "SMM active, request owner change\n");
3564
3565		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3566
3567		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3568			DELAY(1000);
3569			bios_sem = pci_read_config(self, eecp +
3570			    EHCI_LEGSUP_BIOS_SEM, 1);
3571		}
3572
3573		if (bios_sem != 0) {
3574			if (bootverbose)
3575				printf("ehci early: "
3576				    "SMM does not respond\n");
3577		}
3578		/* Disable interrupts */
3579		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3580		bus_write_4(res, offs + EHCI_USBINTR, 0);
3581	}
3582	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3583}
3584
3585/* Perform early XHCI takeover from SMM. */
3586static void
3587xhci_early_takeover(device_t self)
3588{
3589	struct resource *res;
3590	uint32_t cparams;
3591	uint32_t eec;
3592	uint8_t eecp;
3593	uint8_t bios_sem;
3594	uint8_t offs;
3595	int rid;
3596	int i;
3597
3598	rid = PCIR_BAR(0);
3599	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3600	if (res == NULL)
3601		return;
3602
3603	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3604
3605	eec = -1;
3606
3607	/* Synchronise with the BIOS if it owns the controller. */
3608	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3609	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3610		eec = bus_read_4(res, eecp);
3611
3612		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3613			continue;
3614
3615		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3616		if (bios_sem == 0)
3617			continue;
3618
3619		if (bootverbose)
3620			printf("xhci early: "
3621			    "SMM active, request owner change\n");
3622
3623		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3624
3625		/* wait a maximum of 5 second */
3626
3627		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3628			DELAY(1000);
3629			bios_sem = bus_read_1(res, eecp +
3630			    XHCI_XECP_BIOS_SEM);
3631		}
3632
3633		if (bios_sem != 0) {
3634			if (bootverbose)
3635				printf("xhci early: "
3636				    "SMM does not respond\n");
3637		}
3638
3639		/* Disable interrupts */
3640		offs = bus_read_1(res, XHCI_CAPLENGTH);
3641		bus_write_4(res, offs + XHCI_USBCMD, 0);
3642		bus_read_4(res, offs + XHCI_USBSTS);
3643	}
3644	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3645}
3646
3647#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3648static void
3649pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3650    struct resource_list *rl)
3651{
3652	struct resource *res;
3653	char *cp;
3654	rman_res_t start, end, count;
3655	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3656
3657	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3658	case PCIM_HDRTYPE_BRIDGE:
3659		sec_reg = PCIR_SECBUS_1;
3660		sub_reg = PCIR_SUBBUS_1;
3661		break;
3662	case PCIM_HDRTYPE_CARDBUS:
3663		sec_reg = PCIR_SECBUS_2;
3664		sub_reg = PCIR_SUBBUS_2;
3665		break;
3666	default:
3667		return;
3668	}
3669
3670	/*
3671	 * If the existing bus range is valid, attempt to reserve it
3672	 * from our parent.  If this fails for any reason, clear the
3673	 * secbus and subbus registers.
3674	 *
3675	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3676	 * This would at least preserve the existing sec_bus if it is
3677	 * valid.
3678	 */
3679	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3680	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3681
3682	/* Quirk handling. */
3683	switch (pci_get_devid(dev)) {
3684	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3685		sup_bus = pci_read_config(dev, 0x41, 1);
3686		if (sup_bus != 0xff) {
3687			sec_bus = sup_bus + 1;
3688			sub_bus = sup_bus + 1;
3689			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3690			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3691		}
3692		break;
3693
3694	case 0x00dd10de:
3695		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3696		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3697			break;
3698		if (strncmp(cp, "Compal", 6) != 0) {
3699			freeenv(cp);
3700			break;
3701		}
3702		freeenv(cp);
3703		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3704			break;
3705		if (strncmp(cp, "08A0", 4) != 0) {
3706			freeenv(cp);
3707			break;
3708		}
3709		freeenv(cp);
3710		if (sub_bus < 0xa) {
3711			sub_bus = 0xa;
3712			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3713		}
3714		break;
3715	}
3716
3717	if (bootverbose)
3718		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3719	if (sec_bus > 0 && sub_bus >= sec_bus) {
3720		start = sec_bus;
3721		end = sub_bus;
3722		count = end - start + 1;
3723
3724		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3725
3726		/*
3727		 * If requested, clear secondary bus registers in
3728		 * bridge devices to force a complete renumbering
3729		 * rather than reserving the existing range.  However,
3730		 * preserve the existing size.
3731		 */
3732		if (pci_clear_buses)
3733			goto clear;
3734
3735		rid = 0;
3736		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3737		    start, end, count, 0);
3738		if (res != NULL)
3739			return;
3740
3741		if (bootverbose)
3742			device_printf(bus,
3743			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3744			    pci_get_domain(dev), pci_get_bus(dev),
3745			    pci_get_slot(dev), pci_get_function(dev));
3746	}
3747
3748clear:
3749	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3750	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3751}
3752
3753static struct resource *
3754pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3755    rman_res_t end, rman_res_t count, u_int flags)
3756{
3757	struct pci_devinfo *dinfo;
3758	pcicfgregs *cfg;
3759	struct resource_list *rl;
3760	struct resource *res;
3761	int sec_reg, sub_reg;
3762
3763	dinfo = device_get_ivars(child);
3764	cfg = &dinfo->cfg;
3765	rl = &dinfo->resources;
3766	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3767	case PCIM_HDRTYPE_BRIDGE:
3768		sec_reg = PCIR_SECBUS_1;
3769		sub_reg = PCIR_SUBBUS_1;
3770		break;
3771	case PCIM_HDRTYPE_CARDBUS:
3772		sec_reg = PCIR_SECBUS_2;
3773		sub_reg = PCIR_SUBBUS_2;
3774		break;
3775	default:
3776		return (NULL);
3777	}
3778
3779	if (*rid != 0)
3780		return (NULL);
3781
3782	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3783		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3784	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3785		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3786		    start, end, count, flags & ~RF_ACTIVE);
3787		if (res == NULL) {
3788			resource_list_delete(rl, PCI_RES_BUS, *rid);
3789			device_printf(child, "allocating %ju bus%s failed\n",
3790			    count, count == 1 ? "" : "es");
3791			return (NULL);
3792		}
3793		if (bootverbose)
3794			device_printf(child,
3795			    "Lazy allocation of %ju bus%s at %ju\n", count,
3796			    count == 1 ? "" : "es", rman_get_start(res));
3797		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3798		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3799	}
3800	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3801	    end, count, flags));
3802}
3803#endif
3804
3805static int
3806pci_ea_bei_to_rid(device_t dev, int bei)
3807{
3808#ifdef PCI_IOV
3809	struct pci_devinfo *dinfo;
3810	int iov_pos;
3811	struct pcicfg_iov *iov;
3812
3813	dinfo = device_get_ivars(dev);
3814	iov = dinfo->cfg.iov;
3815	if (iov != NULL)
3816		iov_pos = iov->iov_pos;
3817	else
3818		iov_pos = 0;
3819#endif
3820
3821	/* Check if matches BAR */
3822	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3823	    (bei <= PCIM_EA_BEI_BAR_5))
3824		return (PCIR_BAR(bei));
3825
3826	/* Check ROM */
3827	if (bei == PCIM_EA_BEI_ROM)
3828		return (PCIR_BIOS);
3829
3830#ifdef PCI_IOV
3831	/* Check if matches VF_BAR */
3832	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3833	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3834		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3835		    iov_pos);
3836#endif
3837
3838	return (-1);
3839}
3840
3841int
3842pci_ea_is_enabled(device_t dev, int rid)
3843{
3844	struct pci_ea_entry *ea;
3845	struct pci_devinfo *dinfo;
3846
3847	dinfo = device_get_ivars(dev);
3848
3849	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3850		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3851			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3852	}
3853
3854	return (0);
3855}
3856
3857void
3858pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3859{
3860	struct pci_ea_entry *ea;
3861	struct pci_devinfo *dinfo;
3862	pci_addr_t start, end, count;
3863	struct resource_list *rl;
3864	int type, flags, rid;
3865	struct resource *res;
3866	uint32_t tmp;
3867#ifdef PCI_IOV
3868	struct pcicfg_iov *iov;
3869#endif
3870
3871	dinfo = device_get_ivars(dev);
3872	rl = &dinfo->resources;
3873	flags = 0;
3874
3875#ifdef PCI_IOV
3876	iov = dinfo->cfg.iov;
3877#endif
3878
3879	if (dinfo->cfg.ea.ea_location == 0)
3880		return;
3881
3882	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3883
3884		/*
3885		 * TODO: Ignore EA-BAR if is not enabled.
3886		 *   Currently the EA implementation supports
3887		 *   only situation, where EA structure contains
3888		 *   predefined entries. In case they are not enabled
3889		 *   leave them unallocated and proceed with
3890		 *   a legacy-BAR mechanism.
3891		 */
3892		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3893			continue;
3894
3895		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3896		case PCIM_EA_P_MEM_PREFETCH:
3897		case PCIM_EA_P_VF_MEM_PREFETCH:
3898			flags = RF_PREFETCHABLE;
3899			/* FALLTHROUGH */
3900		case PCIM_EA_P_VF_MEM:
3901		case PCIM_EA_P_MEM:
3902			type = SYS_RES_MEMORY;
3903			break;
3904		case PCIM_EA_P_IO:
3905			type = SYS_RES_IOPORT;
3906			break;
3907		default:
3908			continue;
3909		}
3910
3911		if (alloc_iov != 0) {
3912#ifdef PCI_IOV
3913			/* Allocating IOV, confirm BEI matches */
3914			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3915			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3916				continue;
3917#else
3918			continue;
3919#endif
3920		} else {
3921			/* Allocating BAR, confirm BEI matches */
3922			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3923			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3924			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3925				continue;
3926		}
3927
3928		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3929		if (rid < 0)
3930			continue;
3931
3932		/* Skip resources already allocated by EA */
3933		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3934		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3935			continue;
3936
3937		start = ea->eae_base;
3938		count = ea->eae_max_offset + 1;
3939#ifdef PCI_IOV
3940		if (iov != NULL)
3941			count = count * iov->iov_num_vfs;
3942#endif
3943		end = start + count - 1;
3944		if (count == 0)
3945			continue;
3946
3947		resource_list_add(rl, type, rid, start, end, count);
3948		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3949		    flags);
3950		if (res == NULL) {
3951			resource_list_delete(rl, type, rid);
3952
3953			/*
3954			 * Failed to allocate using EA, disable entry.
3955			 * Another attempt to allocation will be performed
3956			 * further, but this time using legacy BAR registers
3957			 */
3958			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3959			tmp &= ~PCIM_EA_ENABLE;
3960			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3961
3962			/*
3963			 * Disabling entry might fail in case it is hardwired.
3964			 * Read flags again to match current status.
3965			 */
3966			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3967
3968			continue;
3969		}
3970
3971		/* As per specification, fill BAR with zeros */
3972		pci_write_config(dev, rid, 0, 4);
3973	}
3974}
3975
3976void
3977pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3978{
3979	struct pci_devinfo *dinfo;
3980	pcicfgregs *cfg;
3981	struct resource_list *rl;
3982	const struct pci_quirk *q;
3983	uint32_t devid;
3984	int i;
3985
3986	dinfo = device_get_ivars(dev);
3987	cfg = &dinfo->cfg;
3988	rl = &dinfo->resources;
3989	devid = (cfg->device << 16) | cfg->vendor;
3990
3991	/* Allocate resources using Enhanced Allocation */
3992	pci_add_resources_ea(bus, dev, 0);
3993
3994	/* ATA devices needs special map treatment */
3995	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3996	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3997	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3998	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3999	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
4000		pci_ata_maps(bus, dev, rl, force, prefetchmask);
4001	else
4002		for (i = 0; i < cfg->nummaps;) {
4003			/* Skip resources already managed by EA */
4004			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
4005			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
4006			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
4007				i++;
4008				continue;
4009			}
4010
4011			/*
4012			 * Skip quirked resources.
4013			 */
4014			for (q = &pci_quirks[0]; q->devid != 0; q++)
4015				if (q->devid == devid &&
4016				    q->type == PCI_QUIRK_UNMAP_REG &&
4017				    q->arg1 == PCIR_BAR(i))
4018					break;
4019			if (q->devid != 0) {
4020				i++;
4021				continue;
4022			}
4023			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
4024			    prefetchmask & (1 << i));
4025		}
4026
4027	/*
4028	 * Add additional, quirked resources.
4029	 */
4030	for (q = &pci_quirks[0]; q->devid != 0; q++)
4031		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
4032			pci_add_map(bus, dev, q->arg1, rl, force, 0);
4033
4034	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
4035#ifdef __PCI_REROUTE_INTERRUPT
4036		/*
4037		 * Try to re-route interrupts. Sometimes the BIOS or
4038		 * firmware may leave bogus values in these registers.
4039		 * If the re-route fails, then just stick with what we
4040		 * have.
4041		 */
4042		pci_assign_interrupt(bus, dev, 1);
4043#else
4044		pci_assign_interrupt(bus, dev, 0);
4045#endif
4046	}
4047
4048	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
4049	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
4050		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
4051			xhci_early_takeover(dev);
4052		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
4053			ehci_early_takeover(dev);
4054		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
4055			ohci_early_takeover(dev);
4056		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
4057			uhci_early_takeover(dev);
4058	}
4059
4060#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4061	/*
4062	 * Reserve resources for secondary bus ranges behind bridge
4063	 * devices.
4064	 */
4065	pci_reserve_secbus(bus, dev, cfg, rl);
4066#endif
4067}
4068
4069static struct pci_devinfo *
4070pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
4071    int slot, int func)
4072{
4073	struct pci_devinfo *dinfo;
4074
4075	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
4076	if (dinfo != NULL)
4077		pci_add_child(dev, dinfo);
4078
4079	return (dinfo);
4080}
4081
4082void
4083pci_add_children(device_t dev, int domain, int busno)
4084{
4085#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
4086	device_t pcib = device_get_parent(dev);
4087	struct pci_devinfo *dinfo;
4088	int maxslots;
4089	int s, f, pcifunchigh;
4090	uint8_t hdrtype;
4091	int first_func;
4092
4093	/*
4094	 * Try to detect a device at slot 0, function 0.  If it exists, try to
4095	 * enable ARI.  We must enable ARI before detecting the rest of the
4096	 * functions on this bus as ARI changes the set of slots and functions
4097	 * that are legal on this bus.
4098	 */
4099	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
4100	if (dinfo != NULL && pci_enable_ari)
4101		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
4102
4103	/*
4104	 * Start looking for new devices on slot 0 at function 1 because we
4105	 * just identified the device at slot 0, function 0.
4106	 */
4107	first_func = 1;
4108
4109	maxslots = PCIB_MAXSLOTS(pcib);
4110	for (s = 0; s <= maxslots; s++, first_func = 0) {
4111		pcifunchigh = 0;
4112		f = 0;
4113		DELAY(1);
4114		hdrtype = REG(PCIR_HDRTYPE, 1);
4115		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
4116			continue;
4117		if (hdrtype & PCIM_MFDEV)
4118			pcifunchigh = PCIB_MAXFUNCS(pcib);
4119		for (f = first_func; f <= pcifunchigh; f++)
4120			pci_identify_function(pcib, dev, domain, busno, s, f);
4121	}
4122#undef REG
4123}
4124
4125int
4126pci_rescan_method(device_t dev)
4127{
4128#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
4129	device_t pcib = device_get_parent(dev);
4130	device_t child, *devlist, *unchanged;
4131	int devcount, error, i, j, maxslots, oldcount;
4132	int busno, domain, s, f, pcifunchigh;
4133	uint8_t hdrtype;
4134
4135	/* No need to check for ARI on a rescan. */
4136	error = device_get_children(dev, &devlist, &devcount);
4137	if (error)
4138		return (error);
4139	if (devcount != 0) {
4140		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
4141		    M_NOWAIT | M_ZERO);
4142		if (unchanged == NULL) {
4143			free(devlist, M_TEMP);
4144			return (ENOMEM);
4145		}
4146	} else
4147		unchanged = NULL;
4148
4149	domain = pcib_get_domain(dev);
4150	busno = pcib_get_bus(dev);
4151	maxslots = PCIB_MAXSLOTS(pcib);
4152	for (s = 0; s <= maxslots; s++) {
4153		/* If function 0 is not present, skip to the next slot. */
4154		f = 0;
4155		if (REG(PCIR_VENDOR, 2) == 0xffff)
4156			continue;
4157		pcifunchigh = 0;
4158		hdrtype = REG(PCIR_HDRTYPE, 1);
4159		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
4160			continue;
4161		if (hdrtype & PCIM_MFDEV)
4162			pcifunchigh = PCIB_MAXFUNCS(pcib);
4163		for (f = 0; f <= pcifunchigh; f++) {
4164			if (REG(PCIR_VENDOR, 2) == 0xffff)
4165				continue;
4166
4167			/*
4168			 * Found a valid function.  Check if a
4169			 * device_t for this device already exists.
4170			 */
4171			for (i = 0; i < devcount; i++) {
4172				child = devlist[i];
4173				if (child == NULL)
4174					continue;
4175				if (pci_get_slot(child) == s &&
4176				    pci_get_function(child) == f) {
4177					unchanged[i] = child;
4178					goto next_func;
4179				}
4180			}
4181
4182			pci_identify_function(pcib, dev, domain, busno, s, f);
4183		next_func:;
4184		}
4185	}
4186
4187	/* Remove devices that are no longer present. */
4188	for (i = 0; i < devcount; i++) {
4189		if (unchanged[i] != NULL)
4190			continue;
4191		device_delete_child(dev, devlist[i]);
4192	}
4193
4194	free(devlist, M_TEMP);
4195	oldcount = devcount;
4196
4197	/* Try to attach the devices just added. */
4198	error = device_get_children(dev, &devlist, &devcount);
4199	if (error) {
4200		free(unchanged, M_TEMP);
4201		return (error);
4202	}
4203
4204	for (i = 0; i < devcount; i++) {
4205		for (j = 0; j < oldcount; j++) {
4206			if (devlist[i] == unchanged[j])
4207				goto next_device;
4208		}
4209
4210		device_probe_and_attach(devlist[i]);
4211	next_device:;
4212	}
4213
4214	free(unchanged, M_TEMP);
4215	free(devlist, M_TEMP);
4216	return (0);
4217#undef REG
4218}
4219
4220#ifdef PCI_IOV
4221device_t
4222pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4223    uint16_t did)
4224{
4225	struct pci_devinfo *vf_dinfo;
4226	device_t pcib;
4227	int busno, slot, func;
4228
4229	pcib = device_get_parent(bus);
4230
4231	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4232
4233	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4234	    slot, func, vid, did);
4235
4236	vf_dinfo->cfg.flags |= PCICFG_VF;
4237	pci_add_child(bus, vf_dinfo);
4238
4239	return (vf_dinfo->cfg.dev);
4240}
4241
4242device_t
4243pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4244    uint16_t vid, uint16_t did)
4245{
4246
4247	return (pci_add_iov_child(bus, pf, rid, vid, did));
4248}
4249#endif
4250
4251/*
4252 * For PCIe device set Max_Payload_Size to match PCIe root's.
4253 */
4254static void
4255pcie_setup_mps(device_t dev)
4256{
4257	struct pci_devinfo *dinfo = device_get_ivars(dev);
4258	device_t root;
4259	uint16_t rmps, mmps, mps;
4260
4261	if (dinfo->cfg.pcie.pcie_location == 0)
4262		return;
4263	root = pci_find_pcie_root_port(dev);
4264	if (root == NULL)
4265		return;
4266	/* Check whether the MPS is already configured. */
4267	rmps = pcie_read_config(root, PCIER_DEVICE_CTL, 2) &
4268	    PCIEM_CTL_MAX_PAYLOAD;
4269	mps = pcie_read_config(dev, PCIER_DEVICE_CTL, 2) &
4270	    PCIEM_CTL_MAX_PAYLOAD;
4271	if (mps == rmps)
4272		return;
4273	/* Check whether the device is capable of the root's MPS. */
4274	mmps = (pcie_read_config(dev, PCIER_DEVICE_CAP, 2) &
4275	    PCIEM_CAP_MAX_PAYLOAD) << 5;
4276	if (rmps > mmps) {
4277		/*
4278		 * The device is unable to handle root's MPS.  Limit root.
4279		 * XXX: We should traverse through all the tree, applying
4280		 * it to all the devices.
4281		 */
4282		pcie_adjust_config(root, PCIER_DEVICE_CTL,
4283		    PCIEM_CTL_MAX_PAYLOAD, mmps, 2);
4284	} else {
4285		pcie_adjust_config(dev, PCIER_DEVICE_CTL,
4286		    PCIEM_CTL_MAX_PAYLOAD, rmps, 2);
4287	}
4288}
4289
4290static void
4291pci_add_child_clear_aer(device_t dev, struct pci_devinfo *dinfo)
4292{
4293	int aer;
4294	uint32_t r;
4295	uint16_t r2;
4296
4297	if (dinfo->cfg.pcie.pcie_location != 0 &&
4298	    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT) {
4299		r2 = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
4300		    PCIER_ROOT_CTL, 2);
4301		r2 &= ~(PCIEM_ROOT_CTL_SERR_CORR |
4302		    PCIEM_ROOT_CTL_SERR_NONFATAL | PCIEM_ROOT_CTL_SERR_FATAL);
4303		pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
4304		    PCIER_ROOT_CTL, r2, 2);
4305	}
4306	if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
4307		r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
4308		pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
4309		if (r != 0 && bootverbose) {
4310			pci_printf(&dinfo->cfg,
4311			    "clearing AER UC 0x%08x -> 0x%08x\n",
4312			    r, pci_read_config(dev, aer + PCIR_AER_UC_STATUS,
4313			    4));
4314		}
4315
4316		r = pci_read_config(dev, aer + PCIR_AER_UC_MASK, 4);
4317		r &= ~(PCIM_AER_UC_TRAINING_ERROR |
4318		    PCIM_AER_UC_DL_PROTOCOL_ERROR |
4319		    PCIM_AER_UC_SURPRISE_LINK_DOWN |
4320		    PCIM_AER_UC_POISONED_TLP |
4321		    PCIM_AER_UC_FC_PROTOCOL_ERROR |
4322		    PCIM_AER_UC_COMPLETION_TIMEOUT |
4323		    PCIM_AER_UC_COMPLETER_ABORT |
4324		    PCIM_AER_UC_UNEXPECTED_COMPLETION |
4325		    PCIM_AER_UC_RECEIVER_OVERFLOW |
4326		    PCIM_AER_UC_MALFORMED_TLP |
4327		    PCIM_AER_UC_ECRC_ERROR |
4328		    PCIM_AER_UC_UNSUPPORTED_REQUEST |
4329		    PCIM_AER_UC_ACS_VIOLATION |
4330		    PCIM_AER_UC_INTERNAL_ERROR |
4331		    PCIM_AER_UC_MC_BLOCKED_TLP |
4332		    PCIM_AER_UC_ATOMIC_EGRESS_BLK |
4333		    PCIM_AER_UC_TLP_PREFIX_BLOCKED);
4334		pci_write_config(dev, aer + PCIR_AER_UC_MASK, r, 4);
4335
4336		r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
4337		pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
4338		if (r != 0 && bootverbose) {
4339			pci_printf(&dinfo->cfg,
4340			    "clearing AER COR 0x%08x -> 0x%08x\n",
4341			    r, pci_read_config(dev, aer + PCIR_AER_COR_STATUS,
4342			    4));
4343		}
4344
4345		r = pci_read_config(dev, aer + PCIR_AER_COR_MASK, 4);
4346		r &= ~(PCIM_AER_COR_RECEIVER_ERROR |
4347		    PCIM_AER_COR_BAD_TLP |
4348		    PCIM_AER_COR_BAD_DLLP |
4349		    PCIM_AER_COR_REPLAY_ROLLOVER |
4350		    PCIM_AER_COR_REPLAY_TIMEOUT |
4351		    PCIM_AER_COR_ADVISORY_NF_ERROR |
4352		    PCIM_AER_COR_INTERNAL_ERROR |
4353		    PCIM_AER_COR_HEADER_LOG_OVFLOW);
4354		pci_write_config(dev, aer + PCIR_AER_COR_MASK, r, 4);
4355
4356		r = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
4357		    PCIER_DEVICE_CTL, 2);
4358		r |=  PCIEM_CTL_COR_ENABLE | PCIEM_CTL_NFER_ENABLE |
4359		    PCIEM_CTL_FER_ENABLE | PCIEM_CTL_URR_ENABLE;
4360		pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
4361		    PCIER_DEVICE_CTL, r, 2);
4362	}
4363}
4364
4365void
4366pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4367{
4368	device_t dev;
4369
4370	dinfo->cfg.dev = dev = device_add_child(bus, NULL, -1);
4371	device_set_ivars(dev, dinfo);
4372	resource_list_init(&dinfo->resources);
4373	pci_cfg_save(dev, dinfo, 0);
4374	pci_cfg_restore(dev, dinfo);
4375	pci_print_verbose(dinfo);
4376	pci_add_resources(bus, dev, 0, 0);
4377	pcie_setup_mps(dev);
4378	pci_child_added(dinfo->cfg.dev);
4379
4380	if (pci_clear_aer_on_attach)
4381		pci_add_child_clear_aer(dev, dinfo);
4382
4383	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
4384}
4385
4386void
4387pci_child_added_method(device_t dev, device_t child)
4388{
4389
4390}
4391
4392static int
4393pci_probe(device_t dev)
4394{
4395
4396	device_set_desc(dev, "PCI bus");
4397
4398	/* Allow other subclasses to override this driver. */
4399	return (BUS_PROBE_GENERIC);
4400}
4401
4402int
4403pci_attach_common(device_t dev)
4404{
4405	struct pci_softc *sc;
4406	int busno, domain;
4407#ifdef PCI_RES_BUS
4408	int rid;
4409#endif
4410
4411	sc = device_get_softc(dev);
4412	domain = pcib_get_domain(dev);
4413	busno = pcib_get_bus(dev);
4414#ifdef PCI_RES_BUS
4415	rid = 0;
4416	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4417	    1, 0);
4418	if (sc->sc_bus == NULL) {
4419		device_printf(dev, "failed to allocate bus number\n");
4420		return (ENXIO);
4421	}
4422#endif
4423	if (bootverbose)
4424		device_printf(dev, "domain=%d, physical bus=%d\n",
4425		    domain, busno);
4426	sc->sc_dma_tag = bus_get_dma_tag(dev);
4427	return (0);
4428}
4429
4430int
4431pci_attach(device_t dev)
4432{
4433	int busno, domain, error;
4434
4435	error = pci_attach_common(dev);
4436	if (error)
4437		return (error);
4438
4439	/*
4440	 * Since there can be multiple independently numbered PCI
4441	 * buses on systems with multiple PCI domains, we can't use
4442	 * the unit number to decide which bus we are probing. We ask
4443	 * the parent pcib what our domain and bus numbers are.
4444	 */
4445	domain = pcib_get_domain(dev);
4446	busno = pcib_get_bus(dev);
4447	pci_add_children(dev, domain, busno);
4448	return (bus_generic_attach(dev));
4449}
4450
4451int
4452pci_detach(device_t dev)
4453{
4454#ifdef PCI_RES_BUS
4455	struct pci_softc *sc;
4456#endif
4457	int error;
4458
4459	error = bus_generic_detach(dev);
4460	if (error)
4461		return (error);
4462#ifdef PCI_RES_BUS
4463	sc = device_get_softc(dev);
4464	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4465	if (error)
4466		return (error);
4467#endif
4468	return (device_delete_children(dev));
4469}
4470
4471static void
4472pci_hint_device_unit(device_t dev, device_t child, const char *name, int *unitp)
4473{
4474	int line, unit;
4475	const char *at;
4476	char me1[24], me2[32];
4477	uint8_t b, s, f;
4478	uint32_t d;
4479
4480	d = pci_get_domain(child);
4481	b = pci_get_bus(child);
4482	s = pci_get_slot(child);
4483	f = pci_get_function(child);
4484	snprintf(me1, sizeof(me1), "pci%u:%u:%u", b, s, f);
4485	snprintf(me2, sizeof(me2), "pci%u:%u:%u:%u", d, b, s, f);
4486	line = 0;
4487	while (resource_find_dev(&line, name, &unit, "at", NULL) == 0) {
4488		resource_string_value(name, unit, "at", &at);
4489		if (strcmp(at, me1) != 0 && strcmp(at, me2) != 0)
4490			continue; /* No match, try next candidate */
4491		*unitp = unit;
4492		return;
4493	}
4494}
4495
4496static void
4497pci_set_power_child(device_t dev, device_t child, int state)
4498{
4499	device_t pcib;
4500	int dstate;
4501
4502	/*
4503	 * Set the device to the given state.  If the firmware suggests
4504	 * a different power state, use it instead.  If power management
4505	 * is not present, the firmware is responsible for managing
4506	 * device power.  Skip children who aren't attached since they
4507	 * are handled separately.
4508	 */
4509	pcib = device_get_parent(dev);
4510	dstate = state;
4511	if (device_is_attached(child) &&
4512	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4513		pci_set_powerstate(child, dstate);
4514}
4515
4516int
4517pci_suspend_child(device_t dev, device_t child)
4518{
4519	struct pci_devinfo *dinfo;
4520	struct resource_list_entry *rle;
4521	int error;
4522
4523	dinfo = device_get_ivars(child);
4524
4525	/*
4526	 * Save the PCI configuration space for the child and set the
4527	 * device in the appropriate power state for this sleep state.
4528	 */
4529	pci_cfg_save(child, dinfo, 0);
4530
4531	/* Suspend devices before potentially powering them down. */
4532	error = bus_generic_suspend_child(dev, child);
4533
4534	if (error)
4535		return (error);
4536
4537	if (pci_do_power_suspend) {
4538		/*
4539		 * Make sure this device's interrupt handler is not invoked
4540		 * in the case the device uses a shared interrupt that can
4541		 * be raised by some other device.
4542		 * This is applicable only to regular (legacy) PCI interrupts
4543		 * as MSI/MSI-X interrupts are never shared.
4544		 */
4545		rle = resource_list_find(&dinfo->resources,
4546		    SYS_RES_IRQ, 0);
4547		if (rle != NULL && rle->res != NULL)
4548			(void)bus_suspend_intr(child, rle->res);
4549		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4550	}
4551
4552	return (0);
4553}
4554
4555int
4556pci_resume_child(device_t dev, device_t child)
4557{
4558	struct pci_devinfo *dinfo;
4559	struct resource_list_entry *rle;
4560
4561	if (pci_do_power_resume)
4562		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4563
4564	dinfo = device_get_ivars(child);
4565	pci_cfg_restore(child, dinfo);
4566	if (!device_is_attached(child))
4567		pci_cfg_save(child, dinfo, 1);
4568
4569	bus_generic_resume_child(dev, child);
4570
4571	/*
4572	 * Allow interrupts only after fully resuming the driver and hardware.
4573	 */
4574	if (pci_do_power_suspend) {
4575		/* See pci_suspend_child for details. */
4576		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
4577		if (rle != NULL && rle->res != NULL)
4578			(void)bus_resume_intr(child, rle->res);
4579	}
4580
4581	return (0);
4582}
4583
4584int
4585pci_resume(device_t dev)
4586{
4587	device_t child, *devlist;
4588	int error, i, numdevs;
4589
4590	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4591		return (error);
4592
4593	/*
4594	 * Resume critical devices first, then everything else later.
4595	 */
4596	for (i = 0; i < numdevs; i++) {
4597		child = devlist[i];
4598		switch (pci_get_class(child)) {
4599		case PCIC_DISPLAY:
4600		case PCIC_MEMORY:
4601		case PCIC_BRIDGE:
4602		case PCIC_BASEPERIPH:
4603			BUS_RESUME_CHILD(dev, child);
4604			break;
4605		}
4606	}
4607	for (i = 0; i < numdevs; i++) {
4608		child = devlist[i];
4609		switch (pci_get_class(child)) {
4610		case PCIC_DISPLAY:
4611		case PCIC_MEMORY:
4612		case PCIC_BRIDGE:
4613		case PCIC_BASEPERIPH:
4614			break;
4615		default:
4616			BUS_RESUME_CHILD(dev, child);
4617		}
4618	}
4619	free(devlist, M_TEMP);
4620	return (0);
4621}
4622
4623static void
4624pci_load_vendor_data(void)
4625{
4626	caddr_t data;
4627	void *ptr;
4628	size_t sz;
4629
4630	data = preload_search_by_type("pci_vendor_data");
4631	if (data != NULL) {
4632		ptr = preload_fetch_addr(data);
4633		sz = preload_fetch_size(data);
4634		if (ptr != NULL && sz != 0) {
4635			pci_vendordata = ptr;
4636			pci_vendordata_size = sz;
4637			/* terminate the database */
4638			pci_vendordata[pci_vendordata_size] = '\n';
4639		}
4640	}
4641}
4642
4643void
4644pci_driver_added(device_t dev, driver_t *driver)
4645{
4646	int numdevs;
4647	device_t *devlist;
4648	device_t child;
4649	struct pci_devinfo *dinfo;
4650	int i;
4651
4652	if (bootverbose)
4653		device_printf(dev, "driver added\n");
4654	DEVICE_IDENTIFY(driver, dev);
4655	if (device_get_children(dev, &devlist, &numdevs) != 0)
4656		return;
4657	for (i = 0; i < numdevs; i++) {
4658		child = devlist[i];
4659		if (device_get_state(child) != DS_NOTPRESENT)
4660			continue;
4661		dinfo = device_get_ivars(child);
4662		pci_print_verbose(dinfo);
4663		if (bootverbose)
4664			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4665		pci_cfg_restore(child, dinfo);
4666		if (device_probe_and_attach(child) != 0)
4667			pci_child_detached(dev, child);
4668	}
4669	free(devlist, M_TEMP);
4670}
4671
4672int
4673pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4674    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4675{
4676	struct pci_devinfo *dinfo;
4677	struct msix_table_entry *mte;
4678	struct msix_vector *mv;
4679	uint64_t addr;
4680	uint32_t data;
4681	void *cookie;
4682	int error, rid;
4683
4684	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4685	    arg, &cookie);
4686	if (error)
4687		return (error);
4688
4689	/* If this is not a direct child, just bail out. */
4690	if (device_get_parent(child) != dev) {
4691		*cookiep = cookie;
4692		return(0);
4693	}
4694
4695	rid = rman_get_rid(irq);
4696	if (rid == 0) {
4697		/* Make sure that INTx is enabled */
4698		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4699	} else {
4700		/*
4701		 * Check to see if the interrupt is MSI or MSI-X.
4702		 * Ask our parent to map the MSI and give
4703		 * us the address and data register values.
4704		 * If we fail for some reason, teardown the
4705		 * interrupt handler.
4706		 */
4707		dinfo = device_get_ivars(child);
4708		if (dinfo->cfg.msi.msi_alloc > 0) {
4709			if (dinfo->cfg.msi.msi_addr == 0) {
4710				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4711			    ("MSI has handlers, but vectors not mapped"));
4712				error = PCIB_MAP_MSI(device_get_parent(dev),
4713				    child, rman_get_start(irq), &addr, &data);
4714				if (error)
4715					goto bad;
4716				dinfo->cfg.msi.msi_addr = addr;
4717				dinfo->cfg.msi.msi_data = data;
4718			}
4719			if (dinfo->cfg.msi.msi_handlers == 0)
4720				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4721				    dinfo->cfg.msi.msi_data);
4722			dinfo->cfg.msi.msi_handlers++;
4723		} else {
4724			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4725			    ("No MSI or MSI-X interrupts allocated"));
4726			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4727			    ("MSI-X index too high"));
4728			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4729			KASSERT(mte->mte_vector != 0, ("no message vector"));
4730			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4731			KASSERT(mv->mv_irq == rman_get_start(irq),
4732			    ("IRQ mismatch"));
4733			if (mv->mv_address == 0) {
4734				KASSERT(mte->mte_handlers == 0,
4735		    ("MSI-X table entry has handlers, but vector not mapped"));
4736				error = PCIB_MAP_MSI(device_get_parent(dev),
4737				    child, rman_get_start(irq), &addr, &data);
4738				if (error)
4739					goto bad;
4740				mv->mv_address = addr;
4741				mv->mv_data = data;
4742			}
4743
4744			/*
4745			 * The MSIX table entry must be made valid by
4746			 * incrementing the mte_handlers before
4747			 * calling pci_enable_msix() and
4748			 * pci_resume_msix(). Else the MSIX rewrite
4749			 * table quirk will not work as expected.
4750			 */
4751			mte->mte_handlers++;
4752			if (mte->mte_handlers == 1) {
4753				pci_enable_msix(child, rid - 1, mv->mv_address,
4754				    mv->mv_data);
4755				pci_unmask_msix(child, rid - 1);
4756			}
4757		}
4758
4759		/*
4760		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4761		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4762		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4763		 */
4764		if (!pci_has_quirk(pci_get_devid(child),
4765		    PCI_QUIRK_MSI_INTX_BUG))
4766			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4767		else
4768			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4769	bad:
4770		if (error) {
4771			(void)bus_generic_teardown_intr(dev, child, irq,
4772			    cookie);
4773			return (error);
4774		}
4775	}
4776	*cookiep = cookie;
4777	return (0);
4778}
4779
4780int
4781pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4782    void *cookie)
4783{
4784	struct msix_table_entry *mte;
4785	struct resource_list_entry *rle;
4786	struct pci_devinfo *dinfo;
4787	int error, rid;
4788
4789	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4790		return (EINVAL);
4791
4792	/* If this isn't a direct child, just bail out */
4793	if (device_get_parent(child) != dev)
4794		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4795
4796	rid = rman_get_rid(irq);
4797	if (rid == 0) {
4798		/* Mask INTx */
4799		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4800	} else {
4801		/*
4802		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4803		 * decrement the appropriate handlers count and mask the
4804		 * MSI-X message, or disable MSI messages if the count
4805		 * drops to 0.
4806		 */
4807		dinfo = device_get_ivars(child);
4808		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4809		if (rle->res != irq)
4810			return (EINVAL);
4811		if (dinfo->cfg.msi.msi_alloc > 0) {
4812			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4813			    ("MSI-X index too high"));
4814			if (dinfo->cfg.msi.msi_handlers == 0)
4815				return (EINVAL);
4816			dinfo->cfg.msi.msi_handlers--;
4817			if (dinfo->cfg.msi.msi_handlers == 0)
4818				pci_disable_msi(child);
4819		} else {
4820			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4821			    ("No MSI or MSI-X interrupts allocated"));
4822			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4823			    ("MSI-X index too high"));
4824			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4825			if (mte->mte_handlers == 0)
4826				return (EINVAL);
4827			mte->mte_handlers--;
4828			if (mte->mte_handlers == 0)
4829				pci_mask_msix(child, rid - 1);
4830		}
4831	}
4832	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4833	if (rid > 0)
4834		KASSERT(error == 0,
4835		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4836	return (error);
4837}
4838
4839int
4840pci_print_child(device_t dev, device_t child)
4841{
4842	struct pci_devinfo *dinfo;
4843	struct resource_list *rl;
4844	int retval = 0;
4845
4846	dinfo = device_get_ivars(child);
4847	rl = &dinfo->resources;
4848
4849	retval += bus_print_child_header(dev, child);
4850
4851	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4852	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4853	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4854	if (device_get_flags(dev))
4855		retval += printf(" flags %#x", device_get_flags(dev));
4856
4857	retval += printf(" at device %d.%d", pci_get_slot(child),
4858	    pci_get_function(child));
4859
4860	retval += bus_print_child_domain(dev, child);
4861	retval += bus_print_child_footer(dev, child);
4862
4863	return (retval);
4864}
4865
4866static const struct
4867{
4868	int		class;
4869	int		subclass;
4870	int		report; /* 0 = bootverbose, 1 = always */
4871	const char	*desc;
4872} pci_nomatch_tab[] = {
4873	{PCIC_OLD,		-1,			1, "old"},
4874	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4875	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4876	{PCIC_STORAGE,		-1,			1, "mass storage"},
4877	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4878	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4879	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4880	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4881	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4882	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4883	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4884	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4885	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4886	{PCIC_NETWORK,		-1,			1, "network"},
4887	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4888	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4889	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4890	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4891	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4892	{PCIC_DISPLAY,		-1,			1, "display"},
4893	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4894	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4895	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4896	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4897	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4898	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4899	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4900	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4901	{PCIC_MEMORY,		-1,			1, "memory"},
4902	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4903	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4904	{PCIC_BRIDGE,		-1,			1, "bridge"},
4905	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4906	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4907	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4908	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4909	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4910	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4911	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4912	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4913	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4914	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4915	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4916	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4917	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4918	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4919	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4920	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4921	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4922	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4923	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4924	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4925	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4926	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4927	{PCIC_INPUTDEV,		-1,			1, "input device"},
4928	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4929	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4930	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4931	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4932	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4933	{PCIC_DOCKING,		-1,			1, "docking station"},
4934	{PCIC_PROCESSOR,	-1,			1, "processor"},
4935	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4936	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4937	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4938	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4939	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4940	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4941	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4942	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4943	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4944	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4945	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4946	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4947	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4948	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4949	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4950	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4951	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4952	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4953	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4954	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4955	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4956	{PCIC_DASP,		-1,			0, "dasp"},
4957	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4958	{PCIC_DASP,		PCIS_DASP_PERFCNTRS,	1, "performance counters"},
4959	{PCIC_DASP,		PCIS_DASP_COMM_SYNC,	1, "communication synchronizer"},
4960	{PCIC_DASP,		PCIS_DASP_MGMT_CARD,	1, "signal processing management"},
4961	{0, 0, 0,		NULL}
4962};
4963
4964void
4965pci_probe_nomatch(device_t dev, device_t child)
4966{
4967	int i, report;
4968	const char *cp, *scp;
4969	char *device;
4970
4971	/*
4972	 * Look for a listing for this device in a loaded device database.
4973	 */
4974	report = 1;
4975	if ((device = pci_describe_device(child)) != NULL) {
4976		device_printf(dev, "<%s>", device);
4977		free(device, M_DEVBUF);
4978	} else {
4979		/*
4980		 * Scan the class/subclass descriptions for a general
4981		 * description.
4982		 */
4983		cp = "unknown";
4984		scp = NULL;
4985		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4986			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4987				if (pci_nomatch_tab[i].subclass == -1) {
4988					cp = pci_nomatch_tab[i].desc;
4989					report = pci_nomatch_tab[i].report;
4990				} else if (pci_nomatch_tab[i].subclass ==
4991				    pci_get_subclass(child)) {
4992					scp = pci_nomatch_tab[i].desc;
4993					report = pci_nomatch_tab[i].report;
4994				}
4995			}
4996		}
4997		if (report || bootverbose) {
4998			device_printf(dev, "<%s%s%s>",
4999			    cp ? cp : "",
5000			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
5001			    scp ? scp : "");
5002		}
5003	}
5004	if (report || bootverbose) {
5005		printf(" at device %d.%d (no driver attached)\n",
5006		    pci_get_slot(child), pci_get_function(child));
5007	}
5008	pci_cfg_save(child, device_get_ivars(child), 1);
5009}
5010
5011void
5012pci_child_detached(device_t dev, device_t child)
5013{
5014	struct pci_devinfo *dinfo;
5015	struct resource_list *rl;
5016
5017	dinfo = device_get_ivars(child);
5018	rl = &dinfo->resources;
5019
5020	/*
5021	 * Have to deallocate IRQs before releasing any MSI messages and
5022	 * have to release MSI messages before deallocating any memory
5023	 * BARs.
5024	 */
5025	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
5026		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
5027	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
5028		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
5029		(void)pci_release_msi(child);
5030	}
5031	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
5032		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
5033	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
5034		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
5035#ifdef PCI_RES_BUS
5036	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
5037		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
5038#endif
5039
5040	pci_cfg_save(child, dinfo, 1);
5041}
5042
5043/*
5044 * Parse the PCI device database, if loaded, and return a pointer to a
5045 * description of the device.
5046 *
5047 * The database is flat text formatted as follows:
5048 *
5049 * Any line not in a valid format is ignored.
5050 * Lines are terminated with newline '\n' characters.
5051 *
5052 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
5053 * the vendor name.
5054 *
5055 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
5056 * - devices cannot be listed without a corresponding VENDOR line.
5057 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
5058 * another TAB, then the device name.
5059 */
5060
5061/*
5062 * Assuming (ptr) points to the beginning of a line in the database,
5063 * return the vendor or device and description of the next entry.
5064 * The value of (vendor) or (device) inappropriate for the entry type
5065 * is set to -1.  Returns nonzero at the end of the database.
5066 *
5067 * Note that this is slightly unrobust in the face of corrupt data;
5068 * we attempt to safeguard against this by spamming the end of the
5069 * database with a newline when we initialise.
5070 */
5071static int
5072pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
5073{
5074	char	*cp = *ptr;
5075	int	left;
5076
5077	*device = -1;
5078	*vendor = -1;
5079	**desc = '\0';
5080	for (;;) {
5081		left = pci_vendordata_size - (cp - pci_vendordata);
5082		if (left <= 0) {
5083			*ptr = cp;
5084			return(1);
5085		}
5086
5087		/* vendor entry? */
5088		if (*cp != '\t' &&
5089		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
5090			break;
5091		/* device entry? */
5092		if (*cp == '\t' &&
5093		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
5094			break;
5095
5096		/* skip to next line */
5097		while (*cp != '\n' && left > 0) {
5098			cp++;
5099			left--;
5100		}
5101		if (*cp == '\n') {
5102			cp++;
5103			left--;
5104		}
5105	}
5106	/* skip to next line */
5107	while (*cp != '\n' && left > 0) {
5108		cp++;
5109		left--;
5110	}
5111	if (*cp == '\n' && left > 0)
5112		cp++;
5113	*ptr = cp;
5114	return(0);
5115}
5116
5117static char *
5118pci_describe_device(device_t dev)
5119{
5120	int	vendor, device;
5121	char	*desc, *vp, *dp, *line;
5122
5123	desc = vp = dp = NULL;
5124
5125	/*
5126	 * If we have no vendor data, we can't do anything.
5127	 */
5128	if (pci_vendordata == NULL)
5129		goto out;
5130
5131	/*
5132	 * Scan the vendor data looking for this device
5133	 */
5134	line = pci_vendordata;
5135	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
5136		goto out;
5137	for (;;) {
5138		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
5139			goto out;
5140		if (vendor == pci_get_vendor(dev))
5141			break;
5142	}
5143	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
5144		goto out;
5145	for (;;) {
5146		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
5147			*dp = 0;
5148			break;
5149		}
5150		if (vendor != -1) {
5151			*dp = 0;
5152			break;
5153		}
5154		if (device == pci_get_device(dev))
5155			break;
5156	}
5157	if (dp[0] == '\0')
5158		snprintf(dp, 80, "0x%x", pci_get_device(dev));
5159	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
5160	    NULL)
5161		sprintf(desc, "%s, %s", vp, dp);
5162out:
5163	if (vp != NULL)
5164		free(vp, M_DEVBUF);
5165	if (dp != NULL)
5166		free(dp, M_DEVBUF);
5167	return(desc);
5168}
5169
5170int
5171pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
5172{
5173	struct pci_devinfo *dinfo;
5174	pcicfgregs *cfg;
5175
5176	dinfo = device_get_ivars(child);
5177	cfg = &dinfo->cfg;
5178
5179	switch (which) {
5180	case PCI_IVAR_ETHADDR:
5181		/*
5182		 * The generic accessor doesn't deal with failure, so
5183		 * we set the return value, then return an error.
5184		 */
5185		*((uint8_t **) result) = NULL;
5186		return (EINVAL);
5187	case PCI_IVAR_SUBVENDOR:
5188		*result = cfg->subvendor;
5189		break;
5190	case PCI_IVAR_SUBDEVICE:
5191		*result = cfg->subdevice;
5192		break;
5193	case PCI_IVAR_VENDOR:
5194		*result = cfg->vendor;
5195		break;
5196	case PCI_IVAR_DEVICE:
5197		*result = cfg->device;
5198		break;
5199	case PCI_IVAR_DEVID:
5200		*result = (cfg->device << 16) | cfg->vendor;
5201		break;
5202	case PCI_IVAR_CLASS:
5203		*result = cfg->baseclass;
5204		break;
5205	case PCI_IVAR_SUBCLASS:
5206		*result = cfg->subclass;
5207		break;
5208	case PCI_IVAR_PROGIF:
5209		*result = cfg->progif;
5210		break;
5211	case PCI_IVAR_REVID:
5212		*result = cfg->revid;
5213		break;
5214	case PCI_IVAR_INTPIN:
5215		*result = cfg->intpin;
5216		break;
5217	case PCI_IVAR_IRQ:
5218		*result = cfg->intline;
5219		break;
5220	case PCI_IVAR_DOMAIN:
5221		*result = cfg->domain;
5222		break;
5223	case PCI_IVAR_BUS:
5224		*result = cfg->bus;
5225		break;
5226	case PCI_IVAR_SLOT:
5227		*result = cfg->slot;
5228		break;
5229	case PCI_IVAR_FUNCTION:
5230		*result = cfg->func;
5231		break;
5232	case PCI_IVAR_CMDREG:
5233		*result = cfg->cmdreg;
5234		break;
5235	case PCI_IVAR_CACHELNSZ:
5236		*result = cfg->cachelnsz;
5237		break;
5238	case PCI_IVAR_MINGNT:
5239		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
5240			*result = -1;
5241			return (EINVAL);
5242		}
5243		*result = cfg->mingnt;
5244		break;
5245	case PCI_IVAR_MAXLAT:
5246		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
5247			*result = -1;
5248			return (EINVAL);
5249		}
5250		*result = cfg->maxlat;
5251		break;
5252	case PCI_IVAR_LATTIMER:
5253		*result = cfg->lattimer;
5254		break;
5255	default:
5256		return (ENOENT);
5257	}
5258	return (0);
5259}
5260
5261int
5262pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
5263{
5264	struct pci_devinfo *dinfo;
5265
5266	dinfo = device_get_ivars(child);
5267
5268	switch (which) {
5269	case PCI_IVAR_INTPIN:
5270		dinfo->cfg.intpin = value;
5271		return (0);
5272	case PCI_IVAR_ETHADDR:
5273	case PCI_IVAR_SUBVENDOR:
5274	case PCI_IVAR_SUBDEVICE:
5275	case PCI_IVAR_VENDOR:
5276	case PCI_IVAR_DEVICE:
5277	case PCI_IVAR_DEVID:
5278	case PCI_IVAR_CLASS:
5279	case PCI_IVAR_SUBCLASS:
5280	case PCI_IVAR_PROGIF:
5281	case PCI_IVAR_REVID:
5282	case PCI_IVAR_IRQ:
5283	case PCI_IVAR_DOMAIN:
5284	case PCI_IVAR_BUS:
5285	case PCI_IVAR_SLOT:
5286	case PCI_IVAR_FUNCTION:
5287		return (EINVAL);	/* disallow for now */
5288
5289	default:
5290		return (ENOENT);
5291	}
5292}
5293
5294#include "opt_ddb.h"
5295#ifdef DDB
5296#include <ddb/ddb.h>
5297#include <sys/cons.h>
5298
5299/*
5300 * List resources based on pci map registers, used for within ddb
5301 */
5302
5303DB_SHOW_COMMAND(pciregs, db_pci_dump)
5304{
5305	struct pci_devinfo *dinfo;
5306	struct devlist *devlist_head;
5307	struct pci_conf *p;
5308	const char *name;
5309	int i, error, none_count;
5310
5311	none_count = 0;
5312	/* get the head of the device queue */
5313	devlist_head = &pci_devq;
5314
5315	/*
5316	 * Go through the list of devices and print out devices
5317	 */
5318	for (error = 0, i = 0,
5319	     dinfo = STAILQ_FIRST(devlist_head);
5320	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
5321	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
5322
5323		/* Populate pd_name and pd_unit */
5324		name = NULL;
5325		if (dinfo->cfg.dev)
5326			name = device_get_name(dinfo->cfg.dev);
5327
5328		p = &dinfo->conf;
5329		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
5330			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
5331			(name && *name) ? name : "none",
5332			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
5333			none_count++,
5334			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
5335			p->pc_sel.pc_func, (p->pc_class << 16) |
5336			(p->pc_subclass << 8) | p->pc_progif,
5337			(p->pc_subdevice << 16) | p->pc_subvendor,
5338			(p->pc_device << 16) | p->pc_vendor,
5339			p->pc_revid, p->pc_hdr);
5340	}
5341}
5342#endif /* DDB */
5343
5344static struct resource *
5345pci_reserve_map(device_t dev, device_t child, int type, int *rid,
5346    rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
5347    u_int flags)
5348{
5349	struct pci_devinfo *dinfo = device_get_ivars(child);
5350	struct resource_list *rl = &dinfo->resources;
5351	struct resource *res;
5352	struct pci_map *pm;
5353	uint16_t cmd;
5354	pci_addr_t map, testval;
5355	int mapsize;
5356
5357	res = NULL;
5358
5359	/* If rid is managed by EA, ignore it */
5360	if (pci_ea_is_enabled(child, *rid))
5361		goto out;
5362
5363	pm = pci_find_bar(child, *rid);
5364	if (pm != NULL) {
5365		/* This is a BAR that we failed to allocate earlier. */
5366		mapsize = pm->pm_size;
5367		map = pm->pm_value;
5368	} else {
5369		/*
5370		 * Weed out the bogons, and figure out how large the
5371		 * BAR/map is.  BARs that read back 0 here are bogus
5372		 * and unimplemented.  Note: atapci in legacy mode are
5373		 * special and handled elsewhere in the code.  If you
5374		 * have a atapci device in legacy mode and it fails
5375		 * here, that other code is broken.
5376		 */
5377		pci_read_bar(child, *rid, &map, &testval, NULL);
5378
5379		/*
5380		 * Determine the size of the BAR and ignore BARs with a size
5381		 * of 0.  Device ROM BARs use a different mask value.
5382		 */
5383		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5384			mapsize = pci_romsize(testval);
5385		else
5386			mapsize = pci_mapsize(testval);
5387		if (mapsize == 0)
5388			goto out;
5389		pm = pci_add_bar(child, *rid, map, mapsize);
5390	}
5391
5392	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5393		if (type != SYS_RES_MEMORY) {
5394			if (bootverbose)
5395				device_printf(dev,
5396				    "child %s requested type %d for rid %#x,"
5397				    " but the BAR says it is an memio\n",
5398				    device_get_nameunit(child), type, *rid);
5399			goto out;
5400		}
5401	} else {
5402		if (type != SYS_RES_IOPORT) {
5403			if (bootverbose)
5404				device_printf(dev,
5405				    "child %s requested type %d for rid %#x,"
5406				    " but the BAR says it is an ioport\n",
5407				    device_get_nameunit(child), type, *rid);
5408			goto out;
5409		}
5410	}
5411
5412	/*
5413	 * For real BARs, we need to override the size that
5414	 * the driver requests, because that's what the BAR
5415	 * actually uses and we would otherwise have a
5416	 * situation where we might allocate the excess to
5417	 * another driver, which won't work.
5418	 */
5419	count = ((pci_addr_t)1 << mapsize) * num;
5420	if (RF_ALIGNMENT(flags) < mapsize)
5421		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5422	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5423		flags |= RF_PREFETCHABLE;
5424
5425	/*
5426	 * Allocate enough resource, and then write back the
5427	 * appropriate BAR for that resource.
5428	 */
5429	resource_list_add(rl, type, *rid, start, end, count);
5430	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5431	    count, flags & ~RF_ACTIVE);
5432	if (res == NULL) {
5433		resource_list_delete(rl, type, *rid);
5434		device_printf(child,
5435		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5436		    count, *rid, type, start, end);
5437		goto out;
5438	}
5439	if (bootverbose)
5440		device_printf(child,
5441		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5442		    count, *rid, type, rman_get_start(res));
5443
5444	/* Disable decoding via the CMD register before updating the BAR */
5445	cmd = pci_read_config(child, PCIR_COMMAND, 2);
5446	pci_write_config(child, PCIR_COMMAND,
5447	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
5448
5449	map = rman_get_start(res);
5450	pci_write_bar(child, pm, map);
5451
5452	/* Restore the original value of the CMD register */
5453	pci_write_config(child, PCIR_COMMAND, cmd, 2);
5454out:
5455	return (res);
5456}
5457
5458struct resource *
5459pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5460    rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5461    u_int flags)
5462{
5463	struct pci_devinfo *dinfo;
5464	struct resource_list *rl;
5465	struct resource_list_entry *rle;
5466	struct resource *res;
5467	pcicfgregs *cfg;
5468
5469	/*
5470	 * Perform lazy resource allocation
5471	 */
5472	dinfo = device_get_ivars(child);
5473	rl = &dinfo->resources;
5474	cfg = &dinfo->cfg;
5475	switch (type) {
5476#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5477	case PCI_RES_BUS:
5478		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5479		    flags));
5480#endif
5481	case SYS_RES_IRQ:
5482		/*
5483		 * Can't alloc legacy interrupt once MSI messages have
5484		 * been allocated.
5485		 */
5486		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5487		    cfg->msix.msix_alloc > 0))
5488			return (NULL);
5489
5490		/*
5491		 * If the child device doesn't have an interrupt
5492		 * routed and is deserving of an interrupt, try to
5493		 * assign it one.
5494		 */
5495		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5496		    (cfg->intpin != 0))
5497			pci_assign_interrupt(dev, child, 0);
5498		break;
5499	case SYS_RES_IOPORT:
5500	case SYS_RES_MEMORY:
5501#ifdef NEW_PCIB
5502		/*
5503		 * PCI-PCI bridge I/O window resources are not BARs.
5504		 * For those allocations just pass the request up the
5505		 * tree.
5506		 */
5507		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5508			switch (*rid) {
5509			case PCIR_IOBASEL_1:
5510			case PCIR_MEMBASE_1:
5511			case PCIR_PMBASEL_1:
5512				/*
5513				 * XXX: Should we bother creating a resource
5514				 * list entry?
5515				 */
5516				return (bus_generic_alloc_resource(dev, child,
5517				    type, rid, start, end, count, flags));
5518			}
5519		}
5520#endif
5521		/* Reserve resources for this BAR if needed. */
5522		rle = resource_list_find(rl, type, *rid);
5523		if (rle == NULL) {
5524			res = pci_reserve_map(dev, child, type, rid, start, end,
5525			    count, num, flags);
5526			if (res == NULL)
5527				return (NULL);
5528		}
5529	}
5530	return (resource_list_alloc(rl, dev, child, type, rid,
5531	    start, end, count, flags));
5532}
5533
5534struct resource *
5535pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5536    rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5537{
5538#ifdef PCI_IOV
5539	struct pci_devinfo *dinfo;
5540#endif
5541
5542	if (device_get_parent(child) != dev)
5543		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5544		    type, rid, start, end, count, flags));
5545
5546#ifdef PCI_IOV
5547	dinfo = device_get_ivars(child);
5548	if (dinfo->cfg.flags & PCICFG_VF) {
5549		switch (type) {
5550		/* VFs can't have I/O BARs. */
5551		case SYS_RES_IOPORT:
5552			return (NULL);
5553		case SYS_RES_MEMORY:
5554			return (pci_vf_alloc_mem_resource(dev, child, rid,
5555			    start, end, count, flags));
5556		}
5557
5558		/* Fall through for other types of resource allocations. */
5559	}
5560#endif
5561
5562	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5563	    count, 1, flags));
5564}
5565
5566int
5567pci_release_resource(device_t dev, device_t child, int type, int rid,
5568    struct resource *r)
5569{
5570	struct pci_devinfo *dinfo;
5571	struct resource_list *rl;
5572	pcicfgregs *cfg;
5573
5574	if (device_get_parent(child) != dev)
5575		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5576		    type, rid, r));
5577
5578	dinfo = device_get_ivars(child);
5579	cfg = &dinfo->cfg;
5580
5581#ifdef PCI_IOV
5582	if (dinfo->cfg.flags & PCICFG_VF) {
5583		switch (type) {
5584		/* VFs can't have I/O BARs. */
5585		case SYS_RES_IOPORT:
5586			return (EDOOFUS);
5587		case SYS_RES_MEMORY:
5588			return (pci_vf_release_mem_resource(dev, child, rid,
5589			    r));
5590		}
5591
5592		/* Fall through for other types of resource allocations. */
5593	}
5594#endif
5595
5596#ifdef NEW_PCIB
5597	/*
5598	 * PCI-PCI bridge I/O window resources are not BARs.  For
5599	 * those allocations just pass the request up the tree.
5600	 */
5601	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5602	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5603		switch (rid) {
5604		case PCIR_IOBASEL_1:
5605		case PCIR_MEMBASE_1:
5606		case PCIR_PMBASEL_1:
5607			return (bus_generic_release_resource(dev, child, type,
5608			    rid, r));
5609		}
5610	}
5611#endif
5612
5613	rl = &dinfo->resources;
5614	return (resource_list_release(rl, dev, child, type, rid, r));
5615}
5616
5617int
5618pci_activate_resource(device_t dev, device_t child, int type, int rid,
5619    struct resource *r)
5620{
5621	struct pci_devinfo *dinfo;
5622	int error;
5623
5624	error = bus_generic_activate_resource(dev, child, type, rid, r);
5625	if (error)
5626		return (error);
5627
5628	/* Enable decoding in the command register when activating BARs. */
5629	if (device_get_parent(child) == dev) {
5630		/* Device ROMs need their decoding explicitly enabled. */
5631		dinfo = device_get_ivars(child);
5632		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5633			pci_write_bar(child, pci_find_bar(child, rid),
5634			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5635		switch (type) {
5636		case SYS_RES_IOPORT:
5637		case SYS_RES_MEMORY:
5638			error = PCI_ENABLE_IO(dev, child, type);
5639			break;
5640		}
5641	}
5642	return (error);
5643}
5644
5645int
5646pci_deactivate_resource(device_t dev, device_t child, int type,
5647    int rid, struct resource *r)
5648{
5649	struct pci_devinfo *dinfo;
5650	int error;
5651
5652	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5653	if (error)
5654		return (error);
5655
5656	/* Disable decoding for device ROMs. */
5657	if (device_get_parent(child) == dev) {
5658		dinfo = device_get_ivars(child);
5659		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5660			pci_write_bar(child, pci_find_bar(child, rid),
5661			    rman_get_start(r));
5662	}
5663	return (0);
5664}
5665
5666void
5667pci_child_deleted(device_t dev, device_t child)
5668{
5669	struct resource_list_entry *rle;
5670	struct resource_list *rl;
5671	struct pci_devinfo *dinfo;
5672
5673	dinfo = device_get_ivars(child);
5674	rl = &dinfo->resources;
5675
5676	EVENTHANDLER_INVOKE(pci_delete_device, child);
5677
5678	/* Turn off access to resources we're about to free */
5679	if (bus_child_present(child) != 0) {
5680		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5681		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5682
5683		pci_disable_busmaster(child);
5684	}
5685
5686	/* Free all allocated resources */
5687	STAILQ_FOREACH(rle, rl, link) {
5688		if (rle->res) {
5689			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5690			    resource_list_busy(rl, rle->type, rle->rid)) {
5691				pci_printf(&dinfo->cfg,
5692				    "Resource still owned, oops. "
5693				    "(type=%d, rid=%d, addr=%lx)\n",
5694				    rle->type, rle->rid,
5695				    rman_get_start(rle->res));
5696				bus_release_resource(child, rle->type, rle->rid,
5697				    rle->res);
5698			}
5699			resource_list_unreserve(rl, dev, child, rle->type,
5700			    rle->rid);
5701		}
5702	}
5703	resource_list_free(rl);
5704
5705	pci_freecfg(dinfo);
5706}
5707
5708void
5709pci_delete_resource(device_t dev, device_t child, int type, int rid)
5710{
5711	struct pci_devinfo *dinfo;
5712	struct resource_list *rl;
5713	struct resource_list_entry *rle;
5714
5715	if (device_get_parent(child) != dev)
5716		return;
5717
5718	dinfo = device_get_ivars(child);
5719	rl = &dinfo->resources;
5720	rle = resource_list_find(rl, type, rid);
5721	if (rle == NULL)
5722		return;
5723
5724	if (rle->res) {
5725		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5726		    resource_list_busy(rl, type, rid)) {
5727			device_printf(dev, "delete_resource: "
5728			    "Resource still owned by child, oops. "
5729			    "(type=%d, rid=%d, addr=%jx)\n",
5730			    type, rid, rman_get_start(rle->res));
5731			return;
5732		}
5733		resource_list_unreserve(rl, dev, child, type, rid);
5734	}
5735	resource_list_delete(rl, type, rid);
5736}
5737
5738struct resource_list *
5739pci_get_resource_list (device_t dev, device_t child)
5740{
5741	struct pci_devinfo *dinfo = device_get_ivars(child);
5742
5743	return (&dinfo->resources);
5744}
5745
5746bus_dma_tag_t
5747pci_get_dma_tag(device_t bus, device_t dev)
5748{
5749	struct pci_softc *sc = device_get_softc(bus);
5750
5751	return (sc->sc_dma_tag);
5752}
5753
5754uint32_t
5755pci_read_config_method(device_t dev, device_t child, int reg, int width)
5756{
5757	struct pci_devinfo *dinfo = device_get_ivars(child);
5758	pcicfgregs *cfg = &dinfo->cfg;
5759
5760#ifdef PCI_IOV
5761	/*
5762	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5763	 * emulate them here.
5764	 */
5765	if (cfg->flags & PCICFG_VF) {
5766		if (reg == PCIR_VENDOR) {
5767			switch (width) {
5768			case 4:
5769				return (cfg->device << 16 | cfg->vendor);
5770			case 2:
5771				return (cfg->vendor);
5772			case 1:
5773				return (cfg->vendor & 0xff);
5774			default:
5775				return (0xffffffff);
5776			}
5777		} else if (reg == PCIR_DEVICE) {
5778			switch (width) {
5779			/* Note that an unaligned 4-byte read is an error. */
5780			case 2:
5781				return (cfg->device);
5782			case 1:
5783				return (cfg->device & 0xff);
5784			default:
5785				return (0xffffffff);
5786			}
5787		}
5788	}
5789#endif
5790
5791	return (PCIB_READ_CONFIG(device_get_parent(dev),
5792	    cfg->bus, cfg->slot, cfg->func, reg, width));
5793}
5794
5795void
5796pci_write_config_method(device_t dev, device_t child, int reg,
5797    uint32_t val, int width)
5798{
5799	struct pci_devinfo *dinfo = device_get_ivars(child);
5800	pcicfgregs *cfg = &dinfo->cfg;
5801
5802	PCIB_WRITE_CONFIG(device_get_parent(dev),
5803	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5804}
5805
5806int
5807pci_child_location_str_method(device_t dev, device_t child, char *buf,
5808    size_t buflen)
5809{
5810
5811	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5812	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5813	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5814	return (0);
5815}
5816
5817int
5818pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5819    size_t buflen)
5820{
5821	struct pci_devinfo *dinfo;
5822	pcicfgregs *cfg;
5823
5824	dinfo = device_get_ivars(child);
5825	cfg = &dinfo->cfg;
5826	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5827	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5828	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5829	    cfg->progif);
5830	return (0);
5831}
5832
5833int
5834pci_assign_interrupt_method(device_t dev, device_t child)
5835{
5836	struct pci_devinfo *dinfo = device_get_ivars(child);
5837	pcicfgregs *cfg = &dinfo->cfg;
5838
5839	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5840	    cfg->intpin));
5841}
5842
5843static void
5844pci_lookup(void *arg, const char *name, device_t *dev)
5845{
5846	long val;
5847	char *end;
5848	int domain, bus, slot, func;
5849
5850	if (*dev != NULL)
5851		return;
5852
5853	/*
5854	 * Accept pciconf-style selectors of either pciD:B:S:F or
5855	 * pciB:S:F.  In the latter case, the domain is assumed to
5856	 * be zero.
5857	 */
5858	if (strncmp(name, "pci", 3) != 0)
5859		return;
5860	val = strtol(name + 3, &end, 10);
5861	if (val < 0 || val > INT_MAX || *end != ':')
5862		return;
5863	domain = val;
5864	val = strtol(end + 1, &end, 10);
5865	if (val < 0 || val > INT_MAX || *end != ':')
5866		return;
5867	bus = val;
5868	val = strtol(end + 1, &end, 10);
5869	if (val < 0 || val > INT_MAX)
5870		return;
5871	slot = val;
5872	if (*end == ':') {
5873		val = strtol(end + 1, &end, 10);
5874		if (val < 0 || val > INT_MAX || *end != '\0')
5875			return;
5876		func = val;
5877	} else if (*end == '\0') {
5878		func = slot;
5879		slot = bus;
5880		bus = domain;
5881		domain = 0;
5882	} else
5883		return;
5884
5885	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5886	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5887		return;
5888
5889	*dev = pci_find_dbsf(domain, bus, slot, func);
5890}
5891
5892static int
5893pci_modevent(module_t mod, int what, void *arg)
5894{
5895	static struct cdev *pci_cdev;
5896	static eventhandler_tag tag;
5897
5898	switch (what) {
5899	case MOD_LOAD:
5900		STAILQ_INIT(&pci_devq);
5901		pci_generation = 0;
5902		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5903		    "pci");
5904		pci_load_vendor_data();
5905		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5906		    1000);
5907		break;
5908
5909	case MOD_UNLOAD:
5910		if (tag != NULL)
5911			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5912		destroy_dev(pci_cdev);
5913		break;
5914	}
5915
5916	return (0);
5917}
5918
5919static void
5920pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5921{
5922#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5923	struct pcicfg_pcie *cfg;
5924	int version, pos;
5925
5926	cfg = &dinfo->cfg.pcie;
5927	pos = cfg->pcie_location;
5928
5929	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5930
5931	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5932
5933	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5934	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5935	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5936		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5937
5938	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5939	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5940	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5941		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5942
5943	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5944	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5945		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5946
5947	if (version > 1) {
5948		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5949		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5950		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5951	}
5952#undef WREG
5953}
5954
5955static void
5956pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5957{
5958	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5959	    dinfo->cfg.pcix.pcix_command,  2);
5960}
5961
5962void
5963pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5964{
5965
5966	/*
5967	 * Restore the device to full power mode.  We must do this
5968	 * before we restore the registers because moving from D3 to
5969	 * D0 will cause the chip's BARs and some other registers to
5970	 * be reset to some unknown power on reset values.  Cut down
5971	 * the noise on boot by doing nothing if we are already in
5972	 * state D0.
5973	 */
5974	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5975		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5976	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5977	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5978	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5979	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5980	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5981	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5982	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5983	case PCIM_HDRTYPE_NORMAL:
5984		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5985		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5986		break;
5987	case PCIM_HDRTYPE_BRIDGE:
5988		pci_write_config(dev, PCIR_SECLAT_1,
5989		    dinfo->cfg.bridge.br_seclat, 1);
5990		pci_write_config(dev, PCIR_SUBBUS_1,
5991		    dinfo->cfg.bridge.br_subbus, 1);
5992		pci_write_config(dev, PCIR_SECBUS_1,
5993		    dinfo->cfg.bridge.br_secbus, 1);
5994		pci_write_config(dev, PCIR_PRIBUS_1,
5995		    dinfo->cfg.bridge.br_pribus, 1);
5996		pci_write_config(dev, PCIR_BRIDGECTL_1,
5997		    dinfo->cfg.bridge.br_control, 2);
5998		break;
5999	case PCIM_HDRTYPE_CARDBUS:
6000		pci_write_config(dev, PCIR_SECLAT_2,
6001		    dinfo->cfg.bridge.br_seclat, 1);
6002		pci_write_config(dev, PCIR_SUBBUS_2,
6003		    dinfo->cfg.bridge.br_subbus, 1);
6004		pci_write_config(dev, PCIR_SECBUS_2,
6005		    dinfo->cfg.bridge.br_secbus, 1);
6006		pci_write_config(dev, PCIR_PRIBUS_2,
6007		    dinfo->cfg.bridge.br_pribus, 1);
6008		pci_write_config(dev, PCIR_BRIDGECTL_2,
6009		    dinfo->cfg.bridge.br_control, 2);
6010		break;
6011	}
6012	pci_restore_bars(dev);
6013
6014	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_BRIDGE)
6015		pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
6016
6017	/*
6018	 * Restore extended capabilities for PCI-Express and PCI-X
6019	 */
6020	if (dinfo->cfg.pcie.pcie_location != 0)
6021		pci_cfg_restore_pcie(dev, dinfo);
6022	if (dinfo->cfg.pcix.pcix_location != 0)
6023		pci_cfg_restore_pcix(dev, dinfo);
6024
6025	/* Restore MSI and MSI-X configurations if they are present. */
6026	if (dinfo->cfg.msi.msi_location != 0)
6027		pci_resume_msi(dev);
6028	if (dinfo->cfg.msix.msix_location != 0)
6029		pci_resume_msix(dev);
6030
6031#ifdef PCI_IOV
6032	if (dinfo->cfg.iov != NULL)
6033		pci_iov_cfg_restore(dev, dinfo);
6034#endif
6035}
6036
6037static void
6038pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
6039{
6040#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
6041	struct pcicfg_pcie *cfg;
6042	int version, pos;
6043
6044	cfg = &dinfo->cfg.pcie;
6045	pos = cfg->pcie_location;
6046
6047	cfg->pcie_flags = RREG(PCIER_FLAGS);
6048
6049	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
6050
6051	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
6052
6053	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
6054	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
6055	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
6056		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
6057
6058	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
6059	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
6060	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
6061		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
6062
6063	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
6064	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
6065		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
6066
6067	if (version > 1) {
6068		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
6069		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
6070		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
6071	}
6072#undef RREG
6073}
6074
6075static void
6076pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
6077{
6078	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
6079	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
6080}
6081
6082void
6083pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
6084{
6085	uint32_t cls;
6086	int ps;
6087
6088	/*
6089	 * Some drivers apparently write to these registers w/o updating our
6090	 * cached copy.  No harm happens if we update the copy, so do so here
6091	 * so we can restore them.  The COMMAND register is modified by the
6092	 * bus w/o updating the cache.  This should represent the normally
6093	 * writable portion of the 'defined' part of type 0/1/2 headers.
6094	 */
6095	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
6096	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
6097	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
6098	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
6099	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
6100	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
6101	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
6102	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
6103	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
6104	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
6105	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
6106	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
6107	case PCIM_HDRTYPE_NORMAL:
6108		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
6109		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
6110		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
6111		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
6112		break;
6113	case PCIM_HDRTYPE_BRIDGE:
6114		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
6115		    PCIR_SECLAT_1, 1);
6116		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
6117		    PCIR_SUBBUS_1, 1);
6118		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
6119		    PCIR_SECBUS_1, 1);
6120		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
6121		    PCIR_PRIBUS_1, 1);
6122		dinfo->cfg.bridge.br_control = pci_read_config(dev,
6123		    PCIR_BRIDGECTL_1, 2);
6124		break;
6125	case PCIM_HDRTYPE_CARDBUS:
6126		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
6127		    PCIR_SECLAT_2, 1);
6128		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
6129		    PCIR_SUBBUS_2, 1);
6130		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
6131		    PCIR_SECBUS_2, 1);
6132		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
6133		    PCIR_PRIBUS_2, 1);
6134		dinfo->cfg.bridge.br_control = pci_read_config(dev,
6135		    PCIR_BRIDGECTL_2, 2);
6136		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
6137		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
6138		break;
6139	}
6140
6141	if (dinfo->cfg.pcie.pcie_location != 0)
6142		pci_cfg_save_pcie(dev, dinfo);
6143
6144	if (dinfo->cfg.pcix.pcix_location != 0)
6145		pci_cfg_save_pcix(dev, dinfo);
6146
6147#ifdef PCI_IOV
6148	if (dinfo->cfg.iov != NULL)
6149		pci_iov_cfg_save(dev, dinfo);
6150#endif
6151
6152	/*
6153	 * don't set the state for display devices, base peripherals and
6154	 * memory devices since bad things happen when they are powered down.
6155	 * We should (a) have drivers that can easily detach and (b) use
6156	 * generic drivers for these devices so that some device actually
6157	 * attaches.  We need to make sure that when we implement (a) we don't
6158	 * power the device down on a reattach.
6159	 */
6160	cls = pci_get_class(dev);
6161	if (!setstate)
6162		return;
6163	switch (pci_do_power_nodriver)
6164	{
6165		case 0:		/* NO powerdown at all */
6166			return;
6167		case 1:		/* Conservative about what to power down */
6168			if (cls == PCIC_STORAGE)
6169				return;
6170			/*FALLTHROUGH*/
6171		case 2:		/* Aggressive about what to power down */
6172			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
6173			    cls == PCIC_BASEPERIPH)
6174				return;
6175			/*FALLTHROUGH*/
6176		case 3:		/* Power down everything */
6177			break;
6178	}
6179	/*
6180	 * PCI spec says we can only go into D3 state from D0 state.
6181	 * Transition from D[12] into D0 before going to D3 state.
6182	 */
6183	ps = pci_get_powerstate(dev);
6184	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
6185		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
6186	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
6187		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
6188}
6189
6190/* Wrapper APIs suitable for device driver use. */
6191void
6192pci_save_state(device_t dev)
6193{
6194	struct pci_devinfo *dinfo;
6195
6196	dinfo = device_get_ivars(dev);
6197	pci_cfg_save(dev, dinfo, 0);
6198}
6199
6200void
6201pci_restore_state(device_t dev)
6202{
6203	struct pci_devinfo *dinfo;
6204
6205	dinfo = device_get_ivars(dev);
6206	pci_cfg_restore(dev, dinfo);
6207}
6208
6209static int
6210pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
6211    uintptr_t *id)
6212{
6213
6214	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
6215}
6216
6217/* Find the upstream port of a given PCI device in a root complex. */
6218device_t
6219pci_find_pcie_root_port(device_t dev)
6220{
6221	struct pci_devinfo *dinfo;
6222	devclass_t pci_class;
6223	device_t pcib, bus;
6224
6225	pci_class = devclass_find("pci");
6226	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
6227	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
6228
6229	/*
6230	 * Walk the bridge hierarchy until we find a PCI-e root
6231	 * port or a non-PCI device.
6232	 */
6233	for (;;) {
6234		bus = device_get_parent(dev);
6235		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
6236		    device_get_nameunit(dev)));
6237
6238		pcib = device_get_parent(bus);
6239		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
6240		    device_get_nameunit(bus)));
6241
6242		/*
6243		 * pcib's parent must be a PCI bus for this to be a
6244		 * PCI-PCI bridge.
6245		 */
6246		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
6247			return (NULL);
6248
6249		dinfo = device_get_ivars(pcib);
6250		if (dinfo->cfg.pcie.pcie_location != 0 &&
6251		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
6252			return (pcib);
6253
6254		dev = pcib;
6255	}
6256}
6257
6258/*
6259 * Wait for pending transactions to complete on a PCI-express function.
6260 *
6261 * The maximum delay is specified in milliseconds in max_delay.  Note
6262 * that this function may sleep.
6263 *
6264 * Returns true if the function is idle and false if the timeout is
6265 * exceeded.  If dev is not a PCI-express function, this returns true.
6266 */
6267bool
6268pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
6269{
6270	struct pci_devinfo *dinfo = device_get_ivars(dev);
6271	uint16_t sta;
6272	int cap;
6273
6274	cap = dinfo->cfg.pcie.pcie_location;
6275	if (cap == 0)
6276		return (true);
6277
6278	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
6279	while (sta & PCIEM_STA_TRANSACTION_PND) {
6280		if (max_delay == 0)
6281			return (false);
6282
6283		/* Poll once every 100 milliseconds up to the timeout. */
6284		if (max_delay > 100) {
6285			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
6286			max_delay -= 100;
6287		} else {
6288			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
6289			    C_HARDCLOCK);
6290			max_delay = 0;
6291		}
6292		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
6293	}
6294
6295	return (true);
6296}
6297
6298/*
6299 * Determine the maximum Completion Timeout in microseconds.
6300 *
6301 * For non-PCI-express functions this returns 0.
6302 */
6303int
6304pcie_get_max_completion_timeout(device_t dev)
6305{
6306	struct pci_devinfo *dinfo = device_get_ivars(dev);
6307	int cap;
6308
6309	cap = dinfo->cfg.pcie.pcie_location;
6310	if (cap == 0)
6311		return (0);
6312
6313	/*
6314	 * Functions using the 1.x spec use the default timeout range of
6315	 * 50 microseconds to 50 milliseconds.  Functions that do not
6316	 * support programmable timeouts also use this range.
6317	 */
6318	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
6319	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
6320	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
6321		return (50 * 1000);
6322
6323	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
6324	    PCIEM_CTL2_COMP_TIMO_VAL) {
6325	case PCIEM_CTL2_COMP_TIMO_100US:
6326		return (100);
6327	case PCIEM_CTL2_COMP_TIMO_10MS:
6328		return (10 * 1000);
6329	case PCIEM_CTL2_COMP_TIMO_55MS:
6330		return (55 * 1000);
6331	case PCIEM_CTL2_COMP_TIMO_210MS:
6332		return (210 * 1000);
6333	case PCIEM_CTL2_COMP_TIMO_900MS:
6334		return (900 * 1000);
6335	case PCIEM_CTL2_COMP_TIMO_3500MS:
6336		return (3500 * 1000);
6337	case PCIEM_CTL2_COMP_TIMO_13S:
6338		return (13 * 1000 * 1000);
6339	case PCIEM_CTL2_COMP_TIMO_64S:
6340		return (64 * 1000 * 1000);
6341	default:
6342		return (50 * 1000);
6343	}
6344}
6345
6346void
6347pcie_apei_error(device_t dev, int sev, uint8_t *aerp)
6348{
6349	struct pci_devinfo *dinfo = device_get_ivars(dev);
6350	const char *s;
6351	int aer;
6352	uint32_t r, r1;
6353	uint16_t rs;
6354
6355	if (sev == PCIEM_STA_CORRECTABLE_ERROR)
6356		s = "Correctable";
6357	else if (sev == PCIEM_STA_NON_FATAL_ERROR)
6358		s = "Uncorrectable (Non-Fatal)";
6359	else
6360		s = "Uncorrectable (Fatal)";
6361	device_printf(dev, "%s PCIe error reported by APEI\n", s);
6362	if (aerp) {
6363		if (sev == PCIEM_STA_CORRECTABLE_ERROR) {
6364			r = le32dec(aerp + PCIR_AER_COR_STATUS);
6365			r1 = le32dec(aerp + PCIR_AER_COR_MASK);
6366		} else {
6367			r = le32dec(aerp + PCIR_AER_UC_STATUS);
6368			r1 = le32dec(aerp + PCIR_AER_UC_MASK);
6369		}
6370		device_printf(dev, "status 0x%08x mask 0x%08x", r, r1);
6371		if (sev != PCIEM_STA_CORRECTABLE_ERROR) {
6372			r = le32dec(aerp + PCIR_AER_UC_SEVERITY);
6373			rs = le16dec(aerp + PCIR_AER_CAP_CONTROL);
6374			printf(" severity 0x%08x first %d\n",
6375			    r, rs & 0x1f);
6376		} else
6377			printf("\n");
6378	}
6379
6380	/* As kind of recovery just report and clear the error statuses. */
6381	if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
6382		r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
6383		if (r != 0) {
6384			pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
6385			device_printf(dev, "Clearing UC AER errors 0x%08x\n", r);
6386		}
6387
6388		r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
6389		if (r != 0) {
6390			pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
6391			device_printf(dev, "Clearing COR AER errors 0x%08x\n", r);
6392		}
6393	}
6394	if (dinfo->cfg.pcie.pcie_location != 0) {
6395		rs = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
6396		    PCIER_DEVICE_STA, 2);
6397		if ((rs & (PCIEM_STA_CORRECTABLE_ERROR |
6398		    PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR |
6399		    PCIEM_STA_UNSUPPORTED_REQ)) != 0) {
6400			pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
6401			    PCIER_DEVICE_STA, rs, 2);
6402			device_printf(dev, "Clearing PCIe errors 0x%04x\n", rs);
6403		}
6404	}
6405}
6406
6407/*
6408 * Perform a Function Level Reset (FLR) on a device.
6409 *
6410 * This function first waits for any pending transactions to complete
6411 * within the timeout specified by max_delay.  If transactions are
6412 * still pending, the function will return false without attempting a
6413 * reset.
6414 *
6415 * If dev is not a PCI-express function or does not support FLR, this
6416 * function returns false.
6417 *
6418 * Note that no registers are saved or restored.  The caller is
6419 * responsible for saving and restoring any registers including
6420 * PCI-standard registers via pci_save_state() and
6421 * pci_restore_state().
6422 */
6423bool
6424pcie_flr(device_t dev, u_int max_delay, bool force)
6425{
6426	struct pci_devinfo *dinfo = device_get_ivars(dev);
6427	uint16_t cmd, ctl;
6428	int compl_delay;
6429	int cap;
6430
6431	cap = dinfo->cfg.pcie.pcie_location;
6432	if (cap == 0)
6433		return (false);
6434
6435	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
6436		return (false);
6437
6438	/*
6439	 * Disable busmastering to prevent generation of new
6440	 * transactions while waiting for the device to go idle.  If
6441	 * the idle timeout fails, the command register is restored
6442	 * which will re-enable busmastering.
6443	 */
6444	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
6445	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
6446	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
6447		if (!force) {
6448			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
6449			return (false);
6450		}
6451		pci_printf(&dinfo->cfg,
6452		    "Resetting with transactions pending after %d ms\n",
6453		    max_delay);
6454
6455		/*
6456		 * Extend the post-FLR delay to cover the maximum
6457		 * Completion Timeout delay of anything in flight
6458		 * during the FLR delay.  Enforce a minimum delay of
6459		 * at least 10ms.
6460		 */
6461		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
6462		if (compl_delay < 10)
6463			compl_delay = 10;
6464	} else
6465		compl_delay = 0;
6466
6467	/* Initiate the reset. */
6468	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
6469	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
6470	    PCIEM_CTL_INITIATE_FLR, 2);
6471
6472	/* Wait for 100ms. */
6473	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
6474
6475	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
6476	    PCIEM_STA_TRANSACTION_PND)
6477		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
6478	return (true);
6479}
6480
6481/*
6482 * Attempt a power-management reset by cycling the device in/out of D3
6483 * state.  PCI spec says we can only go into D3 state from D0 state.
6484 * Transition from D[12] into D0 before going to D3 state.
6485 */
6486int
6487pci_power_reset(device_t dev)
6488{
6489	int ps;
6490
6491	ps = pci_get_powerstate(dev);
6492	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
6493		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
6494	pci_set_powerstate(dev, PCI_POWERSTATE_D3);
6495	pci_set_powerstate(dev, ps);
6496	return (0);
6497}
6498
6499/*
6500 * Try link drop and retrain of the downstream port of upstream
6501 * switch, for PCIe.  According to the PCIe 3.0 spec 6.6.1, this must
6502 * cause Conventional Hot reset of the device in the slot.
6503 * Alternative, for PCIe, could be the secondary bus reset initiatied
6504 * on the upstream switch PCIR_BRIDGECTL_1, bit 6.
6505 */
6506int
6507pcie_link_reset(device_t port, int pcie_location)
6508{
6509	uint16_t v;
6510
6511	v = pci_read_config(port, pcie_location + PCIER_LINK_CTL, 2);
6512	v |= PCIEM_LINK_CTL_LINK_DIS;
6513	pci_write_config(port, pcie_location + PCIER_LINK_CTL, v, 2);
6514	pause_sbt("pcier1", mstosbt(20), 0, 0);
6515	v &= ~PCIEM_LINK_CTL_LINK_DIS;
6516	v |= PCIEM_LINK_CTL_RETRAIN_LINK;
6517	pci_write_config(port, pcie_location + PCIER_LINK_CTL, v, 2);
6518	pause_sbt("pcier2", mstosbt(100), 0, 0); /* 100 ms */
6519	v = pci_read_config(port, pcie_location + PCIER_LINK_STA, 2);
6520	return ((v & PCIEM_LINK_STA_TRAINING) != 0 ? ETIMEDOUT : 0);
6521}
6522
6523static int
6524pci_reset_post(device_t dev, device_t child)
6525{
6526
6527	if (dev == device_get_parent(child))
6528		pci_restore_state(child);
6529	return (0);
6530}
6531
6532static int
6533pci_reset_prepare(device_t dev, device_t child)
6534{
6535
6536	if (dev == device_get_parent(child))
6537		pci_save_state(child);
6538	return (0);
6539}
6540
6541static int
6542pci_reset_child(device_t dev, device_t child, int flags)
6543{
6544	int error;
6545
6546	if (dev == NULL || device_get_parent(child) != dev)
6547		return (0);
6548	if ((flags & DEVF_RESET_DETACH) != 0) {
6549		error = device_get_state(child) == DS_ATTACHED ?
6550		    device_detach(child) : 0;
6551	} else {
6552		error = BUS_SUSPEND_CHILD(dev, child);
6553	}
6554	if (error == 0) {
6555		if (!pcie_flr(child, 1000, false)) {
6556			error = BUS_RESET_PREPARE(dev, child);
6557			if (error == 0)
6558				pci_power_reset(child);
6559			BUS_RESET_POST(dev, child);
6560		}
6561		if ((flags & DEVF_RESET_DETACH) != 0)
6562			device_probe_and_attach(child);
6563		else
6564			BUS_RESUME_CHILD(dev, child);
6565	}
6566	return (error);
6567}
6568
6569const struct pci_device_table *
6570pci_match_device(device_t child, const struct pci_device_table *id, size_t nelt)
6571{
6572	bool match;
6573	uint16_t vendor, device, subvendor, subdevice, class, subclass, revid;
6574
6575	vendor = pci_get_vendor(child);
6576	device = pci_get_device(child);
6577	subvendor = pci_get_subvendor(child);
6578	subdevice = pci_get_subdevice(child);
6579	class = pci_get_class(child);
6580	subclass = pci_get_subclass(child);
6581	revid = pci_get_revid(child);
6582	while (nelt-- > 0) {
6583		match = true;
6584		if (id->match_flag_vendor)
6585			match &= vendor == id->vendor;
6586		if (id->match_flag_device)
6587			match &= device == id->device;
6588		if (id->match_flag_subvendor)
6589			match &= subvendor == id->subvendor;
6590		if (id->match_flag_subdevice)
6591			match &= subdevice == id->subdevice;
6592		if (id->match_flag_class)
6593			match &= class == id->class_id;
6594		if (id->match_flag_subclass)
6595			match &= subclass == id->subclass;
6596		if (id->match_flag_revid)
6597			match &= revid == id->revid;
6598		if (match)
6599			return (id);
6600		id++;
6601	}
6602	return (NULL);
6603}
6604
6605static void
6606pci_print_faulted_dev_name(const struct pci_devinfo *dinfo)
6607{
6608	const char *dev_name;
6609	device_t dev;
6610
6611	dev = dinfo->cfg.dev;
6612	printf("pci%d:%d:%d:%d", dinfo->cfg.domain, dinfo->cfg.bus,
6613	    dinfo->cfg.slot, dinfo->cfg.func);
6614	dev_name = device_get_name(dev);
6615	if (dev_name != NULL)
6616		printf(" (%s%d)", dev_name, device_get_unit(dev));
6617}
6618
6619void
6620pci_print_faulted_dev(void)
6621{
6622	struct pci_devinfo *dinfo;
6623	device_t dev;
6624	int aer, i;
6625	uint32_t r1, r2;
6626	uint16_t status;
6627
6628	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
6629		dev = dinfo->cfg.dev;
6630		status = pci_read_config(dev, PCIR_STATUS, 2);
6631		status &= PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
6632		    PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
6633		    PCIM_STATUS_SERR | PCIM_STATUS_PERR;
6634		if (status != 0) {
6635			pci_print_faulted_dev_name(dinfo);
6636			printf(" error 0x%04x\n", status);
6637		}
6638		if (dinfo->cfg.pcie.pcie_location != 0) {
6639			status = pci_read_config(dev,
6640			    dinfo->cfg.pcie.pcie_location +
6641			    PCIER_DEVICE_STA, 2);
6642			if ((status & (PCIEM_STA_CORRECTABLE_ERROR |
6643			    PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR |
6644			    PCIEM_STA_UNSUPPORTED_REQ)) != 0) {
6645				pci_print_faulted_dev_name(dinfo);
6646				printf(" PCIe DEVCTL 0x%04x DEVSTA 0x%04x\n",
6647				    pci_read_config(dev,
6648				    dinfo->cfg.pcie.pcie_location +
6649				    PCIER_DEVICE_CTL, 2),
6650				    status);
6651			}
6652		}
6653		if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
6654			r1 = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
6655			r2 = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
6656			if (r1 != 0 || r2 != 0) {
6657				pci_print_faulted_dev_name(dinfo);
6658				printf(" AER UC 0x%08x Mask 0x%08x Svr 0x%08x\n"
6659				    "  COR 0x%08x Mask 0x%08x Ctl 0x%08x\n",
6660				    r1, pci_read_config(dev, aer +
6661				    PCIR_AER_UC_MASK, 4),
6662				    pci_read_config(dev, aer +
6663				    PCIR_AER_UC_SEVERITY, 4),
6664				    r2, pci_read_config(dev, aer +
6665				    PCIR_AER_COR_MASK, 4),
6666				    pci_read_config(dev, aer +
6667				    PCIR_AER_CAP_CONTROL, 4));
6668				for (i = 0; i < 4; i++) {
6669					r1 = pci_read_config(dev, aer +
6670					    PCIR_AER_HEADER_LOG + i * 4, 4);
6671					printf("    HL%d: 0x%08x\n", i, r1);
6672				}
6673			}
6674		}
6675	}
6676}
6677
6678#ifdef DDB
6679DB_SHOW_COMMAND(pcierr, pci_print_faulted_dev_db)
6680{
6681
6682	pci_print_faulted_dev();
6683}
6684
6685static void
6686db_clear_pcie_errors(const struct pci_devinfo *dinfo)
6687{
6688	device_t dev;
6689	int aer;
6690	uint32_t r;
6691
6692	dev = dinfo->cfg.dev;
6693	r = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
6694	    PCIER_DEVICE_STA, 2);
6695	pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
6696	    PCIER_DEVICE_STA, r, 2);
6697
6698	if (pci_find_extcap(dev, PCIZ_AER, &aer) != 0)
6699		return;
6700	r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
6701	if (r != 0)
6702		pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
6703	r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
6704	if (r != 0)
6705		pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
6706}
6707
6708DB_COMMAND(pci_clearerr, db_pci_clearerr)
6709{
6710	struct pci_devinfo *dinfo;
6711	device_t dev;
6712	uint16_t status, status1;
6713
6714	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
6715		dev = dinfo->cfg.dev;
6716		status1 = status = pci_read_config(dev, PCIR_STATUS, 2);
6717		status1 &= PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
6718		    PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
6719		    PCIM_STATUS_SERR | PCIM_STATUS_PERR;
6720		if (status1 != 0) {
6721			status &= ~status1;
6722			pci_write_config(dev, PCIR_STATUS, status, 2);
6723		}
6724		if (dinfo->cfg.pcie.pcie_location != 0)
6725			db_clear_pcie_errors(dinfo);
6726	}
6727}
6728#endif
6729