1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: releng/11.0/sys/dev/pci/pci.c 302175 2016-06-24 17:26:42Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/limits.h>
39#include <sys/linker.h>
40#include <sys/fcntl.h>
41#include <sys/conf.h>
42#include <sys/kernel.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/endian.h>
46
47#include <vm/vm.h>
48#include <vm/pmap.h>
49#include <vm/vm_extern.h>
50
51#include <sys/bus.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#include <machine/resource.h>
55#include <machine/stdarg.h>
56
57#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58#include <machine/intr_machdep.h>
59#endif
60
61#include <sys/pciio.h>
62#include <dev/pci/pcireg.h>
63#include <dev/pci/pcivar.h>
64#include <dev/pci/pci_private.h>
65
66#ifdef PCI_IOV
67#include <sys/nv.h>
68#include <dev/pci/pci_iov_private.h>
69#endif
70
71#include <dev/usb/controller/xhcireg.h>
72#include <dev/usb/controller/ehcireg.h>
73#include <dev/usb/controller/ohcireg.h>
74#include <dev/usb/controller/uhcireg.h>
75
76#include "pcib_if.h"
77#include "pci_if.h"
78
79#define	PCIR_IS_BIOS(cfg, reg)						\
80	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
81	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
82
83static int		pci_has_quirk(uint32_t devid, int quirk);
84static pci_addr_t	pci_mapbase(uint64_t mapreg);
85static const char	*pci_maptype(uint64_t mapreg);
86static int		pci_maprange(uint64_t mapreg);
87static pci_addr_t	pci_rombase(uint64_t mapreg);
88static int		pci_romsize(uint64_t testval);
89static void		pci_fixancient(pcicfgregs *cfg);
90static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
91
92static int		pci_porten(device_t dev);
93static int		pci_memen(device_t dev);
94static void		pci_assign_interrupt(device_t bus, device_t dev,
95			    int force_route);
96static int		pci_add_map(device_t bus, device_t dev, int reg,
97			    struct resource_list *rl, int force, int prefetch);
98static int		pci_probe(device_t dev);
99static int		pci_attach(device_t dev);
100static int		pci_detach(device_t dev);
101static void		pci_load_vendor_data(void);
102static int		pci_describe_parse_line(char **ptr, int *vendor,
103			    int *device, char **desc);
104static char		*pci_describe_device(device_t dev);
105static int		pci_modevent(module_t mod, int what, void *arg);
106static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
107			    pcicfgregs *cfg);
108static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
109static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
110			    int reg, uint32_t *data);
111#if 0
112static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
113			    int reg, uint32_t data);
114#endif
115static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
116static void		pci_mask_msix(device_t dev, u_int index);
117static void		pci_unmask_msix(device_t dev, u_int index);
118static int		pci_msi_blacklisted(void);
119static int		pci_msix_blacklisted(void);
120static void		pci_resume_msi(device_t dev);
121static void		pci_resume_msix(device_t dev);
122static int		pci_remap_intr_method(device_t bus, device_t dev,
123			    u_int irq);
124
125static int		pci_get_id_method(device_t dev, device_t child,
126			    enum pci_id_type type, uintptr_t *rid);
127
128static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
129    int b, int s, int f, uint16_t vid, uint16_t did);
130
131static device_method_t pci_methods[] = {
132	/* Device interface */
133	DEVMETHOD(device_probe,		pci_probe),
134	DEVMETHOD(device_attach,	pci_attach),
135	DEVMETHOD(device_detach,	pci_detach),
136	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137	DEVMETHOD(device_suspend,	bus_generic_suspend),
138	DEVMETHOD(device_resume,	pci_resume),
139
140	/* Bus interface */
141	DEVMETHOD(bus_print_child,	pci_print_child),
142	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145	DEVMETHOD(bus_driver_added,	pci_driver_added),
146	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148
149	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
150	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
156	DEVMETHOD(bus_release_resource,	pci_release_resource),
157	DEVMETHOD(bus_activate_resource, pci_activate_resource),
158	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
159	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
160	DEVMETHOD(bus_child_detached,	pci_child_detached),
161	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
164	DEVMETHOD(bus_suspend_child,	pci_suspend_child),
165	DEVMETHOD(bus_resume_child,	pci_resume_child),
166	DEVMETHOD(bus_rescan,		pci_rescan_method),
167
168	/* PCI interface */
169	DEVMETHOD(pci_read_config,	pci_read_config_method),
170	DEVMETHOD(pci_write_config,	pci_write_config_method),
171	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
181	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
182	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
183	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
184	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
185	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
186	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
187	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
188	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
189	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
190	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
191	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
192	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
193	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
194	DEVMETHOD(pci_get_id,		pci_get_id_method),
195	DEVMETHOD(pci_alloc_devinfo,	pci_alloc_devinfo_method),
196	DEVMETHOD(pci_child_added,	pci_child_added_method),
197#ifdef PCI_IOV
198	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
199	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
200	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
201#endif
202
203	DEVMETHOD_END
204};
205
206DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
207
208static devclass_t pci_devclass;
209DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
210MODULE_VERSION(pci, 1);
211
212static char	*pci_vendordata;
213static size_t	pci_vendordata_size;
214
215struct pci_quirk {
216	uint32_t devid;	/* Vendor/device of the card */
217	int	type;
218#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
219#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
220#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
221#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
222#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
223#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
224	int	arg1;
225	int	arg2;
226};
227
228static const struct pci_quirk pci_quirks[] = {
229	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
230	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
232	/* As does the Serverworks OSB4 (the SMBus mapping register) */
233	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
234
235	/*
236	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
237	 * or the CMIC-SL (AKA ServerWorks GC_LE).
238	 */
239	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241
242	/*
243	 * MSI doesn't work on earlier Intel chipsets including
244	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
245	 */
246	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253
254	/*
255	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
256	 * bridge.
257	 */
258	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
259
260	/*
261	 * MSI-X allocation doesn't work properly for devices passed through
262	 * by VMware up to at least ESXi 5.1.
263	 */
264	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
265	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
266
267	/*
268	 * Some virtualization environments emulate an older chipset
269	 * but support MSI just fine.  QEMU uses the Intel 82440.
270	 */
271	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
272
273	/*
274	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
275	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
276	 * It prevents us from attaching hpet(4) when the bit is unset.
277	 * Note this quirk only affects SB600 revision A13 and earlier.
278	 * For SB600 A21 and later, firmware must set the bit to hide it.
279	 * For SB700 and later, it is unused and hardcoded to zero.
280	 */
281	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
282
283	/*
284	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
285	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
286	 * command register is set.
287	 */
288	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
289	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
290	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
291
292	/*
293	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
294	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
295	 */
296	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
297	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
298	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
299	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
300	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
301	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
302
303	{ 0 }
304};
305
306/* map register information */
307#define	PCI_MAPMEM	0x01	/* memory map */
308#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
309#define	PCI_MAPPORT	0x04	/* port map */
310
311struct devlist pci_devq;
312uint32_t pci_generation;
313uint32_t pci_numdevs = 0;
314static int pcie_chipset, pcix_chipset;
315
316/* sysctl vars */
317SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
318
319static int pci_enable_io_modes = 1;
320SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
321    &pci_enable_io_modes, 1,
322    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
323enable these bits correctly.  We'd like to do this all the time, but there\n\
324are some peripherals that this causes problems with.");
325
326static int pci_do_realloc_bars = 0;
327SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
328    &pci_do_realloc_bars, 0,
329    "Attempt to allocate a new range for any BARs whose original "
330    "firmware-assigned ranges fail to allocate during the initial device scan.");
331
332static int pci_do_power_nodriver = 0;
333SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
334    &pci_do_power_nodriver, 0,
335  "Place a function into D3 state when no driver attaches to it.  0 means\n\
336disable.  1 means conservatively place devices into D3 state.  2 means\n\
337aggressively place devices into D3 state.  3 means put absolutely everything\n\
338in D3 state.");
339
340int pci_do_power_resume = 1;
341SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
342    &pci_do_power_resume, 1,
343  "Transition from D3 -> D0 on resume.");
344
345int pci_do_power_suspend = 1;
346SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
347    &pci_do_power_suspend, 1,
348  "Transition from D0 -> D3 on suspend.");
349
350static int pci_do_msi = 1;
351SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
352    "Enable support for MSI interrupts");
353
354static int pci_do_msix = 1;
355SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
356    "Enable support for MSI-X interrupts");
357
358static int pci_honor_msi_blacklist = 1;
359SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
360    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
361
362#if defined(__i386__) || defined(__amd64__)
363static int pci_usb_takeover = 1;
364#else
365static int pci_usb_takeover = 0;
366#endif
367SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
368    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
369Disable this if you depend on BIOS emulation of USB devices, that is\n\
370you use USB devices (like keyboard or mouse) but do not load USB drivers");
371
372static int pci_clear_bars;
373SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
374    "Ignore firmware-assigned resources for BARs.");
375
376#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
377static int pci_clear_buses;
378SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
379    "Ignore firmware-assigned bus numbers.");
380#endif
381
382static int pci_enable_ari = 1;
383SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
384    0, "Enable support for PCIe Alternative RID Interpretation");
385
386static int
387pci_has_quirk(uint32_t devid, int quirk)
388{
389	const struct pci_quirk *q;
390
391	for (q = &pci_quirks[0]; q->devid; q++) {
392		if (q->devid == devid && q->type == quirk)
393			return (1);
394	}
395	return (0);
396}
397
398/* Find a device_t by bus/slot/function in domain 0 */
399
400device_t
401pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
402{
403
404	return (pci_find_dbsf(0, bus, slot, func));
405}
406
407/* Find a device_t by domain/bus/slot/function */
408
409device_t
410pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
411{
412	struct pci_devinfo *dinfo;
413
414	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
415		if ((dinfo->cfg.domain == domain) &&
416		    (dinfo->cfg.bus == bus) &&
417		    (dinfo->cfg.slot == slot) &&
418		    (dinfo->cfg.func == func)) {
419			return (dinfo->cfg.dev);
420		}
421	}
422
423	return (NULL);
424}
425
426/* Find a device_t by vendor/device ID */
427
428device_t
429pci_find_device(uint16_t vendor, uint16_t device)
430{
431	struct pci_devinfo *dinfo;
432
433	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
434		if ((dinfo->cfg.vendor == vendor) &&
435		    (dinfo->cfg.device == device)) {
436			return (dinfo->cfg.dev);
437		}
438	}
439
440	return (NULL);
441}
442
443device_t
444pci_find_class(uint8_t class, uint8_t subclass)
445{
446	struct pci_devinfo *dinfo;
447
448	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
449		if (dinfo->cfg.baseclass == class &&
450		    dinfo->cfg.subclass == subclass) {
451			return (dinfo->cfg.dev);
452		}
453	}
454
455	return (NULL);
456}
457
458static int
459pci_printf(pcicfgregs *cfg, const char *fmt, ...)
460{
461	va_list ap;
462	int retval;
463
464	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
465	    cfg->func);
466	va_start(ap, fmt);
467	retval += vprintf(fmt, ap);
468	va_end(ap);
469	return (retval);
470}
471
472/* return base address of memory or port map */
473
474static pci_addr_t
475pci_mapbase(uint64_t mapreg)
476{
477
478	if (PCI_BAR_MEM(mapreg))
479		return (mapreg & PCIM_BAR_MEM_BASE);
480	else
481		return (mapreg & PCIM_BAR_IO_BASE);
482}
483
484/* return map type of memory or port map */
485
486static const char *
487pci_maptype(uint64_t mapreg)
488{
489
490	if (PCI_BAR_IO(mapreg))
491		return ("I/O Port");
492	if (mapreg & PCIM_BAR_MEM_PREFETCH)
493		return ("Prefetchable Memory");
494	return ("Memory");
495}
496
497/* return log2 of map size decoded for memory or port map */
498
499int
500pci_mapsize(uint64_t testval)
501{
502	int ln2size;
503
504	testval = pci_mapbase(testval);
505	ln2size = 0;
506	if (testval != 0) {
507		while ((testval & 1) == 0)
508		{
509			ln2size++;
510			testval >>= 1;
511		}
512	}
513	return (ln2size);
514}
515
516/* return base address of device ROM */
517
518static pci_addr_t
519pci_rombase(uint64_t mapreg)
520{
521
522	return (mapreg & PCIM_BIOS_ADDR_MASK);
523}
524
525/* return log2 of map size decided for device ROM */
526
527static int
528pci_romsize(uint64_t testval)
529{
530	int ln2size;
531
532	testval = pci_rombase(testval);
533	ln2size = 0;
534	if (testval != 0) {
535		while ((testval & 1) == 0)
536		{
537			ln2size++;
538			testval >>= 1;
539		}
540	}
541	return (ln2size);
542}
543
544/* return log2 of address range supported by map register */
545
546static int
547pci_maprange(uint64_t mapreg)
548{
549	int ln2range = 0;
550
551	if (PCI_BAR_IO(mapreg))
552		ln2range = 32;
553	else
554		switch (mapreg & PCIM_BAR_MEM_TYPE) {
555		case PCIM_BAR_MEM_32:
556			ln2range = 32;
557			break;
558		case PCIM_BAR_MEM_1MB:
559			ln2range = 20;
560			break;
561		case PCIM_BAR_MEM_64:
562			ln2range = 64;
563			break;
564		}
565	return (ln2range);
566}
567
568/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
569
570static void
571pci_fixancient(pcicfgregs *cfg)
572{
573	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
574		return;
575
576	/* PCI to PCI bridges use header type 1 */
577	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
578		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
579}
580
581/* extract header type specific config data */
582
583static void
584pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
585{
586#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
587	switch (cfg->hdrtype & PCIM_HDRTYPE) {
588	case PCIM_HDRTYPE_NORMAL:
589		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
590		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
591		cfg->mingnt         = REG(PCIR_MINGNT, 1);
592		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
593		cfg->nummaps	    = PCI_MAXMAPS_0;
594		break;
595	case PCIM_HDRTYPE_BRIDGE:
596		cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
597		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
598		cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
599		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
600		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
601		cfg->nummaps	    = PCI_MAXMAPS_1;
602		break;
603	case PCIM_HDRTYPE_CARDBUS:
604		cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
605		cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
606		cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
607		cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
608		cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
609		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
610		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
611		cfg->nummaps	    = PCI_MAXMAPS_2;
612		break;
613	}
614#undef REG
615}
616
617/* read configuration header into pcicfgregs structure */
618struct pci_devinfo *
619pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
620{
621#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
622	uint16_t vid, did;
623
624	vid = REG(PCIR_VENDOR, 2);
625	did = REG(PCIR_DEVICE, 2);
626	if (vid != 0xffff)
627		return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
628
629	return (NULL);
630}
631
632struct pci_devinfo *
633pci_alloc_devinfo_method(device_t dev)
634{
635
636	return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
637	    M_WAITOK | M_ZERO));
638}
639
640static struct pci_devinfo *
641pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
642    uint16_t vid, uint16_t did)
643{
644	struct pci_devinfo *devlist_entry;
645	pcicfgregs *cfg;
646
647	devlist_entry = PCI_ALLOC_DEVINFO(bus);
648
649	cfg = &devlist_entry->cfg;
650
651	cfg->domain		= d;
652	cfg->bus		= b;
653	cfg->slot		= s;
654	cfg->func		= f;
655	cfg->vendor		= vid;
656	cfg->device		= did;
657	cfg->cmdreg		= REG(PCIR_COMMAND, 2);
658	cfg->statreg		= REG(PCIR_STATUS, 2);
659	cfg->baseclass		= REG(PCIR_CLASS, 1);
660	cfg->subclass		= REG(PCIR_SUBCLASS, 1);
661	cfg->progif		= REG(PCIR_PROGIF, 1);
662	cfg->revid		= REG(PCIR_REVID, 1);
663	cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
664	cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
665	cfg->lattimer		= REG(PCIR_LATTIMER, 1);
666	cfg->intpin		= REG(PCIR_INTPIN, 1);
667	cfg->intline		= REG(PCIR_INTLINE, 1);
668
669	cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
670	cfg->hdrtype		&= ~PCIM_MFDEV;
671	STAILQ_INIT(&cfg->maps);
672
673	cfg->iov		= NULL;
674
675	pci_fixancient(cfg);
676	pci_hdrtypedata(pcib, b, s, f, cfg);
677
678	if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
679		pci_read_cap(pcib, cfg);
680
681	STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
682
683	devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
684	devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
685	devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
686	devlist_entry->conf.pc_sel.pc_func = cfg->func;
687	devlist_entry->conf.pc_hdr = cfg->hdrtype;
688
689	devlist_entry->conf.pc_subvendor = cfg->subvendor;
690	devlist_entry->conf.pc_subdevice = cfg->subdevice;
691	devlist_entry->conf.pc_vendor = cfg->vendor;
692	devlist_entry->conf.pc_device = cfg->device;
693
694	devlist_entry->conf.pc_class = cfg->baseclass;
695	devlist_entry->conf.pc_subclass = cfg->subclass;
696	devlist_entry->conf.pc_progif = cfg->progif;
697	devlist_entry->conf.pc_revid = cfg->revid;
698
699	pci_numdevs++;
700	pci_generation++;
701
702	return (devlist_entry);
703}
704#undef REG
705
706static void
707pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
708{
709#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
710    cfg->ea.ea_location + (n), w)
711	int num_ent;
712	int ptr;
713	int a, b;
714	uint32_t val;
715	int ent_size;
716	uint32_t dw[4];
717	uint64_t base, max_offset;
718	struct pci_ea_entry *eae;
719
720	if (cfg->ea.ea_location == 0)
721		return;
722
723	STAILQ_INIT(&cfg->ea.ea_entries);
724
725	/* Determine the number of entries */
726	num_ent = REG(PCIR_EA_NUM_ENT, 2);
727	num_ent &= PCIM_EA_NUM_ENT_MASK;
728
729	/* Find the first entry to care of */
730	ptr = PCIR_EA_FIRST_ENT;
731
732	/* Skip DWORD 2 for type 1 functions */
733	if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
734		ptr += 4;
735
736	for (a = 0; a < num_ent; a++) {
737
738		eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
739		eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
740
741		/* Read a number of dwords in the entry */
742		val = REG(ptr, 4);
743		ptr += 4;
744		ent_size = (val & PCIM_EA_ES);
745
746		for (b = 0; b < ent_size; b++) {
747			dw[b] = REG(ptr, 4);
748			ptr += 4;
749		}
750
751		eae->eae_flags = val;
752		eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
753
754		base = dw[0] & PCIM_EA_FIELD_MASK;
755		max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
756		b = 2;
757		if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
758			base |= (uint64_t)dw[b] << 32UL;
759			b++;
760		}
761		if (((dw[1] & PCIM_EA_IS_64) != 0)
762		    && (b < ent_size)) {
763			max_offset |= (uint64_t)dw[b] << 32UL;
764			b++;
765		}
766
767		eae->eae_base = base;
768		eae->eae_max_offset = max_offset;
769
770		STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
771
772		if (bootverbose) {
773			printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
774			    cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
775			    (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
776		}
777	}
778}
779#undef REG
780
781static void
782pci_read_cap(device_t pcib, pcicfgregs *cfg)
783{
784#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
785#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
786#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
787	uint64_t addr;
788#endif
789	uint32_t val;
790	int	ptr, nextptr, ptrptr;
791
792	switch (cfg->hdrtype & PCIM_HDRTYPE) {
793	case PCIM_HDRTYPE_NORMAL:
794	case PCIM_HDRTYPE_BRIDGE:
795		ptrptr = PCIR_CAP_PTR;
796		break;
797	case PCIM_HDRTYPE_CARDBUS:
798		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
799		break;
800	default:
801		return;		/* no extended capabilities support */
802	}
803	nextptr = REG(ptrptr, 1);	/* sanity check? */
804
805	/*
806	 * Read capability entries.
807	 */
808	while (nextptr != 0) {
809		/* Sanity check */
810		if (nextptr > 255) {
811			printf("illegal PCI extended capability offset %d\n",
812			    nextptr);
813			return;
814		}
815		/* Find the next entry */
816		ptr = nextptr;
817		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
818
819		/* Process this entry */
820		switch (REG(ptr + PCICAP_ID, 1)) {
821		case PCIY_PMG:		/* PCI power management */
822			if (cfg->pp.pp_cap == 0) {
823				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
824				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
825				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
826				if ((nextptr - ptr) > PCIR_POWER_DATA)
827					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
828			}
829			break;
830		case PCIY_HT:		/* HyperTransport */
831			/* Determine HT-specific capability type. */
832			val = REG(ptr + PCIR_HT_COMMAND, 2);
833
834			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
835				cfg->ht.ht_slave = ptr;
836
837#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
838			switch (val & PCIM_HTCMD_CAP_MASK) {
839			case PCIM_HTCAP_MSI_MAPPING:
840				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
841					/* Sanity check the mapping window. */
842					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
843					    4);
844					addr <<= 32;
845					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
846					    4);
847					if (addr != MSI_INTEL_ADDR_BASE)
848						device_printf(pcib,
849	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
850						    cfg->domain, cfg->bus,
851						    cfg->slot, cfg->func,
852						    (long long)addr);
853				} else
854					addr = MSI_INTEL_ADDR_BASE;
855
856				cfg->ht.ht_msimap = ptr;
857				cfg->ht.ht_msictrl = val;
858				cfg->ht.ht_msiaddr = addr;
859				break;
860			}
861#endif
862			break;
863		case PCIY_MSI:		/* PCI MSI */
864			cfg->msi.msi_location = ptr;
865			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
866			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
867						     PCIM_MSICTRL_MMC_MASK)>>1);
868			break;
869		case PCIY_MSIX:		/* PCI MSI-X */
870			cfg->msix.msix_location = ptr;
871			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
872			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
873			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
874			val = REG(ptr + PCIR_MSIX_TABLE, 4);
875			cfg->msix.msix_table_bar = PCIR_BAR(val &
876			    PCIM_MSIX_BIR_MASK);
877			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
878			val = REG(ptr + PCIR_MSIX_PBA, 4);
879			cfg->msix.msix_pba_bar = PCIR_BAR(val &
880			    PCIM_MSIX_BIR_MASK);
881			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
882			break;
883		case PCIY_VPD:		/* PCI Vital Product Data */
884			cfg->vpd.vpd_reg = ptr;
885			break;
886		case PCIY_SUBVENDOR:
887			/* Should always be true. */
888			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
889			    PCIM_HDRTYPE_BRIDGE) {
890				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
891				cfg->subvendor = val & 0xffff;
892				cfg->subdevice = val >> 16;
893			}
894			break;
895		case PCIY_PCIX:		/* PCI-X */
896			/*
897			 * Assume we have a PCI-X chipset if we have
898			 * at least one PCI-PCI bridge with a PCI-X
899			 * capability.  Note that some systems with
900			 * PCI-express or HT chipsets might match on
901			 * this check as well.
902			 */
903			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
904			    PCIM_HDRTYPE_BRIDGE)
905				pcix_chipset = 1;
906			cfg->pcix.pcix_location = ptr;
907			break;
908		case PCIY_EXPRESS:	/* PCI-express */
909			/*
910			 * Assume we have a PCI-express chipset if we have
911			 * at least one PCI-express device.
912			 */
913			pcie_chipset = 1;
914			cfg->pcie.pcie_location = ptr;
915			val = REG(ptr + PCIER_FLAGS, 2);
916			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
917			break;
918		case PCIY_EA:		/* Enhanced Allocation */
919			cfg->ea.ea_location = ptr;
920			pci_ea_fill_info(pcib, cfg);
921			break;
922		default:
923			break;
924		}
925	}
926
927#if defined(__powerpc__)
928	/*
929	 * Enable the MSI mapping window for all HyperTransport
930	 * slaves.  PCI-PCI bridges have their windows enabled via
931	 * PCIB_MAP_MSI().
932	 */
933	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
934	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
935		device_printf(pcib,
936	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
937		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
938		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
939		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
940		     2);
941	}
942#endif
943/* REG and WREG use carry through to next functions */
944}
945
946/*
947 * PCI Vital Product Data
948 */
949
950#define	PCI_VPD_TIMEOUT		1000000
951
952static int
953pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
954{
955	int count = PCI_VPD_TIMEOUT;
956
957	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
958
959	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
960
961	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
962		if (--count < 0)
963			return (ENXIO);
964		DELAY(1);	/* limit looping */
965	}
966	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
967
968	return (0);
969}
970
971#if 0
972static int
973pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
974{
975	int count = PCI_VPD_TIMEOUT;
976
977	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
978
979	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
980	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
981	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
982		if (--count < 0)
983			return (ENXIO);
984		DELAY(1);	/* limit looping */
985	}
986
987	return (0);
988}
989#endif
990
991#undef PCI_VPD_TIMEOUT
992
993struct vpd_readstate {
994	device_t	pcib;
995	pcicfgregs	*cfg;
996	uint32_t	val;
997	int		bytesinval;
998	int		off;
999	uint8_t		cksum;
1000};
1001
1002static int
1003vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
1004{
1005	uint32_t reg;
1006	uint8_t byte;
1007
1008	if (vrs->bytesinval == 0) {
1009		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
1010			return (ENXIO);
1011		vrs->val = le32toh(reg);
1012		vrs->off += 4;
1013		byte = vrs->val & 0xff;
1014		vrs->bytesinval = 3;
1015	} else {
1016		vrs->val = vrs->val >> 8;
1017		byte = vrs->val & 0xff;
1018		vrs->bytesinval--;
1019	}
1020
1021	vrs->cksum += byte;
1022	*data = byte;
1023	return (0);
1024}
1025
1026static void
1027pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1028{
1029	struct vpd_readstate vrs;
1030	int state;
1031	int name;
1032	int remain;
1033	int i;
1034	int alloc, off;		/* alloc/off for RO/W arrays */
1035	int cksumvalid;
1036	int dflen;
1037	uint8_t byte;
1038	uint8_t byte2;
1039
1040	/* init vpd reader */
1041	vrs.bytesinval = 0;
1042	vrs.off = 0;
1043	vrs.pcib = pcib;
1044	vrs.cfg = cfg;
1045	vrs.cksum = 0;
1046
1047	state = 0;
1048	name = remain = i = 0;	/* shut up stupid gcc */
1049	alloc = off = 0;	/* shut up stupid gcc */
1050	dflen = 0;		/* shut up stupid gcc */
1051	cksumvalid = -1;
1052	while (state >= 0) {
1053		if (vpd_nextbyte(&vrs, &byte)) {
1054			state = -2;
1055			break;
1056		}
1057#if 0
1058		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1059		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1060		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1061#endif
1062		switch (state) {
1063		case 0:		/* item name */
1064			if (byte & 0x80) {
1065				if (vpd_nextbyte(&vrs, &byte2)) {
1066					state = -2;
1067					break;
1068				}
1069				remain = byte2;
1070				if (vpd_nextbyte(&vrs, &byte2)) {
1071					state = -2;
1072					break;
1073				}
1074				remain |= byte2 << 8;
1075				if (remain > (0x7f*4 - vrs.off)) {
1076					state = -1;
1077					pci_printf(cfg,
1078					    "invalid VPD data, remain %#x\n",
1079					    remain);
1080				}
1081				name = byte & 0x7f;
1082			} else {
1083				remain = byte & 0x7;
1084				name = (byte >> 3) & 0xf;
1085			}
1086			switch (name) {
1087			case 0x2:	/* String */
1088				cfg->vpd.vpd_ident = malloc(remain + 1,
1089				    M_DEVBUF, M_WAITOK);
1090				i = 0;
1091				state = 1;
1092				break;
1093			case 0xf:	/* End */
1094				state = -1;
1095				break;
1096			case 0x10:	/* VPD-R */
1097				alloc = 8;
1098				off = 0;
1099				cfg->vpd.vpd_ros = malloc(alloc *
1100				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1101				    M_WAITOK | M_ZERO);
1102				state = 2;
1103				break;
1104			case 0x11:	/* VPD-W */
1105				alloc = 8;
1106				off = 0;
1107				cfg->vpd.vpd_w = malloc(alloc *
1108				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1109				    M_WAITOK | M_ZERO);
1110				state = 5;
1111				break;
1112			default:	/* Invalid data, abort */
1113				state = -1;
1114				break;
1115			}
1116			break;
1117
1118		case 1:	/* Identifier String */
1119			cfg->vpd.vpd_ident[i++] = byte;
1120			remain--;
1121			if (remain == 0)  {
1122				cfg->vpd.vpd_ident[i] = '\0';
1123				state = 0;
1124			}
1125			break;
1126
1127		case 2:	/* VPD-R Keyword Header */
1128			if (off == alloc) {
1129				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1130				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1131				    M_DEVBUF, M_WAITOK | M_ZERO);
1132			}
1133			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1134			if (vpd_nextbyte(&vrs, &byte2)) {
1135				state = -2;
1136				break;
1137			}
1138			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1139			if (vpd_nextbyte(&vrs, &byte2)) {
1140				state = -2;
1141				break;
1142			}
1143			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1144			if (dflen == 0 &&
1145			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1146			    2) == 0) {
1147				/*
1148				 * if this happens, we can't trust the rest
1149				 * of the VPD.
1150				 */
1151				pci_printf(cfg, "bad keyword length: %d\n",
1152				    dflen);
1153				cksumvalid = 0;
1154				state = -1;
1155				break;
1156			} else if (dflen == 0) {
1157				cfg->vpd.vpd_ros[off].value = malloc(1 *
1158				    sizeof(*cfg->vpd.vpd_ros[off].value),
1159				    M_DEVBUF, M_WAITOK);
1160				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1161			} else
1162				cfg->vpd.vpd_ros[off].value = malloc(
1163				    (dflen + 1) *
1164				    sizeof(*cfg->vpd.vpd_ros[off].value),
1165				    M_DEVBUF, M_WAITOK);
1166			remain -= 3;
1167			i = 0;
1168			/* keep in sync w/ state 3's transistions */
1169			if (dflen == 0 && remain == 0)
1170				state = 0;
1171			else if (dflen == 0)
1172				state = 2;
1173			else
1174				state = 3;
1175			break;
1176
1177		case 3:	/* VPD-R Keyword Value */
1178			cfg->vpd.vpd_ros[off].value[i++] = byte;
1179			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1180			    "RV", 2) == 0 && cksumvalid == -1) {
1181				if (vrs.cksum == 0)
1182					cksumvalid = 1;
1183				else {
1184					if (bootverbose)
1185						pci_printf(cfg,
1186					    "bad VPD cksum, remain %hhu\n",
1187						    vrs.cksum);
1188					cksumvalid = 0;
1189					state = -1;
1190					break;
1191				}
1192			}
1193			dflen--;
1194			remain--;
1195			/* keep in sync w/ state 2's transistions */
1196			if (dflen == 0)
1197				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1198			if (dflen == 0 && remain == 0) {
1199				cfg->vpd.vpd_rocnt = off;
1200				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1201				    off * sizeof(*cfg->vpd.vpd_ros),
1202				    M_DEVBUF, M_WAITOK | M_ZERO);
1203				state = 0;
1204			} else if (dflen == 0)
1205				state = 2;
1206			break;
1207
1208		case 4:
1209			remain--;
1210			if (remain == 0)
1211				state = 0;
1212			break;
1213
1214		case 5:	/* VPD-W Keyword Header */
1215			if (off == alloc) {
1216				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1217				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1218				    M_DEVBUF, M_WAITOK | M_ZERO);
1219			}
1220			cfg->vpd.vpd_w[off].keyword[0] = byte;
1221			if (vpd_nextbyte(&vrs, &byte2)) {
1222				state = -2;
1223				break;
1224			}
1225			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1226			if (vpd_nextbyte(&vrs, &byte2)) {
1227				state = -2;
1228				break;
1229			}
1230			cfg->vpd.vpd_w[off].len = dflen = byte2;
1231			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1232			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1233			    sizeof(*cfg->vpd.vpd_w[off].value),
1234			    M_DEVBUF, M_WAITOK);
1235			remain -= 3;
1236			i = 0;
1237			/* keep in sync w/ state 6's transistions */
1238			if (dflen == 0 && remain == 0)
1239				state = 0;
1240			else if (dflen == 0)
1241				state = 5;
1242			else
1243				state = 6;
1244			break;
1245
1246		case 6:	/* VPD-W Keyword Value */
1247			cfg->vpd.vpd_w[off].value[i++] = byte;
1248			dflen--;
1249			remain--;
1250			/* keep in sync w/ state 5's transistions */
1251			if (dflen == 0)
1252				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1253			if (dflen == 0 && remain == 0) {
1254				cfg->vpd.vpd_wcnt = off;
1255				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1256				    off * sizeof(*cfg->vpd.vpd_w),
1257				    M_DEVBUF, M_WAITOK | M_ZERO);
1258				state = 0;
1259			} else if (dflen == 0)
1260				state = 5;
1261			break;
1262
1263		default:
1264			pci_printf(cfg, "invalid state: %d\n", state);
1265			state = -1;
1266			break;
1267		}
1268	}
1269
1270	if (cksumvalid == 0 || state < -1) {
1271		/* read-only data bad, clean up */
1272		if (cfg->vpd.vpd_ros != NULL) {
1273			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1274				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1275			free(cfg->vpd.vpd_ros, M_DEVBUF);
1276			cfg->vpd.vpd_ros = NULL;
1277		}
1278	}
1279	if (state < -1) {
1280		/* I/O error, clean up */
1281		pci_printf(cfg, "failed to read VPD data.\n");
1282		if (cfg->vpd.vpd_ident != NULL) {
1283			free(cfg->vpd.vpd_ident, M_DEVBUF);
1284			cfg->vpd.vpd_ident = NULL;
1285		}
1286		if (cfg->vpd.vpd_w != NULL) {
1287			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1288				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1289			free(cfg->vpd.vpd_w, M_DEVBUF);
1290			cfg->vpd.vpd_w = NULL;
1291		}
1292	}
1293	cfg->vpd.vpd_cached = 1;
1294#undef REG
1295#undef WREG
1296}
1297
1298int
1299pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1300{
1301	struct pci_devinfo *dinfo = device_get_ivars(child);
1302	pcicfgregs *cfg = &dinfo->cfg;
1303
1304	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1305		pci_read_vpd(device_get_parent(dev), cfg);
1306
1307	*identptr = cfg->vpd.vpd_ident;
1308
1309	if (*identptr == NULL)
1310		return (ENXIO);
1311
1312	return (0);
1313}
1314
1315int
1316pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1317	const char **vptr)
1318{
1319	struct pci_devinfo *dinfo = device_get_ivars(child);
1320	pcicfgregs *cfg = &dinfo->cfg;
1321	int i;
1322
1323	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1324		pci_read_vpd(device_get_parent(dev), cfg);
1325
1326	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1327		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1328		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1329			*vptr = cfg->vpd.vpd_ros[i].value;
1330			return (0);
1331		}
1332
1333	*vptr = NULL;
1334	return (ENXIO);
1335}
1336
1337struct pcicfg_vpd *
1338pci_fetch_vpd_list(device_t dev)
1339{
1340	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341	pcicfgregs *cfg = &dinfo->cfg;
1342
1343	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1344		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1345	return (&cfg->vpd);
1346}
1347
1348/*
1349 * Find the requested HyperTransport capability and return the offset
1350 * in configuration space via the pointer provided.  The function
1351 * returns 0 on success and an error code otherwise.
1352 */
1353int
1354pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1355{
1356	int ptr, error;
1357	uint16_t val;
1358
1359	error = pci_find_cap(child, PCIY_HT, &ptr);
1360	if (error)
1361		return (error);
1362
1363	/*
1364	 * Traverse the capabilities list checking each HT capability
1365	 * to see if it matches the requested HT capability.
1366	 */
1367	while (ptr != 0) {
1368		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1369		if (capability == PCIM_HTCAP_SLAVE ||
1370		    capability == PCIM_HTCAP_HOST)
1371			val &= 0xe000;
1372		else
1373			val &= PCIM_HTCMD_CAP_MASK;
1374		if (val == capability) {
1375			if (capreg != NULL)
1376				*capreg = ptr;
1377			return (0);
1378		}
1379
1380		/* Skip to the next HT capability. */
1381		while (ptr != 0) {
1382			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1383			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1384			    PCIY_HT)
1385				break;
1386		}
1387	}
1388	return (ENOENT);
1389}
1390
1391/*
1392 * Find the requested capability and return the offset in
1393 * configuration space via the pointer provided.  The function returns
1394 * 0 on success and an error code otherwise.
1395 */
1396int
1397pci_find_cap_method(device_t dev, device_t child, int capability,
1398    int *capreg)
1399{
1400	struct pci_devinfo *dinfo = device_get_ivars(child);
1401	pcicfgregs *cfg = &dinfo->cfg;
1402	u_int32_t status;
1403	u_int8_t ptr;
1404
1405	/*
1406	 * Check the CAP_LIST bit of the PCI status register first.
1407	 */
1408	status = pci_read_config(child, PCIR_STATUS, 2);
1409	if (!(status & PCIM_STATUS_CAPPRESENT))
1410		return (ENXIO);
1411
1412	/*
1413	 * Determine the start pointer of the capabilities list.
1414	 */
1415	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1416	case PCIM_HDRTYPE_NORMAL:
1417	case PCIM_HDRTYPE_BRIDGE:
1418		ptr = PCIR_CAP_PTR;
1419		break;
1420	case PCIM_HDRTYPE_CARDBUS:
1421		ptr = PCIR_CAP_PTR_2;
1422		break;
1423	default:
1424		/* XXX: panic? */
1425		return (ENXIO);		/* no extended capabilities support */
1426	}
1427	ptr = pci_read_config(child, ptr, 1);
1428
1429	/*
1430	 * Traverse the capabilities list.
1431	 */
1432	while (ptr != 0) {
1433		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1434			if (capreg != NULL)
1435				*capreg = ptr;
1436			return (0);
1437		}
1438		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1439	}
1440
1441	return (ENOENT);
1442}
1443
1444/*
1445 * Find the requested extended capability and return the offset in
1446 * configuration space via the pointer provided.  The function returns
1447 * 0 on success and an error code otherwise.
1448 */
1449int
1450pci_find_extcap_method(device_t dev, device_t child, int capability,
1451    int *capreg)
1452{
1453	struct pci_devinfo *dinfo = device_get_ivars(child);
1454	pcicfgregs *cfg = &dinfo->cfg;
1455	uint32_t ecap;
1456	uint16_t ptr;
1457
1458	/* Only supported for PCI-express devices. */
1459	if (cfg->pcie.pcie_location == 0)
1460		return (ENXIO);
1461
1462	ptr = PCIR_EXTCAP;
1463	ecap = pci_read_config(child, ptr, 4);
1464	if (ecap == 0xffffffff || ecap == 0)
1465		return (ENOENT);
1466	for (;;) {
1467		if (PCI_EXTCAP_ID(ecap) == capability) {
1468			if (capreg != NULL)
1469				*capreg = ptr;
1470			return (0);
1471		}
1472		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1473		if (ptr == 0)
1474			break;
1475		ecap = pci_read_config(child, ptr, 4);
1476	}
1477
1478	return (ENOENT);
1479}
1480
1481/*
1482 * Support for MSI-X message interrupts.
1483 */
1484void
1485pci_enable_msix_method(device_t dev, device_t child, u_int index,
1486    uint64_t address, uint32_t data)
1487{
1488	struct pci_devinfo *dinfo = device_get_ivars(child);
1489	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1490	uint32_t offset;
1491
1492	KASSERT(msix->msix_table_len > index, ("bogus index"));
1493	offset = msix->msix_table_offset + index * 16;
1494	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1495	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1496	bus_write_4(msix->msix_table_res, offset + 8, data);
1497
1498	/* Enable MSI -> HT mapping. */
1499	pci_ht_map_msi(child, address);
1500}
1501
1502void
1503pci_mask_msix(device_t dev, u_int index)
1504{
1505	struct pci_devinfo *dinfo = device_get_ivars(dev);
1506	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1507	uint32_t offset, val;
1508
1509	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1510	offset = msix->msix_table_offset + index * 16 + 12;
1511	val = bus_read_4(msix->msix_table_res, offset);
1512	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1513		val |= PCIM_MSIX_VCTRL_MASK;
1514		bus_write_4(msix->msix_table_res, offset, val);
1515	}
1516}
1517
1518void
1519pci_unmask_msix(device_t dev, u_int index)
1520{
1521	struct pci_devinfo *dinfo = device_get_ivars(dev);
1522	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1523	uint32_t offset, val;
1524
1525	KASSERT(msix->msix_table_len > index, ("bogus index"));
1526	offset = msix->msix_table_offset + index * 16 + 12;
1527	val = bus_read_4(msix->msix_table_res, offset);
1528	if (val & PCIM_MSIX_VCTRL_MASK) {
1529		val &= ~PCIM_MSIX_VCTRL_MASK;
1530		bus_write_4(msix->msix_table_res, offset, val);
1531	}
1532}
1533
1534int
1535pci_pending_msix(device_t dev, u_int index)
1536{
1537	struct pci_devinfo *dinfo = device_get_ivars(dev);
1538	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1539	uint32_t offset, bit;
1540
1541	KASSERT(msix->msix_table_len > index, ("bogus index"));
1542	offset = msix->msix_pba_offset + (index / 32) * 4;
1543	bit = 1 << index % 32;
1544	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1545}
1546
1547/*
1548 * Restore MSI-X registers and table during resume.  If MSI-X is
1549 * enabled then walk the virtual table to restore the actual MSI-X
1550 * table.
1551 */
1552static void
1553pci_resume_msix(device_t dev)
1554{
1555	struct pci_devinfo *dinfo = device_get_ivars(dev);
1556	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1557	struct msix_table_entry *mte;
1558	struct msix_vector *mv;
1559	int i;
1560
1561	if (msix->msix_alloc > 0) {
1562		/* First, mask all vectors. */
1563		for (i = 0; i < msix->msix_msgnum; i++)
1564			pci_mask_msix(dev, i);
1565
1566		/* Second, program any messages with at least one handler. */
1567		for (i = 0; i < msix->msix_table_len; i++) {
1568			mte = &msix->msix_table[i];
1569			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1570				continue;
1571			mv = &msix->msix_vectors[mte->mte_vector - 1];
1572			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1573			pci_unmask_msix(dev, i);
1574		}
1575	}
1576	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1577	    msix->msix_ctrl, 2);
1578}
1579
1580/*
1581 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1582 * returned in *count.  After this function returns, each message will be
1583 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1584 */
1585int
1586pci_alloc_msix_method(device_t dev, device_t child, int *count)
1587{
1588	struct pci_devinfo *dinfo = device_get_ivars(child);
1589	pcicfgregs *cfg = &dinfo->cfg;
1590	struct resource_list_entry *rle;
1591	int actual, error, i, irq, max;
1592
1593	/* Don't let count == 0 get us into trouble. */
1594	if (*count == 0)
1595		return (EINVAL);
1596
1597	/* If rid 0 is allocated, then fail. */
1598	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1599	if (rle != NULL && rle->res != NULL)
1600		return (ENXIO);
1601
1602	/* Already have allocated messages? */
1603	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1604		return (ENXIO);
1605
1606	/* If MSI-X is blacklisted for this system, fail. */
1607	if (pci_msix_blacklisted())
1608		return (ENXIO);
1609
1610	/* MSI-X capability present? */
1611	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1612		return (ENODEV);
1613
1614	/* Make sure the appropriate BARs are mapped. */
1615	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1616	    cfg->msix.msix_table_bar);
1617	if (rle == NULL || rle->res == NULL ||
1618	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1619		return (ENXIO);
1620	cfg->msix.msix_table_res = rle->res;
1621	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1622		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1623		    cfg->msix.msix_pba_bar);
1624		if (rle == NULL || rle->res == NULL ||
1625		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1626			return (ENXIO);
1627	}
1628	cfg->msix.msix_pba_res = rle->res;
1629
1630	if (bootverbose)
1631		device_printf(child,
1632		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1633		    *count, cfg->msix.msix_msgnum);
1634	max = min(*count, cfg->msix.msix_msgnum);
1635	for (i = 0; i < max; i++) {
1636		/* Allocate a message. */
1637		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1638		if (error) {
1639			if (i == 0)
1640				return (error);
1641			break;
1642		}
1643		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1644		    irq, 1);
1645	}
1646	actual = i;
1647
1648	if (bootverbose) {
1649		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1650		if (actual == 1)
1651			device_printf(child, "using IRQ %ju for MSI-X\n",
1652			    rle->start);
1653		else {
1654			int run;
1655
1656			/*
1657			 * Be fancy and try to print contiguous runs of
1658			 * IRQ values as ranges.  'irq' is the previous IRQ.
1659			 * 'run' is true if we are in a range.
1660			 */
1661			device_printf(child, "using IRQs %ju", rle->start);
1662			irq = rle->start;
1663			run = 0;
1664			for (i = 1; i < actual; i++) {
1665				rle = resource_list_find(&dinfo->resources,
1666				    SYS_RES_IRQ, i + 1);
1667
1668				/* Still in a run? */
1669				if (rle->start == irq + 1) {
1670					run = 1;
1671					irq++;
1672					continue;
1673				}
1674
1675				/* Finish previous range. */
1676				if (run) {
1677					printf("-%d", irq);
1678					run = 0;
1679				}
1680
1681				/* Start new range. */
1682				printf(",%ju", rle->start);
1683				irq = rle->start;
1684			}
1685
1686			/* Unfinished range? */
1687			if (run)
1688				printf("-%d", irq);
1689			printf(" for MSI-X\n");
1690		}
1691	}
1692
1693	/* Mask all vectors. */
1694	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1695		pci_mask_msix(child, i);
1696
1697	/* Allocate and initialize vector data and virtual table. */
1698	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1699	    M_DEVBUF, M_WAITOK | M_ZERO);
1700	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1701	    M_DEVBUF, M_WAITOK | M_ZERO);
1702	for (i = 0; i < actual; i++) {
1703		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1704		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1705		cfg->msix.msix_table[i].mte_vector = i + 1;
1706	}
1707
1708	/* Update control register to enable MSI-X. */
1709	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1710	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1711	    cfg->msix.msix_ctrl, 2);
1712
1713	/* Update counts of alloc'd messages. */
1714	cfg->msix.msix_alloc = actual;
1715	cfg->msix.msix_table_len = actual;
1716	*count = actual;
1717	return (0);
1718}
1719
1720/*
1721 * By default, pci_alloc_msix() will assign the allocated IRQ
1722 * resources consecutively to the first N messages in the MSI-X table.
1723 * However, device drivers may want to use different layouts if they
1724 * either receive fewer messages than they asked for, or they wish to
1725 * populate the MSI-X table sparsely.  This method allows the driver
1726 * to specify what layout it wants.  It must be called after a
1727 * successful pci_alloc_msix() but before any of the associated
1728 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1729 *
1730 * The 'vectors' array contains 'count' message vectors.  The array
1731 * maps directly to the MSI-X table in that index 0 in the array
1732 * specifies the vector for the first message in the MSI-X table, etc.
1733 * The vector value in each array index can either be 0 to indicate
1734 * that no vector should be assigned to a message slot, or it can be a
1735 * number from 1 to N (where N is the count returned from a
1736 * succcessful call to pci_alloc_msix()) to indicate which message
1737 * vector (IRQ) to be used for the corresponding message.
1738 *
1739 * On successful return, each message with a non-zero vector will have
1740 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1741 * 1.  Additionally, if any of the IRQs allocated via the previous
1742 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1743 * will be freed back to the system automatically.
1744 *
1745 * For example, suppose a driver has a MSI-X table with 6 messages and
1746 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1747 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1748 * C.  After the call to pci_alloc_msix(), the device will be setup to
1749 * have an MSI-X table of ABC--- (where - means no vector assigned).
1750 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1751 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1752 * be freed back to the system.  This device will also have valid
1753 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1754 *
1755 * In any case, the SYS_RES_IRQ rid X will always map to the message
1756 * at MSI-X table index X - 1 and will only be valid if a vector is
1757 * assigned to that table entry.
1758 */
1759int
1760pci_remap_msix_method(device_t dev, device_t child, int count,
1761    const u_int *vectors)
1762{
1763	struct pci_devinfo *dinfo = device_get_ivars(child);
1764	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1765	struct resource_list_entry *rle;
1766	int i, irq, j, *used;
1767
1768	/*
1769	 * Have to have at least one message in the table but the
1770	 * table can't be bigger than the actual MSI-X table in the
1771	 * device.
1772	 */
1773	if (count == 0 || count > msix->msix_msgnum)
1774		return (EINVAL);
1775
1776	/* Sanity check the vectors. */
1777	for (i = 0; i < count; i++)
1778		if (vectors[i] > msix->msix_alloc)
1779			return (EINVAL);
1780
1781	/*
1782	 * Make sure there aren't any holes in the vectors to be used.
1783	 * It's a big pain to support it, and it doesn't really make
1784	 * sense anyway.  Also, at least one vector must be used.
1785	 */
1786	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1787	    M_ZERO);
1788	for (i = 0; i < count; i++)
1789		if (vectors[i] != 0)
1790			used[vectors[i] - 1] = 1;
1791	for (i = 0; i < msix->msix_alloc - 1; i++)
1792		if (used[i] == 0 && used[i + 1] == 1) {
1793			free(used, M_DEVBUF);
1794			return (EINVAL);
1795		}
1796	if (used[0] != 1) {
1797		free(used, M_DEVBUF);
1798		return (EINVAL);
1799	}
1800
1801	/* Make sure none of the resources are allocated. */
1802	for (i = 0; i < msix->msix_table_len; i++) {
1803		if (msix->msix_table[i].mte_vector == 0)
1804			continue;
1805		if (msix->msix_table[i].mte_handlers > 0) {
1806			free(used, M_DEVBUF);
1807			return (EBUSY);
1808		}
1809		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1810		KASSERT(rle != NULL, ("missing resource"));
1811		if (rle->res != NULL) {
1812			free(used, M_DEVBUF);
1813			return (EBUSY);
1814		}
1815	}
1816
1817	/* Free the existing resource list entries. */
1818	for (i = 0; i < msix->msix_table_len; i++) {
1819		if (msix->msix_table[i].mte_vector == 0)
1820			continue;
1821		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1822	}
1823
1824	/*
1825	 * Build the new virtual table keeping track of which vectors are
1826	 * used.
1827	 */
1828	free(msix->msix_table, M_DEVBUF);
1829	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1830	    M_DEVBUF, M_WAITOK | M_ZERO);
1831	for (i = 0; i < count; i++)
1832		msix->msix_table[i].mte_vector = vectors[i];
1833	msix->msix_table_len = count;
1834
1835	/* Free any unused IRQs and resize the vectors array if necessary. */
1836	j = msix->msix_alloc - 1;
1837	if (used[j] == 0) {
1838		struct msix_vector *vec;
1839
1840		while (used[j] == 0) {
1841			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1842			    msix->msix_vectors[j].mv_irq);
1843			j--;
1844		}
1845		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1846		    M_WAITOK);
1847		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1848		    (j + 1));
1849		free(msix->msix_vectors, M_DEVBUF);
1850		msix->msix_vectors = vec;
1851		msix->msix_alloc = j + 1;
1852	}
1853	free(used, M_DEVBUF);
1854
1855	/* Map the IRQs onto the rids. */
1856	for (i = 0; i < count; i++) {
1857		if (vectors[i] == 0)
1858			continue;
1859		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1860		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1861		    irq, 1);
1862	}
1863
1864	if (bootverbose) {
1865		device_printf(child, "Remapped MSI-X IRQs as: ");
1866		for (i = 0; i < count; i++) {
1867			if (i != 0)
1868				printf(", ");
1869			if (vectors[i] == 0)
1870				printf("---");
1871			else
1872				printf("%d",
1873				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1874		}
1875		printf("\n");
1876	}
1877
1878	return (0);
1879}
1880
1881static int
1882pci_release_msix(device_t dev, device_t child)
1883{
1884	struct pci_devinfo *dinfo = device_get_ivars(child);
1885	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1886	struct resource_list_entry *rle;
1887	int i;
1888
1889	/* Do we have any messages to release? */
1890	if (msix->msix_alloc == 0)
1891		return (ENODEV);
1892
1893	/* Make sure none of the resources are allocated. */
1894	for (i = 0; i < msix->msix_table_len; i++) {
1895		if (msix->msix_table[i].mte_vector == 0)
1896			continue;
1897		if (msix->msix_table[i].mte_handlers > 0)
1898			return (EBUSY);
1899		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1900		KASSERT(rle != NULL, ("missing resource"));
1901		if (rle->res != NULL)
1902			return (EBUSY);
1903	}
1904
1905	/* Update control register to disable MSI-X. */
1906	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1907	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1908	    msix->msix_ctrl, 2);
1909
1910	/* Free the resource list entries. */
1911	for (i = 0; i < msix->msix_table_len; i++) {
1912		if (msix->msix_table[i].mte_vector == 0)
1913			continue;
1914		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1915	}
1916	free(msix->msix_table, M_DEVBUF);
1917	msix->msix_table_len = 0;
1918
1919	/* Release the IRQs. */
1920	for (i = 0; i < msix->msix_alloc; i++)
1921		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1922		    msix->msix_vectors[i].mv_irq);
1923	free(msix->msix_vectors, M_DEVBUF);
1924	msix->msix_alloc = 0;
1925	return (0);
1926}
1927
1928/*
1929 * Return the max supported MSI-X messages this device supports.
1930 * Basically, assuming the MD code can alloc messages, this function
1931 * should return the maximum value that pci_alloc_msix() can return.
1932 * Thus, it is subject to the tunables, etc.
1933 */
1934int
1935pci_msix_count_method(device_t dev, device_t child)
1936{
1937	struct pci_devinfo *dinfo = device_get_ivars(child);
1938	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1939
1940	if (pci_do_msix && msix->msix_location != 0)
1941		return (msix->msix_msgnum);
1942	return (0);
1943}
1944
1945int
1946pci_msix_pba_bar_method(device_t dev, device_t child)
1947{
1948	struct pci_devinfo *dinfo = device_get_ivars(child);
1949	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1950
1951	if (pci_do_msix && msix->msix_location != 0)
1952		return (msix->msix_pba_bar);
1953	return (-1);
1954}
1955
1956int
1957pci_msix_table_bar_method(device_t dev, device_t child)
1958{
1959	struct pci_devinfo *dinfo = device_get_ivars(child);
1960	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1961
1962	if (pci_do_msix && msix->msix_location != 0)
1963		return (msix->msix_table_bar);
1964	return (-1);
1965}
1966
1967/*
1968 * HyperTransport MSI mapping control
1969 */
1970void
1971pci_ht_map_msi(device_t dev, uint64_t addr)
1972{
1973	struct pci_devinfo *dinfo = device_get_ivars(dev);
1974	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1975
1976	if (!ht->ht_msimap)
1977		return;
1978
1979	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1980	    ht->ht_msiaddr >> 20 == addr >> 20) {
1981		/* Enable MSI -> HT mapping. */
1982		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1983		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1984		    ht->ht_msictrl, 2);
1985	}
1986
1987	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1988		/* Disable MSI -> HT mapping. */
1989		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1990		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1991		    ht->ht_msictrl, 2);
1992	}
1993}
1994
1995int
1996pci_get_max_payload(device_t dev)
1997{
1998	struct pci_devinfo *dinfo = device_get_ivars(dev);
1999	int cap;
2000	uint16_t val;
2001
2002	cap = dinfo->cfg.pcie.pcie_location;
2003	if (cap == 0)
2004		return (0);
2005	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2006	val &= PCIEM_CTL_MAX_PAYLOAD;
2007	val >>= 5;
2008	return (1 << (val + 7));
2009}
2010
2011int
2012pci_get_max_read_req(device_t dev)
2013{
2014	struct pci_devinfo *dinfo = device_get_ivars(dev);
2015	int cap;
2016	uint16_t val;
2017
2018	cap = dinfo->cfg.pcie.pcie_location;
2019	if (cap == 0)
2020		return (0);
2021	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2022	val &= PCIEM_CTL_MAX_READ_REQUEST;
2023	val >>= 12;
2024	return (1 << (val + 7));
2025}
2026
2027int
2028pci_set_max_read_req(device_t dev, int size)
2029{
2030	struct pci_devinfo *dinfo = device_get_ivars(dev);
2031	int cap;
2032	uint16_t val;
2033
2034	cap = dinfo->cfg.pcie.pcie_location;
2035	if (cap == 0)
2036		return (0);
2037	if (size < 128)
2038		size = 128;
2039	if (size > 4096)
2040		size = 4096;
2041	size = (1 << (fls(size) - 1));
2042	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
2043	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
2044	val |= (fls(size) - 8) << 12;
2045	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
2046	return (size);
2047}
2048
2049uint32_t
2050pcie_read_config(device_t dev, int reg, int width)
2051{
2052	struct pci_devinfo *dinfo = device_get_ivars(dev);
2053	int cap;
2054
2055	cap = dinfo->cfg.pcie.pcie_location;
2056	if (cap == 0) {
2057		if (width == 2)
2058			return (0xffff);
2059		return (0xffffffff);
2060	}
2061
2062	return (pci_read_config(dev, cap + reg, width));
2063}
2064
2065void
2066pcie_write_config(device_t dev, int reg, uint32_t value, int width)
2067{
2068	struct pci_devinfo *dinfo = device_get_ivars(dev);
2069	int cap;
2070
2071	cap = dinfo->cfg.pcie.pcie_location;
2072	if (cap == 0)
2073		return;
2074	pci_write_config(dev, cap + reg, value, width);
2075}
2076
2077/*
2078 * Adjusts a PCI-e capability register by clearing the bits in mask
2079 * and setting the bits in (value & mask).  Bits not set in mask are
2080 * not adjusted.
2081 *
2082 * Returns the old value on success or all ones on failure.
2083 */
2084uint32_t
2085pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2086    int width)
2087{
2088	struct pci_devinfo *dinfo = device_get_ivars(dev);
2089	uint32_t old, new;
2090	int cap;
2091
2092	cap = dinfo->cfg.pcie.pcie_location;
2093	if (cap == 0) {
2094		if (width == 2)
2095			return (0xffff);
2096		return (0xffffffff);
2097	}
2098
2099	old = pci_read_config(dev, cap + reg, width);
2100	new = old & ~mask;
2101	new |= (value & mask);
2102	pci_write_config(dev, cap + reg, new, width);
2103	return (old);
2104}
2105
2106/*
2107 * Support for MSI message signalled interrupts.
2108 */
2109void
2110pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2111    uint16_t data)
2112{
2113	struct pci_devinfo *dinfo = device_get_ivars(child);
2114	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2115
2116	/* Write data and address values. */
2117	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2118	    address & 0xffffffff, 4);
2119	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2120		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2121		    address >> 32, 4);
2122		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2123		    data, 2);
2124	} else
2125		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2126		    2);
2127
2128	/* Enable MSI in the control register. */
2129	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2130	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2131	    msi->msi_ctrl, 2);
2132
2133	/* Enable MSI -> HT mapping. */
2134	pci_ht_map_msi(child, address);
2135}
2136
2137void
2138pci_disable_msi_method(device_t dev, device_t child)
2139{
2140	struct pci_devinfo *dinfo = device_get_ivars(child);
2141	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2142
2143	/* Disable MSI -> HT mapping. */
2144	pci_ht_map_msi(child, 0);
2145
2146	/* Disable MSI in the control register. */
2147	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2148	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2149	    msi->msi_ctrl, 2);
2150}
2151
2152/*
2153 * Restore MSI registers during resume.  If MSI is enabled then
2154 * restore the data and address registers in addition to the control
2155 * register.
2156 */
2157static void
2158pci_resume_msi(device_t dev)
2159{
2160	struct pci_devinfo *dinfo = device_get_ivars(dev);
2161	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2162	uint64_t address;
2163	uint16_t data;
2164
2165	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2166		address = msi->msi_addr;
2167		data = msi->msi_data;
2168		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2169		    address & 0xffffffff, 4);
2170		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2171			pci_write_config(dev, msi->msi_location +
2172			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2173			pci_write_config(dev, msi->msi_location +
2174			    PCIR_MSI_DATA_64BIT, data, 2);
2175		} else
2176			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2177			    data, 2);
2178	}
2179	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2180	    2);
2181}
2182
2183static int
2184pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2185{
2186	struct pci_devinfo *dinfo = device_get_ivars(dev);
2187	pcicfgregs *cfg = &dinfo->cfg;
2188	struct resource_list_entry *rle;
2189	struct msix_table_entry *mte;
2190	struct msix_vector *mv;
2191	uint64_t addr;
2192	uint32_t data;
2193	int error, i, j;
2194
2195	/*
2196	 * Handle MSI first.  We try to find this IRQ among our list
2197	 * of MSI IRQs.  If we find it, we request updated address and
2198	 * data registers and apply the results.
2199	 */
2200	if (cfg->msi.msi_alloc > 0) {
2201
2202		/* If we don't have any active handlers, nothing to do. */
2203		if (cfg->msi.msi_handlers == 0)
2204			return (0);
2205		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2206			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2207			    i + 1);
2208			if (rle->start == irq) {
2209				error = PCIB_MAP_MSI(device_get_parent(bus),
2210				    dev, irq, &addr, &data);
2211				if (error)
2212					return (error);
2213				pci_disable_msi(dev);
2214				dinfo->cfg.msi.msi_addr = addr;
2215				dinfo->cfg.msi.msi_data = data;
2216				pci_enable_msi(dev, addr, data);
2217				return (0);
2218			}
2219		}
2220		return (ENOENT);
2221	}
2222
2223	/*
2224	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2225	 * we request the updated mapping info.  If that works, we go
2226	 * through all the slots that use this IRQ and update them.
2227	 */
2228	if (cfg->msix.msix_alloc > 0) {
2229		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2230			mv = &cfg->msix.msix_vectors[i];
2231			if (mv->mv_irq == irq) {
2232				error = PCIB_MAP_MSI(device_get_parent(bus),
2233				    dev, irq, &addr, &data);
2234				if (error)
2235					return (error);
2236				mv->mv_address = addr;
2237				mv->mv_data = data;
2238				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2239					mte = &cfg->msix.msix_table[j];
2240					if (mte->mte_vector != i + 1)
2241						continue;
2242					if (mte->mte_handlers == 0)
2243						continue;
2244					pci_mask_msix(dev, j);
2245					pci_enable_msix(dev, j, addr, data);
2246					pci_unmask_msix(dev, j);
2247				}
2248			}
2249		}
2250		return (ENOENT);
2251	}
2252
2253	return (ENOENT);
2254}
2255
2256/*
2257 * Returns true if the specified device is blacklisted because MSI
2258 * doesn't work.
2259 */
2260int
2261pci_msi_device_blacklisted(device_t dev)
2262{
2263
2264	if (!pci_honor_msi_blacklist)
2265		return (0);
2266
2267	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2268}
2269
2270/*
2271 * Determine if MSI is blacklisted globally on this system.  Currently,
2272 * we just check for blacklisted chipsets as represented by the
2273 * host-PCI bridge at device 0:0:0.  In the future, it may become
2274 * necessary to check other system attributes, such as the kenv values
2275 * that give the motherboard manufacturer and model number.
2276 */
2277static int
2278pci_msi_blacklisted(void)
2279{
2280	device_t dev;
2281
2282	if (!pci_honor_msi_blacklist)
2283		return (0);
2284
2285	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2286	if (!(pcie_chipset || pcix_chipset)) {
2287		if (vm_guest != VM_GUEST_NO) {
2288			/*
2289			 * Whitelist older chipsets in virtual
2290			 * machines known to support MSI.
2291			 */
2292			dev = pci_find_bsf(0, 0, 0);
2293			if (dev != NULL)
2294				return (!pci_has_quirk(pci_get_devid(dev),
2295					PCI_QUIRK_ENABLE_MSI_VM));
2296		}
2297		return (1);
2298	}
2299
2300	dev = pci_find_bsf(0, 0, 0);
2301	if (dev != NULL)
2302		return (pci_msi_device_blacklisted(dev));
2303	return (0);
2304}
2305
2306/*
2307 * Returns true if the specified device is blacklisted because MSI-X
2308 * doesn't work.  Note that this assumes that if MSI doesn't work,
2309 * MSI-X doesn't either.
2310 */
2311int
2312pci_msix_device_blacklisted(device_t dev)
2313{
2314
2315	if (!pci_honor_msi_blacklist)
2316		return (0);
2317
2318	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2319		return (1);
2320
2321	return (pci_msi_device_blacklisted(dev));
2322}
2323
2324/*
2325 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2326 * is blacklisted, assume that MSI-X is as well.  Check for additional
2327 * chipsets where MSI works but MSI-X does not.
2328 */
2329static int
2330pci_msix_blacklisted(void)
2331{
2332	device_t dev;
2333
2334	if (!pci_honor_msi_blacklist)
2335		return (0);
2336
2337	dev = pci_find_bsf(0, 0, 0);
2338	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2339	    PCI_QUIRK_DISABLE_MSIX))
2340		return (1);
2341
2342	return (pci_msi_blacklisted());
2343}
2344
2345/*
2346 * Attempt to allocate *count MSI messages.  The actual number allocated is
2347 * returned in *count.  After this function returns, each message will be
2348 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2349 */
2350int
2351pci_alloc_msi_method(device_t dev, device_t child, int *count)
2352{
2353	struct pci_devinfo *dinfo = device_get_ivars(child);
2354	pcicfgregs *cfg = &dinfo->cfg;
2355	struct resource_list_entry *rle;
2356	int actual, error, i, irqs[32];
2357	uint16_t ctrl;
2358
2359	/* Don't let count == 0 get us into trouble. */
2360	if (*count == 0)
2361		return (EINVAL);
2362
2363	/* If rid 0 is allocated, then fail. */
2364	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2365	if (rle != NULL && rle->res != NULL)
2366		return (ENXIO);
2367
2368	/* Already have allocated messages? */
2369	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2370		return (ENXIO);
2371
2372	/* If MSI is blacklisted for this system, fail. */
2373	if (pci_msi_blacklisted())
2374		return (ENXIO);
2375
2376	/* MSI capability present? */
2377	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2378		return (ENODEV);
2379
2380	if (bootverbose)
2381		device_printf(child,
2382		    "attempting to allocate %d MSI vectors (%d supported)\n",
2383		    *count, cfg->msi.msi_msgnum);
2384
2385	/* Don't ask for more than the device supports. */
2386	actual = min(*count, cfg->msi.msi_msgnum);
2387
2388	/* Don't ask for more than 32 messages. */
2389	actual = min(actual, 32);
2390
2391	/* MSI requires power of 2 number of messages. */
2392	if (!powerof2(actual))
2393		return (EINVAL);
2394
2395	for (;;) {
2396		/* Try to allocate N messages. */
2397		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2398		    actual, irqs);
2399		if (error == 0)
2400			break;
2401		if (actual == 1)
2402			return (error);
2403
2404		/* Try N / 2. */
2405		actual >>= 1;
2406	}
2407
2408	/*
2409	 * We now have N actual messages mapped onto SYS_RES_IRQ
2410	 * resources in the irqs[] array, so add new resources
2411	 * starting at rid 1.
2412	 */
2413	for (i = 0; i < actual; i++)
2414		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2415		    irqs[i], irqs[i], 1);
2416
2417	if (bootverbose) {
2418		if (actual == 1)
2419			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2420		else {
2421			int run;
2422
2423			/*
2424			 * Be fancy and try to print contiguous runs
2425			 * of IRQ values as ranges.  'run' is true if
2426			 * we are in a range.
2427			 */
2428			device_printf(child, "using IRQs %d", irqs[0]);
2429			run = 0;
2430			for (i = 1; i < actual; i++) {
2431
2432				/* Still in a run? */
2433				if (irqs[i] == irqs[i - 1] + 1) {
2434					run = 1;
2435					continue;
2436				}
2437
2438				/* Finish previous range. */
2439				if (run) {
2440					printf("-%d", irqs[i - 1]);
2441					run = 0;
2442				}
2443
2444				/* Start new range. */
2445				printf(",%d", irqs[i]);
2446			}
2447
2448			/* Unfinished range? */
2449			if (run)
2450				printf("-%d", irqs[actual - 1]);
2451			printf(" for MSI\n");
2452		}
2453	}
2454
2455	/* Update control register with actual count. */
2456	ctrl = cfg->msi.msi_ctrl;
2457	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2458	ctrl |= (ffs(actual) - 1) << 4;
2459	cfg->msi.msi_ctrl = ctrl;
2460	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2461
2462	/* Update counts of alloc'd messages. */
2463	cfg->msi.msi_alloc = actual;
2464	cfg->msi.msi_handlers = 0;
2465	*count = actual;
2466	return (0);
2467}
2468
2469/* Release the MSI messages associated with this device. */
2470int
2471pci_release_msi_method(device_t dev, device_t child)
2472{
2473	struct pci_devinfo *dinfo = device_get_ivars(child);
2474	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2475	struct resource_list_entry *rle;
2476	int error, i, irqs[32];
2477
2478	/* Try MSI-X first. */
2479	error = pci_release_msix(dev, child);
2480	if (error != ENODEV)
2481		return (error);
2482
2483	/* Do we have any messages to release? */
2484	if (msi->msi_alloc == 0)
2485		return (ENODEV);
2486	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2487
2488	/* Make sure none of the resources are allocated. */
2489	if (msi->msi_handlers > 0)
2490		return (EBUSY);
2491	for (i = 0; i < msi->msi_alloc; i++) {
2492		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2493		KASSERT(rle != NULL, ("missing MSI resource"));
2494		if (rle->res != NULL)
2495			return (EBUSY);
2496		irqs[i] = rle->start;
2497	}
2498
2499	/* Update control register with 0 count. */
2500	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2501	    ("%s: MSI still enabled", __func__));
2502	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2503	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2504	    msi->msi_ctrl, 2);
2505
2506	/* Release the messages. */
2507	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2508	for (i = 0; i < msi->msi_alloc; i++)
2509		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2510
2511	/* Update alloc count. */
2512	msi->msi_alloc = 0;
2513	msi->msi_addr = 0;
2514	msi->msi_data = 0;
2515	return (0);
2516}
2517
2518/*
2519 * Return the max supported MSI messages this device supports.
2520 * Basically, assuming the MD code can alloc messages, this function
2521 * should return the maximum value that pci_alloc_msi() can return.
2522 * Thus, it is subject to the tunables, etc.
2523 */
2524int
2525pci_msi_count_method(device_t dev, device_t child)
2526{
2527	struct pci_devinfo *dinfo = device_get_ivars(child);
2528	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2529
2530	if (pci_do_msi && msi->msi_location != 0)
2531		return (msi->msi_msgnum);
2532	return (0);
2533}
2534
2535/* free pcicfgregs structure and all depending data structures */
2536
2537int
2538pci_freecfg(struct pci_devinfo *dinfo)
2539{
2540	struct devlist *devlist_head;
2541	struct pci_map *pm, *next;
2542	int i;
2543
2544	devlist_head = &pci_devq;
2545
2546	if (dinfo->cfg.vpd.vpd_reg) {
2547		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2548		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2549			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2550		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2551		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2552			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2553		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2554	}
2555	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2556		free(pm, M_DEVBUF);
2557	}
2558	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2559	free(dinfo, M_DEVBUF);
2560
2561	/* increment the generation count */
2562	pci_generation++;
2563
2564	/* we're losing one device */
2565	pci_numdevs--;
2566	return (0);
2567}
2568
2569/*
2570 * PCI power manangement
2571 */
2572int
2573pci_set_powerstate_method(device_t dev, device_t child, int state)
2574{
2575	struct pci_devinfo *dinfo = device_get_ivars(child);
2576	pcicfgregs *cfg = &dinfo->cfg;
2577	uint16_t status;
2578	int oldstate, highest, delay;
2579
2580	if (cfg->pp.pp_cap == 0)
2581		return (EOPNOTSUPP);
2582
2583	/*
2584	 * Optimize a no state change request away.  While it would be OK to
2585	 * write to the hardware in theory, some devices have shown odd
2586	 * behavior when going from D3 -> D3.
2587	 */
2588	oldstate = pci_get_powerstate(child);
2589	if (oldstate == state)
2590		return (0);
2591
2592	/*
2593	 * The PCI power management specification states that after a state
2594	 * transition between PCI power states, system software must
2595	 * guarantee a minimal delay before the function accesses the device.
2596	 * Compute the worst case delay that we need to guarantee before we
2597	 * access the device.  Many devices will be responsive much more
2598	 * quickly than this delay, but there are some that don't respond
2599	 * instantly to state changes.  Transitions to/from D3 state require
2600	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2601	 * is done below with DELAY rather than a sleeper function because
2602	 * this function can be called from contexts where we cannot sleep.
2603	 */
2604	highest = (oldstate > state) ? oldstate : state;
2605	if (highest == PCI_POWERSTATE_D3)
2606	    delay = 10000;
2607	else if (highest == PCI_POWERSTATE_D2)
2608	    delay = 200;
2609	else
2610	    delay = 0;
2611	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2612	    & ~PCIM_PSTAT_DMASK;
2613	switch (state) {
2614	case PCI_POWERSTATE_D0:
2615		status |= PCIM_PSTAT_D0;
2616		break;
2617	case PCI_POWERSTATE_D1:
2618		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2619			return (EOPNOTSUPP);
2620		status |= PCIM_PSTAT_D1;
2621		break;
2622	case PCI_POWERSTATE_D2:
2623		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2624			return (EOPNOTSUPP);
2625		status |= PCIM_PSTAT_D2;
2626		break;
2627	case PCI_POWERSTATE_D3:
2628		status |= PCIM_PSTAT_D3;
2629		break;
2630	default:
2631		return (EINVAL);
2632	}
2633
2634	if (bootverbose)
2635		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2636		    state);
2637
2638	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2639	if (delay)
2640		DELAY(delay);
2641	return (0);
2642}
2643
2644int
2645pci_get_powerstate_method(device_t dev, device_t child)
2646{
2647	struct pci_devinfo *dinfo = device_get_ivars(child);
2648	pcicfgregs *cfg = &dinfo->cfg;
2649	uint16_t status;
2650	int result;
2651
2652	if (cfg->pp.pp_cap != 0) {
2653		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2654		switch (status & PCIM_PSTAT_DMASK) {
2655		case PCIM_PSTAT_D0:
2656			result = PCI_POWERSTATE_D0;
2657			break;
2658		case PCIM_PSTAT_D1:
2659			result = PCI_POWERSTATE_D1;
2660			break;
2661		case PCIM_PSTAT_D2:
2662			result = PCI_POWERSTATE_D2;
2663			break;
2664		case PCIM_PSTAT_D3:
2665			result = PCI_POWERSTATE_D3;
2666			break;
2667		default:
2668			result = PCI_POWERSTATE_UNKNOWN;
2669			break;
2670		}
2671	} else {
2672		/* No support, device is always at D0 */
2673		result = PCI_POWERSTATE_D0;
2674	}
2675	return (result);
2676}
2677
2678/*
2679 * Some convenience functions for PCI device drivers.
2680 */
2681
2682static __inline void
2683pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2684{
2685	uint16_t	command;
2686
2687	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2688	command |= bit;
2689	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2690}
2691
2692static __inline void
2693pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2694{
2695	uint16_t	command;
2696
2697	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2698	command &= ~bit;
2699	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2700}
2701
2702int
2703pci_enable_busmaster_method(device_t dev, device_t child)
2704{
2705	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2706	return (0);
2707}
2708
2709int
2710pci_disable_busmaster_method(device_t dev, device_t child)
2711{
2712	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2713	return (0);
2714}
2715
2716int
2717pci_enable_io_method(device_t dev, device_t child, int space)
2718{
2719	uint16_t bit;
2720
2721	switch(space) {
2722	case SYS_RES_IOPORT:
2723		bit = PCIM_CMD_PORTEN;
2724		break;
2725	case SYS_RES_MEMORY:
2726		bit = PCIM_CMD_MEMEN;
2727		break;
2728	default:
2729		return (EINVAL);
2730	}
2731	pci_set_command_bit(dev, child, bit);
2732	return (0);
2733}
2734
2735int
2736pci_disable_io_method(device_t dev, device_t child, int space)
2737{
2738	uint16_t bit;
2739
2740	switch(space) {
2741	case SYS_RES_IOPORT:
2742		bit = PCIM_CMD_PORTEN;
2743		break;
2744	case SYS_RES_MEMORY:
2745		bit = PCIM_CMD_MEMEN;
2746		break;
2747	default:
2748		return (EINVAL);
2749	}
2750	pci_clear_command_bit(dev, child, bit);
2751	return (0);
2752}
2753
2754/*
2755 * New style pci driver.  Parent device is either a pci-host-bridge or a
2756 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2757 */
2758
2759void
2760pci_print_verbose(struct pci_devinfo *dinfo)
2761{
2762
2763	if (bootverbose) {
2764		pcicfgregs *cfg = &dinfo->cfg;
2765
2766		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2767		    cfg->vendor, cfg->device, cfg->revid);
2768		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2769		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2770		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2771		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2772		    cfg->mfdev);
2773		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2774		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2775		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2776		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2777		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2778		if (cfg->intpin > 0)
2779			printf("\tintpin=%c, irq=%d\n",
2780			    cfg->intpin +'a' -1, cfg->intline);
2781		if (cfg->pp.pp_cap) {
2782			uint16_t status;
2783
2784			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2785			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2786			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2787			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2788			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2789			    status & PCIM_PSTAT_DMASK);
2790		}
2791		if (cfg->msi.msi_location) {
2792			int ctrl;
2793
2794			ctrl = cfg->msi.msi_ctrl;
2795			printf("\tMSI supports %d message%s%s%s\n",
2796			    cfg->msi.msi_msgnum,
2797			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2798			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2799			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2800		}
2801		if (cfg->msix.msix_location) {
2802			printf("\tMSI-X supports %d message%s ",
2803			    cfg->msix.msix_msgnum,
2804			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2805			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2806				printf("in map 0x%x\n",
2807				    cfg->msix.msix_table_bar);
2808			else
2809				printf("in maps 0x%x and 0x%x\n",
2810				    cfg->msix.msix_table_bar,
2811				    cfg->msix.msix_pba_bar);
2812		}
2813	}
2814}
2815
2816static int
2817pci_porten(device_t dev)
2818{
2819	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2820}
2821
2822static int
2823pci_memen(device_t dev)
2824{
2825	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2826}
2827
2828void
2829pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
2830    int *bar64)
2831{
2832	struct pci_devinfo *dinfo;
2833	pci_addr_t map, testval;
2834	int ln2range;
2835	uint16_t cmd;
2836
2837	/*
2838	 * The device ROM BAR is special.  It is always a 32-bit
2839	 * memory BAR.  Bit 0 is special and should not be set when
2840	 * sizing the BAR.
2841	 */
2842	dinfo = device_get_ivars(dev);
2843	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2844		map = pci_read_config(dev, reg, 4);
2845		pci_write_config(dev, reg, 0xfffffffe, 4);
2846		testval = pci_read_config(dev, reg, 4);
2847		pci_write_config(dev, reg, map, 4);
2848		*mapp = map;
2849		*testvalp = testval;
2850		if (bar64 != NULL)
2851			*bar64 = 0;
2852		return;
2853	}
2854
2855	map = pci_read_config(dev, reg, 4);
2856	ln2range = pci_maprange(map);
2857	if (ln2range == 64)
2858		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2859
2860	/*
2861	 * Disable decoding via the command register before
2862	 * determining the BAR's length since we will be placing it in
2863	 * a weird state.
2864	 */
2865	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2866	pci_write_config(dev, PCIR_COMMAND,
2867	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2868
2869	/*
2870	 * Determine the BAR's length by writing all 1's.  The bottom
2871	 * log_2(size) bits of the BAR will stick as 0 when we read
2872	 * the value back.
2873	 */
2874	pci_write_config(dev, reg, 0xffffffff, 4);
2875	testval = pci_read_config(dev, reg, 4);
2876	if (ln2range == 64) {
2877		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2878		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2879	}
2880
2881	/*
2882	 * Restore the original value of the BAR.  We may have reprogrammed
2883	 * the BAR of the low-level console device and when booting verbose,
2884	 * we need the console device addressable.
2885	 */
2886	pci_write_config(dev, reg, map, 4);
2887	if (ln2range == 64)
2888		pci_write_config(dev, reg + 4, map >> 32, 4);
2889	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2890
2891	*mapp = map;
2892	*testvalp = testval;
2893	if (bar64 != NULL)
2894		*bar64 = (ln2range == 64);
2895}
2896
2897static void
2898pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2899{
2900	struct pci_devinfo *dinfo;
2901	int ln2range;
2902
2903	/* The device ROM BAR is always a 32-bit memory BAR. */
2904	dinfo = device_get_ivars(dev);
2905	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2906		ln2range = 32;
2907	else
2908		ln2range = pci_maprange(pm->pm_value);
2909	pci_write_config(dev, pm->pm_reg, base, 4);
2910	if (ln2range == 64)
2911		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2912	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2913	if (ln2range == 64)
2914		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2915		    pm->pm_reg + 4, 4) << 32;
2916}
2917
2918struct pci_map *
2919pci_find_bar(device_t dev, int reg)
2920{
2921	struct pci_devinfo *dinfo;
2922	struct pci_map *pm;
2923
2924	dinfo = device_get_ivars(dev);
2925	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2926		if (pm->pm_reg == reg)
2927			return (pm);
2928	}
2929	return (NULL);
2930}
2931
2932int
2933pci_bar_enabled(device_t dev, struct pci_map *pm)
2934{
2935	struct pci_devinfo *dinfo;
2936	uint16_t cmd;
2937
2938	dinfo = device_get_ivars(dev);
2939	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2940	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2941		return (0);
2942	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2943	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2944		return ((cmd & PCIM_CMD_MEMEN) != 0);
2945	else
2946		return ((cmd & PCIM_CMD_PORTEN) != 0);
2947}
2948
2949struct pci_map *
2950pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2951{
2952	struct pci_devinfo *dinfo;
2953	struct pci_map *pm, *prev;
2954
2955	dinfo = device_get_ivars(dev);
2956	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2957	pm->pm_reg = reg;
2958	pm->pm_value = value;
2959	pm->pm_size = size;
2960	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2961		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2962		    reg));
2963		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2964		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2965			break;
2966	}
2967	if (prev != NULL)
2968		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2969	else
2970		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2971	return (pm);
2972}
2973
2974static void
2975pci_restore_bars(device_t dev)
2976{
2977	struct pci_devinfo *dinfo;
2978	struct pci_map *pm;
2979	int ln2range;
2980
2981	dinfo = device_get_ivars(dev);
2982	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2983		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2984			ln2range = 32;
2985		else
2986			ln2range = pci_maprange(pm->pm_value);
2987		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2988		if (ln2range == 64)
2989			pci_write_config(dev, pm->pm_reg + 4,
2990			    pm->pm_value >> 32, 4);
2991	}
2992}
2993
2994/*
2995 * Add a resource based on a pci map register. Return 1 if the map
2996 * register is a 32bit map register or 2 if it is a 64bit register.
2997 */
2998static int
2999pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
3000    int force, int prefetch)
3001{
3002	struct pci_map *pm;
3003	pci_addr_t base, map, testval;
3004	pci_addr_t start, end, count;
3005	int barlen, basezero, flags, maprange, mapsize, type;
3006	uint16_t cmd;
3007	struct resource *res;
3008
3009	/*
3010	 * The BAR may already exist if the device is a CardBus card
3011	 * whose CIS is stored in this BAR.
3012	 */
3013	pm = pci_find_bar(dev, reg);
3014	if (pm != NULL) {
3015		maprange = pci_maprange(pm->pm_value);
3016		barlen = maprange == 64 ? 2 : 1;
3017		return (barlen);
3018	}
3019
3020	pci_read_bar(dev, reg, &map, &testval, NULL);
3021	if (PCI_BAR_MEM(map)) {
3022		type = SYS_RES_MEMORY;
3023		if (map & PCIM_BAR_MEM_PREFETCH)
3024			prefetch = 1;
3025	} else
3026		type = SYS_RES_IOPORT;
3027	mapsize = pci_mapsize(testval);
3028	base = pci_mapbase(map);
3029#ifdef __PCI_BAR_ZERO_VALID
3030	basezero = 0;
3031#else
3032	basezero = base == 0;
3033#endif
3034	maprange = pci_maprange(map);
3035	barlen = maprange == 64 ? 2 : 1;
3036
3037	/*
3038	 * For I/O registers, if bottom bit is set, and the next bit up
3039	 * isn't clear, we know we have a BAR that doesn't conform to the
3040	 * spec, so ignore it.  Also, sanity check the size of the data
3041	 * areas to the type of memory involved.  Memory must be at least
3042	 * 16 bytes in size, while I/O ranges must be at least 4.
3043	 */
3044	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
3045		return (barlen);
3046	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
3047	    (type == SYS_RES_IOPORT && mapsize < 2))
3048		return (barlen);
3049
3050	/* Save a record of this BAR. */
3051	pm = pci_add_bar(dev, reg, map, mapsize);
3052	if (bootverbose) {
3053		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
3054		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
3055		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3056			printf(", port disabled\n");
3057		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
3058			printf(", memory disabled\n");
3059		else
3060			printf(", enabled\n");
3061	}
3062
3063	/*
3064	 * If base is 0, then we have problems if this architecture does
3065	 * not allow that.  It is best to ignore such entries for the
3066	 * moment.  These will be allocated later if the driver specifically
3067	 * requests them.  However, some removable busses look better when
3068	 * all resources are allocated, so allow '0' to be overriden.
3069	 *
3070	 * Similarly treat maps whose values is the same as the test value
3071	 * read back.  These maps have had all f's written to them by the
3072	 * BIOS in an attempt to disable the resources.
3073	 */
3074	if (!force && (basezero || map == testval))
3075		return (barlen);
3076	if ((u_long)base != base) {
3077		device_printf(bus,
3078		    "pci%d:%d:%d:%d bar %#x too many address bits",
3079		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
3080		    pci_get_function(dev), reg);
3081		return (barlen);
3082	}
3083
3084	/*
3085	 * This code theoretically does the right thing, but has
3086	 * undesirable side effects in some cases where peripherals
3087	 * respond oddly to having these bits enabled.  Let the user
3088	 * be able to turn them off (since pci_enable_io_modes is 1 by
3089	 * default).
3090	 */
3091	if (pci_enable_io_modes) {
3092		/* Turn on resources that have been left off by a lazy BIOS */
3093		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3094			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3095			cmd |= PCIM_CMD_PORTEN;
3096			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3097		}
3098		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3099			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3100			cmd |= PCIM_CMD_MEMEN;
3101			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3102		}
3103	} else {
3104		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3105			return (barlen);
3106		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3107			return (barlen);
3108	}
3109
3110	count = (pci_addr_t)1 << mapsize;
3111	flags = RF_ALIGNMENT_LOG2(mapsize);
3112	if (prefetch)
3113		flags |= RF_PREFETCHABLE;
3114	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3115		start = 0;	/* Let the parent decide. */
3116		end = ~0;
3117	} else {
3118		start = base;
3119		end = base + count - 1;
3120	}
3121	resource_list_add(rl, type, reg, start, end, count);
3122
3123	/*
3124	 * Try to allocate the resource for this BAR from our parent
3125	 * so that this resource range is already reserved.  The
3126	 * driver for this device will later inherit this resource in
3127	 * pci_alloc_resource().
3128	 */
3129	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3130	    flags);
3131	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
3132		/*
3133		 * If the allocation fails, try to allocate a resource for
3134		 * this BAR using any available range.  The firmware felt
3135		 * it was important enough to assign a resource, so don't
3136		 * disable decoding if we can help it.
3137		 */
3138		resource_list_delete(rl, type, reg);
3139		resource_list_add(rl, type, reg, 0, ~0, count);
3140		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
3141		    count, flags);
3142	}
3143	if (res == NULL) {
3144		/*
3145		 * If the allocation fails, delete the resource list entry
3146		 * and disable decoding for this device.
3147		 *
3148		 * If the driver requests this resource in the future,
3149		 * pci_reserve_map() will try to allocate a fresh
3150		 * resource range.
3151		 */
3152		resource_list_delete(rl, type, reg);
3153		pci_disable_io(dev, type);
3154		if (bootverbose)
3155			device_printf(bus,
3156			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3157			    pci_get_domain(dev), pci_get_bus(dev),
3158			    pci_get_slot(dev), pci_get_function(dev), reg);
3159	} else {
3160		start = rman_get_start(res);
3161		pci_write_bar(dev, pm, start);
3162	}
3163	return (barlen);
3164}
3165
3166/*
3167 * For ATA devices we need to decide early what addressing mode to use.
3168 * Legacy demands that the primary and secondary ATA ports sits on the
3169 * same addresses that old ISA hardware did. This dictates that we use
3170 * those addresses and ignore the BAR's if we cannot set PCI native
3171 * addressing mode.
3172 */
3173static void
3174pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3175    uint32_t prefetchmask)
3176{
3177	int rid, type, progif;
3178#if 0
3179	/* if this device supports PCI native addressing use it */
3180	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3181	if ((progif & 0x8a) == 0x8a) {
3182		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3183		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3184			printf("Trying ATA native PCI addressing mode\n");
3185			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3186		}
3187	}
3188#endif
3189	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3190	type = SYS_RES_IOPORT;
3191	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3192		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3193		    prefetchmask & (1 << 0));
3194		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3195		    prefetchmask & (1 << 1));
3196	} else {
3197		rid = PCIR_BAR(0);
3198		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3199		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3200		    0x1f7, 8, 0);
3201		rid = PCIR_BAR(1);
3202		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3203		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3204		    0x3f6, 1, 0);
3205	}
3206	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3207		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3208		    prefetchmask & (1 << 2));
3209		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3210		    prefetchmask & (1 << 3));
3211	} else {
3212		rid = PCIR_BAR(2);
3213		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3214		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3215		    0x177, 8, 0);
3216		rid = PCIR_BAR(3);
3217		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3218		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3219		    0x376, 1, 0);
3220	}
3221	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3222	    prefetchmask & (1 << 4));
3223	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3224	    prefetchmask & (1 << 5));
3225}
3226
3227static void
3228pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3229{
3230	struct pci_devinfo *dinfo = device_get_ivars(dev);
3231	pcicfgregs *cfg = &dinfo->cfg;
3232	char tunable_name[64];
3233	int irq;
3234
3235	/* Has to have an intpin to have an interrupt. */
3236	if (cfg->intpin == 0)
3237		return;
3238
3239	/* Let the user override the IRQ with a tunable. */
3240	irq = PCI_INVALID_IRQ;
3241	snprintf(tunable_name, sizeof(tunable_name),
3242	    "hw.pci%d.%d.%d.INT%c.irq",
3243	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3244	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3245		irq = PCI_INVALID_IRQ;
3246
3247	/*
3248	 * If we didn't get an IRQ via the tunable, then we either use the
3249	 * IRQ value in the intline register or we ask the bus to route an
3250	 * interrupt for us.  If force_route is true, then we only use the
3251	 * value in the intline register if the bus was unable to assign an
3252	 * IRQ.
3253	 */
3254	if (!PCI_INTERRUPT_VALID(irq)) {
3255		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3256			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3257		if (!PCI_INTERRUPT_VALID(irq))
3258			irq = cfg->intline;
3259	}
3260
3261	/* If after all that we don't have an IRQ, just bail. */
3262	if (!PCI_INTERRUPT_VALID(irq))
3263		return;
3264
3265	/* Update the config register if it changed. */
3266	if (irq != cfg->intline) {
3267		cfg->intline = irq;
3268		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3269	}
3270
3271	/* Add this IRQ as rid 0 interrupt resource. */
3272	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3273}
3274
3275/* Perform early OHCI takeover from SMM. */
3276static void
3277ohci_early_takeover(device_t self)
3278{
3279	struct resource *res;
3280	uint32_t ctl;
3281	int rid;
3282	int i;
3283
3284	rid = PCIR_BAR(0);
3285	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3286	if (res == NULL)
3287		return;
3288
3289	ctl = bus_read_4(res, OHCI_CONTROL);
3290	if (ctl & OHCI_IR) {
3291		if (bootverbose)
3292			printf("ohci early: "
3293			    "SMM active, request owner change\n");
3294		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3295		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3296			DELAY(1000);
3297			ctl = bus_read_4(res, OHCI_CONTROL);
3298		}
3299		if (ctl & OHCI_IR) {
3300			if (bootverbose)
3301				printf("ohci early: "
3302				    "SMM does not respond, resetting\n");
3303			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3304		}
3305		/* Disable interrupts */
3306		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3307	}
3308
3309	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3310}
3311
3312/* Perform early UHCI takeover from SMM. */
3313static void
3314uhci_early_takeover(device_t self)
3315{
3316	struct resource *res;
3317	int rid;
3318
3319	/*
3320	 * Set the PIRQD enable bit and switch off all the others. We don't
3321	 * want legacy support to interfere with us XXX Does this also mean
3322	 * that the BIOS won't touch the keyboard anymore if it is connected
3323	 * to the ports of the root hub?
3324	 */
3325	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3326
3327	/* Disable interrupts */
3328	rid = PCI_UHCI_BASE_REG;
3329	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3330	if (res != NULL) {
3331		bus_write_2(res, UHCI_INTR, 0);
3332		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3333	}
3334}
3335
3336/* Perform early EHCI takeover from SMM. */
3337static void
3338ehci_early_takeover(device_t self)
3339{
3340	struct resource *res;
3341	uint32_t cparams;
3342	uint32_t eec;
3343	uint8_t eecp;
3344	uint8_t bios_sem;
3345	uint8_t offs;
3346	int rid;
3347	int i;
3348
3349	rid = PCIR_BAR(0);
3350	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3351	if (res == NULL)
3352		return;
3353
3354	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3355
3356	/* Synchronise with the BIOS if it owns the controller. */
3357	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3358	    eecp = EHCI_EECP_NEXT(eec)) {
3359		eec = pci_read_config(self, eecp, 4);
3360		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3361			continue;
3362		}
3363		bios_sem = pci_read_config(self, eecp +
3364		    EHCI_LEGSUP_BIOS_SEM, 1);
3365		if (bios_sem == 0) {
3366			continue;
3367		}
3368		if (bootverbose)
3369			printf("ehci early: "
3370			    "SMM active, request owner change\n");
3371
3372		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3373
3374		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3375			DELAY(1000);
3376			bios_sem = pci_read_config(self, eecp +
3377			    EHCI_LEGSUP_BIOS_SEM, 1);
3378		}
3379
3380		if (bios_sem != 0) {
3381			if (bootverbose)
3382				printf("ehci early: "
3383				    "SMM does not respond\n");
3384		}
3385		/* Disable interrupts */
3386		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3387		bus_write_4(res, offs + EHCI_USBINTR, 0);
3388	}
3389	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3390}
3391
3392/* Perform early XHCI takeover from SMM. */
3393static void
3394xhci_early_takeover(device_t self)
3395{
3396	struct resource *res;
3397	uint32_t cparams;
3398	uint32_t eec;
3399	uint8_t eecp;
3400	uint8_t bios_sem;
3401	uint8_t offs;
3402	int rid;
3403	int i;
3404
3405	rid = PCIR_BAR(0);
3406	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3407	if (res == NULL)
3408		return;
3409
3410	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3411
3412	eec = -1;
3413
3414	/* Synchronise with the BIOS if it owns the controller. */
3415	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3416	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3417		eec = bus_read_4(res, eecp);
3418
3419		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3420			continue;
3421
3422		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3423		if (bios_sem == 0)
3424			continue;
3425
3426		if (bootverbose)
3427			printf("xhci early: "
3428			    "SMM active, request owner change\n");
3429
3430		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3431
3432		/* wait a maximum of 5 second */
3433
3434		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3435			DELAY(1000);
3436			bios_sem = bus_read_1(res, eecp +
3437			    XHCI_XECP_BIOS_SEM);
3438		}
3439
3440		if (bios_sem != 0) {
3441			if (bootverbose)
3442				printf("xhci early: "
3443				    "SMM does not respond\n");
3444		}
3445
3446		/* Disable interrupts */
3447		offs = bus_read_1(res, XHCI_CAPLENGTH);
3448		bus_write_4(res, offs + XHCI_USBCMD, 0);
3449		bus_read_4(res, offs + XHCI_USBSTS);
3450	}
3451	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3452}
3453
3454#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3455static void
3456pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3457    struct resource_list *rl)
3458{
3459	struct resource *res;
3460	char *cp;
3461	rman_res_t start, end, count;
3462	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3463
3464	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3465	case PCIM_HDRTYPE_BRIDGE:
3466		sec_reg = PCIR_SECBUS_1;
3467		sub_reg = PCIR_SUBBUS_1;
3468		break;
3469	case PCIM_HDRTYPE_CARDBUS:
3470		sec_reg = PCIR_SECBUS_2;
3471		sub_reg = PCIR_SUBBUS_2;
3472		break;
3473	default:
3474		return;
3475	}
3476
3477	/*
3478	 * If the existing bus range is valid, attempt to reserve it
3479	 * from our parent.  If this fails for any reason, clear the
3480	 * secbus and subbus registers.
3481	 *
3482	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3483	 * This would at least preserve the existing sec_bus if it is
3484	 * valid.
3485	 */
3486	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3487	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3488
3489	/* Quirk handling. */
3490	switch (pci_get_devid(dev)) {
3491	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3492		sup_bus = pci_read_config(dev, 0x41, 1);
3493		if (sup_bus != 0xff) {
3494			sec_bus = sup_bus + 1;
3495			sub_bus = sup_bus + 1;
3496			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3497			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3498		}
3499		break;
3500
3501	case 0x00dd10de:
3502		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3503		if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
3504			break;
3505		if (strncmp(cp, "Compal", 6) != 0) {
3506			freeenv(cp);
3507			break;
3508		}
3509		freeenv(cp);
3510		if ((cp = kern_getenv("smbios.planar.product")) == NULL)
3511			break;
3512		if (strncmp(cp, "08A0", 4) != 0) {
3513			freeenv(cp);
3514			break;
3515		}
3516		freeenv(cp);
3517		if (sub_bus < 0xa) {
3518			sub_bus = 0xa;
3519			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3520		}
3521		break;
3522	}
3523
3524	if (bootverbose)
3525		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3526	if (sec_bus > 0 && sub_bus >= sec_bus) {
3527		start = sec_bus;
3528		end = sub_bus;
3529		count = end - start + 1;
3530
3531		resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
3532
3533		/*
3534		 * If requested, clear secondary bus registers in
3535		 * bridge devices to force a complete renumbering
3536		 * rather than reserving the existing range.  However,
3537		 * preserve the existing size.
3538		 */
3539		if (pci_clear_buses)
3540			goto clear;
3541
3542		rid = 0;
3543		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3544		    start, end, count, 0);
3545		if (res != NULL)
3546			return;
3547
3548		if (bootverbose)
3549			device_printf(bus,
3550			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3551			    pci_get_domain(dev), pci_get_bus(dev),
3552			    pci_get_slot(dev), pci_get_function(dev));
3553	}
3554
3555clear:
3556	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3557	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3558}
3559
3560static struct resource *
3561pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
3562    rman_res_t end, rman_res_t count, u_int flags)
3563{
3564	struct pci_devinfo *dinfo;
3565	pcicfgregs *cfg;
3566	struct resource_list *rl;
3567	struct resource *res;
3568	int sec_reg, sub_reg;
3569
3570	dinfo = device_get_ivars(child);
3571	cfg = &dinfo->cfg;
3572	rl = &dinfo->resources;
3573	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3574	case PCIM_HDRTYPE_BRIDGE:
3575		sec_reg = PCIR_SECBUS_1;
3576		sub_reg = PCIR_SUBBUS_1;
3577		break;
3578	case PCIM_HDRTYPE_CARDBUS:
3579		sec_reg = PCIR_SECBUS_2;
3580		sub_reg = PCIR_SUBBUS_2;
3581		break;
3582	default:
3583		return (NULL);
3584	}
3585
3586	if (*rid != 0)
3587		return (NULL);
3588
3589	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3590		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3591	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3592		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3593		    start, end, count, flags & ~RF_ACTIVE);
3594		if (res == NULL) {
3595			resource_list_delete(rl, PCI_RES_BUS, *rid);
3596			device_printf(child, "allocating %ju bus%s failed\n",
3597			    count, count == 1 ? "" : "es");
3598			return (NULL);
3599		}
3600		if (bootverbose)
3601			device_printf(child,
3602			    "Lazy allocation of %ju bus%s at %ju\n", count,
3603			    count == 1 ? "" : "es", rman_get_start(res));
3604		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3605		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3606	}
3607	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3608	    end, count, flags));
3609}
3610#endif
3611
3612static int
3613pci_ea_bei_to_rid(device_t dev, int bei)
3614{
3615#ifdef PCI_IOV
3616	struct pci_devinfo *dinfo;
3617	int iov_pos;
3618	struct pcicfg_iov *iov;
3619
3620	dinfo = device_get_ivars(dev);
3621	iov = dinfo->cfg.iov;
3622	if (iov != NULL)
3623		iov_pos = iov->iov_pos;
3624	else
3625		iov_pos = 0;
3626#endif
3627
3628	/* Check if matches BAR */
3629	if ((bei >= PCIM_EA_BEI_BAR_0) &&
3630	    (bei <= PCIM_EA_BEI_BAR_5))
3631		return (PCIR_BAR(bei));
3632
3633	/* Check ROM */
3634	if (bei == PCIM_EA_BEI_ROM)
3635		return (PCIR_BIOS);
3636
3637#ifdef PCI_IOV
3638	/* Check if matches VF_BAR */
3639	if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
3640	    (bei <= PCIM_EA_BEI_VF_BAR_5))
3641		return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
3642		    iov_pos);
3643#endif
3644
3645	return (-1);
3646}
3647
3648int
3649pci_ea_is_enabled(device_t dev, int rid)
3650{
3651	struct pci_ea_entry *ea;
3652	struct pci_devinfo *dinfo;
3653
3654	dinfo = device_get_ivars(dev);
3655
3656	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3657		if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
3658			return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
3659	}
3660
3661	return (0);
3662}
3663
3664void
3665pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
3666{
3667	struct pci_ea_entry *ea;
3668	struct pci_devinfo *dinfo;
3669	pci_addr_t start, end, count;
3670	struct resource_list *rl;
3671	int type, flags, rid;
3672	struct resource *res;
3673	uint32_t tmp;
3674#ifdef PCI_IOV
3675	struct pcicfg_iov *iov;
3676#endif
3677
3678	dinfo = device_get_ivars(dev);
3679	rl = &dinfo->resources;
3680	flags = 0;
3681
3682#ifdef PCI_IOV
3683	iov = dinfo->cfg.iov;
3684#endif
3685
3686	if (dinfo->cfg.ea.ea_location == 0)
3687		return;
3688
3689	STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
3690
3691		/*
3692		 * TODO: Ignore EA-BAR if is not enabled.
3693		 *   Currently the EA implementation supports
3694		 *   only situation, where EA structure contains
3695		 *   predefined entries. In case they are not enabled
3696		 *   leave them unallocated and proceed with
3697		 *   a legacy-BAR mechanism.
3698		 */
3699		if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
3700			continue;
3701
3702		switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
3703		case PCIM_EA_P_MEM_PREFETCH:
3704		case PCIM_EA_P_VF_MEM_PREFETCH:
3705			flags = RF_PREFETCHABLE;
3706			/* FALLTHROUGH */
3707		case PCIM_EA_P_VF_MEM:
3708		case PCIM_EA_P_MEM:
3709			type = SYS_RES_MEMORY;
3710			break;
3711		case PCIM_EA_P_IO:
3712			type = SYS_RES_IOPORT;
3713			break;
3714		default:
3715			continue;
3716		}
3717
3718		if (alloc_iov != 0) {
3719#ifdef PCI_IOV
3720			/* Allocating IOV, confirm BEI matches */
3721			if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
3722			    (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
3723				continue;
3724#else
3725			continue;
3726#endif
3727		} else {
3728			/* Allocating BAR, confirm BEI matches */
3729			if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
3730			    (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
3731			    (ea->eae_bei != PCIM_EA_BEI_ROM))
3732				continue;
3733		}
3734
3735		rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
3736		if (rid < 0)
3737			continue;
3738
3739		/* Skip resources already allocated by EA */
3740		if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
3741		    (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
3742			continue;
3743
3744		start = ea->eae_base;
3745		count = ea->eae_max_offset + 1;
3746#ifdef PCI_IOV
3747		if (iov != NULL)
3748			count = count * iov->iov_num_vfs;
3749#endif
3750		end = start + count - 1;
3751		if (count == 0)
3752			continue;
3753
3754		resource_list_add(rl, type, rid, start, end, count);
3755		res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
3756		    flags);
3757		if (res == NULL) {
3758			resource_list_delete(rl, type, rid);
3759
3760			/*
3761			 * Failed to allocate using EA, disable entry.
3762			 * Another attempt to allocation will be performed
3763			 * further, but this time using legacy BAR registers
3764			 */
3765			tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
3766			tmp &= ~PCIM_EA_ENABLE;
3767			pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
3768
3769			/*
3770			 * Disabling entry might fail in case it is hardwired.
3771			 * Read flags again to match current status.
3772			 */
3773			ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
3774
3775			continue;
3776		}
3777
3778		/* As per specification, fill BAR with zeros */
3779		pci_write_config(dev, rid, 0, 4);
3780	}
3781}
3782
3783void
3784pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3785{
3786	struct pci_devinfo *dinfo;
3787	pcicfgregs *cfg;
3788	struct resource_list *rl;
3789	const struct pci_quirk *q;
3790	uint32_t devid;
3791	int i;
3792
3793	dinfo = device_get_ivars(dev);
3794	cfg = &dinfo->cfg;
3795	rl = &dinfo->resources;
3796	devid = (cfg->device << 16) | cfg->vendor;
3797
3798	/* Allocate resources using Enhanced Allocation */
3799	pci_add_resources_ea(bus, dev, 0);
3800
3801	/* ATA devices needs special map treatment */
3802	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3803	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3804	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3805	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3806	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3807		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3808	else
3809		for (i = 0; i < cfg->nummaps;) {
3810			/* Skip resources already managed by EA */
3811			if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
3812			    (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
3813			    pci_ea_is_enabled(dev, PCIR_BAR(i))) {
3814				i++;
3815				continue;
3816			}
3817
3818			/*
3819			 * Skip quirked resources.
3820			 */
3821			for (q = &pci_quirks[0]; q->devid != 0; q++)
3822				if (q->devid == devid &&
3823				    q->type == PCI_QUIRK_UNMAP_REG &&
3824				    q->arg1 == PCIR_BAR(i))
3825					break;
3826			if (q->devid != 0) {
3827				i++;
3828				continue;
3829			}
3830			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3831			    prefetchmask & (1 << i));
3832		}
3833
3834	/*
3835	 * Add additional, quirked resources.
3836	 */
3837	for (q = &pci_quirks[0]; q->devid != 0; q++)
3838		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3839			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3840
3841	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3842#ifdef __PCI_REROUTE_INTERRUPT
3843		/*
3844		 * Try to re-route interrupts. Sometimes the BIOS or
3845		 * firmware may leave bogus values in these registers.
3846		 * If the re-route fails, then just stick with what we
3847		 * have.
3848		 */
3849		pci_assign_interrupt(bus, dev, 1);
3850#else
3851		pci_assign_interrupt(bus, dev, 0);
3852#endif
3853	}
3854
3855	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3856	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3857		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3858			xhci_early_takeover(dev);
3859		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3860			ehci_early_takeover(dev);
3861		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3862			ohci_early_takeover(dev);
3863		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3864			uhci_early_takeover(dev);
3865	}
3866
3867#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3868	/*
3869	 * Reserve resources for secondary bus ranges behind bridge
3870	 * devices.
3871	 */
3872	pci_reserve_secbus(bus, dev, cfg, rl);
3873#endif
3874}
3875
3876static struct pci_devinfo *
3877pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3878    int slot, int func)
3879{
3880	struct pci_devinfo *dinfo;
3881
3882	dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
3883	if (dinfo != NULL)
3884		pci_add_child(dev, dinfo);
3885
3886	return (dinfo);
3887}
3888
3889void
3890pci_add_children(device_t dev, int domain, int busno)
3891{
3892#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3893	device_t pcib = device_get_parent(dev);
3894	struct pci_devinfo *dinfo;
3895	int maxslots;
3896	int s, f, pcifunchigh;
3897	uint8_t hdrtype;
3898	int first_func;
3899
3900	/*
3901	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3902	 * enable ARI.  We must enable ARI before detecting the rest of the
3903	 * functions on this bus as ARI changes the set of slots and functions
3904	 * that are legal on this bus.
3905	 */
3906	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
3907	if (dinfo != NULL && pci_enable_ari)
3908		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3909
3910	/*
3911	 * Start looking for new devices on slot 0 at function 1 because we
3912	 * just identified the device at slot 0, function 0.
3913	 */
3914	first_func = 1;
3915
3916	maxslots = PCIB_MAXSLOTS(pcib);
3917	for (s = 0; s <= maxslots; s++, first_func = 0) {
3918		pcifunchigh = 0;
3919		f = 0;
3920		DELAY(1);
3921		hdrtype = REG(PCIR_HDRTYPE, 1);
3922		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3923			continue;
3924		if (hdrtype & PCIM_MFDEV)
3925			pcifunchigh = PCIB_MAXFUNCS(pcib);
3926		for (f = first_func; f <= pcifunchigh; f++)
3927			pci_identify_function(pcib, dev, domain, busno, s, f);
3928	}
3929#undef REG
3930}
3931
3932int
3933pci_rescan_method(device_t dev)
3934{
3935#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3936	device_t pcib = device_get_parent(dev);
3937	struct pci_softc *sc;
3938	device_t child, *devlist, *unchanged;
3939	int devcount, error, i, j, maxslots, oldcount;
3940	int busno, domain, s, f, pcifunchigh;
3941	uint8_t hdrtype;
3942
3943	/* No need to check for ARI on a rescan. */
3944	error = device_get_children(dev, &devlist, &devcount);
3945	if (error)
3946		return (error);
3947	if (devcount != 0) {
3948		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
3949		    M_NOWAIT | M_ZERO);
3950		if (unchanged == NULL) {
3951			free(devlist, M_TEMP);
3952			return (ENOMEM);
3953		}
3954	} else
3955		unchanged = NULL;
3956
3957	sc = device_get_softc(dev);
3958	domain = pcib_get_domain(dev);
3959	busno = pcib_get_bus(dev);
3960	maxslots = PCIB_MAXSLOTS(pcib);
3961	for (s = 0; s <= maxslots; s++) {
3962		/* If function 0 is not present, skip to the next slot. */
3963		f = 0;
3964		if (REG(PCIR_VENDOR, 2) == 0xffff)
3965			continue;
3966		pcifunchigh = 0;
3967		hdrtype = REG(PCIR_HDRTYPE, 1);
3968		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3969			continue;
3970		if (hdrtype & PCIM_MFDEV)
3971			pcifunchigh = PCIB_MAXFUNCS(pcib);
3972		for (f = 0; f <= pcifunchigh; f++) {
3973			if (REG(PCIR_VENDOR, 2) == 0xfff)
3974				continue;
3975
3976			/*
3977			 * Found a valid function.  Check if a
3978			 * device_t for this device already exists.
3979			 */
3980			for (i = 0; i < devcount; i++) {
3981				child = devlist[i];
3982				if (child == NULL)
3983					continue;
3984				if (pci_get_slot(child) == s &&
3985				    pci_get_function(child) == f) {
3986					unchanged[i] = child;
3987					goto next_func;
3988				}
3989			}
3990
3991			pci_identify_function(pcib, dev, domain, busno, s, f);
3992		next_func:;
3993		}
3994	}
3995
3996	/* Remove devices that are no longer present. */
3997	for (i = 0; i < devcount; i++) {
3998		if (unchanged[i] != NULL)
3999			continue;
4000		device_delete_child(dev, devlist[i]);
4001	}
4002
4003	free(devlist, M_TEMP);
4004	oldcount = devcount;
4005
4006	/* Try to attach the devices just added. */
4007	error = device_get_children(dev, &devlist, &devcount);
4008	if (error) {
4009		free(unchanged, M_TEMP);
4010		return (error);
4011	}
4012
4013	for (i = 0; i < devcount; i++) {
4014		for (j = 0; j < oldcount; j++) {
4015			if (devlist[i] == unchanged[j])
4016				goto next_device;
4017		}
4018
4019		device_probe_and_attach(devlist[i]);
4020	next_device:;
4021	}
4022
4023	free(unchanged, M_TEMP);
4024	free(devlist, M_TEMP);
4025	return (0);
4026#undef REG
4027}
4028
4029#ifdef PCI_IOV
4030device_t
4031pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
4032    uint16_t did)
4033{
4034	struct pci_devinfo *pf_dinfo, *vf_dinfo;
4035	device_t pcib;
4036	int busno, slot, func;
4037
4038	pf_dinfo = device_get_ivars(pf);
4039
4040	pcib = device_get_parent(bus);
4041
4042	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
4043
4044	vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
4045	    slot, func, vid, did);
4046
4047	vf_dinfo->cfg.flags |= PCICFG_VF;
4048	pci_add_child(bus, vf_dinfo);
4049
4050	return (vf_dinfo->cfg.dev);
4051}
4052
4053device_t
4054pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
4055    uint16_t vid, uint16_t did)
4056{
4057
4058	return (pci_add_iov_child(bus, pf, rid, vid, did));
4059}
4060#endif
4061
4062void
4063pci_add_child(device_t bus, struct pci_devinfo *dinfo)
4064{
4065	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
4066	device_set_ivars(dinfo->cfg.dev, dinfo);
4067	resource_list_init(&dinfo->resources);
4068	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
4069	pci_cfg_restore(dinfo->cfg.dev, dinfo);
4070	pci_print_verbose(dinfo);
4071	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
4072	pci_child_added(dinfo->cfg.dev);
4073}
4074
4075void
4076pci_child_added_method(device_t dev, device_t child)
4077{
4078
4079}
4080
4081static int
4082pci_probe(device_t dev)
4083{
4084
4085	device_set_desc(dev, "PCI bus");
4086
4087	/* Allow other subclasses to override this driver. */
4088	return (BUS_PROBE_GENERIC);
4089}
4090
4091int
4092pci_attach_common(device_t dev)
4093{
4094	struct pci_softc *sc;
4095	int busno, domain;
4096#ifdef PCI_DMA_BOUNDARY
4097	int error, tag_valid;
4098#endif
4099#ifdef PCI_RES_BUS
4100	int rid;
4101#endif
4102
4103	sc = device_get_softc(dev);
4104	domain = pcib_get_domain(dev);
4105	busno = pcib_get_bus(dev);
4106#ifdef PCI_RES_BUS
4107	rid = 0;
4108	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
4109	    1, 0);
4110	if (sc->sc_bus == NULL) {
4111		device_printf(dev, "failed to allocate bus number\n");
4112		return (ENXIO);
4113	}
4114#endif
4115	if (bootverbose)
4116		device_printf(dev, "domain=%d, physical bus=%d\n",
4117		    domain, busno);
4118#ifdef PCI_DMA_BOUNDARY
4119	tag_valid = 0;
4120	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
4121	    devclass_find("pci")) {
4122		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
4123		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4124		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
4125		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
4126		if (error)
4127			device_printf(dev, "Failed to create DMA tag: %d\n",
4128			    error);
4129		else
4130			tag_valid = 1;
4131	}
4132	if (!tag_valid)
4133#endif
4134		sc->sc_dma_tag = bus_get_dma_tag(dev);
4135	return (0);
4136}
4137
4138static int
4139pci_attach(device_t dev)
4140{
4141	int busno, domain, error;
4142
4143	error = pci_attach_common(dev);
4144	if (error)
4145		return (error);
4146
4147	/*
4148	 * Since there can be multiple independently numbered PCI
4149	 * busses on systems with multiple PCI domains, we can't use
4150	 * the unit number to decide which bus we are probing. We ask
4151	 * the parent pcib what our domain and bus numbers are.
4152	 */
4153	domain = pcib_get_domain(dev);
4154	busno = pcib_get_bus(dev);
4155	pci_add_children(dev, domain, busno);
4156	return (bus_generic_attach(dev));
4157}
4158
4159static int
4160pci_detach(device_t dev)
4161{
4162#ifdef PCI_RES_BUS
4163	struct pci_softc *sc;
4164#endif
4165	int error;
4166
4167	error = bus_generic_detach(dev);
4168	if (error)
4169		return (error);
4170#ifdef PCI_RES_BUS
4171	sc = device_get_softc(dev);
4172	error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
4173	if (error)
4174		return (error);
4175#endif
4176	return (device_delete_children(dev));
4177}
4178
4179static void
4180pci_set_power_child(device_t dev, device_t child, int state)
4181{
4182	device_t pcib;
4183	int dstate;
4184
4185	/*
4186	 * Set the device to the given state.  If the firmware suggests
4187	 * a different power state, use it instead.  If power management
4188	 * is not present, the firmware is responsible for managing
4189	 * device power.  Skip children who aren't attached since they
4190	 * are handled separately.
4191	 */
4192	pcib = device_get_parent(dev);
4193	dstate = state;
4194	if (device_is_attached(child) &&
4195	    PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
4196		pci_set_powerstate(child, dstate);
4197}
4198
4199int
4200pci_suspend_child(device_t dev, device_t child)
4201{
4202	struct pci_devinfo *dinfo;
4203	int error;
4204
4205	dinfo = device_get_ivars(child);
4206
4207	/*
4208	 * Save the PCI configuration space for the child and set the
4209	 * device in the appropriate power state for this sleep state.
4210	 */
4211	pci_cfg_save(child, dinfo, 0);
4212
4213	/* Suspend devices before potentially powering them down. */
4214	error = bus_generic_suspend_child(dev, child);
4215
4216	if (error)
4217		return (error);
4218
4219	if (pci_do_power_suspend)
4220		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
4221
4222	return (0);
4223}
4224
4225int
4226pci_resume_child(device_t dev, device_t child)
4227{
4228	struct pci_devinfo *dinfo;
4229
4230	if (pci_do_power_resume)
4231		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
4232
4233	dinfo = device_get_ivars(child);
4234	pci_cfg_restore(child, dinfo);
4235	if (!device_is_attached(child))
4236		pci_cfg_save(child, dinfo, 1);
4237
4238	bus_generic_resume_child(dev, child);
4239
4240	return (0);
4241}
4242
4243int
4244pci_resume(device_t dev)
4245{
4246	device_t child, *devlist;
4247	int error, i, numdevs;
4248
4249	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
4250		return (error);
4251
4252	/*
4253	 * Resume critical devices first, then everything else later.
4254	 */
4255	for (i = 0; i < numdevs; i++) {
4256		child = devlist[i];
4257		switch (pci_get_class(child)) {
4258		case PCIC_DISPLAY:
4259		case PCIC_MEMORY:
4260		case PCIC_BRIDGE:
4261		case PCIC_BASEPERIPH:
4262			BUS_RESUME_CHILD(dev, child);
4263			break;
4264		}
4265	}
4266	for (i = 0; i < numdevs; i++) {
4267		child = devlist[i];
4268		switch (pci_get_class(child)) {
4269		case PCIC_DISPLAY:
4270		case PCIC_MEMORY:
4271		case PCIC_BRIDGE:
4272		case PCIC_BASEPERIPH:
4273			break;
4274		default:
4275			BUS_RESUME_CHILD(dev, child);
4276		}
4277	}
4278	free(devlist, M_TEMP);
4279	return (0);
4280}
4281
4282static void
4283pci_load_vendor_data(void)
4284{
4285	caddr_t data;
4286	void *ptr;
4287	size_t sz;
4288
4289	data = preload_search_by_type("pci_vendor_data");
4290	if (data != NULL) {
4291		ptr = preload_fetch_addr(data);
4292		sz = preload_fetch_size(data);
4293		if (ptr != NULL && sz != 0) {
4294			pci_vendordata = ptr;
4295			pci_vendordata_size = sz;
4296			/* terminate the database */
4297			pci_vendordata[pci_vendordata_size] = '\n';
4298		}
4299	}
4300}
4301
4302void
4303pci_driver_added(device_t dev, driver_t *driver)
4304{
4305	int numdevs;
4306	device_t *devlist;
4307	device_t child;
4308	struct pci_devinfo *dinfo;
4309	int i;
4310
4311	if (bootverbose)
4312		device_printf(dev, "driver added\n");
4313	DEVICE_IDENTIFY(driver, dev);
4314	if (device_get_children(dev, &devlist, &numdevs) != 0)
4315		return;
4316	for (i = 0; i < numdevs; i++) {
4317		child = devlist[i];
4318		if (device_get_state(child) != DS_NOTPRESENT)
4319			continue;
4320		dinfo = device_get_ivars(child);
4321		pci_print_verbose(dinfo);
4322		if (bootverbose)
4323			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
4324		pci_cfg_restore(child, dinfo);
4325		if (device_probe_and_attach(child) != 0)
4326			pci_child_detached(dev, child);
4327	}
4328	free(devlist, M_TEMP);
4329}
4330
4331int
4332pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
4333    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
4334{
4335	struct pci_devinfo *dinfo;
4336	struct msix_table_entry *mte;
4337	struct msix_vector *mv;
4338	uint64_t addr;
4339	uint32_t data;
4340	void *cookie;
4341	int error, rid;
4342
4343	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
4344	    arg, &cookie);
4345	if (error)
4346		return (error);
4347
4348	/* If this is not a direct child, just bail out. */
4349	if (device_get_parent(child) != dev) {
4350		*cookiep = cookie;
4351		return(0);
4352	}
4353
4354	rid = rman_get_rid(irq);
4355	if (rid == 0) {
4356		/* Make sure that INTx is enabled */
4357		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4358	} else {
4359		/*
4360		 * Check to see if the interrupt is MSI or MSI-X.
4361		 * Ask our parent to map the MSI and give
4362		 * us the address and data register values.
4363		 * If we fail for some reason, teardown the
4364		 * interrupt handler.
4365		 */
4366		dinfo = device_get_ivars(child);
4367		if (dinfo->cfg.msi.msi_alloc > 0) {
4368			if (dinfo->cfg.msi.msi_addr == 0) {
4369				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
4370			    ("MSI has handlers, but vectors not mapped"));
4371				error = PCIB_MAP_MSI(device_get_parent(dev),
4372				    child, rman_get_start(irq), &addr, &data);
4373				if (error)
4374					goto bad;
4375				dinfo->cfg.msi.msi_addr = addr;
4376				dinfo->cfg.msi.msi_data = data;
4377			}
4378			if (dinfo->cfg.msi.msi_handlers == 0)
4379				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
4380				    dinfo->cfg.msi.msi_data);
4381			dinfo->cfg.msi.msi_handlers++;
4382		} else {
4383			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4384			    ("No MSI or MSI-X interrupts allocated"));
4385			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4386			    ("MSI-X index too high"));
4387			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4388			KASSERT(mte->mte_vector != 0, ("no message vector"));
4389			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4390			KASSERT(mv->mv_irq == rman_get_start(irq),
4391			    ("IRQ mismatch"));
4392			if (mv->mv_address == 0) {
4393				KASSERT(mte->mte_handlers == 0,
4394		    ("MSI-X table entry has handlers, but vector not mapped"));
4395				error = PCIB_MAP_MSI(device_get_parent(dev),
4396				    child, rman_get_start(irq), &addr, &data);
4397				if (error)
4398					goto bad;
4399				mv->mv_address = addr;
4400				mv->mv_data = data;
4401			}
4402			if (mte->mte_handlers == 0) {
4403				pci_enable_msix(child, rid - 1, mv->mv_address,
4404				    mv->mv_data);
4405				pci_unmask_msix(child, rid - 1);
4406			}
4407			mte->mte_handlers++;
4408		}
4409
4410		/*
4411		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4412		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4413		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4414		 */
4415		if (!pci_has_quirk(pci_get_devid(child),
4416		    PCI_QUIRK_MSI_INTX_BUG))
4417			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4418		else
4419			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4420	bad:
4421		if (error) {
4422			(void)bus_generic_teardown_intr(dev, child, irq,
4423			    cookie);
4424			return (error);
4425		}
4426	}
4427	*cookiep = cookie;
4428	return (0);
4429}
4430
4431int
4432pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4433    void *cookie)
4434{
4435	struct msix_table_entry *mte;
4436	struct resource_list_entry *rle;
4437	struct pci_devinfo *dinfo;
4438	int error, rid;
4439
4440	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4441		return (EINVAL);
4442
4443	/* If this isn't a direct child, just bail out */
4444	if (device_get_parent(child) != dev)
4445		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4446
4447	rid = rman_get_rid(irq);
4448	if (rid == 0) {
4449		/* Mask INTx */
4450		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4451	} else {
4452		/*
4453		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4454		 * decrement the appropriate handlers count and mask the
4455		 * MSI-X message, or disable MSI messages if the count
4456		 * drops to 0.
4457		 */
4458		dinfo = device_get_ivars(child);
4459		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4460		if (rle->res != irq)
4461			return (EINVAL);
4462		if (dinfo->cfg.msi.msi_alloc > 0) {
4463			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4464			    ("MSI-X index too high"));
4465			if (dinfo->cfg.msi.msi_handlers == 0)
4466				return (EINVAL);
4467			dinfo->cfg.msi.msi_handlers--;
4468			if (dinfo->cfg.msi.msi_handlers == 0)
4469				pci_disable_msi(child);
4470		} else {
4471			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4472			    ("No MSI or MSI-X interrupts allocated"));
4473			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4474			    ("MSI-X index too high"));
4475			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4476			if (mte->mte_handlers == 0)
4477				return (EINVAL);
4478			mte->mte_handlers--;
4479			if (mte->mte_handlers == 0)
4480				pci_mask_msix(child, rid - 1);
4481		}
4482	}
4483	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4484	if (rid > 0)
4485		KASSERT(error == 0,
4486		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4487	return (error);
4488}
4489
4490int
4491pci_print_child(device_t dev, device_t child)
4492{
4493	struct pci_devinfo *dinfo;
4494	struct resource_list *rl;
4495	int retval = 0;
4496
4497	dinfo = device_get_ivars(child);
4498	rl = &dinfo->resources;
4499
4500	retval += bus_print_child_header(dev, child);
4501
4502	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
4503	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
4504	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
4505	if (device_get_flags(dev))
4506		retval += printf(" flags %#x", device_get_flags(dev));
4507
4508	retval += printf(" at device %d.%d", pci_get_slot(child),
4509	    pci_get_function(child));
4510
4511	retval += bus_print_child_domain(dev, child);
4512	retval += bus_print_child_footer(dev, child);
4513
4514	return (retval);
4515}
4516
4517static const struct
4518{
4519	int		class;
4520	int		subclass;
4521	int		report; /* 0 = bootverbose, 1 = always */
4522	const char	*desc;
4523} pci_nomatch_tab[] = {
4524	{PCIC_OLD,		-1,			1, "old"},
4525	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4526	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4527	{PCIC_STORAGE,		-1,			1, "mass storage"},
4528	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4529	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4530	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4531	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4532	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4533	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4534	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4535	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4536	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4537	{PCIC_NETWORK,		-1,			1, "network"},
4538	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4539	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4540	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4541	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4542	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4543	{PCIC_DISPLAY,		-1,			1, "display"},
4544	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4545	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4546	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4547	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4548	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4549	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4550	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4551	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4552	{PCIC_MEMORY,		-1,			1, "memory"},
4553	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4554	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4555	{PCIC_BRIDGE,		-1,			1, "bridge"},
4556	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4557	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4558	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4559	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4560	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4561	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4562	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4563	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4564	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4565	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4566	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4567	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4568	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4569	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4570	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4571	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4572	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4573	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4574	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4575	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4576	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4577	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4578	{PCIC_INPUTDEV,		-1,			1, "input device"},
4579	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4580	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4581	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4582	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4583	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4584	{PCIC_DOCKING,		-1,			1, "docking station"},
4585	{PCIC_PROCESSOR,	-1,			1, "processor"},
4586	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4587	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4588	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4589	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4590	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4591	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4592	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4593	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4594	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4595	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4596	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4597	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4598	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4599	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4600	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4601	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4602	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4603	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4604	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4605	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4606	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4607	{PCIC_DASP,		-1,			0, "dasp"},
4608	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4609	{0, 0, 0,		NULL}
4610};
4611
4612void
4613pci_probe_nomatch(device_t dev, device_t child)
4614{
4615	int i, report;
4616	const char *cp, *scp;
4617	char *device;
4618
4619	/*
4620	 * Look for a listing for this device in a loaded device database.
4621	 */
4622	report = 1;
4623	if ((device = pci_describe_device(child)) != NULL) {
4624		device_printf(dev, "<%s>", device);
4625		free(device, M_DEVBUF);
4626	} else {
4627		/*
4628		 * Scan the class/subclass descriptions for a general
4629		 * description.
4630		 */
4631		cp = "unknown";
4632		scp = NULL;
4633		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4634			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4635				if (pci_nomatch_tab[i].subclass == -1) {
4636					cp = pci_nomatch_tab[i].desc;
4637					report = pci_nomatch_tab[i].report;
4638				} else if (pci_nomatch_tab[i].subclass ==
4639				    pci_get_subclass(child)) {
4640					scp = pci_nomatch_tab[i].desc;
4641					report = pci_nomatch_tab[i].report;
4642				}
4643			}
4644		}
4645		if (report || bootverbose) {
4646			device_printf(dev, "<%s%s%s>",
4647			    cp ? cp : "",
4648			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4649			    scp ? scp : "");
4650		}
4651	}
4652	if (report || bootverbose) {
4653		printf(" at device %d.%d (no driver attached)\n",
4654		    pci_get_slot(child), pci_get_function(child));
4655	}
4656	pci_cfg_save(child, device_get_ivars(child), 1);
4657}
4658
4659void
4660pci_child_detached(device_t dev, device_t child)
4661{
4662	struct pci_devinfo *dinfo;
4663	struct resource_list *rl;
4664
4665	dinfo = device_get_ivars(child);
4666	rl = &dinfo->resources;
4667
4668	/*
4669	 * Have to deallocate IRQs before releasing any MSI messages and
4670	 * have to release MSI messages before deallocating any memory
4671	 * BARs.
4672	 */
4673	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4674		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4675	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4676		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4677		(void)pci_release_msi(child);
4678	}
4679	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4680		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4681	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4682		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4683#ifdef PCI_RES_BUS
4684	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4685		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4686#endif
4687
4688	pci_cfg_save(child, dinfo, 1);
4689}
4690
4691/*
4692 * Parse the PCI device database, if loaded, and return a pointer to a
4693 * description of the device.
4694 *
4695 * The database is flat text formatted as follows:
4696 *
4697 * Any line not in a valid format is ignored.
4698 * Lines are terminated with newline '\n' characters.
4699 *
4700 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4701 * the vendor name.
4702 *
4703 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4704 * - devices cannot be listed without a corresponding VENDOR line.
4705 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4706 * another TAB, then the device name.
4707 */
4708
4709/*
4710 * Assuming (ptr) points to the beginning of a line in the database,
4711 * return the vendor or device and description of the next entry.
4712 * The value of (vendor) or (device) inappropriate for the entry type
4713 * is set to -1.  Returns nonzero at the end of the database.
4714 *
4715 * Note that this is slightly unrobust in the face of corrupt data;
4716 * we attempt to safeguard against this by spamming the end of the
4717 * database with a newline when we initialise.
4718 */
4719static int
4720pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4721{
4722	char	*cp = *ptr;
4723	int	left;
4724
4725	*device = -1;
4726	*vendor = -1;
4727	**desc = '\0';
4728	for (;;) {
4729		left = pci_vendordata_size - (cp - pci_vendordata);
4730		if (left <= 0) {
4731			*ptr = cp;
4732			return(1);
4733		}
4734
4735		/* vendor entry? */
4736		if (*cp != '\t' &&
4737		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4738			break;
4739		/* device entry? */
4740		if (*cp == '\t' &&
4741		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4742			break;
4743
4744		/* skip to next line */
4745		while (*cp != '\n' && left > 0) {
4746			cp++;
4747			left--;
4748		}
4749		if (*cp == '\n') {
4750			cp++;
4751			left--;
4752		}
4753	}
4754	/* skip to next line */
4755	while (*cp != '\n' && left > 0) {
4756		cp++;
4757		left--;
4758	}
4759	if (*cp == '\n' && left > 0)
4760		cp++;
4761	*ptr = cp;
4762	return(0);
4763}
4764
4765static char *
4766pci_describe_device(device_t dev)
4767{
4768	int	vendor, device;
4769	char	*desc, *vp, *dp, *line;
4770
4771	desc = vp = dp = NULL;
4772
4773	/*
4774	 * If we have no vendor data, we can't do anything.
4775	 */
4776	if (pci_vendordata == NULL)
4777		goto out;
4778
4779	/*
4780	 * Scan the vendor data looking for this device
4781	 */
4782	line = pci_vendordata;
4783	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4784		goto out;
4785	for (;;) {
4786		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4787			goto out;
4788		if (vendor == pci_get_vendor(dev))
4789			break;
4790	}
4791	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4792		goto out;
4793	for (;;) {
4794		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4795			*dp = 0;
4796			break;
4797		}
4798		if (vendor != -1) {
4799			*dp = 0;
4800			break;
4801		}
4802		if (device == pci_get_device(dev))
4803			break;
4804	}
4805	if (dp[0] == '\0')
4806		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4807	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4808	    NULL)
4809		sprintf(desc, "%s, %s", vp, dp);
4810out:
4811	if (vp != NULL)
4812		free(vp, M_DEVBUF);
4813	if (dp != NULL)
4814		free(dp, M_DEVBUF);
4815	return(desc);
4816}
4817
4818int
4819pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4820{
4821	struct pci_devinfo *dinfo;
4822	pcicfgregs *cfg;
4823
4824	dinfo = device_get_ivars(child);
4825	cfg = &dinfo->cfg;
4826
4827	switch (which) {
4828	case PCI_IVAR_ETHADDR:
4829		/*
4830		 * The generic accessor doesn't deal with failure, so
4831		 * we set the return value, then return an error.
4832		 */
4833		*((uint8_t **) result) = NULL;
4834		return (EINVAL);
4835	case PCI_IVAR_SUBVENDOR:
4836		*result = cfg->subvendor;
4837		break;
4838	case PCI_IVAR_SUBDEVICE:
4839		*result = cfg->subdevice;
4840		break;
4841	case PCI_IVAR_VENDOR:
4842		*result = cfg->vendor;
4843		break;
4844	case PCI_IVAR_DEVICE:
4845		*result = cfg->device;
4846		break;
4847	case PCI_IVAR_DEVID:
4848		*result = (cfg->device << 16) | cfg->vendor;
4849		break;
4850	case PCI_IVAR_CLASS:
4851		*result = cfg->baseclass;
4852		break;
4853	case PCI_IVAR_SUBCLASS:
4854		*result = cfg->subclass;
4855		break;
4856	case PCI_IVAR_PROGIF:
4857		*result = cfg->progif;
4858		break;
4859	case PCI_IVAR_REVID:
4860		*result = cfg->revid;
4861		break;
4862	case PCI_IVAR_INTPIN:
4863		*result = cfg->intpin;
4864		break;
4865	case PCI_IVAR_IRQ:
4866		*result = cfg->intline;
4867		break;
4868	case PCI_IVAR_DOMAIN:
4869		*result = cfg->domain;
4870		break;
4871	case PCI_IVAR_BUS:
4872		*result = cfg->bus;
4873		break;
4874	case PCI_IVAR_SLOT:
4875		*result = cfg->slot;
4876		break;
4877	case PCI_IVAR_FUNCTION:
4878		*result = cfg->func;
4879		break;
4880	case PCI_IVAR_CMDREG:
4881		*result = cfg->cmdreg;
4882		break;
4883	case PCI_IVAR_CACHELNSZ:
4884		*result = cfg->cachelnsz;
4885		break;
4886	case PCI_IVAR_MINGNT:
4887		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4888			*result = -1;
4889			return (EINVAL);
4890		}
4891		*result = cfg->mingnt;
4892		break;
4893	case PCI_IVAR_MAXLAT:
4894		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4895			*result = -1;
4896			return (EINVAL);
4897		}
4898		*result = cfg->maxlat;
4899		break;
4900	case PCI_IVAR_LATTIMER:
4901		*result = cfg->lattimer;
4902		break;
4903	default:
4904		return (ENOENT);
4905	}
4906	return (0);
4907}
4908
4909int
4910pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4911{
4912	struct pci_devinfo *dinfo;
4913
4914	dinfo = device_get_ivars(child);
4915
4916	switch (which) {
4917	case PCI_IVAR_INTPIN:
4918		dinfo->cfg.intpin = value;
4919		return (0);
4920	case PCI_IVAR_ETHADDR:
4921	case PCI_IVAR_SUBVENDOR:
4922	case PCI_IVAR_SUBDEVICE:
4923	case PCI_IVAR_VENDOR:
4924	case PCI_IVAR_DEVICE:
4925	case PCI_IVAR_DEVID:
4926	case PCI_IVAR_CLASS:
4927	case PCI_IVAR_SUBCLASS:
4928	case PCI_IVAR_PROGIF:
4929	case PCI_IVAR_REVID:
4930	case PCI_IVAR_IRQ:
4931	case PCI_IVAR_DOMAIN:
4932	case PCI_IVAR_BUS:
4933	case PCI_IVAR_SLOT:
4934	case PCI_IVAR_FUNCTION:
4935		return (EINVAL);	/* disallow for now */
4936
4937	default:
4938		return (ENOENT);
4939	}
4940}
4941
4942#include "opt_ddb.h"
4943#ifdef DDB
4944#include <ddb/ddb.h>
4945#include <sys/cons.h>
4946
4947/*
4948 * List resources based on pci map registers, used for within ddb
4949 */
4950
4951DB_SHOW_COMMAND(pciregs, db_pci_dump)
4952{
4953	struct pci_devinfo *dinfo;
4954	struct devlist *devlist_head;
4955	struct pci_conf *p;
4956	const char *name;
4957	int i, error, none_count;
4958
4959	none_count = 0;
4960	/* get the head of the device queue */
4961	devlist_head = &pci_devq;
4962
4963	/*
4964	 * Go through the list of devices and print out devices
4965	 */
4966	for (error = 0, i = 0,
4967	     dinfo = STAILQ_FIRST(devlist_head);
4968	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4969	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4970
4971		/* Populate pd_name and pd_unit */
4972		name = NULL;
4973		if (dinfo->cfg.dev)
4974			name = device_get_name(dinfo->cfg.dev);
4975
4976		p = &dinfo->conf;
4977		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4978			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4979			(name && *name) ? name : "none",
4980			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4981			none_count++,
4982			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4983			p->pc_sel.pc_func, (p->pc_class << 16) |
4984			(p->pc_subclass << 8) | p->pc_progif,
4985			(p->pc_subdevice << 16) | p->pc_subvendor,
4986			(p->pc_device << 16) | p->pc_vendor,
4987			p->pc_revid, p->pc_hdr);
4988	}
4989}
4990#endif /* DDB */
4991
4992static struct resource *
4993pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4994    rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
4995    u_int flags)
4996{
4997	struct pci_devinfo *dinfo = device_get_ivars(child);
4998	struct resource_list *rl = &dinfo->resources;
4999	struct resource *res;
5000	struct pci_map *pm;
5001	pci_addr_t map, testval;
5002	int mapsize;
5003
5004	res = NULL;
5005
5006	/* If rid is managed by EA, ignore it */
5007	if (pci_ea_is_enabled(child, *rid))
5008		goto out;
5009
5010	pm = pci_find_bar(child, *rid);
5011	if (pm != NULL) {
5012		/* This is a BAR that we failed to allocate earlier. */
5013		mapsize = pm->pm_size;
5014		map = pm->pm_value;
5015	} else {
5016		/*
5017		 * Weed out the bogons, and figure out how large the
5018		 * BAR/map is.  BARs that read back 0 here are bogus
5019		 * and unimplemented.  Note: atapci in legacy mode are
5020		 * special and handled elsewhere in the code.  If you
5021		 * have a atapci device in legacy mode and it fails
5022		 * here, that other code is broken.
5023		 */
5024		pci_read_bar(child, *rid, &map, &testval, NULL);
5025
5026		/*
5027		 * Determine the size of the BAR and ignore BARs with a size
5028		 * of 0.  Device ROM BARs use a different mask value.
5029		 */
5030		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
5031			mapsize = pci_romsize(testval);
5032		else
5033			mapsize = pci_mapsize(testval);
5034		if (mapsize == 0)
5035			goto out;
5036		pm = pci_add_bar(child, *rid, map, mapsize);
5037	}
5038
5039	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
5040		if (type != SYS_RES_MEMORY) {
5041			if (bootverbose)
5042				device_printf(dev,
5043				    "child %s requested type %d for rid %#x,"
5044				    " but the BAR says it is an memio\n",
5045				    device_get_nameunit(child), type, *rid);
5046			goto out;
5047		}
5048	} else {
5049		if (type != SYS_RES_IOPORT) {
5050			if (bootverbose)
5051				device_printf(dev,
5052				    "child %s requested type %d for rid %#x,"
5053				    " but the BAR says it is an ioport\n",
5054				    device_get_nameunit(child), type, *rid);
5055			goto out;
5056		}
5057	}
5058
5059	/*
5060	 * For real BARs, we need to override the size that
5061	 * the driver requests, because that's what the BAR
5062	 * actually uses and we would otherwise have a
5063	 * situation where we might allocate the excess to
5064	 * another driver, which won't work.
5065	 */
5066	count = ((pci_addr_t)1 << mapsize) * num;
5067	if (RF_ALIGNMENT(flags) < mapsize)
5068		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
5069	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
5070		flags |= RF_PREFETCHABLE;
5071
5072	/*
5073	 * Allocate enough resource, and then write back the
5074	 * appropriate BAR for that resource.
5075	 */
5076	resource_list_add(rl, type, *rid, start, end, count);
5077	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
5078	    count, flags & ~RF_ACTIVE);
5079	if (res == NULL) {
5080		resource_list_delete(rl, type, *rid);
5081		device_printf(child,
5082		    "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
5083		    count, *rid, type, start, end);
5084		goto out;
5085	}
5086	if (bootverbose)
5087		device_printf(child,
5088		    "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
5089		    count, *rid, type, rman_get_start(res));
5090	map = rman_get_start(res);
5091	pci_write_bar(child, pm, map);
5092out:
5093	return (res);
5094}
5095
5096struct resource *
5097pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
5098    rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
5099    u_int flags)
5100{
5101	struct pci_devinfo *dinfo;
5102	struct resource_list *rl;
5103	struct resource_list_entry *rle;
5104	struct resource *res;
5105	pcicfgregs *cfg;
5106
5107	/*
5108	 * Perform lazy resource allocation
5109	 */
5110	dinfo = device_get_ivars(child);
5111	rl = &dinfo->resources;
5112	cfg = &dinfo->cfg;
5113	switch (type) {
5114#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
5115	case PCI_RES_BUS:
5116		return (pci_alloc_secbus(dev, child, rid, start, end, count,
5117		    flags));
5118#endif
5119	case SYS_RES_IRQ:
5120		/*
5121		 * Can't alloc legacy interrupt once MSI messages have
5122		 * been allocated.
5123		 */
5124		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
5125		    cfg->msix.msix_alloc > 0))
5126			return (NULL);
5127
5128		/*
5129		 * If the child device doesn't have an interrupt
5130		 * routed and is deserving of an interrupt, try to
5131		 * assign it one.
5132		 */
5133		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
5134		    (cfg->intpin != 0))
5135			pci_assign_interrupt(dev, child, 0);
5136		break;
5137	case SYS_RES_IOPORT:
5138	case SYS_RES_MEMORY:
5139#ifdef NEW_PCIB
5140		/*
5141		 * PCI-PCI bridge I/O window resources are not BARs.
5142		 * For those allocations just pass the request up the
5143		 * tree.
5144		 */
5145		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
5146			switch (*rid) {
5147			case PCIR_IOBASEL_1:
5148			case PCIR_MEMBASE_1:
5149			case PCIR_PMBASEL_1:
5150				/*
5151				 * XXX: Should we bother creating a resource
5152				 * list entry?
5153				 */
5154				return (bus_generic_alloc_resource(dev, child,
5155				    type, rid, start, end, count, flags));
5156			}
5157		}
5158#endif
5159		/* Reserve resources for this BAR if needed. */
5160		rle = resource_list_find(rl, type, *rid);
5161		if (rle == NULL) {
5162			res = pci_reserve_map(dev, child, type, rid, start, end,
5163			    count, num, flags);
5164			if (res == NULL)
5165				return (NULL);
5166		}
5167	}
5168	return (resource_list_alloc(rl, dev, child, type, rid,
5169	    start, end, count, flags));
5170}
5171
5172struct resource *
5173pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
5174    rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
5175{
5176#ifdef PCI_IOV
5177	struct pci_devinfo *dinfo;
5178#endif
5179
5180	if (device_get_parent(child) != dev)
5181		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
5182		    type, rid, start, end, count, flags));
5183
5184#ifdef PCI_IOV
5185	dinfo = device_get_ivars(child);
5186	if (dinfo->cfg.flags & PCICFG_VF) {
5187		switch (type) {
5188		/* VFs can't have I/O BARs. */
5189		case SYS_RES_IOPORT:
5190			return (NULL);
5191		case SYS_RES_MEMORY:
5192			return (pci_vf_alloc_mem_resource(dev, child, rid,
5193			    start, end, count, flags));
5194		}
5195
5196		/* Fall through for other types of resource allocations. */
5197	}
5198#endif
5199
5200	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
5201	    count, 1, flags));
5202}
5203
5204int
5205pci_release_resource(device_t dev, device_t child, int type, int rid,
5206    struct resource *r)
5207{
5208	struct pci_devinfo *dinfo;
5209	struct resource_list *rl;
5210	pcicfgregs *cfg;
5211
5212	if (device_get_parent(child) != dev)
5213		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
5214		    type, rid, r));
5215
5216	dinfo = device_get_ivars(child);
5217	cfg = &dinfo->cfg;
5218
5219#ifdef PCI_IOV
5220	if (dinfo->cfg.flags & PCICFG_VF) {
5221		switch (type) {
5222		/* VFs can't have I/O BARs. */
5223		case SYS_RES_IOPORT:
5224			return (EDOOFUS);
5225		case SYS_RES_MEMORY:
5226			return (pci_vf_release_mem_resource(dev, child, rid,
5227			    r));
5228		}
5229
5230		/* Fall through for other types of resource allocations. */
5231	}
5232#endif
5233
5234#ifdef NEW_PCIB
5235	/*
5236	 * PCI-PCI bridge I/O window resources are not BARs.  For
5237	 * those allocations just pass the request up the tree.
5238	 */
5239	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
5240	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
5241		switch (rid) {
5242		case PCIR_IOBASEL_1:
5243		case PCIR_MEMBASE_1:
5244		case PCIR_PMBASEL_1:
5245			return (bus_generic_release_resource(dev, child, type,
5246			    rid, r));
5247		}
5248	}
5249#endif
5250
5251	rl = &dinfo->resources;
5252	return (resource_list_release(rl, dev, child, type, rid, r));
5253}
5254
5255int
5256pci_activate_resource(device_t dev, device_t child, int type, int rid,
5257    struct resource *r)
5258{
5259	struct pci_devinfo *dinfo;
5260	int error;
5261
5262	error = bus_generic_activate_resource(dev, child, type, rid, r);
5263	if (error)
5264		return (error);
5265
5266	/* Enable decoding in the command register when activating BARs. */
5267	if (device_get_parent(child) == dev) {
5268		/* Device ROMs need their decoding explicitly enabled. */
5269		dinfo = device_get_ivars(child);
5270		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5271			pci_write_bar(child, pci_find_bar(child, rid),
5272			    rman_get_start(r) | PCIM_BIOS_ENABLE);
5273		switch (type) {
5274		case SYS_RES_IOPORT:
5275		case SYS_RES_MEMORY:
5276			error = PCI_ENABLE_IO(dev, child, type);
5277			break;
5278		}
5279	}
5280	return (error);
5281}
5282
5283int
5284pci_deactivate_resource(device_t dev, device_t child, int type,
5285    int rid, struct resource *r)
5286{
5287	struct pci_devinfo *dinfo;
5288	int error;
5289
5290	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
5291	if (error)
5292		return (error);
5293
5294	/* Disable decoding for device ROMs. */
5295	if (device_get_parent(child) == dev) {
5296		dinfo = device_get_ivars(child);
5297		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
5298			pci_write_bar(child, pci_find_bar(child, rid),
5299			    rman_get_start(r));
5300	}
5301	return (0);
5302}
5303
5304void
5305pci_child_deleted(device_t dev, device_t child)
5306{
5307	struct resource_list_entry *rle;
5308	struct resource_list *rl;
5309	struct pci_devinfo *dinfo;
5310
5311	dinfo = device_get_ivars(child);
5312	rl = &dinfo->resources;
5313
5314	/* Turn off access to resources we're about to free */
5315	if (bus_child_present(child) != 0) {
5316		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
5317		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
5318
5319		pci_disable_busmaster(child);
5320	}
5321
5322	/* Free all allocated resources */
5323	STAILQ_FOREACH(rle, rl, link) {
5324		if (rle->res) {
5325			if (rman_get_flags(rle->res) & RF_ACTIVE ||
5326			    resource_list_busy(rl, rle->type, rle->rid)) {
5327				pci_printf(&dinfo->cfg,
5328				    "Resource still owned, oops. "
5329				    "(type=%d, rid=%d, addr=%lx)\n",
5330				    rle->type, rle->rid,
5331				    rman_get_start(rle->res));
5332				bus_release_resource(child, rle->type, rle->rid,
5333				    rle->res);
5334			}
5335			resource_list_unreserve(rl, dev, child, rle->type,
5336			    rle->rid);
5337		}
5338	}
5339	resource_list_free(rl);
5340
5341	pci_freecfg(dinfo);
5342}
5343
5344void
5345pci_delete_resource(device_t dev, device_t child, int type, int rid)
5346{
5347	struct pci_devinfo *dinfo;
5348	struct resource_list *rl;
5349	struct resource_list_entry *rle;
5350
5351	if (device_get_parent(child) != dev)
5352		return;
5353
5354	dinfo = device_get_ivars(child);
5355	rl = &dinfo->resources;
5356	rle = resource_list_find(rl, type, rid);
5357	if (rle == NULL)
5358		return;
5359
5360	if (rle->res) {
5361		if (rman_get_flags(rle->res) & RF_ACTIVE ||
5362		    resource_list_busy(rl, type, rid)) {
5363			device_printf(dev, "delete_resource: "
5364			    "Resource still owned by child, oops. "
5365			    "(type=%d, rid=%d, addr=%jx)\n",
5366			    type, rid, rman_get_start(rle->res));
5367			return;
5368		}
5369		resource_list_unreserve(rl, dev, child, type, rid);
5370	}
5371	resource_list_delete(rl, type, rid);
5372}
5373
5374struct resource_list *
5375pci_get_resource_list (device_t dev, device_t child)
5376{
5377	struct pci_devinfo *dinfo = device_get_ivars(child);
5378
5379	return (&dinfo->resources);
5380}
5381
5382bus_dma_tag_t
5383pci_get_dma_tag(device_t bus, device_t dev)
5384{
5385	struct pci_softc *sc = device_get_softc(bus);
5386
5387	return (sc->sc_dma_tag);
5388}
5389
5390uint32_t
5391pci_read_config_method(device_t dev, device_t child, int reg, int width)
5392{
5393	struct pci_devinfo *dinfo = device_get_ivars(child);
5394	pcicfgregs *cfg = &dinfo->cfg;
5395
5396#ifdef PCI_IOV
5397	/*
5398	 * SR-IOV VFs don't implement the VID or DID registers, so we have to
5399	 * emulate them here.
5400	 */
5401	if (cfg->flags & PCICFG_VF) {
5402		if (reg == PCIR_VENDOR) {
5403			switch (width) {
5404			case 4:
5405				return (cfg->device << 16 | cfg->vendor);
5406			case 2:
5407				return (cfg->vendor);
5408			case 1:
5409				return (cfg->vendor & 0xff);
5410			default:
5411				return (0xffffffff);
5412			}
5413		} else if (reg == PCIR_DEVICE) {
5414			switch (width) {
5415			/* Note that an unaligned 4-byte read is an error. */
5416			case 2:
5417				return (cfg->device);
5418			case 1:
5419				return (cfg->device & 0xff);
5420			default:
5421				return (0xffffffff);
5422			}
5423		}
5424	}
5425#endif
5426
5427	return (PCIB_READ_CONFIG(device_get_parent(dev),
5428	    cfg->bus, cfg->slot, cfg->func, reg, width));
5429}
5430
5431void
5432pci_write_config_method(device_t dev, device_t child, int reg,
5433    uint32_t val, int width)
5434{
5435	struct pci_devinfo *dinfo = device_get_ivars(child);
5436	pcicfgregs *cfg = &dinfo->cfg;
5437
5438	PCIB_WRITE_CONFIG(device_get_parent(dev),
5439	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5440}
5441
5442int
5443pci_child_location_str_method(device_t dev, device_t child, char *buf,
5444    size_t buflen)
5445{
5446
5447	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5448	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5449	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5450	return (0);
5451}
5452
5453int
5454pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5455    size_t buflen)
5456{
5457	struct pci_devinfo *dinfo;
5458	pcicfgregs *cfg;
5459
5460	dinfo = device_get_ivars(child);
5461	cfg = &dinfo->cfg;
5462	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5463	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5464	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5465	    cfg->progif);
5466	return (0);
5467}
5468
5469int
5470pci_assign_interrupt_method(device_t dev, device_t child)
5471{
5472	struct pci_devinfo *dinfo = device_get_ivars(child);
5473	pcicfgregs *cfg = &dinfo->cfg;
5474
5475	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5476	    cfg->intpin));
5477}
5478
5479static void
5480pci_lookup(void *arg, const char *name, device_t *dev)
5481{
5482	long val;
5483	char *end;
5484	int domain, bus, slot, func;
5485
5486	if (*dev != NULL)
5487		return;
5488
5489	/*
5490	 * Accept pciconf-style selectors of either pciD:B:S:F or
5491	 * pciB:S:F.  In the latter case, the domain is assumed to
5492	 * be zero.
5493	 */
5494	if (strncmp(name, "pci", 3) != 0)
5495		return;
5496	val = strtol(name + 3, &end, 10);
5497	if (val < 0 || val > INT_MAX || *end != ':')
5498		return;
5499	domain = val;
5500	val = strtol(end + 1, &end, 10);
5501	if (val < 0 || val > INT_MAX || *end != ':')
5502		return;
5503	bus = val;
5504	val = strtol(end + 1, &end, 10);
5505	if (val < 0 || val > INT_MAX)
5506		return;
5507	slot = val;
5508	if (*end == ':') {
5509		val = strtol(end + 1, &end, 10);
5510		if (val < 0 || val > INT_MAX || *end != '\0')
5511			return;
5512		func = val;
5513	} else if (*end == '\0') {
5514		func = slot;
5515		slot = bus;
5516		bus = domain;
5517		domain = 0;
5518	} else
5519		return;
5520
5521	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5522	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5523		return;
5524
5525	*dev = pci_find_dbsf(domain, bus, slot, func);
5526}
5527
5528static int
5529pci_modevent(module_t mod, int what, void *arg)
5530{
5531	static struct cdev *pci_cdev;
5532	static eventhandler_tag tag;
5533
5534	switch (what) {
5535	case MOD_LOAD:
5536		STAILQ_INIT(&pci_devq);
5537		pci_generation = 0;
5538		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5539		    "pci");
5540		pci_load_vendor_data();
5541		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5542		    1000);
5543		break;
5544
5545	case MOD_UNLOAD:
5546		if (tag != NULL)
5547			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5548		destroy_dev(pci_cdev);
5549		break;
5550	}
5551
5552	return (0);
5553}
5554
5555static void
5556pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5557{
5558#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5559	struct pcicfg_pcie *cfg;
5560	int version, pos;
5561
5562	cfg = &dinfo->cfg.pcie;
5563	pos = cfg->pcie_location;
5564
5565	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5566
5567	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5568
5569	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5570	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5571	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5572		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5573
5574	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5575	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5576	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5577		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5578
5579	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5580	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5581		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5582
5583	if (version > 1) {
5584		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5585		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5586		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5587	}
5588#undef WREG
5589}
5590
5591static void
5592pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5593{
5594	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5595	    dinfo->cfg.pcix.pcix_command,  2);
5596}
5597
5598void
5599pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5600{
5601
5602	/*
5603	 * Restore the device to full power mode.  We must do this
5604	 * before we restore the registers because moving from D3 to
5605	 * D0 will cause the chip's BARs and some other registers to
5606	 * be reset to some unknown power on reset values.  Cut down
5607	 * the noise on boot by doing nothing if we are already in
5608	 * state D0.
5609	 */
5610	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5611		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5612	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5613	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5614	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5615	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5616	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5617	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5618	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5619	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5620	case PCIM_HDRTYPE_NORMAL:
5621		pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5622		pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5623		break;
5624	case PCIM_HDRTYPE_BRIDGE:
5625		pci_write_config(dev, PCIR_SECLAT_1,
5626		    dinfo->cfg.bridge.br_seclat, 1);
5627		pci_write_config(dev, PCIR_SUBBUS_1,
5628		    dinfo->cfg.bridge.br_subbus, 1);
5629		pci_write_config(dev, PCIR_SECBUS_1,
5630		    dinfo->cfg.bridge.br_secbus, 1);
5631		pci_write_config(dev, PCIR_PRIBUS_1,
5632		    dinfo->cfg.bridge.br_pribus, 1);
5633		pci_write_config(dev, PCIR_BRIDGECTL_1,
5634		    dinfo->cfg.bridge.br_control, 2);
5635		break;
5636	case PCIM_HDRTYPE_CARDBUS:
5637		pci_write_config(dev, PCIR_SECLAT_2,
5638		    dinfo->cfg.bridge.br_seclat, 1);
5639		pci_write_config(dev, PCIR_SUBBUS_2,
5640		    dinfo->cfg.bridge.br_subbus, 1);
5641		pci_write_config(dev, PCIR_SECBUS_2,
5642		    dinfo->cfg.bridge.br_secbus, 1);
5643		pci_write_config(dev, PCIR_PRIBUS_2,
5644		    dinfo->cfg.bridge.br_pribus, 1);
5645		pci_write_config(dev, PCIR_BRIDGECTL_2,
5646		    dinfo->cfg.bridge.br_control, 2);
5647		break;
5648	}
5649	pci_restore_bars(dev);
5650
5651	/*
5652	 * Restore extended capabilities for PCI-Express and PCI-X
5653	 */
5654	if (dinfo->cfg.pcie.pcie_location != 0)
5655		pci_cfg_restore_pcie(dev, dinfo);
5656	if (dinfo->cfg.pcix.pcix_location != 0)
5657		pci_cfg_restore_pcix(dev, dinfo);
5658
5659	/* Restore MSI and MSI-X configurations if they are present. */
5660	if (dinfo->cfg.msi.msi_location != 0)
5661		pci_resume_msi(dev);
5662	if (dinfo->cfg.msix.msix_location != 0)
5663		pci_resume_msix(dev);
5664
5665#ifdef PCI_IOV
5666	if (dinfo->cfg.iov != NULL)
5667		pci_iov_cfg_restore(dev, dinfo);
5668#endif
5669}
5670
5671static void
5672pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5673{
5674#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5675	struct pcicfg_pcie *cfg;
5676	int version, pos;
5677
5678	cfg = &dinfo->cfg.pcie;
5679	pos = cfg->pcie_location;
5680
5681	cfg->pcie_flags = RREG(PCIER_FLAGS);
5682
5683	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5684
5685	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5686
5687	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5688	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5689	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5690		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5691
5692	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5693	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5694	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5695		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5696
5697	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5698	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5699		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5700
5701	if (version > 1) {
5702		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5703		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5704		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5705	}
5706#undef RREG
5707}
5708
5709static void
5710pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5711{
5712	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5713	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5714}
5715
5716void
5717pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5718{
5719	uint32_t cls;
5720	int ps;
5721
5722	/*
5723	 * Some drivers apparently write to these registers w/o updating our
5724	 * cached copy.  No harm happens if we update the copy, so do so here
5725	 * so we can restore them.  The COMMAND register is modified by the
5726	 * bus w/o updating the cache.  This should represent the normally
5727	 * writable portion of the 'defined' part of type 0/1/2 headers.
5728	 */
5729	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5730	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5731	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5732	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5733	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5734	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5735	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5736	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5737	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5738	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5739	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5740	switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
5741	case PCIM_HDRTYPE_NORMAL:
5742		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5743		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5744		dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5745		dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5746		break;
5747	case PCIM_HDRTYPE_BRIDGE:
5748		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5749		    PCIR_SECLAT_1, 1);
5750		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5751		    PCIR_SUBBUS_1, 1);
5752		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5753		    PCIR_SECBUS_1, 1);
5754		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5755		    PCIR_PRIBUS_1, 1);
5756		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5757		    PCIR_BRIDGECTL_1, 2);
5758		break;
5759	case PCIM_HDRTYPE_CARDBUS:
5760		dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
5761		    PCIR_SECLAT_2, 1);
5762		dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
5763		    PCIR_SUBBUS_2, 1);
5764		dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
5765		    PCIR_SECBUS_2, 1);
5766		dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
5767		    PCIR_PRIBUS_2, 1);
5768		dinfo->cfg.bridge.br_control = pci_read_config(dev,
5769		    PCIR_BRIDGECTL_2, 2);
5770		dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
5771		dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
5772		break;
5773	}
5774
5775	if (dinfo->cfg.pcie.pcie_location != 0)
5776		pci_cfg_save_pcie(dev, dinfo);
5777
5778	if (dinfo->cfg.pcix.pcix_location != 0)
5779		pci_cfg_save_pcix(dev, dinfo);
5780
5781#ifdef PCI_IOV
5782	if (dinfo->cfg.iov != NULL)
5783		pci_iov_cfg_save(dev, dinfo);
5784#endif
5785
5786	/*
5787	 * don't set the state for display devices, base peripherals and
5788	 * memory devices since bad things happen when they are powered down.
5789	 * We should (a) have drivers that can easily detach and (b) use
5790	 * generic drivers for these devices so that some device actually
5791	 * attaches.  We need to make sure that when we implement (a) we don't
5792	 * power the device down on a reattach.
5793	 */
5794	cls = pci_get_class(dev);
5795	if (!setstate)
5796		return;
5797	switch (pci_do_power_nodriver)
5798	{
5799		case 0:		/* NO powerdown at all */
5800			return;
5801		case 1:		/* Conservative about what to power down */
5802			if (cls == PCIC_STORAGE)
5803				return;
5804			/*FALLTHROUGH*/
5805		case 2:		/* Aggressive about what to power down */
5806			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5807			    cls == PCIC_BASEPERIPH)
5808				return;
5809			/*FALLTHROUGH*/
5810		case 3:		/* Power down everything */
5811			break;
5812	}
5813	/*
5814	 * PCI spec says we can only go into D3 state from D0 state.
5815	 * Transition from D[12] into D0 before going to D3 state.
5816	 */
5817	ps = pci_get_powerstate(dev);
5818	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5819		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5820	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5821		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5822}
5823
5824/* Wrapper APIs suitable for device driver use. */
5825void
5826pci_save_state(device_t dev)
5827{
5828	struct pci_devinfo *dinfo;
5829
5830	dinfo = device_get_ivars(dev);
5831	pci_cfg_save(dev, dinfo, 0);
5832}
5833
5834void
5835pci_restore_state(device_t dev)
5836{
5837	struct pci_devinfo *dinfo;
5838
5839	dinfo = device_get_ivars(dev);
5840	pci_cfg_restore(dev, dinfo);
5841}
5842
5843static int
5844pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
5845    uintptr_t *id)
5846{
5847
5848	return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
5849}
5850
5851/* Find the upstream port of a given PCI device in a root complex. */
5852device_t
5853pci_find_pcie_root_port(device_t dev)
5854{
5855	struct pci_devinfo *dinfo;
5856	devclass_t pci_class;
5857	device_t pcib, bus;
5858
5859	pci_class = devclass_find("pci");
5860	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5861	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5862
5863	/*
5864	 * Walk the bridge hierarchy until we find a PCI-e root
5865	 * port or a non-PCI device.
5866	 */
5867	for (;;) {
5868		bus = device_get_parent(dev);
5869		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5870		    device_get_nameunit(dev)));
5871
5872		pcib = device_get_parent(bus);
5873		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5874		    device_get_nameunit(bus)));
5875
5876		/*
5877		 * pcib's parent must be a PCI bus for this to be a
5878		 * PCI-PCI bridge.
5879		 */
5880		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5881			return (NULL);
5882
5883		dinfo = device_get_ivars(pcib);
5884		if (dinfo->cfg.pcie.pcie_location != 0 &&
5885		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5886			return (pcib);
5887
5888		dev = pcib;
5889	}
5890}
5891