pci.c revision 247632
1139826Simp/*-
253541Sshin * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
353541Sshin * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
453541Sshin * Copyright (c) 2000, BSDi
553541Sshin * All rights reserved.
653541Sshin *
753541Sshin * Redistribution and use in source and binary forms, with or without
853541Sshin * modification, are permitted provided that the following conditions
953541Sshin * are met:
1053541Sshin * 1. Redistributions of source code must retain the above copyright
1153541Sshin *    notice unmodified, this list of conditions, and the following
1253541Sshin *    disclaimer.
1353541Sshin * 2. Redistributions in binary form must reproduce the above copyright
1453541Sshin *    notice, this list of conditions and the following disclaimer in the
1553541Sshin *    documentation and/or other materials provided with the distribution.
1653541Sshin *
1753541Sshin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1853541Sshin * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1953541Sshin * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2053541Sshin * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2153541Sshin * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2253541Sshin * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2353541Sshin * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2453541Sshin * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2553541Sshin * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2653541Sshin * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2753541Sshin */
2853541Sshin
29174510Sobrien#include <sys/cdefs.h>
3053541Sshin__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 247632 2013-03-02 15:54:02Z marius $");
3153541Sshin
32139826Simp#include "opt_bus.h"
3353541Sshin
3453541Sshin#include <sys/param.h>
3553541Sshin#include <sys/systm.h>
3653541Sshin#include <sys/malloc.h>
3753541Sshin#include <sys/module.h>
3853541Sshin#include <sys/linker.h>
3953541Sshin#include <sys/fcntl.h>
4053541Sshin#include <sys/conf.h>
4153541Sshin#include <sys/kernel.h>
4253541Sshin#include <sys/queue.h>
4353541Sshin#include <sys/sysctl.h>
4453541Sshin#include <sys/endian.h>
4553541Sshin
4653541Sshin#include <vm/vm.h>
4753541Sshin#include <vm/pmap.h>
4853541Sshin#include <vm/vm_extern.h>
4953541Sshin
5053541Sshin#include <sys/bus.h>
5153541Sshin#include <machine/bus.h>
5253541Sshin#include <sys/rman.h>
5353541Sshin#include <machine/resource.h>
5453541Sshin#include <machine/stdarg.h>
5553541Sshin
5653541Sshin#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
5753541Sshin#include <machine/intr_machdep.h>
5853541Sshin#endif
5953541Sshin
6053541Sshin#include <sys/pciio.h>
6153541Sshin#include <dev/pci/pcireg.h>
6253541Sshin#include <dev/pci/pcivar.h>
63174510Sobrien#include <dev/pci/pci_private.h>
64174510Sobrien
65174510Sobrien#include <dev/usb/controller/xhcireg.h>
6678064Sume#include <dev/usb/controller/ehcireg.h>
6778064Sume#include <dev/usb/controller/ohcireg.h>
6855009Sshin#include <dev/usb/controller/uhcireg.h>
6953541Sshin
7053541Sshin#include "pcib_if.h"
7153541Sshin#include "pci_if.h"
7253541Sshin
7353541Sshin#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
7455679Sshin#define	PCI_DMA_BOUNDARY	0x100000000
7553541Sshin#endif
7653541Sshin
7753541Sshin#define	PCIR_IS_BIOS(cfg, reg)						\
7853541Sshin	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
7953541Sshin	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
8053541Sshin
81164033Srwatsonstatic pci_addr_t	pci_mapbase(uint64_t mapreg);
8253541Sshinstatic const char	*pci_maptype(uint64_t mapreg);
8353541Sshinstatic int		pci_mapsize(uint64_t testval);
8453541Sshinstatic int		pci_maprange(uint64_t mapreg);
8592767Sjeffstatic pci_addr_t	pci_rombase(uint64_t mapreg);
8653541Sshinstatic int		pci_romsize(uint64_t testval);
8753541Sshinstatic void		pci_fixancient(pcicfgregs *cfg);
8853541Sshinstatic int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
8953541Sshin
9053541Sshinstatic int		pci_porten(device_t dev);
9153541Sshinstatic int		pci_memen(device_t dev);
9253541Sshinstatic void		pci_assign_interrupt(device_t bus, device_t dev,
9353541Sshin			    int force_route);
9498102Shsustatic int		pci_add_map(device_t bus, device_t dev, int reg,
9562587Sitojun			    struct resource_list *rl, int force, int prefetch);
9655679Sshinstatic int		pci_probe(device_t dev);
97181887Sjulianstatic int		pci_attach(device_t dev);
9853541Sshinstatic void		pci_load_vendor_data(void);
9953541Sshinstatic int		pci_describe_parse_line(char **ptr, int *vendor,
10053541Sshin			    int *device, char **desc);
10153541Sshinstatic char		*pci_describe_device(device_t dev);
102148385Sumestatic bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
10353541Sshinstatic int		pci_modevent(module_t mod, int what, void *arg);
10453541Sshinstatic void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
10553541Sshin			    pcicfgregs *cfg);
10653541Sshinstatic void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107171259Sdelphijstatic int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108171259Sdelphij			    int reg, uint32_t *data);
10953541Sshin#if 0
11053541Sshinstatic int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
11153541Sshin			    int reg, uint32_t data);
11253541Sshin#endif
11353541Sshinstatic void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114188144Sjamiestatic void		pci_disable_msi(device_t dev);
11553541Sshinstatic void		pci_enable_msi(device_t dev, uint64_t address,
116132714Srwatson			    uint16_t data);
117178285Srwatsonstatic void		pci_enable_msix(device_t dev, u_int index,
118132714Srwatson			    uint64_t address, uint32_t data);
119194907Srwatsonstatic void		pci_mask_msix(device_t dev, u_int index);
12053541Sshinstatic void		pci_unmask_msix(device_t dev, u_int index);
12153541Sshinstatic int		pci_msi_blacklisted(void);
122120856Sumestatic void		pci_resume_msi(device_t dev);
12353541Sshinstatic void		pci_resume_msix(device_t dev);
124160024Sbzstatic int		pci_remap_intr_method(device_t bus, device_t dev,
125188148Sjamie			    u_int irq);
126188148Sjamie
127188148Sjamiestatic device_method_t pci_methods[] = {
128188148Sjamie	/* Device interface */
129188148Sjamie	DEVMETHOD(device_probe,		pci_probe),
13053541Sshin	DEVMETHOD(device_attach,	pci_attach),
13153541Sshin	DEVMETHOD(device_detach,	bus_generic_detach),
132120856Sume	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
13353541Sshin	DEVMETHOD(device_suspend,	pci_suspend),
13453541Sshin	DEVMETHOD(device_resume,	pci_resume),
13553541Sshin
13653541Sshin	/* Bus interface */
137120856Sume	DEVMETHOD(bus_print_child,	pci_print_child),
13853541Sshin	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139181803Sbz	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140148385Sume	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
14153541Sshin	DEVMETHOD(bus_driver_added,	pci_driver_added),
142188144Sjamie	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143188144Sjamie	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144188144Sjamie
145185435Sbz	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
14653541Sshin	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
14753541Sshin	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
14853541Sshin	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
14953541Sshin	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
15053541Sshin	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
15153541Sshin	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
15253541Sshin	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
15353541Sshin	DEVMETHOD(bus_activate_resource, pci_activate_resource),
15453541Sshin	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
15553541Sshin	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
15653541Sshin	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
15753541Sshin	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158194760Srwatson
15953541Sshin	/* PCI interface */
16053541Sshin	DEVMETHOD(pci_read_config,	pci_read_config_method),
161194760Srwatson	DEVMETHOD(pci_write_config,	pci_write_config_method),
162194760Srwatson	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163193217Spjd	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164120856Sume	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165193217Spjd	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
16653541Sshin	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
16753541Sshin	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
16853541Sshin	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
16953541Sshin	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
17078064Sume	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171120913Sume	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
17253541Sshin	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173194760Srwatson	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174194760Srwatson	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
17578064Sume	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176194760Srwatson	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177120856Sume	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
17853541Sshin	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179194760Srwatson	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180194760Srwatson
18153541Sshin	DEVMETHOD_END
18253541Sshin};
18353541Sshin
18453541SshinDEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
18553541Sshin
186181803Sbzstatic devclass_t pci_devclass;
187181803SbzDRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188164033SrwatsonMODULE_VERSION(pci, 1);
189170587Srwatson
190120856Sumestatic char	*pci_vendordata;
191159976Spjdstatic size_t	pci_vendordata_size;
192183606Sbz
193170587Srwatsonstruct pci_quirk {
19455679Sshin	uint32_t devid;	/* Vendor/device of the card */
19553541Sshin	int	type;
196180427Sbz#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197132699Syar#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198189848Srwatson#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199132699Syar#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200132699Syar	int	arg1;
20197658Stanimura	int	arg2;
202171260Sdelphij};
203171260Sdelphij
204183606Sbzstatic const struct pci_quirk pci_quirks[] = {
205183606Sbz	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
20697658Stanimura	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207221247Sbz	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
20878064Sume	/* As does the Serverworks OSB4 (the SMBus mapping register) */
20955679Sshin	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
21055679Sshin
21155679Sshin	/*
21255679Sshin	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
21355679Sshin	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214180427Sbz	 */
215180427Sbz	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216132699Syar	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217189848Srwatson
218132699Syar	/*
219132699Syar	 * MSI doesn't work on earlier Intel chipsets including
220132699Syar	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221132699Syar	 */
222183606Sbz	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223183606Sbz	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
22455679Sshin	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
22555679Sshin	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226221247Sbz	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
22753541Sshin	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
22853541Sshin	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229180427Sbz
230189848Srwatson	/*
231171260Sdelphij	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232116453Scognet	 * bridge.
233120856Sume	 */
234221247Sbz	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
23578064Sume
23655679Sshin	/*
23755679Sshin	 * MSI-X allocation doesn't work properly for devices passed through
23855679Sshin	 * by VMware up to at least ESXi 5.1.
23955679Sshin	 */
24055679Sshin	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCI/PCI-X */
241180427Sbz	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 }, /* PCIe */
242189848Srwatson
243171260Sdelphij	/*
244116453Scognet	 * Some virtualization environments emulate an older chipset
245116453Scognet	 * but support MSI just fine.  QEMU uses the Intel 82440.
246171260Sdelphij	 */
247171260Sdelphij	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
248116453Scognet
249116453Scognet	/*
250116453Scognet	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
251171260Sdelphij	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
252171260Sdelphij	 * It prevents us from attaching hpet(4) when the bit is unset.
253171260Sdelphij	 * Note this quirk only affects SB600 revision A13 and earlier.
254116453Scognet	 * For SB600 A21 and later, firmware must set the bit to hide it.
25597658Stanimura	 * For SB700 and later, it is unused and hardcoded to zero.
25697658Stanimura	 */
25755679Sshin	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
258221247Sbz
25953541Sshin	{ 0 }
26053541Sshin};
26153541Sshin
26253541Sshin/* map register information */
263219570Sbz#define	PCI_MAPMEM	0x01	/* memory map */
264219570Sbz#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
265219570Sbz#define	PCI_MAPPORT	0x04	/* port map */
266188144Sjamie
267219570Sbzstruct devlist pci_devq;
268183611Sbzuint32_t pci_generation;
26962587Sitojunuint32_t pci_numdevs = 0;
27062587Sitojunstatic int pcie_chipset, pcix_chipset;
27162587Sitojun
27262587Sitojun/* sysctl vars */
27362587SitojunSYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
27453541Sshin
27553541Sshinstatic int pci_enable_io_modes = 1;
276120856SumeTUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
27753541SshinSYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
27853541Sshin    &pci_enable_io_modes, 1,
27953541Sshin    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
28053541Sshinenable these bits correctly.  We'd like to do this all the time, but there\n\
28153541Sshinare some peripherals that this causes problems with.");
28253541Sshin
28353541Sshinstatic int pci_do_power_nodriver = 0;
28453541SshinTUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
28553541SshinSYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
28653541Sshin    &pci_do_power_nodriver, 0,
28753541Sshin  "Place a function into D3 state when no driver attaches to it.  0 means\n\
28853541Sshindisable.  1 means conservatively place devices into D3 state.  2 means\n\
28953541Sshinagressively place devices into D3 state.  3 means put absolutely everything\n\
29053541Sshinin D3 state.");
291171259Sdelphij
292194777Sbzint pci_do_power_resume = 1;
29353541SshinTUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
29453541SshinSYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
295148385Sume    &pci_do_power_resume, 1,
29653541Sshin  "Transition from D3 -> D0 on resume.");
297148385Sume
298194777Sbzint pci_do_power_suspend = 1;
29953541SshinTUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
300158011SrwatsonSYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
301178285Srwatson    &pci_do_power_suspend, 1,
302158011Srwatson  "Transition from D0 -> D3 on suspend.");
30353541Sshin
30453541Sshinstatic int pci_do_msi = 1;
30553541SshinTUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
30653541SshinSYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
30753541Sshin    "Enable support for MSI interrupts");
30853541Sshin
30953541Sshinstatic int pci_do_msix = 1;
310181803SbzTUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
311148385SumeSYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
312181803Sbz    "Enable support for MSI-X interrupts");
313148385Sume
314148385Sumestatic int pci_honor_msi_blacklist = 1;
315194907SrwatsonTUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
31653541SshinSYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
31753541Sshin    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
31853541Sshin
31953541Sshin#if defined(__i386__) || defined(__amd64__)
32053541Sshinstatic int pci_usb_takeover = 1;
32153541Sshin#else
32253541Sshinstatic int pci_usb_takeover = 0;
323188144Sjamie#endif
324188144SjamieTUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
325148385SumeSYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
326194777Sbz    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
327194777SbzDisable this if you depend on BIOS emulation of USB devices, that is\n\
328194777Sbzyou use USB devices (like keyboard or mouse) but do not load USB drivers");
329194777Sbz
330194777Sbz/* Find a device_t by bus/slot/function in domain 0 */
331148385Sume
332148385Sumedevice_t
333148385Sumepci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
33453541Sshin{
335148385Sume
336148385Sume	return (pci_find_dbsf(0, bus, slot, func));
337194777Sbz}
338194777Sbz
339194777Sbz/* Find a device_t by domain/bus/slot/function */
340202915Sbz
341194777Sbzdevice_t
342194777Sbzpci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
343194777Sbz{
344194777Sbz	struct pci_devinfo *dinfo;
345194777Sbz
346148385Sume	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
347148385Sume		if ((dinfo->cfg.domain == domain) &&
348148385Sume		    (dinfo->cfg.bus == bus) &&
349148385Sume		    (dinfo->cfg.slot == slot) &&
350148385Sume		    (dinfo->cfg.func == func)) {
351120856Sume			return (dinfo->cfg.dev);
35253541Sshin		}
35353541Sshin	}
35453541Sshin
35553541Sshin	return (NULL);
35653541Sshin}
35753541Sshin
35853541Sshin/* Find a device_t by vendor/device ID */
35953541Sshin
36053541Sshindevice_t
36153541Sshinpci_find_device(uint16_t vendor, uint16_t device)
362171259Sdelphij{
363171259Sdelphij	struct pci_devinfo *dinfo;
36453541Sshin
36553541Sshin	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
366194777Sbz		if ((dinfo->cfg.vendor == vendor) &&
36753541Sshin		    (dinfo->cfg.device == device)) {
36853541Sshin			return (dinfo->cfg.dev);
369132714Srwatson		}
370178285Srwatson	}
371132714Srwatson
37253541Sshin	return (NULL);
37395023Ssuz}
37495023Ssuz
37553541Sshindevice_t
37653541Sshinpci_find_class(uint8_t class, uint8_t subclass)
377120856Sume{
37853541Sshin	struct pci_devinfo *dinfo;
37953541Sshin
38053541Sshin	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
38153541Sshin		if (dinfo->cfg.baseclass == class &&
382194777Sbz		    dinfo->cfg.subclass == subclass) {
38353541Sshin			return (dinfo->cfg.dev);
38453541Sshin		}
38553541Sshin	}
38653541Sshin
38753541Sshin	return (NULL);
388127505Spjd}
38953541Sshin
39053541Sshinstatic int
39153541Sshinpci_printf(pcicfgregs *cfg, const char *fmt, ...)
392194777Sbz{
39353541Sshin	va_list ap;
39453541Sshin	int retval;
39553541Sshin
39678064Sume	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
397186141Sbz	    cfg->func);
398186141Sbz	va_start(ap, fmt);
399186141Sbz	retval += vprintf(fmt, ap);
400120649Sume	va_end(ap);
40153541Sshin	return (retval);
40253541Sshin}
403171133Sgnn
40453541Sshin/* return base address of memory or port map */
40553541Sshin
40653541Sshinstatic pci_addr_t
40753541Sshinpci_mapbase(uint64_t mapreg)
408171259Sdelphij{
40953541Sshin
410132714Srwatson	if (PCI_BAR_MEM(mapreg))
411132714Srwatson		return (mapreg & PCIM_BAR_MEM_BASE);
412178285Srwatson	else
413132714Srwatson		return (mapreg & PCIM_BAR_IO_BASE);
41453541Sshin}
41553541Sshin
41678064Sume/* return map type of memory or port map */
417186141Sbz
41853541Sshinstatic const char *
41953541Sshinpci_maptype(uint64_t mapreg)
42053541Sshin{
421102218Struckman
422171259Sdelphij	if (PCI_BAR_IO(mapreg))
423102218Struckman		return ("I/O Port");
424102218Struckman	if (mapreg & PCIM_BAR_MEM_PREFETCH)
425102218Struckman		return ("Prefetchable Memory");
426184205Sdes	return ("Memory");
427102218Struckman}
428102218Struckman
429102218Struckman/* return log2 of map size decoded for memory or port map */
430102218Struckman
431102218Struckmanstatic int
432148385Sumepci_mapsize(uint64_t testval)
433102218Struckman{
434102218Struckman	int ln2size;
435102218Struckman
436102218Struckman	testval = pci_mapbase(testval);
437102218Struckman	ln2size = 0;
438171259Sdelphij	if (testval != 0) {
439102218Struckman		while ((testval & 1) == 0)
440102218Struckman		{
441102218Struckman			ln2size++;
442102218Struckman			testval >>= 1;
443102218Struckman		}
444102218Struckman	}
445102218Struckman	return (ln2size);
446102218Struckman}
447102218Struckman
448102218Struckman/* return base address of device ROM */
449184205Sdes
450111119Simpstatic pci_addr_t
451102218Struckmanpci_rombase(uint64_t mapreg)
452102218Struckman{
453102218Struckman
454102218Struckman	return (mapreg & PCIM_BIOS_ADDR_MASK);
455102218Struckman}
45653541Sshin
457171259Sdelphij/* return log2 of map size decided for device ROM */
45853541Sshin
45953541Sshinstatic int
460102218Struckmanpci_romsize(uint64_t testval)
461102218Struckman{
46253541Sshin	int ln2size;
46353541Sshin
464169462Srwatson	testval = pci_rombase(testval);
465157673Srwatson	ln2size = 0;
466178320Srwatson	if (testval != 0) {
467102218Struckman		while ((testval & 1) == 0)
468102218Struckman		{
469178320Srwatson			ln2size++;
47053541Sshin			testval >>= 1;
471102218Struckman		}
47253541Sshin	}
47353541Sshin	return (ln2size);
47453541Sshin}
47553541Sshin
476171259Sdelphij/* return log2 of address range supported by map register */
47753541Sshin
47853541Sshinstatic int
479102218Struckmanpci_maprange(uint64_t mapreg)
480102218Struckman{
48153541Sshin	int ln2range = 0;
48253541Sshin
483169462Srwatson	if (PCI_BAR_IO(mapreg))
484157673Srwatson		ln2range = 32;
485178320Srwatson	else
486102218Struckman		switch (mapreg & PCIM_BAR_MEM_TYPE) {
487102218Struckman		case PCIM_BAR_MEM_32:
488178320Srwatson			ln2range = 32;
48953541Sshin			break;
490102218Struckman		case PCIM_BAR_MEM_1MB:
49153541Sshin			ln2range = 20;
49253541Sshin			break;
49353541Sshin		case PCIM_BAR_MEM_64:
49453541Sshin			ln2range = 64;
49553541Sshin			break;
49653541Sshin		}
497157673Srwatson	return (ln2range);
49853541Sshin}
49953541Sshin
500157673Srwatson/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
501157673Srwatson
502157673Srwatsonstatic void
503221247Sbzpci_fixancient(pcicfgregs *cfg)
504124332Sume{
505169462Srwatson	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
50654952Seivind		return;
50753541Sshin
508221247Sbz	/* PCI to PCI bridges use header type 1 */
509221247Sbz	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
510221247Sbz		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
511169462Srwatson}
512169462Srwatson
513120913Sume/* extract header type specific config data */
51453541Sshin
51553541Sshinstatic void
51653541Sshinpci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
51753541Sshin{
51853541Sshin#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
51953541Sshin	switch (cfg->hdrtype & PCIM_HDRTYPE) {
52053541Sshin	case PCIM_HDRTYPE_NORMAL:
521157673Srwatson		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
52253541Sshin		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
52353541Sshin		cfg->nummaps	    = PCI_MAXMAPS_0;
524157673Srwatson		break;
525157673Srwatson	case PCIM_HDRTYPE_BRIDGE:
526157673Srwatson		cfg->nummaps	    = PCI_MAXMAPS_1;
527221247Sbz		break;
528124332Sume	case PCIM_HDRTYPE_CARDBUS:
529169462Srwatson		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
53054952Seivind		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
53153541Sshin		cfg->nummaps	    = PCI_MAXMAPS_2;
53253541Sshin		break;
533221247Sbz	}
534169462Srwatson#undef REG
535169462Srwatson}
53653541Sshin
53753541Sshin/* read configuration header into pcicfgregs structure */
53853541Sshinstruct pci_devinfo *
53953541Sshinpci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
54053541Sshin{
54153541Sshin#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
54253541Sshin	pcicfgregs *cfg = NULL;
54353541Sshin	struct pci_devinfo *devlist_entry;
54453541Sshin	struct devlist *devlist_head;
54553541Sshin
54653541Sshin	devlist_head = &pci_devq;
54753541Sshin
54853541Sshin	devlist_entry = NULL;
54953541Sshin
550171259Sdelphij	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
551171259Sdelphij		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
552171259Sdelphij		if (devlist_entry == NULL)
553175162Sobrien			return (NULL);
55453541Sshin
555177961Srwatson		cfg = &devlist_entry->cfg;
55678064Sume
55753541Sshin		cfg->domain		= d;
55878064Sume		cfg->bus		= b;
559157673Srwatson		cfg->slot		= s;
56053541Sshin		cfg->func		= f;
561119995Sru		cfg->vendor		= REG(PCIR_VENDOR, 2);
56253541Sshin		cfg->device		= REG(PCIR_DEVICE, 2);
56378064Sume		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
56478064Sume		cfg->statreg		= REG(PCIR_STATUS, 2);
56578064Sume		cfg->baseclass		= REG(PCIR_CLASS, 1);
56653541Sshin		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
56753541Sshin		cfg->progif		= REG(PCIR_PROGIF, 1);
56853541Sshin		cfg->revid		= REG(PCIR_REVID, 1);
56978064Sume		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
57078064Sume		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
57191346Salfred		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
57278064Sume		cfg->intpin		= REG(PCIR_INTPIN, 1);
57378064Sume		cfg->intline		= REG(PCIR_INTLINE, 1);
57478064Sume
57553541Sshin		cfg->mingnt		= REG(PCIR_MINGNT, 1);
57662587Sitojun		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
57762587Sitojun
57862587Sitojun		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
57953541Sshin		cfg->hdrtype		&= ~PCIM_MFDEV;
58053541Sshin		STAILQ_INIT(&cfg->maps);
58153541Sshin
58253541Sshin		pci_fixancient(cfg);
58353541Sshin		pci_hdrtypedata(pcib, b, s, f, cfg);
58453541Sshin
58578064Sume		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
58662587Sitojun			pci_read_cap(pcib, cfg);
58778064Sume
58878064Sume		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
58953541Sshin
59053541Sshin		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
591133192Srwatson		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
592177961Srwatson		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
593178285Srwatson		devlist_entry->conf.pc_sel.pc_func = cfg->func;
594171260Sdelphij		devlist_entry->conf.pc_hdr = cfg->hdrtype;
595178285Srwatson
59653541Sshin		devlist_entry->conf.pc_subvendor = cfg->subvendor;
597133192Srwatson		devlist_entry->conf.pc_subdevice = cfg->subdevice;
59862587Sitojun		devlist_entry->conf.pc_vendor = cfg->vendor;
59978064Sume		devlist_entry->conf.pc_device = cfg->device;
600125776Sume
601125776Sume		devlist_entry->conf.pc_class = cfg->baseclass;
602125776Sume		devlist_entry->conf.pc_subclass = cfg->subclass;
603125776Sume		devlist_entry->conf.pc_progif = cfg->progif;
604125776Sume		devlist_entry->conf.pc_revid = cfg->revid;
605125776Sume
606125776Sume		pci_numdevs++;
607125776Sume		pci_generation++;
608125776Sume	}
609125776Sume	return (devlist_entry);
610125776Sume#undef REG
611125776Sume}
612125776Sume
613125776Sumestatic void
614125776Sumepci_read_cap(device_t pcib, pcicfgregs *cfg)
615125776Sume{
61678064Sume#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
61778064Sume#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
61878064Sume#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
61978064Sume	uint64_t addr;
62078064Sume#endif
62178064Sume	uint32_t val;
62278064Sume	int	ptr, nextptr, ptrptr;
62378064Sume
62478064Sume	switch (cfg->hdrtype & PCIM_HDRTYPE) {
625186141Sbz	case PCIM_HDRTYPE_NORMAL:
62678064Sume	case PCIM_HDRTYPE_BRIDGE:
62778064Sume		ptrptr = PCIR_CAP_PTR;
62878064Sume		break;
62978064Sume	case PCIM_HDRTYPE_CARDBUS:
63078064Sume		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
63178064Sume		break;
63278064Sume	default:
63378064Sume		return;		/* no extended capabilities support */
63478064Sume	}
635133192Srwatson	nextptr = REG(ptrptr, 1);	/* sanity check? */
636178285Srwatson
63753541Sshin	/*
638133192Srwatson	 * Read capability entries.
63962587Sitojun	 */
64078064Sume	while (nextptr != 0) {
641134121Srwatson		/* Sanity check */
642134121Srwatson		if (nextptr > 255) {
643178285Srwatson			printf("illegal PCI extended capability offset %d\n",
644134121Srwatson			    nextptr);
645178285Srwatson			return;
64653541Sshin		}
647133192Srwatson		/* Find the next entry */
64853541Sshin		ptr = nextptr;
64953541Sshin		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
65053541Sshin
65153541Sshin		/* Process this entry */
65253541Sshin		switch (REG(ptr + PCICAP_ID, 1)) {
65353541Sshin		case PCIY_PMG:		/* PCI power management */
654171259Sdelphij			if (cfg->pp.pp_cap == 0) {
655180427Sbz				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
65653541Sshin				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
65753541Sshin				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
65853541Sshin				if ((nextptr - ptr) > PCIR_POWER_DATA)
65953541Sshin					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
660158011Srwatson			}
661158011Srwatson			break;
66253541Sshin		case PCIY_HT:		/* HyperTransport */
66353541Sshin			/* Determine HT-specific capability type. */
66453541Sshin			val = REG(ptr + PCIR_HT_COMMAND, 2);
66553541Sshin
66653541Sshin			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
66753541Sshin				cfg->ht.ht_slave = ptr;
668169154Srwatson
669169154Srwatson#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
67053541Sshin			switch (val & PCIM_HTCMD_CAP_MASK) {
671185435Sbz			case PCIM_HTCAP_MSI_MAPPING:
67254952Seivind				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
67353541Sshin					/* Sanity check the mapping window. */
67453541Sshin					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
67553541Sshin					    4);
67653541Sshin					addr <<= 32;
677185435Sbz					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
678185435Sbz					    4);
679192895Sjamie					if (addr != MSI_INTEL_ADDR_BASE)
680192895Sjamie						device_printf(pcib,
681185435Sbz	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
68253541Sshin						    cfg->domain, cfg->bus,
68353541Sshin						    cfg->slot, cfg->func,
68453541Sshin						    (long long)addr);
68553541Sshin				} else
68653541Sshin					addr = MSI_INTEL_ADDR_BASE;
68753541Sshin
68853541Sshin				cfg->ht.ht_msimap = ptr;
68953541Sshin				cfg->ht.ht_msictrl = val;
69053541Sshin				cfg->ht.ht_msiaddr = addr;
69153541Sshin				break;
69253541Sshin			}
69353541Sshin#endif
69453541Sshin			break;
69553541Sshin		case PCIY_MSI:		/* PCI MSI */
69653541Sshin			cfg->msi.msi_location = ptr;
69753541Sshin			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
698169154Srwatson			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
699169154Srwatson						     PCIM_MSICTRL_MMC_MASK)>>1);
70053541Sshin			break;
70153541Sshin		case PCIY_MSIX:		/* PCI MSI-X */
70253541Sshin			cfg->msix.msix_location = ptr;
70353541Sshin			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
70453541Sshin			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
70553541Sshin			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
70653541Sshin			val = REG(ptr + PCIR_MSIX_TABLE, 4);
70753541Sshin			cfg->msix.msix_table_bar = PCIR_BAR(val &
70853541Sshin			    PCIM_MSIX_BIR_MASK);
70953541Sshin			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
71053541Sshin			val = REG(ptr + PCIR_MSIX_PBA, 4);
711185435Sbz			cfg->msix.msix_pba_bar = PCIR_BAR(val &
712192895Sjamie			    PCIM_MSIX_BIR_MASK);
713192895Sjamie			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
714185435Sbz			break;
715185435Sbz		case PCIY_VPD:		/* PCI Vital Product Data */
71654952Seivind			cfg->vpd.vpd_reg = ptr;
71753541Sshin			break;
71853541Sshin		case PCIY_SUBVENDOR:
71953541Sshin			/* Should always be true. */
72053541Sshin			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
72153541Sshin			    PCIM_HDRTYPE_BRIDGE) {
72253541Sshin				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
72353541Sshin				cfg->subvendor = val & 0xffff;
72453541Sshin				cfg->subdevice = val >> 16;
725185435Sbz			}
72653541Sshin			break;
72753541Sshin		case PCIY_PCIX:		/* PCI-X */
72853541Sshin			/*
72953541Sshin			 * Assume we have a PCI-X chipset if we have
73053541Sshin			 * at least one PCI-PCI bridge with a PCI-X
73153541Sshin			 * capability.  Note that some systems with
73253541Sshin			 * PCI-express or HT chipsets might match on
73353541Sshin			 * this check as well.
734185435Sbz			 */
73553541Sshin			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
73653541Sshin			    PCIM_HDRTYPE_BRIDGE)
73753541Sshin				pcix_chipset = 1;
73853541Sshin			cfg->pcix.pcix_location = ptr;
73953541Sshin			break;
74053541Sshin		case PCIY_EXPRESS:	/* PCI-express */
74153541Sshin			/*
74253541Sshin			 * Assume we have a PCI-express chipset if we have
74381127Sume			 * at least one PCI-express device.
744171259Sdelphij			 */
74581127Sume			pcie_chipset = 1;
746186141Sbz			cfg->pcie.pcie_location = ptr;
74781127Sume			val = REG(ptr + PCIER_FLAGS, 2);
748191672Sbms			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
74981127Sume			break;
750157978Srwatson		default:
751169154Srwatson			break;
752178285Srwatson		}
75381127Sume	}
754191672Sbms
75581127Sume#if defined(__powerpc__)
756191672Sbms	/*
757191672Sbms	 * Enable the MSI mapping window for all HyperTransport
75881127Sume	 * slaves.  PCI-PCI bridges have their windows enabled via
75981127Sume	 * PCIB_MAP_MSI().
76081127Sume	 */
76181127Sume	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
76281127Sume	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
76381127Sume		device_printf(pcib,
76481127Sume	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
765191672Sbms		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
766191672Sbms		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
767191672Sbms		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
768191672Sbms		     2);
769191672Sbms	}
770191672Sbms#endif
771191672Sbms/* REG and WREG use carry through to next functions */
772191672Sbms}
773191672Sbms
774191672Sbms/*
77581127Sume * PCI Vital Product Data
77681127Sume */
777191672Sbms
77881127Sume#define	PCI_VPD_TIMEOUT		1000000
779178285Srwatson
78081127Sumestatic int
781157978Srwatsonpci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
78281127Sume{
78381127Sume	int count = PCI_VPD_TIMEOUT;
78453541Sshin
78553541Sshin	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
78653541Sshin
78753541Sshin	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
78853541Sshin
78953541Sshin	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
79053541Sshin		if (--count < 0)
791171259Sdelphij			return (ENXIO);
79253541Sshin		DELAY(1);	/* limit looping */
793171259Sdelphij	}
794122922Sandre	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
795122922Sandre
796122922Sandre	return (0);
797122922Sandre}
79853541Sshin
79953541Sshin#if 0
80053541Sshinstatic int
80153541Sshinpci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
80253541Sshin{
80353541Sshin	int count = PCI_VPD_TIMEOUT;
80498211Shsu
805171259Sdelphij	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
80653541Sshin
807122922Sandre	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
808122922Sandre	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
809122922Sandre	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
81098211Shsu		if (--count < 0)
81153541Sshin			return (ENXIO);
81253541Sshin		DELAY(1);	/* limit looping */
81353541Sshin	}
81453541Sshin
81553541Sshin	return (0);
81653541Sshin}
817171259Sdelphij#endif
818185435Sbz
819185435Sbz#undef PCI_VPD_TIMEOUT
82053541Sshin
82153541Sshinstruct vpd_readstate {
822185435Sbz	device_t	pcib;
82353541Sshin	pcicfgregs	*cfg;
82478064Sume	uint32_t	val;
82553541Sshin	int		bytesinval;
826178285Srwatson	int		off;
827158011Srwatson	uint8_t		cksum;
82883934Sbrooks};
82983934Sbrooks
83083934Sbrooksstatic int
83183934Sbrooksvpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
83278064Sume{
83353541Sshin	uint32_t reg;
83453541Sshin	uint8_t byte;
83553541Sshin
836185435Sbz	if (vrs->bytesinval == 0) {
837169154Srwatson		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
838169154Srwatson			return (ENXIO);
839169154Srwatson		vrs->val = le32toh(reg);
84054263Sshin		vrs->off += 4;
841185435Sbz		byte = vrs->val & 0xff;
84254952Seivind		vrs->bytesinval = 3;
84353541Sshin	} else {
84453541Sshin		vrs->val = vrs->val >> 8;
84553541Sshin		byte = vrs->val & 0xff;
84653541Sshin		vrs->bytesinval--;
84753541Sshin	}
84853541Sshin
849185435Sbz	vrs->cksum += byte;
850185435Sbz	*data = byte;
851185435Sbz	return (0);
85253541Sshin}
853192895Sjamie
854185435Sbzstatic void
855185435Sbzpci_read_vpd(device_t pcib, pcicfgregs *cfg)
856185435Sbz{
85753541Sshin	struct vpd_readstate vrs;
85853541Sshin	int state;
859185435Sbz	int name;
860185435Sbz	int remain;
86153541Sshin	int i;
862185435Sbz	int alloc, off;		/* alloc/off for RO/W arrays */
863185435Sbz	int cksumvalid;
864185435Sbz	int dflen;
865185435Sbz	uint8_t byte;
866185435Sbz	uint8_t byte2;
867185435Sbz
868185435Sbz	/* init vpd reader */
869185435Sbz	vrs.bytesinval = 0;
870185435Sbz	vrs.off = 0;
871185435Sbz	vrs.pcib = pcib;
872185435Sbz	vrs.cfg = cfg;
873185435Sbz	vrs.cksum = 0;
874185435Sbz
875185435Sbz	state = 0;
876185435Sbz	name = remain = i = 0;	/* shut up stupid gcc */
877169154Srwatson	alloc = off = 0;	/* shut up stupid gcc */
878169154Srwatson	dflen = 0;		/* shut up stupid gcc */
87954263Sshin	cksumvalid = -1;
880185435Sbz	while (state >= 0) {
88154952Seivind		if (vpd_nextbyte(&vrs, &byte)) {
88253541Sshin			state = -2;
883185435Sbz			break;
884185435Sbz		}
885185435Sbz#if 0
886185435Sbz		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
887185435Sbz		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
888185435Sbz		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
889185435Sbz#endif
890185435Sbz		switch (state) {
891185435Sbz		case 0:		/* item name */
892185435Sbz			if (byte & 0x80) {
893192895Sjamie				if (vpd_nextbyte(&vrs, &byte2)) {
894185435Sbz					state = -2;
895188144Sjamie					break;
896188144Sjamie				}
89753541Sshin				remain = byte2;
898185435Sbz				if (vpd_nextbyte(&vrs, &byte2)) {
899185435Sbz					state = -2;
900185435Sbz					break;
901185435Sbz				}
902185435Sbz				remain |= byte2 << 8;
903185435Sbz				if (remain > (0x7f*4 - vrs.off)) {
904185435Sbz					state = -1;
90553541Sshin					pci_printf(cfg,
906185435Sbz					    "invalid VPD data, remain %#x\n",
907185435Sbz					    remain);
908185435Sbz				}
909185435Sbz				name = byte & 0x7f;
910185435Sbz			} else {
911185435Sbz				remain = byte & 0x7;
91253541Sshin				name = (byte >> 3) & 0xf;
91353541Sshin			}
914185435Sbz			switch (name) {
91553541Sshin			case 0x2:	/* String */
916185435Sbz				cfg->vpd.vpd_ident = malloc(remain + 1,
917185435Sbz				    M_DEVBUF, M_WAITOK);
918185435Sbz				i = 0;
919185435Sbz				state = 1;
920185435Sbz				break;
921185435Sbz			case 0xf:	/* End */
922185435Sbz				state = -1;
923185435Sbz				break;
92453541Sshin			case 0x10:	/* VPD-R */
92553541Sshin				alloc = 8;
92653541Sshin				off = 0;
92753541Sshin				cfg->vpd.vpd_ros = malloc(alloc *
92853541Sshin				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
92953541Sshin				    M_WAITOK | M_ZERO);
93053541Sshin				state = 2;
93153541Sshin				break;
93253541Sshin			case 0x11:	/* VPD-W */
93353541Sshin				alloc = 8;
93453541Sshin				off = 0;
93553541Sshin				cfg->vpd.vpd_w = malloc(alloc *
93653541Sshin				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
93753541Sshin				    M_WAITOK | M_ZERO);
93853541Sshin				state = 5;
93953541Sshin				break;
94053541Sshin			default:	/* Invalid data, abort */
941148385Sume				state = -1;
942148385Sume				break;
94353541Sshin			}
94453541Sshin			break;
945
946		case 1:	/* Identifier String */
947			cfg->vpd.vpd_ident[i++] = byte;
948			remain--;
949			if (remain == 0)  {
950				cfg->vpd.vpd_ident[i] = '\0';
951				state = 0;
952			}
953			break;
954
955		case 2:	/* VPD-R Keyword Header */
956			if (off == alloc) {
957				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
958				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
959				    M_DEVBUF, M_WAITOK | M_ZERO);
960			}
961			cfg->vpd.vpd_ros[off].keyword[0] = byte;
962			if (vpd_nextbyte(&vrs, &byte2)) {
963				state = -2;
964				break;
965			}
966			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
967			if (vpd_nextbyte(&vrs, &byte2)) {
968				state = -2;
969				break;
970			}
971			dflen = byte2;
972			if (dflen == 0 &&
973			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
974			    2) == 0) {
975				/*
976				 * if this happens, we can't trust the rest
977				 * of the VPD.
978				 */
979				pci_printf(cfg, "bad keyword length: %d\n",
980				    dflen);
981				cksumvalid = 0;
982				state = -1;
983				break;
984			} else if (dflen == 0) {
985				cfg->vpd.vpd_ros[off].value = malloc(1 *
986				    sizeof(*cfg->vpd.vpd_ros[off].value),
987				    M_DEVBUF, M_WAITOK);
988				cfg->vpd.vpd_ros[off].value[0] = '\x00';
989			} else
990				cfg->vpd.vpd_ros[off].value = malloc(
991				    (dflen + 1) *
992				    sizeof(*cfg->vpd.vpd_ros[off].value),
993				    M_DEVBUF, M_WAITOK);
994			remain -= 3;
995			i = 0;
996			/* keep in sync w/ state 3's transistions */
997			if (dflen == 0 && remain == 0)
998				state = 0;
999			else if (dflen == 0)
1000				state = 2;
1001			else
1002				state = 3;
1003			break;
1004
1005		case 3:	/* VPD-R Keyword Value */
1006			cfg->vpd.vpd_ros[off].value[i++] = byte;
1007			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1008			    "RV", 2) == 0 && cksumvalid == -1) {
1009				if (vrs.cksum == 0)
1010					cksumvalid = 1;
1011				else {
1012					if (bootverbose)
1013						pci_printf(cfg,
1014					    "bad VPD cksum, remain %hhu\n",
1015						    vrs.cksum);
1016					cksumvalid = 0;
1017					state = -1;
1018					break;
1019				}
1020			}
1021			dflen--;
1022			remain--;
1023			/* keep in sync w/ state 2's transistions */
1024			if (dflen == 0)
1025				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1026			if (dflen == 0 && remain == 0) {
1027				cfg->vpd.vpd_rocnt = off;
1028				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1029				    off * sizeof(*cfg->vpd.vpd_ros),
1030				    M_DEVBUF, M_WAITOK | M_ZERO);
1031				state = 0;
1032			} else if (dflen == 0)
1033				state = 2;
1034			break;
1035
1036		case 4:
1037			remain--;
1038			if (remain == 0)
1039				state = 0;
1040			break;
1041
1042		case 5:	/* VPD-W Keyword Header */
1043			if (off == alloc) {
1044				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1045				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1046				    M_DEVBUF, M_WAITOK | M_ZERO);
1047			}
1048			cfg->vpd.vpd_w[off].keyword[0] = byte;
1049			if (vpd_nextbyte(&vrs, &byte2)) {
1050				state = -2;
1051				break;
1052			}
1053			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1054			if (vpd_nextbyte(&vrs, &byte2)) {
1055				state = -2;
1056				break;
1057			}
1058			cfg->vpd.vpd_w[off].len = dflen = byte2;
1059			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1060			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1061			    sizeof(*cfg->vpd.vpd_w[off].value),
1062			    M_DEVBUF, M_WAITOK);
1063			remain -= 3;
1064			i = 0;
1065			/* keep in sync w/ state 6's transistions */
1066			if (dflen == 0 && remain == 0)
1067				state = 0;
1068			else if (dflen == 0)
1069				state = 5;
1070			else
1071				state = 6;
1072			break;
1073
1074		case 6:	/* VPD-W Keyword Value */
1075			cfg->vpd.vpd_w[off].value[i++] = byte;
1076			dflen--;
1077			remain--;
1078			/* keep in sync w/ state 5's transistions */
1079			if (dflen == 0)
1080				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1081			if (dflen == 0 && remain == 0) {
1082				cfg->vpd.vpd_wcnt = off;
1083				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1084				    off * sizeof(*cfg->vpd.vpd_w),
1085				    M_DEVBUF, M_WAITOK | M_ZERO);
1086				state = 0;
1087			} else if (dflen == 0)
1088				state = 5;
1089			break;
1090
1091		default:
1092			pci_printf(cfg, "invalid state: %d\n", state);
1093			state = -1;
1094			break;
1095		}
1096	}
1097
1098	if (cksumvalid == 0 || state < -1) {
1099		/* read-only data bad, clean up */
1100		if (cfg->vpd.vpd_ros != NULL) {
1101			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1102				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1103			free(cfg->vpd.vpd_ros, M_DEVBUF);
1104			cfg->vpd.vpd_ros = NULL;
1105		}
1106	}
1107	if (state < -1) {
1108		/* I/O error, clean up */
1109		pci_printf(cfg, "failed to read VPD data.\n");
1110		if (cfg->vpd.vpd_ident != NULL) {
1111			free(cfg->vpd.vpd_ident, M_DEVBUF);
1112			cfg->vpd.vpd_ident = NULL;
1113		}
1114		if (cfg->vpd.vpd_w != NULL) {
1115			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1116				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1117			free(cfg->vpd.vpd_w, M_DEVBUF);
1118			cfg->vpd.vpd_w = NULL;
1119		}
1120	}
1121	cfg->vpd.vpd_cached = 1;
1122#undef REG
1123#undef WREG
1124}
1125
1126int
1127pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1128{
1129	struct pci_devinfo *dinfo = device_get_ivars(child);
1130	pcicfgregs *cfg = &dinfo->cfg;
1131
1132	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1133		pci_read_vpd(device_get_parent(dev), cfg);
1134
1135	*identptr = cfg->vpd.vpd_ident;
1136
1137	if (*identptr == NULL)
1138		return (ENXIO);
1139
1140	return (0);
1141}
1142
1143int
1144pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1145	const char **vptr)
1146{
1147	struct pci_devinfo *dinfo = device_get_ivars(child);
1148	pcicfgregs *cfg = &dinfo->cfg;
1149	int i;
1150
1151	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1152		pci_read_vpd(device_get_parent(dev), cfg);
1153
1154	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1155		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1156		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1157			*vptr = cfg->vpd.vpd_ros[i].value;
1158			return (0);
1159		}
1160
1161	*vptr = NULL;
1162	return (ENXIO);
1163}
1164
1165/*
1166 * Find the requested HyperTransport capability and return the offset
1167 * in configuration space via the pointer provided.  The function
1168 * returns 0 on success and an error code otherwise.
1169 */
1170int
1171pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1172{
1173	int ptr, error;
1174	uint16_t val;
1175
1176	error = pci_find_cap(child, PCIY_HT, &ptr);
1177	if (error)
1178		return (error);
1179
1180	/*
1181	 * Traverse the capabilities list checking each HT capability
1182	 * to see if it matches the requested HT capability.
1183	 */
1184	while (ptr != 0) {
1185		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1186		if (capability == PCIM_HTCAP_SLAVE ||
1187		    capability == PCIM_HTCAP_HOST)
1188			val &= 0xe000;
1189		else
1190			val &= PCIM_HTCMD_CAP_MASK;
1191		if (val == capability) {
1192			if (capreg != NULL)
1193				*capreg = ptr;
1194			return (0);
1195		}
1196
1197		/* Skip to the next HT capability. */
1198		while (ptr != 0) {
1199			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1200			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1201			    PCIY_HT)
1202				break;
1203		}
1204	}
1205	return (ENOENT);
1206}
1207
1208/*
1209 * Find the requested capability and return the offset in
1210 * configuration space via the pointer provided.  The function returns
1211 * 0 on success and an error code otherwise.
1212 */
1213int
1214pci_find_cap_method(device_t dev, device_t child, int capability,
1215    int *capreg)
1216{
1217	struct pci_devinfo *dinfo = device_get_ivars(child);
1218	pcicfgregs *cfg = &dinfo->cfg;
1219	u_int32_t status;
1220	u_int8_t ptr;
1221
1222	/*
1223	 * Check the CAP_LIST bit of the PCI status register first.
1224	 */
1225	status = pci_read_config(child, PCIR_STATUS, 2);
1226	if (!(status & PCIM_STATUS_CAPPRESENT))
1227		return (ENXIO);
1228
1229	/*
1230	 * Determine the start pointer of the capabilities list.
1231	 */
1232	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1233	case PCIM_HDRTYPE_NORMAL:
1234	case PCIM_HDRTYPE_BRIDGE:
1235		ptr = PCIR_CAP_PTR;
1236		break;
1237	case PCIM_HDRTYPE_CARDBUS:
1238		ptr = PCIR_CAP_PTR_2;
1239		break;
1240	default:
1241		/* XXX: panic? */
1242		return (ENXIO);		/* no extended capabilities support */
1243	}
1244	ptr = pci_read_config(child, ptr, 1);
1245
1246	/*
1247	 * Traverse the capabilities list.
1248	 */
1249	while (ptr != 0) {
1250		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1251			if (capreg != NULL)
1252				*capreg = ptr;
1253			return (0);
1254		}
1255		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1256	}
1257
1258	return (ENOENT);
1259}
1260
1261/*
1262 * Find the requested extended capability and return the offset in
1263 * configuration space via the pointer provided.  The function returns
1264 * 0 on success and an error code otherwise.
1265 */
1266int
1267pci_find_extcap_method(device_t dev, device_t child, int capability,
1268    int *capreg)
1269{
1270	struct pci_devinfo *dinfo = device_get_ivars(child);
1271	pcicfgregs *cfg = &dinfo->cfg;
1272	uint32_t ecap;
1273	uint16_t ptr;
1274
1275	/* Only supported for PCI-express devices. */
1276	if (cfg->pcie.pcie_location == 0)
1277		return (ENXIO);
1278
1279	ptr = PCIR_EXTCAP;
1280	ecap = pci_read_config(child, ptr, 4);
1281	if (ecap == 0xffffffff || ecap == 0)
1282		return (ENOENT);
1283	for (;;) {
1284		if (PCI_EXTCAP_ID(ecap) == capability) {
1285			if (capreg != NULL)
1286				*capreg = ptr;
1287			return (0);
1288		}
1289		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1290		if (ptr == 0)
1291			break;
1292		ecap = pci_read_config(child, ptr, 4);
1293	}
1294
1295	return (ENOENT);
1296}
1297
1298/*
1299 * Support for MSI-X message interrupts.
1300 */
1301void
1302pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1303{
1304	struct pci_devinfo *dinfo = device_get_ivars(dev);
1305	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1306	uint32_t offset;
1307
1308	KASSERT(msix->msix_table_len > index, ("bogus index"));
1309	offset = msix->msix_table_offset + index * 16;
1310	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1311	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1312	bus_write_4(msix->msix_table_res, offset + 8, data);
1313
1314	/* Enable MSI -> HT mapping. */
1315	pci_ht_map_msi(dev, address);
1316}
1317
1318void
1319pci_mask_msix(device_t dev, u_int index)
1320{
1321	struct pci_devinfo *dinfo = device_get_ivars(dev);
1322	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1323	uint32_t offset, val;
1324
1325	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1326	offset = msix->msix_table_offset + index * 16 + 12;
1327	val = bus_read_4(msix->msix_table_res, offset);
1328	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1329		val |= PCIM_MSIX_VCTRL_MASK;
1330		bus_write_4(msix->msix_table_res, offset, val);
1331	}
1332}
1333
1334void
1335pci_unmask_msix(device_t dev, u_int index)
1336{
1337	struct pci_devinfo *dinfo = device_get_ivars(dev);
1338	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1339	uint32_t offset, val;
1340
1341	KASSERT(msix->msix_table_len > index, ("bogus index"));
1342	offset = msix->msix_table_offset + index * 16 + 12;
1343	val = bus_read_4(msix->msix_table_res, offset);
1344	if (val & PCIM_MSIX_VCTRL_MASK) {
1345		val &= ~PCIM_MSIX_VCTRL_MASK;
1346		bus_write_4(msix->msix_table_res, offset, val);
1347	}
1348}
1349
1350int
1351pci_pending_msix(device_t dev, u_int index)
1352{
1353	struct pci_devinfo *dinfo = device_get_ivars(dev);
1354	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1355	uint32_t offset, bit;
1356
1357	KASSERT(msix->msix_table_len > index, ("bogus index"));
1358	offset = msix->msix_pba_offset + (index / 32) * 4;
1359	bit = 1 << index % 32;
1360	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1361}
1362
1363/*
1364 * Restore MSI-X registers and table during resume.  If MSI-X is
1365 * enabled then walk the virtual table to restore the actual MSI-X
1366 * table.
1367 */
1368static void
1369pci_resume_msix(device_t dev)
1370{
1371	struct pci_devinfo *dinfo = device_get_ivars(dev);
1372	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1373	struct msix_table_entry *mte;
1374	struct msix_vector *mv;
1375	int i;
1376
1377	if (msix->msix_alloc > 0) {
1378		/* First, mask all vectors. */
1379		for (i = 0; i < msix->msix_msgnum; i++)
1380			pci_mask_msix(dev, i);
1381
1382		/* Second, program any messages with at least one handler. */
1383		for (i = 0; i < msix->msix_table_len; i++) {
1384			mte = &msix->msix_table[i];
1385			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1386				continue;
1387			mv = &msix->msix_vectors[mte->mte_vector - 1];
1388			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1389			pci_unmask_msix(dev, i);
1390		}
1391	}
1392	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1393	    msix->msix_ctrl, 2);
1394}
1395
1396/*
1397 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1398 * returned in *count.  After this function returns, each message will be
1399 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1400 */
1401int
1402pci_alloc_msix_method(device_t dev, device_t child, int *count)
1403{
1404	struct pci_devinfo *dinfo = device_get_ivars(child);
1405	pcicfgregs *cfg = &dinfo->cfg;
1406	struct resource_list_entry *rle;
1407	int actual, error, i, irq, max;
1408
1409	/* Don't let count == 0 get us into trouble. */
1410	if (*count == 0)
1411		return (EINVAL);
1412
1413	/* If rid 0 is allocated, then fail. */
1414	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1415	if (rle != NULL && rle->res != NULL)
1416		return (ENXIO);
1417
1418	/* Already have allocated messages? */
1419	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1420		return (ENXIO);
1421
1422	/* If MSI is blacklisted for this system, fail. */
1423	if (pci_msi_blacklisted())
1424		return (ENXIO);
1425
1426	/* MSI-X capability present? */
1427	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1428		return (ENODEV);
1429
1430	/* Make sure the appropriate BARs are mapped. */
1431	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1432	    cfg->msix.msix_table_bar);
1433	if (rle == NULL || rle->res == NULL ||
1434	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1435		return (ENXIO);
1436	cfg->msix.msix_table_res = rle->res;
1437	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1438		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1439		    cfg->msix.msix_pba_bar);
1440		if (rle == NULL || rle->res == NULL ||
1441		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1442			return (ENXIO);
1443	}
1444	cfg->msix.msix_pba_res = rle->res;
1445
1446	if (bootverbose)
1447		device_printf(child,
1448		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1449		    *count, cfg->msix.msix_msgnum);
1450	max = min(*count, cfg->msix.msix_msgnum);
1451	for (i = 0; i < max; i++) {
1452		/* Allocate a message. */
1453		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1454		if (error) {
1455			if (i == 0)
1456				return (error);
1457			break;
1458		}
1459		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1460		    irq, 1);
1461	}
1462	actual = i;
1463
1464	if (bootverbose) {
1465		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1466		if (actual == 1)
1467			device_printf(child, "using IRQ %lu for MSI-X\n",
1468			    rle->start);
1469		else {
1470			int run;
1471
1472			/*
1473			 * Be fancy and try to print contiguous runs of
1474			 * IRQ values as ranges.  'irq' is the previous IRQ.
1475			 * 'run' is true if we are in a range.
1476			 */
1477			device_printf(child, "using IRQs %lu", rle->start);
1478			irq = rle->start;
1479			run = 0;
1480			for (i = 1; i < actual; i++) {
1481				rle = resource_list_find(&dinfo->resources,
1482				    SYS_RES_IRQ, i + 1);
1483
1484				/* Still in a run? */
1485				if (rle->start == irq + 1) {
1486					run = 1;
1487					irq++;
1488					continue;
1489				}
1490
1491				/* Finish previous range. */
1492				if (run) {
1493					printf("-%d", irq);
1494					run = 0;
1495				}
1496
1497				/* Start new range. */
1498				printf(",%lu", rle->start);
1499				irq = rle->start;
1500			}
1501
1502			/* Unfinished range? */
1503			if (run)
1504				printf("-%d", irq);
1505			printf(" for MSI-X\n");
1506		}
1507	}
1508
1509	/* Mask all vectors. */
1510	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1511		pci_mask_msix(child, i);
1512
1513	/* Allocate and initialize vector data and virtual table. */
1514	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1515	    M_DEVBUF, M_WAITOK | M_ZERO);
1516	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1517	    M_DEVBUF, M_WAITOK | M_ZERO);
1518	for (i = 0; i < actual; i++) {
1519		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1520		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1521		cfg->msix.msix_table[i].mte_vector = i + 1;
1522	}
1523
1524	/* Update control register to enable MSI-X. */
1525	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1526	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1527	    cfg->msix.msix_ctrl, 2);
1528
1529	/* Update counts of alloc'd messages. */
1530	cfg->msix.msix_alloc = actual;
1531	cfg->msix.msix_table_len = actual;
1532	*count = actual;
1533	return (0);
1534}
1535
1536/*
1537 * By default, pci_alloc_msix() will assign the allocated IRQ
1538 * resources consecutively to the first N messages in the MSI-X table.
1539 * However, device drivers may want to use different layouts if they
1540 * either receive fewer messages than they asked for, or they wish to
1541 * populate the MSI-X table sparsely.  This method allows the driver
1542 * to specify what layout it wants.  It must be called after a
1543 * successful pci_alloc_msix() but before any of the associated
1544 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1545 *
1546 * The 'vectors' array contains 'count' message vectors.  The array
1547 * maps directly to the MSI-X table in that index 0 in the array
1548 * specifies the vector for the first message in the MSI-X table, etc.
1549 * The vector value in each array index can either be 0 to indicate
1550 * that no vector should be assigned to a message slot, or it can be a
1551 * number from 1 to N (where N is the count returned from a
1552 * succcessful call to pci_alloc_msix()) to indicate which message
1553 * vector (IRQ) to be used for the corresponding message.
1554 *
1555 * On successful return, each message with a non-zero vector will have
1556 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1557 * 1.  Additionally, if any of the IRQs allocated via the previous
1558 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1559 * will be freed back to the system automatically.
1560 *
1561 * For example, suppose a driver has a MSI-X table with 6 messages and
1562 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1563 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1564 * C.  After the call to pci_alloc_msix(), the device will be setup to
1565 * have an MSI-X table of ABC--- (where - means no vector assigned).
1566 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1567 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1568 * be freed back to the system.  This device will also have valid
1569 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1570 *
1571 * In any case, the SYS_RES_IRQ rid X will always map to the message
1572 * at MSI-X table index X - 1 and will only be valid if a vector is
1573 * assigned to that table entry.
1574 */
1575int
1576pci_remap_msix_method(device_t dev, device_t child, int count,
1577    const u_int *vectors)
1578{
1579	struct pci_devinfo *dinfo = device_get_ivars(child);
1580	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1581	struct resource_list_entry *rle;
1582	int i, irq, j, *used;
1583
1584	/*
1585	 * Have to have at least one message in the table but the
1586	 * table can't be bigger than the actual MSI-X table in the
1587	 * device.
1588	 */
1589	if (count == 0 || count > msix->msix_msgnum)
1590		return (EINVAL);
1591
1592	/* Sanity check the vectors. */
1593	for (i = 0; i < count; i++)
1594		if (vectors[i] > msix->msix_alloc)
1595			return (EINVAL);
1596
1597	/*
1598	 * Make sure there aren't any holes in the vectors to be used.
1599	 * It's a big pain to support it, and it doesn't really make
1600	 * sense anyway.  Also, at least one vector must be used.
1601	 */
1602	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1603	    M_ZERO);
1604	for (i = 0; i < count; i++)
1605		if (vectors[i] != 0)
1606			used[vectors[i] - 1] = 1;
1607	for (i = 0; i < msix->msix_alloc - 1; i++)
1608		if (used[i] == 0 && used[i + 1] == 1) {
1609			free(used, M_DEVBUF);
1610			return (EINVAL);
1611		}
1612	if (used[0] != 1) {
1613		free(used, M_DEVBUF);
1614		return (EINVAL);
1615	}
1616
1617	/* Make sure none of the resources are allocated. */
1618	for (i = 0; i < msix->msix_table_len; i++) {
1619		if (msix->msix_table[i].mte_vector == 0)
1620			continue;
1621		if (msix->msix_table[i].mte_handlers > 0)
1622			return (EBUSY);
1623		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1624		KASSERT(rle != NULL, ("missing resource"));
1625		if (rle->res != NULL)
1626			return (EBUSY);
1627	}
1628
1629	/* Free the existing resource list entries. */
1630	for (i = 0; i < msix->msix_table_len; i++) {
1631		if (msix->msix_table[i].mte_vector == 0)
1632			continue;
1633		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1634	}
1635
1636	/*
1637	 * Build the new virtual table keeping track of which vectors are
1638	 * used.
1639	 */
1640	free(msix->msix_table, M_DEVBUF);
1641	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1642	    M_DEVBUF, M_WAITOK | M_ZERO);
1643	for (i = 0; i < count; i++)
1644		msix->msix_table[i].mte_vector = vectors[i];
1645	msix->msix_table_len = count;
1646
1647	/* Free any unused IRQs and resize the vectors array if necessary. */
1648	j = msix->msix_alloc - 1;
1649	if (used[j] == 0) {
1650		struct msix_vector *vec;
1651
1652		while (used[j] == 0) {
1653			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1654			    msix->msix_vectors[j].mv_irq);
1655			j--;
1656		}
1657		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1658		    M_WAITOK);
1659		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1660		    (j + 1));
1661		free(msix->msix_vectors, M_DEVBUF);
1662		msix->msix_vectors = vec;
1663		msix->msix_alloc = j + 1;
1664	}
1665	free(used, M_DEVBUF);
1666
1667	/* Map the IRQs onto the rids. */
1668	for (i = 0; i < count; i++) {
1669		if (vectors[i] == 0)
1670			continue;
1671		irq = msix->msix_vectors[vectors[i]].mv_irq;
1672		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1673		    irq, 1);
1674	}
1675
1676	if (bootverbose) {
1677		device_printf(child, "Remapped MSI-X IRQs as: ");
1678		for (i = 0; i < count; i++) {
1679			if (i != 0)
1680				printf(", ");
1681			if (vectors[i] == 0)
1682				printf("---");
1683			else
1684				printf("%d",
1685				    msix->msix_vectors[vectors[i]].mv_irq);
1686		}
1687		printf("\n");
1688	}
1689
1690	return (0);
1691}
1692
1693static int
1694pci_release_msix(device_t dev, device_t child)
1695{
1696	struct pci_devinfo *dinfo = device_get_ivars(child);
1697	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1698	struct resource_list_entry *rle;
1699	int i;
1700
1701	/* Do we have any messages to release? */
1702	if (msix->msix_alloc == 0)
1703		return (ENODEV);
1704
1705	/* Make sure none of the resources are allocated. */
1706	for (i = 0; i < msix->msix_table_len; i++) {
1707		if (msix->msix_table[i].mte_vector == 0)
1708			continue;
1709		if (msix->msix_table[i].mte_handlers > 0)
1710			return (EBUSY);
1711		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1712		KASSERT(rle != NULL, ("missing resource"));
1713		if (rle->res != NULL)
1714			return (EBUSY);
1715	}
1716
1717	/* Update control register to disable MSI-X. */
1718	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1719	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1720	    msix->msix_ctrl, 2);
1721
1722	/* Free the resource list entries. */
1723	for (i = 0; i < msix->msix_table_len; i++) {
1724		if (msix->msix_table[i].mte_vector == 0)
1725			continue;
1726		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1727	}
1728	free(msix->msix_table, M_DEVBUF);
1729	msix->msix_table_len = 0;
1730
1731	/* Release the IRQs. */
1732	for (i = 0; i < msix->msix_alloc; i++)
1733		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1734		    msix->msix_vectors[i].mv_irq);
1735	free(msix->msix_vectors, M_DEVBUF);
1736	msix->msix_alloc = 0;
1737	return (0);
1738}
1739
1740/*
1741 * Return the max supported MSI-X messages this device supports.
1742 * Basically, assuming the MD code can alloc messages, this function
1743 * should return the maximum value that pci_alloc_msix() can return.
1744 * Thus, it is subject to the tunables, etc.
1745 */
1746int
1747pci_msix_count_method(device_t dev, device_t child)
1748{
1749	struct pci_devinfo *dinfo = device_get_ivars(child);
1750	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1751
1752	if (pci_do_msix && msix->msix_location != 0)
1753		return (msix->msix_msgnum);
1754	return (0);
1755}
1756
1757/*
1758 * HyperTransport MSI mapping control
1759 */
1760void
1761pci_ht_map_msi(device_t dev, uint64_t addr)
1762{
1763	struct pci_devinfo *dinfo = device_get_ivars(dev);
1764	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1765
1766	if (!ht->ht_msimap)
1767		return;
1768
1769	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1770	    ht->ht_msiaddr >> 20 == addr >> 20) {
1771		/* Enable MSI -> HT mapping. */
1772		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1773		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1774		    ht->ht_msictrl, 2);
1775	}
1776
1777	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1778		/* Disable MSI -> HT mapping. */
1779		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1780		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1781		    ht->ht_msictrl, 2);
1782	}
1783}
1784
1785int
1786pci_get_max_read_req(device_t dev)
1787{
1788	struct pci_devinfo *dinfo = device_get_ivars(dev);
1789	int cap;
1790	uint16_t val;
1791
1792	cap = dinfo->cfg.pcie.pcie_location;
1793	if (cap == 0)
1794		return (0);
1795	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1796	val &= PCIEM_CTL_MAX_READ_REQUEST;
1797	val >>= 12;
1798	return (1 << (val + 7));
1799}
1800
1801int
1802pci_set_max_read_req(device_t dev, int size)
1803{
1804	struct pci_devinfo *dinfo = device_get_ivars(dev);
1805	int cap;
1806	uint16_t val;
1807
1808	cap = dinfo->cfg.pcie.pcie_location;
1809	if (cap == 0)
1810		return (0);
1811	if (size < 128)
1812		size = 128;
1813	if (size > 4096)
1814		size = 4096;
1815	size = (1 << (fls(size) - 1));
1816	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1817	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1818	val |= (fls(size) - 8) << 12;
1819	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1820	return (size);
1821}
1822
1823/*
1824 * Support for MSI message signalled interrupts.
1825 */
1826void
1827pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1828{
1829	struct pci_devinfo *dinfo = device_get_ivars(dev);
1830	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1831
1832	/* Write data and address values. */
1833	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1834	    address & 0xffffffff, 4);
1835	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1836		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1837		    address >> 32, 4);
1838		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1839		    data, 2);
1840	} else
1841		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1842		    2);
1843
1844	/* Enable MSI in the control register. */
1845	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1846	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1847	    2);
1848
1849	/* Enable MSI -> HT mapping. */
1850	pci_ht_map_msi(dev, address);
1851}
1852
1853void
1854pci_disable_msi(device_t dev)
1855{
1856	struct pci_devinfo *dinfo = device_get_ivars(dev);
1857	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1858
1859	/* Disable MSI -> HT mapping. */
1860	pci_ht_map_msi(dev, 0);
1861
1862	/* Disable MSI in the control register. */
1863	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1864	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1865	    2);
1866}
1867
1868/*
1869 * Restore MSI registers during resume.  If MSI is enabled then
1870 * restore the data and address registers in addition to the control
1871 * register.
1872 */
1873static void
1874pci_resume_msi(device_t dev)
1875{
1876	struct pci_devinfo *dinfo = device_get_ivars(dev);
1877	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1878	uint64_t address;
1879	uint16_t data;
1880
1881	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1882		address = msi->msi_addr;
1883		data = msi->msi_data;
1884		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1885		    address & 0xffffffff, 4);
1886		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1887			pci_write_config(dev, msi->msi_location +
1888			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1889			pci_write_config(dev, msi->msi_location +
1890			    PCIR_MSI_DATA_64BIT, data, 2);
1891		} else
1892			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1893			    data, 2);
1894	}
1895	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1896	    2);
1897}
1898
1899static int
1900pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1901{
1902	struct pci_devinfo *dinfo = device_get_ivars(dev);
1903	pcicfgregs *cfg = &dinfo->cfg;
1904	struct resource_list_entry *rle;
1905	struct msix_table_entry *mte;
1906	struct msix_vector *mv;
1907	uint64_t addr;
1908	uint32_t data;
1909	int error, i, j;
1910
1911	/*
1912	 * Handle MSI first.  We try to find this IRQ among our list
1913	 * of MSI IRQs.  If we find it, we request updated address and
1914	 * data registers and apply the results.
1915	 */
1916	if (cfg->msi.msi_alloc > 0) {
1917
1918		/* If we don't have any active handlers, nothing to do. */
1919		if (cfg->msi.msi_handlers == 0)
1920			return (0);
1921		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1922			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1923			    i + 1);
1924			if (rle->start == irq) {
1925				error = PCIB_MAP_MSI(device_get_parent(bus),
1926				    dev, irq, &addr, &data);
1927				if (error)
1928					return (error);
1929				pci_disable_msi(dev);
1930				dinfo->cfg.msi.msi_addr = addr;
1931				dinfo->cfg.msi.msi_data = data;
1932				pci_enable_msi(dev, addr, data);
1933				return (0);
1934			}
1935		}
1936		return (ENOENT);
1937	}
1938
1939	/*
1940	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1941	 * we request the updated mapping info.  If that works, we go
1942	 * through all the slots that use this IRQ and update them.
1943	 */
1944	if (cfg->msix.msix_alloc > 0) {
1945		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1946			mv = &cfg->msix.msix_vectors[i];
1947			if (mv->mv_irq == irq) {
1948				error = PCIB_MAP_MSI(device_get_parent(bus),
1949				    dev, irq, &addr, &data);
1950				if (error)
1951					return (error);
1952				mv->mv_address = addr;
1953				mv->mv_data = data;
1954				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1955					mte = &cfg->msix.msix_table[j];
1956					if (mte->mte_vector != i + 1)
1957						continue;
1958					if (mte->mte_handlers == 0)
1959						continue;
1960					pci_mask_msix(dev, j);
1961					pci_enable_msix(dev, j, addr, data);
1962					pci_unmask_msix(dev, j);
1963				}
1964			}
1965		}
1966		return (ENOENT);
1967	}
1968
1969	return (ENOENT);
1970}
1971
1972/*
1973 * Returns true if the specified device is blacklisted because MSI
1974 * doesn't work.
1975 */
1976int
1977pci_msi_device_blacklisted(device_t dev)
1978{
1979	const struct pci_quirk *q;
1980
1981	if (!pci_honor_msi_blacklist)
1982		return (0);
1983
1984	for (q = &pci_quirks[0]; q->devid; q++) {
1985		if (q->devid == pci_get_devid(dev) &&
1986		    q->type == PCI_QUIRK_DISABLE_MSI)
1987			return (1);
1988	}
1989	return (0);
1990}
1991
1992/*
1993 * Returns true if a specified chipset supports MSI when it is
1994 * emulated hardware in a virtual machine.
1995 */
1996static int
1997pci_msi_vm_chipset(device_t dev)
1998{
1999	const struct pci_quirk *q;
2000
2001	for (q = &pci_quirks[0]; q->devid; q++) {
2002		if (q->devid == pci_get_devid(dev) &&
2003		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2004			return (1);
2005	}
2006	return (0);
2007}
2008
2009/*
2010 * Determine if MSI is blacklisted globally on this sytem.  Currently,
2011 * we just check for blacklisted chipsets as represented by the
2012 * host-PCI bridge at device 0:0:0.  In the future, it may become
2013 * necessary to check other system attributes, such as the kenv values
2014 * that give the motherboard manufacturer and model number.
2015 */
2016static int
2017pci_msi_blacklisted(void)
2018{
2019	device_t dev;
2020
2021	if (!pci_honor_msi_blacklist)
2022		return (0);
2023
2024	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2025	if (!(pcie_chipset || pcix_chipset)) {
2026		if (vm_guest != VM_GUEST_NO) {
2027			dev = pci_find_bsf(0, 0, 0);
2028			if (dev != NULL)
2029				return (pci_msi_vm_chipset(dev) == 0);
2030		}
2031		return (1);
2032	}
2033
2034	dev = pci_find_bsf(0, 0, 0);
2035	if (dev != NULL)
2036		return (pci_msi_device_blacklisted(dev));
2037	return (0);
2038}
2039
2040/*
2041 * Attempt to allocate *count MSI messages.  The actual number allocated is
2042 * returned in *count.  After this function returns, each message will be
2043 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2044 */
2045int
2046pci_alloc_msi_method(device_t dev, device_t child, int *count)
2047{
2048	struct pci_devinfo *dinfo = device_get_ivars(child);
2049	pcicfgregs *cfg = &dinfo->cfg;
2050	struct resource_list_entry *rle;
2051	int actual, error, i, irqs[32];
2052	uint16_t ctrl;
2053
2054	/* Don't let count == 0 get us into trouble. */
2055	if (*count == 0)
2056		return (EINVAL);
2057
2058	/* If rid 0 is allocated, then fail. */
2059	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2060	if (rle != NULL && rle->res != NULL)
2061		return (ENXIO);
2062
2063	/* Already have allocated messages? */
2064	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2065		return (ENXIO);
2066
2067	/* If MSI is blacklisted for this system, fail. */
2068	if (pci_msi_blacklisted())
2069		return (ENXIO);
2070
2071	/* MSI capability present? */
2072	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2073		return (ENODEV);
2074
2075	if (bootverbose)
2076		device_printf(child,
2077		    "attempting to allocate %d MSI vectors (%d supported)\n",
2078		    *count, cfg->msi.msi_msgnum);
2079
2080	/* Don't ask for more than the device supports. */
2081	actual = min(*count, cfg->msi.msi_msgnum);
2082
2083	/* Don't ask for more than 32 messages. */
2084	actual = min(actual, 32);
2085
2086	/* MSI requires power of 2 number of messages. */
2087	if (!powerof2(actual))
2088		return (EINVAL);
2089
2090	for (;;) {
2091		/* Try to allocate N messages. */
2092		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2093		    actual, irqs);
2094		if (error == 0)
2095			break;
2096		if (actual == 1)
2097			return (error);
2098
2099		/* Try N / 2. */
2100		actual >>= 1;
2101	}
2102
2103	/*
2104	 * We now have N actual messages mapped onto SYS_RES_IRQ
2105	 * resources in the irqs[] array, so add new resources
2106	 * starting at rid 1.
2107	 */
2108	for (i = 0; i < actual; i++)
2109		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2110		    irqs[i], irqs[i], 1);
2111
2112	if (bootverbose) {
2113		if (actual == 1)
2114			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2115		else {
2116			int run;
2117
2118			/*
2119			 * Be fancy and try to print contiguous runs
2120			 * of IRQ values as ranges.  'run' is true if
2121			 * we are in a range.
2122			 */
2123			device_printf(child, "using IRQs %d", irqs[0]);
2124			run = 0;
2125			for (i = 1; i < actual; i++) {
2126
2127				/* Still in a run? */
2128				if (irqs[i] == irqs[i - 1] + 1) {
2129					run = 1;
2130					continue;
2131				}
2132
2133				/* Finish previous range. */
2134				if (run) {
2135					printf("-%d", irqs[i - 1]);
2136					run = 0;
2137				}
2138
2139				/* Start new range. */
2140				printf(",%d", irqs[i]);
2141			}
2142
2143			/* Unfinished range? */
2144			if (run)
2145				printf("-%d", irqs[actual - 1]);
2146			printf(" for MSI\n");
2147		}
2148	}
2149
2150	/* Update control register with actual count. */
2151	ctrl = cfg->msi.msi_ctrl;
2152	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2153	ctrl |= (ffs(actual) - 1) << 4;
2154	cfg->msi.msi_ctrl = ctrl;
2155	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2156
2157	/* Update counts of alloc'd messages. */
2158	cfg->msi.msi_alloc = actual;
2159	cfg->msi.msi_handlers = 0;
2160	*count = actual;
2161	return (0);
2162}
2163
2164/* Release the MSI messages associated with this device. */
2165int
2166pci_release_msi_method(device_t dev, device_t child)
2167{
2168	struct pci_devinfo *dinfo = device_get_ivars(child);
2169	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2170	struct resource_list_entry *rle;
2171	int error, i, irqs[32];
2172
2173	/* Try MSI-X first. */
2174	error = pci_release_msix(dev, child);
2175	if (error != ENODEV)
2176		return (error);
2177
2178	/* Do we have any messages to release? */
2179	if (msi->msi_alloc == 0)
2180		return (ENODEV);
2181	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2182
2183	/* Make sure none of the resources are allocated. */
2184	if (msi->msi_handlers > 0)
2185		return (EBUSY);
2186	for (i = 0; i < msi->msi_alloc; i++) {
2187		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2188		KASSERT(rle != NULL, ("missing MSI resource"));
2189		if (rle->res != NULL)
2190			return (EBUSY);
2191		irqs[i] = rle->start;
2192	}
2193
2194	/* Update control register with 0 count. */
2195	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2196	    ("%s: MSI still enabled", __func__));
2197	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2198	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2199	    msi->msi_ctrl, 2);
2200
2201	/* Release the messages. */
2202	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2203	for (i = 0; i < msi->msi_alloc; i++)
2204		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2205
2206	/* Update alloc count. */
2207	msi->msi_alloc = 0;
2208	msi->msi_addr = 0;
2209	msi->msi_data = 0;
2210	return (0);
2211}
2212
2213/*
2214 * Return the max supported MSI messages this device supports.
2215 * Basically, assuming the MD code can alloc messages, this function
2216 * should return the maximum value that pci_alloc_msi() can return.
2217 * Thus, it is subject to the tunables, etc.
2218 */
2219int
2220pci_msi_count_method(device_t dev, device_t child)
2221{
2222	struct pci_devinfo *dinfo = device_get_ivars(child);
2223	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2224
2225	if (pci_do_msi && msi->msi_location != 0)
2226		return (msi->msi_msgnum);
2227	return (0);
2228}
2229
2230/* free pcicfgregs structure and all depending data structures */
2231
2232int
2233pci_freecfg(struct pci_devinfo *dinfo)
2234{
2235	struct devlist *devlist_head;
2236	struct pci_map *pm, *next;
2237	int i;
2238
2239	devlist_head = &pci_devq;
2240
2241	if (dinfo->cfg.vpd.vpd_reg) {
2242		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2243		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2244			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2245		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2246		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2247			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2248		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2249	}
2250	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2251		free(pm, M_DEVBUF);
2252	}
2253	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2254	free(dinfo, M_DEVBUF);
2255
2256	/* increment the generation count */
2257	pci_generation++;
2258
2259	/* we're losing one device */
2260	pci_numdevs--;
2261	return (0);
2262}
2263
2264/*
2265 * PCI power manangement
2266 */
2267int
2268pci_set_powerstate_method(device_t dev, device_t child, int state)
2269{
2270	struct pci_devinfo *dinfo = device_get_ivars(child);
2271	pcicfgregs *cfg = &dinfo->cfg;
2272	uint16_t status;
2273	int result, oldstate, highest, delay;
2274
2275	if (cfg->pp.pp_cap == 0)
2276		return (EOPNOTSUPP);
2277
2278	/*
2279	 * Optimize a no state change request away.  While it would be OK to
2280	 * write to the hardware in theory, some devices have shown odd
2281	 * behavior when going from D3 -> D3.
2282	 */
2283	oldstate = pci_get_powerstate(child);
2284	if (oldstate == state)
2285		return (0);
2286
2287	/*
2288	 * The PCI power management specification states that after a state
2289	 * transition between PCI power states, system software must
2290	 * guarantee a minimal delay before the function accesses the device.
2291	 * Compute the worst case delay that we need to guarantee before we
2292	 * access the device.  Many devices will be responsive much more
2293	 * quickly than this delay, but there are some that don't respond
2294	 * instantly to state changes.  Transitions to/from D3 state require
2295	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2296	 * is done below with DELAY rather than a sleeper function because
2297	 * this function can be called from contexts where we cannot sleep.
2298	 */
2299	highest = (oldstate > state) ? oldstate : state;
2300	if (highest == PCI_POWERSTATE_D3)
2301	    delay = 10000;
2302	else if (highest == PCI_POWERSTATE_D2)
2303	    delay = 200;
2304	else
2305	    delay = 0;
2306	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2307	    & ~PCIM_PSTAT_DMASK;
2308	result = 0;
2309	switch (state) {
2310	case PCI_POWERSTATE_D0:
2311		status |= PCIM_PSTAT_D0;
2312		break;
2313	case PCI_POWERSTATE_D1:
2314		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2315			return (EOPNOTSUPP);
2316		status |= PCIM_PSTAT_D1;
2317		break;
2318	case PCI_POWERSTATE_D2:
2319		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2320			return (EOPNOTSUPP);
2321		status |= PCIM_PSTAT_D2;
2322		break;
2323	case PCI_POWERSTATE_D3:
2324		status |= PCIM_PSTAT_D3;
2325		break;
2326	default:
2327		return (EINVAL);
2328	}
2329
2330	if (bootverbose)
2331		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2332		    state);
2333
2334	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2335	if (delay)
2336		DELAY(delay);
2337	return (0);
2338}
2339
2340int
2341pci_get_powerstate_method(device_t dev, device_t child)
2342{
2343	struct pci_devinfo *dinfo = device_get_ivars(child);
2344	pcicfgregs *cfg = &dinfo->cfg;
2345	uint16_t status;
2346	int result;
2347
2348	if (cfg->pp.pp_cap != 0) {
2349		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2350		switch (status & PCIM_PSTAT_DMASK) {
2351		case PCIM_PSTAT_D0:
2352			result = PCI_POWERSTATE_D0;
2353			break;
2354		case PCIM_PSTAT_D1:
2355			result = PCI_POWERSTATE_D1;
2356			break;
2357		case PCIM_PSTAT_D2:
2358			result = PCI_POWERSTATE_D2;
2359			break;
2360		case PCIM_PSTAT_D3:
2361			result = PCI_POWERSTATE_D3;
2362			break;
2363		default:
2364			result = PCI_POWERSTATE_UNKNOWN;
2365			break;
2366		}
2367	} else {
2368		/* No support, device is always at D0 */
2369		result = PCI_POWERSTATE_D0;
2370	}
2371	return (result);
2372}
2373
2374/*
2375 * Some convenience functions for PCI device drivers.
2376 */
2377
2378static __inline void
2379pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2380{
2381	uint16_t	command;
2382
2383	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2384	command |= bit;
2385	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2386}
2387
2388static __inline void
2389pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2390{
2391	uint16_t	command;
2392
2393	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2394	command &= ~bit;
2395	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2396}
2397
2398int
2399pci_enable_busmaster_method(device_t dev, device_t child)
2400{
2401	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2402	return (0);
2403}
2404
2405int
2406pci_disable_busmaster_method(device_t dev, device_t child)
2407{
2408	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2409	return (0);
2410}
2411
2412int
2413pci_enable_io_method(device_t dev, device_t child, int space)
2414{
2415	uint16_t bit;
2416
2417	switch(space) {
2418	case SYS_RES_IOPORT:
2419		bit = PCIM_CMD_PORTEN;
2420		break;
2421	case SYS_RES_MEMORY:
2422		bit = PCIM_CMD_MEMEN;
2423		break;
2424	default:
2425		return (EINVAL);
2426	}
2427	pci_set_command_bit(dev, child, bit);
2428	return (0);
2429}
2430
2431int
2432pci_disable_io_method(device_t dev, device_t child, int space)
2433{
2434	uint16_t bit;
2435
2436	switch(space) {
2437	case SYS_RES_IOPORT:
2438		bit = PCIM_CMD_PORTEN;
2439		break;
2440	case SYS_RES_MEMORY:
2441		bit = PCIM_CMD_MEMEN;
2442		break;
2443	default:
2444		return (EINVAL);
2445	}
2446	pci_clear_command_bit(dev, child, bit);
2447	return (0);
2448}
2449
2450/*
2451 * New style pci driver.  Parent device is either a pci-host-bridge or a
2452 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2453 */
2454
2455void
2456pci_print_verbose(struct pci_devinfo *dinfo)
2457{
2458
2459	if (bootverbose) {
2460		pcicfgregs *cfg = &dinfo->cfg;
2461
2462		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2463		    cfg->vendor, cfg->device, cfg->revid);
2464		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2465		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2466		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2467		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2468		    cfg->mfdev);
2469		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2470		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2471		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2472		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2473		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2474		if (cfg->intpin > 0)
2475			printf("\tintpin=%c, irq=%d\n",
2476			    cfg->intpin +'a' -1, cfg->intline);
2477		if (cfg->pp.pp_cap) {
2478			uint16_t status;
2479
2480			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2481			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2482			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2483			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2484			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2485			    status & PCIM_PSTAT_DMASK);
2486		}
2487		if (cfg->msi.msi_location) {
2488			int ctrl;
2489
2490			ctrl = cfg->msi.msi_ctrl;
2491			printf("\tMSI supports %d message%s%s%s\n",
2492			    cfg->msi.msi_msgnum,
2493			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2494			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2495			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2496		}
2497		if (cfg->msix.msix_location) {
2498			printf("\tMSI-X supports %d message%s ",
2499			    cfg->msix.msix_msgnum,
2500			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2501			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2502				printf("in map 0x%x\n",
2503				    cfg->msix.msix_table_bar);
2504			else
2505				printf("in maps 0x%x and 0x%x\n",
2506				    cfg->msix.msix_table_bar,
2507				    cfg->msix.msix_pba_bar);
2508		}
2509	}
2510}
2511
2512static int
2513pci_porten(device_t dev)
2514{
2515	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2516}
2517
2518static int
2519pci_memen(device_t dev)
2520{
2521	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2522}
2523
2524static void
2525pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2526{
2527	struct pci_devinfo *dinfo;
2528	pci_addr_t map, testval;
2529	int ln2range;
2530	uint16_t cmd;
2531
2532	/*
2533	 * The device ROM BAR is special.  It is always a 32-bit
2534	 * memory BAR.  Bit 0 is special and should not be set when
2535	 * sizing the BAR.
2536	 */
2537	dinfo = device_get_ivars(dev);
2538	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2539		map = pci_read_config(dev, reg, 4);
2540		pci_write_config(dev, reg, 0xfffffffe, 4);
2541		testval = pci_read_config(dev, reg, 4);
2542		pci_write_config(dev, reg, map, 4);
2543		*mapp = map;
2544		*testvalp = testval;
2545		return;
2546	}
2547
2548	map = pci_read_config(dev, reg, 4);
2549	ln2range = pci_maprange(map);
2550	if (ln2range == 64)
2551		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2552
2553	/*
2554	 * Disable decoding via the command register before
2555	 * determining the BAR's length since we will be placing it in
2556	 * a weird state.
2557	 */
2558	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2559	pci_write_config(dev, PCIR_COMMAND,
2560	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2561
2562	/*
2563	 * Determine the BAR's length by writing all 1's.  The bottom
2564	 * log_2(size) bits of the BAR will stick as 0 when we read
2565	 * the value back.
2566	 */
2567	pci_write_config(dev, reg, 0xffffffff, 4);
2568	testval = pci_read_config(dev, reg, 4);
2569	if (ln2range == 64) {
2570		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2571		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2572	}
2573
2574	/*
2575	 * Restore the original value of the BAR.  We may have reprogrammed
2576	 * the BAR of the low-level console device and when booting verbose,
2577	 * we need the console device addressable.
2578	 */
2579	pci_write_config(dev, reg, map, 4);
2580	if (ln2range == 64)
2581		pci_write_config(dev, reg + 4, map >> 32, 4);
2582	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2583
2584	*mapp = map;
2585	*testvalp = testval;
2586}
2587
2588static void
2589pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2590{
2591	struct pci_devinfo *dinfo;
2592	int ln2range;
2593
2594	/* The device ROM BAR is always a 32-bit memory BAR. */
2595	dinfo = device_get_ivars(dev);
2596	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2597		ln2range = 32;
2598	else
2599		ln2range = pci_maprange(pm->pm_value);
2600	pci_write_config(dev, pm->pm_reg, base, 4);
2601	if (ln2range == 64)
2602		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2603	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2604	if (ln2range == 64)
2605		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2606		    pm->pm_reg + 4, 4) << 32;
2607}
2608
2609struct pci_map *
2610pci_find_bar(device_t dev, int reg)
2611{
2612	struct pci_devinfo *dinfo;
2613	struct pci_map *pm;
2614
2615	dinfo = device_get_ivars(dev);
2616	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2617		if (pm->pm_reg == reg)
2618			return (pm);
2619	}
2620	return (NULL);
2621}
2622
2623int
2624pci_bar_enabled(device_t dev, struct pci_map *pm)
2625{
2626	struct pci_devinfo *dinfo;
2627	uint16_t cmd;
2628
2629	dinfo = device_get_ivars(dev);
2630	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2631	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2632		return (0);
2633	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2634	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2635		return ((cmd & PCIM_CMD_MEMEN) != 0);
2636	else
2637		return ((cmd & PCIM_CMD_PORTEN) != 0);
2638}
2639
2640static struct pci_map *
2641pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2642{
2643	struct pci_devinfo *dinfo;
2644	struct pci_map *pm, *prev;
2645
2646	dinfo = device_get_ivars(dev);
2647	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2648	pm->pm_reg = reg;
2649	pm->pm_value = value;
2650	pm->pm_size = size;
2651	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2652		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2653		    reg));
2654		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2655		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2656			break;
2657	}
2658	if (prev != NULL)
2659		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2660	else
2661		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2662	return (pm);
2663}
2664
2665static void
2666pci_restore_bars(device_t dev)
2667{
2668	struct pci_devinfo *dinfo;
2669	struct pci_map *pm;
2670	int ln2range;
2671
2672	dinfo = device_get_ivars(dev);
2673	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2674		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2675			ln2range = 32;
2676		else
2677			ln2range = pci_maprange(pm->pm_value);
2678		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2679		if (ln2range == 64)
2680			pci_write_config(dev, pm->pm_reg + 4,
2681			    pm->pm_value >> 32, 4);
2682	}
2683}
2684
2685/*
2686 * Add a resource based on a pci map register. Return 1 if the map
2687 * register is a 32bit map register or 2 if it is a 64bit register.
2688 */
2689static int
2690pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2691    int force, int prefetch)
2692{
2693	struct pci_map *pm;
2694	pci_addr_t base, map, testval;
2695	pci_addr_t start, end, count;
2696	int barlen, basezero, maprange, mapsize, type;
2697	uint16_t cmd;
2698	struct resource *res;
2699
2700	/*
2701	 * The BAR may already exist if the device is a CardBus card
2702	 * whose CIS is stored in this BAR.
2703	 */
2704	pm = pci_find_bar(dev, reg);
2705	if (pm != NULL) {
2706		maprange = pci_maprange(pm->pm_value);
2707		barlen = maprange == 64 ? 2 : 1;
2708		return (barlen);
2709	}
2710
2711	pci_read_bar(dev, reg, &map, &testval);
2712	if (PCI_BAR_MEM(map)) {
2713		type = SYS_RES_MEMORY;
2714		if (map & PCIM_BAR_MEM_PREFETCH)
2715			prefetch = 1;
2716	} else
2717		type = SYS_RES_IOPORT;
2718	mapsize = pci_mapsize(testval);
2719	base = pci_mapbase(map);
2720#ifdef __PCI_BAR_ZERO_VALID
2721	basezero = 0;
2722#else
2723	basezero = base == 0;
2724#endif
2725	maprange = pci_maprange(map);
2726	barlen = maprange == 64 ? 2 : 1;
2727
2728	/*
2729	 * For I/O registers, if bottom bit is set, and the next bit up
2730	 * isn't clear, we know we have a BAR that doesn't conform to the
2731	 * spec, so ignore it.  Also, sanity check the size of the data
2732	 * areas to the type of memory involved.  Memory must be at least
2733	 * 16 bytes in size, while I/O ranges must be at least 4.
2734	 */
2735	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2736		return (barlen);
2737	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2738	    (type == SYS_RES_IOPORT && mapsize < 2))
2739		return (barlen);
2740
2741	/* Save a record of this BAR. */
2742	pm = pci_add_bar(dev, reg, map, mapsize);
2743	if (bootverbose) {
2744		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2745		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2746		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2747			printf(", port disabled\n");
2748		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2749			printf(", memory disabled\n");
2750		else
2751			printf(", enabled\n");
2752	}
2753
2754	/*
2755	 * If base is 0, then we have problems if this architecture does
2756	 * not allow that.  It is best to ignore such entries for the
2757	 * moment.  These will be allocated later if the driver specifically
2758	 * requests them.  However, some removable busses look better when
2759	 * all resources are allocated, so allow '0' to be overriden.
2760	 *
2761	 * Similarly treat maps whose values is the same as the test value
2762	 * read back.  These maps have had all f's written to them by the
2763	 * BIOS in an attempt to disable the resources.
2764	 */
2765	if (!force && (basezero || map == testval))
2766		return (barlen);
2767	if ((u_long)base != base) {
2768		device_printf(bus,
2769		    "pci%d:%d:%d:%d bar %#x too many address bits",
2770		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2771		    pci_get_function(dev), reg);
2772		return (barlen);
2773	}
2774
2775	/*
2776	 * This code theoretically does the right thing, but has
2777	 * undesirable side effects in some cases where peripherals
2778	 * respond oddly to having these bits enabled.  Let the user
2779	 * be able to turn them off (since pci_enable_io_modes is 1 by
2780	 * default).
2781	 */
2782	if (pci_enable_io_modes) {
2783		/* Turn on resources that have been left off by a lazy BIOS */
2784		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2785			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2786			cmd |= PCIM_CMD_PORTEN;
2787			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2788		}
2789		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2790			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2791			cmd |= PCIM_CMD_MEMEN;
2792			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2793		}
2794	} else {
2795		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2796			return (barlen);
2797		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2798			return (barlen);
2799	}
2800
2801	count = (pci_addr_t)1 << mapsize;
2802	if (basezero || base == pci_mapbase(testval)) {
2803		start = 0;	/* Let the parent decide. */
2804		end = ~0ul;
2805	} else {
2806		start = base;
2807		end = base + count - 1;
2808	}
2809	resource_list_add(rl, type, reg, start, end, count);
2810
2811	/*
2812	 * Try to allocate the resource for this BAR from our parent
2813	 * so that this resource range is already reserved.  The
2814	 * driver for this device will later inherit this resource in
2815	 * pci_alloc_resource().
2816	 */
2817	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2818	    prefetch ? RF_PREFETCHABLE : 0);
2819	if (res == NULL) {
2820		/*
2821		 * If the allocation fails, delete the resource list entry
2822		 * to force pci_alloc_resource() to allocate resources
2823		 * from the parent.
2824		 */
2825		resource_list_delete(rl, type, reg);
2826	} else {
2827		start = rman_get_start(res);
2828		pci_write_bar(dev, pm, start);
2829	}
2830	return (barlen);
2831}
2832
2833/*
2834 * For ATA devices we need to decide early what addressing mode to use.
2835 * Legacy demands that the primary and secondary ATA ports sits on the
2836 * same addresses that old ISA hardware did. This dictates that we use
2837 * those addresses and ignore the BAR's if we cannot set PCI native
2838 * addressing mode.
2839 */
2840static void
2841pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2842    uint32_t prefetchmask)
2843{
2844	struct resource *r;
2845	int rid, type, progif;
2846#if 0
2847	/* if this device supports PCI native addressing use it */
2848	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2849	if ((progif & 0x8a) == 0x8a) {
2850		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2851		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2852			printf("Trying ATA native PCI addressing mode\n");
2853			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2854		}
2855	}
2856#endif
2857	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2858	type = SYS_RES_IOPORT;
2859	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2860		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2861		    prefetchmask & (1 << 0));
2862		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2863		    prefetchmask & (1 << 1));
2864	} else {
2865		rid = PCIR_BAR(0);
2866		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2867		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2868		    0x1f7, 8, 0);
2869		rid = PCIR_BAR(1);
2870		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2871		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2872		    0x3f6, 1, 0);
2873	}
2874	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2875		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2876		    prefetchmask & (1 << 2));
2877		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2878		    prefetchmask & (1 << 3));
2879	} else {
2880		rid = PCIR_BAR(2);
2881		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2882		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2883		    0x177, 8, 0);
2884		rid = PCIR_BAR(3);
2885		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2886		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2887		    0x376, 1, 0);
2888	}
2889	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2890	    prefetchmask & (1 << 4));
2891	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2892	    prefetchmask & (1 << 5));
2893}
2894
2895static void
2896pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2897{
2898	struct pci_devinfo *dinfo = device_get_ivars(dev);
2899	pcicfgregs *cfg = &dinfo->cfg;
2900	char tunable_name[64];
2901	int irq;
2902
2903	/* Has to have an intpin to have an interrupt. */
2904	if (cfg->intpin == 0)
2905		return;
2906
2907	/* Let the user override the IRQ with a tunable. */
2908	irq = PCI_INVALID_IRQ;
2909	snprintf(tunable_name, sizeof(tunable_name),
2910	    "hw.pci%d.%d.%d.INT%c.irq",
2911	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2912	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2913		irq = PCI_INVALID_IRQ;
2914
2915	/*
2916	 * If we didn't get an IRQ via the tunable, then we either use the
2917	 * IRQ value in the intline register or we ask the bus to route an
2918	 * interrupt for us.  If force_route is true, then we only use the
2919	 * value in the intline register if the bus was unable to assign an
2920	 * IRQ.
2921	 */
2922	if (!PCI_INTERRUPT_VALID(irq)) {
2923		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2924			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2925		if (!PCI_INTERRUPT_VALID(irq))
2926			irq = cfg->intline;
2927	}
2928
2929	/* If after all that we don't have an IRQ, just bail. */
2930	if (!PCI_INTERRUPT_VALID(irq))
2931		return;
2932
2933	/* Update the config register if it changed. */
2934	if (irq != cfg->intline) {
2935		cfg->intline = irq;
2936		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2937	}
2938
2939	/* Add this IRQ as rid 0 interrupt resource. */
2940	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2941}
2942
2943/* Perform early OHCI takeover from SMM. */
2944static void
2945ohci_early_takeover(device_t self)
2946{
2947	struct resource *res;
2948	uint32_t ctl;
2949	int rid;
2950	int i;
2951
2952	rid = PCIR_BAR(0);
2953	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2954	if (res == NULL)
2955		return;
2956
2957	ctl = bus_read_4(res, OHCI_CONTROL);
2958	if (ctl & OHCI_IR) {
2959		if (bootverbose)
2960			printf("ohci early: "
2961			    "SMM active, request owner change\n");
2962		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2963		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2964			DELAY(1000);
2965			ctl = bus_read_4(res, OHCI_CONTROL);
2966		}
2967		if (ctl & OHCI_IR) {
2968			if (bootverbose)
2969				printf("ohci early: "
2970				    "SMM does not respond, resetting\n");
2971			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2972		}
2973		/* Disable interrupts */
2974		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2975	}
2976
2977	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2978}
2979
2980/* Perform early UHCI takeover from SMM. */
2981static void
2982uhci_early_takeover(device_t self)
2983{
2984	struct resource *res;
2985	int rid;
2986
2987	/*
2988	 * Set the PIRQD enable bit and switch off all the others. We don't
2989	 * want legacy support to interfere with us XXX Does this also mean
2990	 * that the BIOS won't touch the keyboard anymore if it is connected
2991	 * to the ports of the root hub?
2992	 */
2993	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2994
2995	/* Disable interrupts */
2996	rid = PCI_UHCI_BASE_REG;
2997	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2998	if (res != NULL) {
2999		bus_write_2(res, UHCI_INTR, 0);
3000		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3001	}
3002}
3003
3004/* Perform early EHCI takeover from SMM. */
3005static void
3006ehci_early_takeover(device_t self)
3007{
3008	struct resource *res;
3009	uint32_t cparams;
3010	uint32_t eec;
3011	uint8_t eecp;
3012	uint8_t bios_sem;
3013	uint8_t offs;
3014	int rid;
3015	int i;
3016
3017	rid = PCIR_BAR(0);
3018	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3019	if (res == NULL)
3020		return;
3021
3022	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3023
3024	/* Synchronise with the BIOS if it owns the controller. */
3025	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3026	    eecp = EHCI_EECP_NEXT(eec)) {
3027		eec = pci_read_config(self, eecp, 4);
3028		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3029			continue;
3030		}
3031		bios_sem = pci_read_config(self, eecp +
3032		    EHCI_LEGSUP_BIOS_SEM, 1);
3033		if (bios_sem == 0) {
3034			continue;
3035		}
3036		if (bootverbose)
3037			printf("ehci early: "
3038			    "SMM active, request owner change\n");
3039
3040		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3041
3042		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3043			DELAY(1000);
3044			bios_sem = pci_read_config(self, eecp +
3045			    EHCI_LEGSUP_BIOS_SEM, 1);
3046		}
3047
3048		if (bios_sem != 0) {
3049			if (bootverbose)
3050				printf("ehci early: "
3051				    "SMM does not respond\n");
3052		}
3053		/* Disable interrupts */
3054		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3055		bus_write_4(res, offs + EHCI_USBINTR, 0);
3056	}
3057	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3058}
3059
3060/* Perform early XHCI takeover from SMM. */
3061static void
3062xhci_early_takeover(device_t self)
3063{
3064	struct resource *res;
3065	uint32_t cparams;
3066	uint32_t eec;
3067	uint8_t eecp;
3068	uint8_t bios_sem;
3069	uint8_t offs;
3070	int rid;
3071	int i;
3072
3073	rid = PCIR_BAR(0);
3074	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3075	if (res == NULL)
3076		return;
3077
3078	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3079
3080	eec = -1;
3081
3082	/* Synchronise with the BIOS if it owns the controller. */
3083	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3084	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3085		eec = bus_read_4(res, eecp);
3086
3087		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3088			continue;
3089
3090		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3091		if (bios_sem == 0)
3092			continue;
3093
3094		if (bootverbose)
3095			printf("xhci early: "
3096			    "SMM active, request owner change\n");
3097
3098		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3099
3100		/* wait a maximum of 5 second */
3101
3102		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3103			DELAY(1000);
3104			bios_sem = bus_read_1(res, eecp +
3105			    XHCI_XECP_BIOS_SEM);
3106		}
3107
3108		if (bios_sem != 0) {
3109			if (bootverbose)
3110				printf("xhci early: "
3111				    "SMM does not respond\n");
3112		}
3113
3114		/* Disable interrupts */
3115		offs = bus_read_1(res, XHCI_CAPLENGTH);
3116		bus_write_4(res, offs + XHCI_USBCMD, 0);
3117		bus_read_4(res, offs + XHCI_USBSTS);
3118	}
3119	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3120}
3121
3122void
3123pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3124{
3125	struct pci_devinfo *dinfo;
3126	pcicfgregs *cfg;
3127	struct resource_list *rl;
3128	const struct pci_quirk *q;
3129	uint32_t devid;
3130	int i;
3131
3132	dinfo = device_get_ivars(dev);
3133	cfg = &dinfo->cfg;
3134	rl = &dinfo->resources;
3135	devid = (cfg->device << 16) | cfg->vendor;
3136
3137	/* ATA devices needs special map treatment */
3138	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3139	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3140	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3141	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3142	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3143		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3144	else
3145		for (i = 0; i < cfg->nummaps;) {
3146			/*
3147			 * Skip quirked resources.
3148			 */
3149			for (q = &pci_quirks[0]; q->devid != 0; q++)
3150				if (q->devid == devid &&
3151				    q->type == PCI_QUIRK_UNMAP_REG &&
3152				    q->arg1 == PCIR_BAR(i))
3153					break;
3154			if (q->devid != 0) {
3155				i++;
3156				continue;
3157			}
3158			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3159			    prefetchmask & (1 << i));
3160		}
3161
3162	/*
3163	 * Add additional, quirked resources.
3164	 */
3165	for (q = &pci_quirks[0]; q->devid != 0; q++)
3166		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3167			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3168
3169	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3170#ifdef __PCI_REROUTE_INTERRUPT
3171		/*
3172		 * Try to re-route interrupts. Sometimes the BIOS or
3173		 * firmware may leave bogus values in these registers.
3174		 * If the re-route fails, then just stick with what we
3175		 * have.
3176		 */
3177		pci_assign_interrupt(bus, dev, 1);
3178#else
3179		pci_assign_interrupt(bus, dev, 0);
3180#endif
3181	}
3182
3183	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3184	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3185		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3186			xhci_early_takeover(dev);
3187		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3188			ehci_early_takeover(dev);
3189		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3190			ohci_early_takeover(dev);
3191		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3192			uhci_early_takeover(dev);
3193	}
3194}
3195
3196void
3197pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3198{
3199#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3200	device_t pcib = device_get_parent(dev);
3201	struct pci_devinfo *dinfo;
3202	int maxslots;
3203	int s, f, pcifunchigh;
3204	uint8_t hdrtype;
3205
3206	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3207	    ("dinfo_size too small"));
3208	maxslots = PCIB_MAXSLOTS(pcib);
3209	for (s = 0; s <= maxslots; s++) {
3210		pcifunchigh = 0;
3211		f = 0;
3212		DELAY(1);
3213		hdrtype = REG(PCIR_HDRTYPE, 1);
3214		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3215			continue;
3216		if (hdrtype & PCIM_MFDEV)
3217			pcifunchigh = PCI_FUNCMAX;
3218		for (f = 0; f <= pcifunchigh; f++) {
3219			dinfo = pci_read_device(pcib, domain, busno, s, f,
3220			    dinfo_size);
3221			if (dinfo != NULL) {
3222				pci_add_child(dev, dinfo);
3223			}
3224		}
3225	}
3226#undef REG
3227}
3228
3229void
3230pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3231{
3232	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3233	device_set_ivars(dinfo->cfg.dev, dinfo);
3234	resource_list_init(&dinfo->resources);
3235	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3236	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3237	pci_print_verbose(dinfo);
3238	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3239}
3240
3241static int
3242pci_probe(device_t dev)
3243{
3244
3245	device_set_desc(dev, "PCI bus");
3246
3247	/* Allow other subclasses to override this driver. */
3248	return (BUS_PROBE_GENERIC);
3249}
3250
3251int
3252pci_attach_common(device_t dev)
3253{
3254	struct pci_softc *sc;
3255	int busno, domain;
3256#ifdef PCI_DMA_BOUNDARY
3257	int error, tag_valid;
3258#endif
3259
3260	sc = device_get_softc(dev);
3261	domain = pcib_get_domain(dev);
3262	busno = pcib_get_bus(dev);
3263	if (bootverbose)
3264		device_printf(dev, "domain=%d, physical bus=%d\n",
3265		    domain, busno);
3266#ifdef PCI_DMA_BOUNDARY
3267	tag_valid = 0;
3268	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3269	    devclass_find("pci")) {
3270		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3271		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3272		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3273		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3274		if (error)
3275			device_printf(dev, "Failed to create DMA tag: %d\n",
3276			    error);
3277		else
3278			tag_valid = 1;
3279	}
3280	if (!tag_valid)
3281#endif
3282		sc->sc_dma_tag = bus_get_dma_tag(dev);
3283	return (0);
3284}
3285
3286static int
3287pci_attach(device_t dev)
3288{
3289	int busno, domain, error;
3290
3291	error = pci_attach_common(dev);
3292	if (error)
3293		return (error);
3294
3295	/*
3296	 * Since there can be multiple independantly numbered PCI
3297	 * busses on systems with multiple PCI domains, we can't use
3298	 * the unit number to decide which bus we are probing. We ask
3299	 * the parent pcib what our domain and bus numbers are.
3300	 */
3301	domain = pcib_get_domain(dev);
3302	busno = pcib_get_bus(dev);
3303	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3304	return (bus_generic_attach(dev));
3305}
3306
3307static void
3308pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3309    int state)
3310{
3311	device_t child, pcib;
3312	struct pci_devinfo *dinfo;
3313	int dstate, i;
3314
3315	/*
3316	 * Set the device to the given state.  If the firmware suggests
3317	 * a different power state, use it instead.  If power management
3318	 * is not present, the firmware is responsible for managing
3319	 * device power.  Skip children who aren't attached since they
3320	 * are handled separately.
3321	 */
3322	pcib = device_get_parent(dev);
3323	for (i = 0; i < numdevs; i++) {
3324		child = devlist[i];
3325		dinfo = device_get_ivars(child);
3326		dstate = state;
3327		if (device_is_attached(child) &&
3328		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3329			pci_set_powerstate(child, dstate);
3330	}
3331}
3332
3333int
3334pci_suspend(device_t dev)
3335{
3336	device_t child, *devlist;
3337	struct pci_devinfo *dinfo;
3338	int error, i, numdevs;
3339
3340	/*
3341	 * Save the PCI configuration space for each child and set the
3342	 * device in the appropriate power state for this sleep state.
3343	 */
3344	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3345		return (error);
3346	for (i = 0; i < numdevs; i++) {
3347		child = devlist[i];
3348		dinfo = device_get_ivars(child);
3349		pci_cfg_save(child, dinfo, 0);
3350	}
3351
3352	/* Suspend devices before potentially powering them down. */
3353	error = bus_generic_suspend(dev);
3354	if (error) {
3355		free(devlist, M_TEMP);
3356		return (error);
3357	}
3358	if (pci_do_power_suspend)
3359		pci_set_power_children(dev, devlist, numdevs,
3360		    PCI_POWERSTATE_D3);
3361	free(devlist, M_TEMP);
3362	return (0);
3363}
3364
3365int
3366pci_resume(device_t dev)
3367{
3368	device_t child, *devlist;
3369	struct pci_devinfo *dinfo;
3370	int error, i, numdevs;
3371
3372	/*
3373	 * Set each child to D0 and restore its PCI configuration space.
3374	 */
3375	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3376		return (error);
3377	if (pci_do_power_resume)
3378		pci_set_power_children(dev, devlist, numdevs,
3379		    PCI_POWERSTATE_D0);
3380
3381	/* Now the device is powered up, restore its config space. */
3382	for (i = 0; i < numdevs; i++) {
3383		child = devlist[i];
3384		dinfo = device_get_ivars(child);
3385
3386		pci_cfg_restore(child, dinfo);
3387		if (!device_is_attached(child))
3388			pci_cfg_save(child, dinfo, 1);
3389	}
3390
3391	/*
3392	 * Resume critical devices first, then everything else later.
3393	 */
3394	for (i = 0; i < numdevs; i++) {
3395		child = devlist[i];
3396		switch (pci_get_class(child)) {
3397		case PCIC_DISPLAY:
3398		case PCIC_MEMORY:
3399		case PCIC_BRIDGE:
3400		case PCIC_BASEPERIPH:
3401			DEVICE_RESUME(child);
3402			break;
3403		}
3404	}
3405	for (i = 0; i < numdevs; i++) {
3406		child = devlist[i];
3407		switch (pci_get_class(child)) {
3408		case PCIC_DISPLAY:
3409		case PCIC_MEMORY:
3410		case PCIC_BRIDGE:
3411		case PCIC_BASEPERIPH:
3412			break;
3413		default:
3414			DEVICE_RESUME(child);
3415		}
3416	}
3417	free(devlist, M_TEMP);
3418	return (0);
3419}
3420
3421static void
3422pci_load_vendor_data(void)
3423{
3424	caddr_t data;
3425	void *ptr;
3426	size_t sz;
3427
3428	data = preload_search_by_type("pci_vendor_data");
3429	if (data != NULL) {
3430		ptr = preload_fetch_addr(data);
3431		sz = preload_fetch_size(data);
3432		if (ptr != NULL && sz != 0) {
3433			pci_vendordata = ptr;
3434			pci_vendordata_size = sz;
3435			/* terminate the database */
3436			pci_vendordata[pci_vendordata_size] = '\n';
3437		}
3438	}
3439}
3440
3441void
3442pci_driver_added(device_t dev, driver_t *driver)
3443{
3444	int numdevs;
3445	device_t *devlist;
3446	device_t child;
3447	struct pci_devinfo *dinfo;
3448	int i;
3449
3450	if (bootverbose)
3451		device_printf(dev, "driver added\n");
3452	DEVICE_IDENTIFY(driver, dev);
3453	if (device_get_children(dev, &devlist, &numdevs) != 0)
3454		return;
3455	for (i = 0; i < numdevs; i++) {
3456		child = devlist[i];
3457		if (device_get_state(child) != DS_NOTPRESENT)
3458			continue;
3459		dinfo = device_get_ivars(child);
3460		pci_print_verbose(dinfo);
3461		if (bootverbose)
3462			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3463		pci_cfg_restore(child, dinfo);
3464		if (device_probe_and_attach(child) != 0)
3465			pci_cfg_save(child, dinfo, 1);
3466	}
3467	free(devlist, M_TEMP);
3468}
3469
3470int
3471pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3472    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3473{
3474	struct pci_devinfo *dinfo;
3475	struct msix_table_entry *mte;
3476	struct msix_vector *mv;
3477	uint64_t addr;
3478	uint32_t data;
3479	void *cookie;
3480	int error, rid;
3481
3482	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3483	    arg, &cookie);
3484	if (error)
3485		return (error);
3486
3487	/* If this is not a direct child, just bail out. */
3488	if (device_get_parent(child) != dev) {
3489		*cookiep = cookie;
3490		return(0);
3491	}
3492
3493	rid = rman_get_rid(irq);
3494	if (rid == 0) {
3495		/* Make sure that INTx is enabled */
3496		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3497	} else {
3498		/*
3499		 * Check to see if the interrupt is MSI or MSI-X.
3500		 * Ask our parent to map the MSI and give
3501		 * us the address and data register values.
3502		 * If we fail for some reason, teardown the
3503		 * interrupt handler.
3504		 */
3505		dinfo = device_get_ivars(child);
3506		if (dinfo->cfg.msi.msi_alloc > 0) {
3507			if (dinfo->cfg.msi.msi_addr == 0) {
3508				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3509			    ("MSI has handlers, but vectors not mapped"));
3510				error = PCIB_MAP_MSI(device_get_parent(dev),
3511				    child, rman_get_start(irq), &addr, &data);
3512				if (error)
3513					goto bad;
3514				dinfo->cfg.msi.msi_addr = addr;
3515				dinfo->cfg.msi.msi_data = data;
3516			}
3517			if (dinfo->cfg.msi.msi_handlers == 0)
3518				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3519				    dinfo->cfg.msi.msi_data);
3520			dinfo->cfg.msi.msi_handlers++;
3521		} else {
3522			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3523			    ("No MSI or MSI-X interrupts allocated"));
3524			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3525			    ("MSI-X index too high"));
3526			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3527			KASSERT(mte->mte_vector != 0, ("no message vector"));
3528			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3529			KASSERT(mv->mv_irq == rman_get_start(irq),
3530			    ("IRQ mismatch"));
3531			if (mv->mv_address == 0) {
3532				KASSERT(mte->mte_handlers == 0,
3533		    ("MSI-X table entry has handlers, but vector not mapped"));
3534				error = PCIB_MAP_MSI(device_get_parent(dev),
3535				    child, rman_get_start(irq), &addr, &data);
3536				if (error)
3537					goto bad;
3538				mv->mv_address = addr;
3539				mv->mv_data = data;
3540			}
3541			if (mte->mte_handlers == 0) {
3542				pci_enable_msix(child, rid - 1, mv->mv_address,
3543				    mv->mv_data);
3544				pci_unmask_msix(child, rid - 1);
3545			}
3546			mte->mte_handlers++;
3547		}
3548
3549		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3550		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3551	bad:
3552		if (error) {
3553			(void)bus_generic_teardown_intr(dev, child, irq,
3554			    cookie);
3555			return (error);
3556		}
3557	}
3558	*cookiep = cookie;
3559	return (0);
3560}
3561
3562int
3563pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3564    void *cookie)
3565{
3566	struct msix_table_entry *mte;
3567	struct resource_list_entry *rle;
3568	struct pci_devinfo *dinfo;
3569	int error, rid;
3570
3571	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3572		return (EINVAL);
3573
3574	/* If this isn't a direct child, just bail out */
3575	if (device_get_parent(child) != dev)
3576		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3577
3578	rid = rman_get_rid(irq);
3579	if (rid == 0) {
3580		/* Mask INTx */
3581		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3582	} else {
3583		/*
3584		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3585		 * decrement the appropriate handlers count and mask the
3586		 * MSI-X message, or disable MSI messages if the count
3587		 * drops to 0.
3588		 */
3589		dinfo = device_get_ivars(child);
3590		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3591		if (rle->res != irq)
3592			return (EINVAL);
3593		if (dinfo->cfg.msi.msi_alloc > 0) {
3594			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3595			    ("MSI-X index too high"));
3596			if (dinfo->cfg.msi.msi_handlers == 0)
3597				return (EINVAL);
3598			dinfo->cfg.msi.msi_handlers--;
3599			if (dinfo->cfg.msi.msi_handlers == 0)
3600				pci_disable_msi(child);
3601		} else {
3602			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3603			    ("No MSI or MSI-X interrupts allocated"));
3604			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3605			    ("MSI-X index too high"));
3606			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3607			if (mte->mte_handlers == 0)
3608				return (EINVAL);
3609			mte->mte_handlers--;
3610			if (mte->mte_handlers == 0)
3611				pci_mask_msix(child, rid - 1);
3612		}
3613	}
3614	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3615	if (rid > 0)
3616		KASSERT(error == 0,
3617		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3618	return (error);
3619}
3620
3621int
3622pci_print_child(device_t dev, device_t child)
3623{
3624	struct pci_devinfo *dinfo;
3625	struct resource_list *rl;
3626	int retval = 0;
3627
3628	dinfo = device_get_ivars(child);
3629	rl = &dinfo->resources;
3630
3631	retval += bus_print_child_header(dev, child);
3632
3633	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3634	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3635	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3636	if (device_get_flags(dev))
3637		retval += printf(" flags %#x", device_get_flags(dev));
3638
3639	retval += printf(" at device %d.%d", pci_get_slot(child),
3640	    pci_get_function(child));
3641
3642	retval += bus_print_child_footer(dev, child);
3643
3644	return (retval);
3645}
3646
3647static const struct
3648{
3649	int		class;
3650	int		subclass;
3651	const char	*desc;
3652} pci_nomatch_tab[] = {
3653	{PCIC_OLD,		-1,			"old"},
3654	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3655	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3656	{PCIC_STORAGE,		-1,			"mass storage"},
3657	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3658	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3659	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3660	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3661	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3662	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3663	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3664	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3665	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3666	{PCIC_NETWORK,		-1,			"network"},
3667	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3668	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3669	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3670	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3671	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3672	{PCIC_DISPLAY,		-1,			"display"},
3673	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3674	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3675	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3676	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3677	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3678	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3679	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3680	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3681	{PCIC_MEMORY,		-1,			"memory"},
3682	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3683	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3684	{PCIC_BRIDGE,		-1,			"bridge"},
3685	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3686	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3687	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3688	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3689	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3690	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3691	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3692	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3693	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3694	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3695	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3696	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3697	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3698	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3699	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3700	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3701	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3702	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3703	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3704	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3705	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3706	{PCIC_INPUTDEV,		-1,			"input device"},
3707	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3708	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3709	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3710	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3711	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3712	{PCIC_DOCKING,		-1,			"docking station"},
3713	{PCIC_PROCESSOR,	-1,			"processor"},
3714	{PCIC_SERIALBUS,	-1,			"serial bus"},
3715	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3716	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3717	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3718	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3719	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3720	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3721	{PCIC_WIRELESS,		-1,			"wireless controller"},
3722	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3723	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3724	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3725	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3726	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3727	{PCIC_SATCOM,		-1,			"satellite communication"},
3728	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3729	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3730	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3731	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3732	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3733	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3734	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3735	{PCIC_DASP,		-1,			"dasp"},
3736	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3737	{0, 0,		NULL}
3738};
3739
3740void
3741pci_probe_nomatch(device_t dev, device_t child)
3742{
3743	int i;
3744	const char *cp, *scp;
3745	char *device;
3746
3747	/*
3748	 * Look for a listing for this device in a loaded device database.
3749	 */
3750	if ((device = pci_describe_device(child)) != NULL) {
3751		device_printf(dev, "<%s>", device);
3752		free(device, M_DEVBUF);
3753	} else {
3754		/*
3755		 * Scan the class/subclass descriptions for a general
3756		 * description.
3757		 */
3758		cp = "unknown";
3759		scp = NULL;
3760		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3761			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3762				if (pci_nomatch_tab[i].subclass == -1) {
3763					cp = pci_nomatch_tab[i].desc;
3764				} else if (pci_nomatch_tab[i].subclass ==
3765				    pci_get_subclass(child)) {
3766					scp = pci_nomatch_tab[i].desc;
3767				}
3768			}
3769		}
3770		device_printf(dev, "<%s%s%s>",
3771		    cp ? cp : "",
3772		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3773		    scp ? scp : "");
3774	}
3775	printf(" at device %d.%d (no driver attached)\n",
3776	    pci_get_slot(child), pci_get_function(child));
3777	pci_cfg_save(child, device_get_ivars(child), 1);
3778}
3779
3780/*
3781 * Parse the PCI device database, if loaded, and return a pointer to a
3782 * description of the device.
3783 *
3784 * The database is flat text formatted as follows:
3785 *
3786 * Any line not in a valid format is ignored.
3787 * Lines are terminated with newline '\n' characters.
3788 *
3789 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3790 * the vendor name.
3791 *
3792 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3793 * - devices cannot be listed without a corresponding VENDOR line.
3794 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3795 * another TAB, then the device name.
3796 */
3797
3798/*
3799 * Assuming (ptr) points to the beginning of a line in the database,
3800 * return the vendor or device and description of the next entry.
3801 * The value of (vendor) or (device) inappropriate for the entry type
3802 * is set to -1.  Returns nonzero at the end of the database.
3803 *
3804 * Note that this is slightly unrobust in the face of corrupt data;
3805 * we attempt to safeguard against this by spamming the end of the
3806 * database with a newline when we initialise.
3807 */
3808static int
3809pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3810{
3811	char	*cp = *ptr;
3812	int	left;
3813
3814	*device = -1;
3815	*vendor = -1;
3816	**desc = '\0';
3817	for (;;) {
3818		left = pci_vendordata_size - (cp - pci_vendordata);
3819		if (left <= 0) {
3820			*ptr = cp;
3821			return(1);
3822		}
3823
3824		/* vendor entry? */
3825		if (*cp != '\t' &&
3826		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3827			break;
3828		/* device entry? */
3829		if (*cp == '\t' &&
3830		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3831			break;
3832
3833		/* skip to next line */
3834		while (*cp != '\n' && left > 0) {
3835			cp++;
3836			left--;
3837		}
3838		if (*cp == '\n') {
3839			cp++;
3840			left--;
3841		}
3842	}
3843	/* skip to next line */
3844	while (*cp != '\n' && left > 0) {
3845		cp++;
3846		left--;
3847	}
3848	if (*cp == '\n' && left > 0)
3849		cp++;
3850	*ptr = cp;
3851	return(0);
3852}
3853
3854static char *
3855pci_describe_device(device_t dev)
3856{
3857	int	vendor, device;
3858	char	*desc, *vp, *dp, *line;
3859
3860	desc = vp = dp = NULL;
3861
3862	/*
3863	 * If we have no vendor data, we can't do anything.
3864	 */
3865	if (pci_vendordata == NULL)
3866		goto out;
3867
3868	/*
3869	 * Scan the vendor data looking for this device
3870	 */
3871	line = pci_vendordata;
3872	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3873		goto out;
3874	for (;;) {
3875		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3876			goto out;
3877		if (vendor == pci_get_vendor(dev))
3878			break;
3879	}
3880	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3881		goto out;
3882	for (;;) {
3883		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3884			*dp = 0;
3885			break;
3886		}
3887		if (vendor != -1) {
3888			*dp = 0;
3889			break;
3890		}
3891		if (device == pci_get_device(dev))
3892			break;
3893	}
3894	if (dp[0] == '\0')
3895		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3896	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3897	    NULL)
3898		sprintf(desc, "%s, %s", vp, dp);
3899out:
3900	if (vp != NULL)
3901		free(vp, M_DEVBUF);
3902	if (dp != NULL)
3903		free(dp, M_DEVBUF);
3904	return(desc);
3905}
3906
3907int
3908pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3909{
3910	struct pci_devinfo *dinfo;
3911	pcicfgregs *cfg;
3912
3913	dinfo = device_get_ivars(child);
3914	cfg = &dinfo->cfg;
3915
3916	switch (which) {
3917	case PCI_IVAR_ETHADDR:
3918		/*
3919		 * The generic accessor doesn't deal with failure, so
3920		 * we set the return value, then return an error.
3921		 */
3922		*((uint8_t **) result) = NULL;
3923		return (EINVAL);
3924	case PCI_IVAR_SUBVENDOR:
3925		*result = cfg->subvendor;
3926		break;
3927	case PCI_IVAR_SUBDEVICE:
3928		*result = cfg->subdevice;
3929		break;
3930	case PCI_IVAR_VENDOR:
3931		*result = cfg->vendor;
3932		break;
3933	case PCI_IVAR_DEVICE:
3934		*result = cfg->device;
3935		break;
3936	case PCI_IVAR_DEVID:
3937		*result = (cfg->device << 16) | cfg->vendor;
3938		break;
3939	case PCI_IVAR_CLASS:
3940		*result = cfg->baseclass;
3941		break;
3942	case PCI_IVAR_SUBCLASS:
3943		*result = cfg->subclass;
3944		break;
3945	case PCI_IVAR_PROGIF:
3946		*result = cfg->progif;
3947		break;
3948	case PCI_IVAR_REVID:
3949		*result = cfg->revid;
3950		break;
3951	case PCI_IVAR_INTPIN:
3952		*result = cfg->intpin;
3953		break;
3954	case PCI_IVAR_IRQ:
3955		*result = cfg->intline;
3956		break;
3957	case PCI_IVAR_DOMAIN:
3958		*result = cfg->domain;
3959		break;
3960	case PCI_IVAR_BUS:
3961		*result = cfg->bus;
3962		break;
3963	case PCI_IVAR_SLOT:
3964		*result = cfg->slot;
3965		break;
3966	case PCI_IVAR_FUNCTION:
3967		*result = cfg->func;
3968		break;
3969	case PCI_IVAR_CMDREG:
3970		*result = cfg->cmdreg;
3971		break;
3972	case PCI_IVAR_CACHELNSZ:
3973		*result = cfg->cachelnsz;
3974		break;
3975	case PCI_IVAR_MINGNT:
3976		*result = cfg->mingnt;
3977		break;
3978	case PCI_IVAR_MAXLAT:
3979		*result = cfg->maxlat;
3980		break;
3981	case PCI_IVAR_LATTIMER:
3982		*result = cfg->lattimer;
3983		break;
3984	default:
3985		return (ENOENT);
3986	}
3987	return (0);
3988}
3989
3990int
3991pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3992{
3993	struct pci_devinfo *dinfo;
3994
3995	dinfo = device_get_ivars(child);
3996
3997	switch (which) {
3998	case PCI_IVAR_INTPIN:
3999		dinfo->cfg.intpin = value;
4000		return (0);
4001	case PCI_IVAR_ETHADDR:
4002	case PCI_IVAR_SUBVENDOR:
4003	case PCI_IVAR_SUBDEVICE:
4004	case PCI_IVAR_VENDOR:
4005	case PCI_IVAR_DEVICE:
4006	case PCI_IVAR_DEVID:
4007	case PCI_IVAR_CLASS:
4008	case PCI_IVAR_SUBCLASS:
4009	case PCI_IVAR_PROGIF:
4010	case PCI_IVAR_REVID:
4011	case PCI_IVAR_IRQ:
4012	case PCI_IVAR_DOMAIN:
4013	case PCI_IVAR_BUS:
4014	case PCI_IVAR_SLOT:
4015	case PCI_IVAR_FUNCTION:
4016		return (EINVAL);	/* disallow for now */
4017
4018	default:
4019		return (ENOENT);
4020	}
4021}
4022
4023#include "opt_ddb.h"
4024#ifdef DDB
4025#include <ddb/ddb.h>
4026#include <sys/cons.h>
4027
4028/*
4029 * List resources based on pci map registers, used for within ddb
4030 */
4031
4032DB_SHOW_COMMAND(pciregs, db_pci_dump)
4033{
4034	struct pci_devinfo *dinfo;
4035	struct devlist *devlist_head;
4036	struct pci_conf *p;
4037	const char *name;
4038	int i, error, none_count;
4039
4040	none_count = 0;
4041	/* get the head of the device queue */
4042	devlist_head = &pci_devq;
4043
4044	/*
4045	 * Go through the list of devices and print out devices
4046	 */
4047	for (error = 0, i = 0,
4048	     dinfo = STAILQ_FIRST(devlist_head);
4049	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4050	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4051
4052		/* Populate pd_name and pd_unit */
4053		name = NULL;
4054		if (dinfo->cfg.dev)
4055			name = device_get_name(dinfo->cfg.dev);
4056
4057		p = &dinfo->conf;
4058		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4059			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4060			(name && *name) ? name : "none",
4061			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4062			none_count++,
4063			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4064			p->pc_sel.pc_func, (p->pc_class << 16) |
4065			(p->pc_subclass << 8) | p->pc_progif,
4066			(p->pc_subdevice << 16) | p->pc_subvendor,
4067			(p->pc_device << 16) | p->pc_vendor,
4068			p->pc_revid, p->pc_hdr);
4069	}
4070}
4071#endif /* DDB */
4072
4073static struct resource *
4074pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4075    u_long start, u_long end, u_long count, u_int flags)
4076{
4077	struct pci_devinfo *dinfo = device_get_ivars(child);
4078	struct resource_list *rl = &dinfo->resources;
4079	struct resource_list_entry *rle;
4080	struct resource *res;
4081	struct pci_map *pm;
4082	pci_addr_t map, testval;
4083	int mapsize;
4084
4085	res = NULL;
4086	pm = pci_find_bar(child, *rid);
4087	if (pm != NULL) {
4088		/* This is a BAR that we failed to allocate earlier. */
4089		mapsize = pm->pm_size;
4090		map = pm->pm_value;
4091	} else {
4092		/*
4093		 * Weed out the bogons, and figure out how large the
4094		 * BAR/map is.  BARs that read back 0 here are bogus
4095		 * and unimplemented.  Note: atapci in legacy mode are
4096		 * special and handled elsewhere in the code.  If you
4097		 * have a atapci device in legacy mode and it fails
4098		 * here, that other code is broken.
4099		 */
4100		pci_read_bar(child, *rid, &map, &testval);
4101
4102		/*
4103		 * Determine the size of the BAR and ignore BARs with a size
4104		 * of 0.  Device ROM BARs use a different mask value.
4105		 */
4106		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4107			mapsize = pci_romsize(testval);
4108		else
4109			mapsize = pci_mapsize(testval);
4110		if (mapsize == 0)
4111			goto out;
4112		pm = pci_add_bar(child, *rid, map, mapsize);
4113	}
4114
4115	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4116		if (type != SYS_RES_MEMORY) {
4117			if (bootverbose)
4118				device_printf(dev,
4119				    "child %s requested type %d for rid %#x,"
4120				    " but the BAR says it is an memio\n",
4121				    device_get_nameunit(child), type, *rid);
4122			goto out;
4123		}
4124	} else {
4125		if (type != SYS_RES_IOPORT) {
4126			if (bootverbose)
4127				device_printf(dev,
4128				    "child %s requested type %d for rid %#x,"
4129				    " but the BAR says it is an ioport\n",
4130				    device_get_nameunit(child), type, *rid);
4131			goto out;
4132		}
4133	}
4134
4135	/*
4136	 * For real BARs, we need to override the size that
4137	 * the driver requests, because that's what the BAR
4138	 * actually uses and we would otherwise have a
4139	 * situation where we might allocate the excess to
4140	 * another driver, which won't work.
4141	 */
4142	count = (pci_addr_t)1 << mapsize;
4143	if (RF_ALIGNMENT(flags) < mapsize)
4144		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4145	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4146		flags |= RF_PREFETCHABLE;
4147
4148	/*
4149	 * Allocate enough resource, and then write back the
4150	 * appropriate BAR for that resource.
4151	 */
4152	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4153	    start, end, count, flags & ~RF_ACTIVE);
4154	if (res == NULL) {
4155		device_printf(child,
4156		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4157		    count, *rid, type, start, end);
4158		goto out;
4159	}
4160	resource_list_add(rl, type, *rid, start, end, count);
4161	rle = resource_list_find(rl, type, *rid);
4162	if (rle == NULL)
4163		panic("pci_reserve_map: unexpectedly can't find resource.");
4164	rle->res = res;
4165	rle->start = rman_get_start(res);
4166	rle->end = rman_get_end(res);
4167	rle->count = count;
4168	rle->flags = RLE_RESERVED;
4169	if (bootverbose)
4170		device_printf(child,
4171		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4172		    count, *rid, type, rman_get_start(res));
4173	map = rman_get_start(res);
4174	pci_write_bar(child, pm, map);
4175out:
4176	return (res);
4177}
4178
4179struct resource *
4180pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4181		   u_long start, u_long end, u_long count, u_int flags)
4182{
4183	struct pci_devinfo *dinfo = device_get_ivars(child);
4184	struct resource_list *rl = &dinfo->resources;
4185	struct resource_list_entry *rle;
4186	struct resource *res;
4187	pcicfgregs *cfg = &dinfo->cfg;
4188
4189	if (device_get_parent(child) != dev)
4190		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4191		    type, rid, start, end, count, flags));
4192
4193	/*
4194	 * Perform lazy resource allocation
4195	 */
4196	switch (type) {
4197	case SYS_RES_IRQ:
4198		/*
4199		 * Can't alloc legacy interrupt once MSI messages have
4200		 * been allocated.
4201		 */
4202		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4203		    cfg->msix.msix_alloc > 0))
4204			return (NULL);
4205
4206		/*
4207		 * If the child device doesn't have an interrupt
4208		 * routed and is deserving of an interrupt, try to
4209		 * assign it one.
4210		 */
4211		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4212		    (cfg->intpin != 0))
4213			pci_assign_interrupt(dev, child, 0);
4214		break;
4215	case SYS_RES_IOPORT:
4216	case SYS_RES_MEMORY:
4217#ifdef NEW_PCIB
4218		/*
4219		 * PCI-PCI bridge I/O window resources are not BARs.
4220		 * For those allocations just pass the request up the
4221		 * tree.
4222		 */
4223		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4224			switch (*rid) {
4225			case PCIR_IOBASEL_1:
4226			case PCIR_MEMBASE_1:
4227			case PCIR_PMBASEL_1:
4228				/*
4229				 * XXX: Should we bother creating a resource
4230				 * list entry?
4231				 */
4232				return (bus_generic_alloc_resource(dev, child,
4233				    type, rid, start, end, count, flags));
4234			}
4235		}
4236#endif
4237		/* Reserve resources for this BAR if needed. */
4238		rle = resource_list_find(rl, type, *rid);
4239		if (rle == NULL) {
4240			res = pci_reserve_map(dev, child, type, rid, start, end,
4241			    count, flags);
4242			if (res == NULL)
4243				return (NULL);
4244		}
4245	}
4246	return (resource_list_alloc(rl, dev, child, type, rid,
4247	    start, end, count, flags));
4248}
4249
4250int
4251pci_activate_resource(device_t dev, device_t child, int type, int rid,
4252    struct resource *r)
4253{
4254	struct pci_devinfo *dinfo;
4255	int error;
4256
4257	error = bus_generic_activate_resource(dev, child, type, rid, r);
4258	if (error)
4259		return (error);
4260
4261	/* Enable decoding in the command register when activating BARs. */
4262	if (device_get_parent(child) == dev) {
4263		/* Device ROMs need their decoding explicitly enabled. */
4264		dinfo = device_get_ivars(child);
4265		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4266			pci_write_bar(child, pci_find_bar(child, rid),
4267			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4268		switch (type) {
4269		case SYS_RES_IOPORT:
4270		case SYS_RES_MEMORY:
4271			error = PCI_ENABLE_IO(dev, child, type);
4272			break;
4273		}
4274	}
4275	return (error);
4276}
4277
4278int
4279pci_deactivate_resource(device_t dev, device_t child, int type,
4280    int rid, struct resource *r)
4281{
4282	struct pci_devinfo *dinfo;
4283	int error;
4284
4285	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4286	if (error)
4287		return (error);
4288
4289	/* Disable decoding for device ROMs. */
4290	if (device_get_parent(child) == dev) {
4291		dinfo = device_get_ivars(child);
4292		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4293			pci_write_bar(child, pci_find_bar(child, rid),
4294			    rman_get_start(r));
4295	}
4296	return (0);
4297}
4298
4299void
4300pci_delete_child(device_t dev, device_t child)
4301{
4302	struct resource_list_entry *rle;
4303	struct resource_list *rl;
4304	struct pci_devinfo *dinfo;
4305
4306	dinfo = device_get_ivars(child);
4307	rl = &dinfo->resources;
4308
4309	if (device_is_attached(child))
4310		device_detach(child);
4311
4312	/* Turn off access to resources we're about to free */
4313	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4314	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4315
4316	/* Free all allocated resources */
4317	STAILQ_FOREACH(rle, rl, link) {
4318		if (rle->res) {
4319			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4320			    resource_list_busy(rl, rle->type, rle->rid)) {
4321				pci_printf(&dinfo->cfg,
4322				    "Resource still owned, oops. "
4323				    "(type=%d, rid=%d, addr=%lx)\n",
4324				    rle->type, rle->rid,
4325				    rman_get_start(rle->res));
4326				bus_release_resource(child, rle->type, rle->rid,
4327				    rle->res);
4328			}
4329			resource_list_unreserve(rl, dev, child, rle->type,
4330			    rle->rid);
4331		}
4332	}
4333	resource_list_free(rl);
4334
4335	device_delete_child(dev, child);
4336	pci_freecfg(dinfo);
4337}
4338
4339void
4340pci_delete_resource(device_t dev, device_t child, int type, int rid)
4341{
4342	struct pci_devinfo *dinfo;
4343	struct resource_list *rl;
4344	struct resource_list_entry *rle;
4345
4346	if (device_get_parent(child) != dev)
4347		return;
4348
4349	dinfo = device_get_ivars(child);
4350	rl = &dinfo->resources;
4351	rle = resource_list_find(rl, type, rid);
4352	if (rle == NULL)
4353		return;
4354
4355	if (rle->res) {
4356		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4357		    resource_list_busy(rl, type, rid)) {
4358			device_printf(dev, "delete_resource: "
4359			    "Resource still owned by child, oops. "
4360			    "(type=%d, rid=%d, addr=%lx)\n",
4361			    type, rid, rman_get_start(rle->res));
4362			return;
4363		}
4364		resource_list_unreserve(rl, dev, child, type, rid);
4365	}
4366	resource_list_delete(rl, type, rid);
4367}
4368
4369struct resource_list *
4370pci_get_resource_list (device_t dev, device_t child)
4371{
4372	struct pci_devinfo *dinfo = device_get_ivars(child);
4373
4374	return (&dinfo->resources);
4375}
4376
4377bus_dma_tag_t
4378pci_get_dma_tag(device_t bus, device_t dev)
4379{
4380	struct pci_softc *sc = device_get_softc(bus);
4381
4382	return (sc->sc_dma_tag);
4383}
4384
4385uint32_t
4386pci_read_config_method(device_t dev, device_t child, int reg, int width)
4387{
4388	struct pci_devinfo *dinfo = device_get_ivars(child);
4389	pcicfgregs *cfg = &dinfo->cfg;
4390
4391	return (PCIB_READ_CONFIG(device_get_parent(dev),
4392	    cfg->bus, cfg->slot, cfg->func, reg, width));
4393}
4394
4395void
4396pci_write_config_method(device_t dev, device_t child, int reg,
4397    uint32_t val, int width)
4398{
4399	struct pci_devinfo *dinfo = device_get_ivars(child);
4400	pcicfgregs *cfg = &dinfo->cfg;
4401
4402	PCIB_WRITE_CONFIG(device_get_parent(dev),
4403	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4404}
4405
4406int
4407pci_child_location_str_method(device_t dev, device_t child, char *buf,
4408    size_t buflen)
4409{
4410
4411	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4412	    pci_get_function(child));
4413	return (0);
4414}
4415
4416int
4417pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4418    size_t buflen)
4419{
4420	struct pci_devinfo *dinfo;
4421	pcicfgregs *cfg;
4422
4423	dinfo = device_get_ivars(child);
4424	cfg = &dinfo->cfg;
4425	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4426	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4427	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4428	    cfg->progif);
4429	return (0);
4430}
4431
4432int
4433pci_assign_interrupt_method(device_t dev, device_t child)
4434{
4435	struct pci_devinfo *dinfo = device_get_ivars(child);
4436	pcicfgregs *cfg = &dinfo->cfg;
4437
4438	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4439	    cfg->intpin));
4440}
4441
4442static int
4443pci_modevent(module_t mod, int what, void *arg)
4444{
4445	static struct cdev *pci_cdev;
4446
4447	switch (what) {
4448	case MOD_LOAD:
4449		STAILQ_INIT(&pci_devq);
4450		pci_generation = 0;
4451		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4452		    "pci");
4453		pci_load_vendor_data();
4454		break;
4455
4456	case MOD_UNLOAD:
4457		destroy_dev(pci_cdev);
4458		break;
4459	}
4460
4461	return (0);
4462}
4463
4464static void
4465pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4466{
4467#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4468	struct pcicfg_pcie *cfg;
4469	int version, pos;
4470
4471	cfg = &dinfo->cfg.pcie;
4472	pos = cfg->pcie_location;
4473
4474	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4475
4476	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4477
4478	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4479	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4480	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4481		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4482
4483	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4484	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4485	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4486		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4487
4488	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4489	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4490		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4491
4492	if (version > 1) {
4493		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4494		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4495		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4496	}
4497#undef WREG
4498}
4499
4500static void
4501pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4502{
4503	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4504	    dinfo->cfg.pcix.pcix_command,  2);
4505}
4506
4507void
4508pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4509{
4510
4511	/*
4512	 * Only do header type 0 devices.  Type 1 devices are bridges,
4513	 * which we know need special treatment.  Type 2 devices are
4514	 * cardbus bridges which also require special treatment.
4515	 * Other types are unknown, and we err on the side of safety
4516	 * by ignoring them.
4517	 */
4518	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4519		return;
4520
4521	/*
4522	 * Restore the device to full power mode.  We must do this
4523	 * before we restore the registers because moving from D3 to
4524	 * D0 will cause the chip's BARs and some other registers to
4525	 * be reset to some unknown power on reset values.  Cut down
4526	 * the noise on boot by doing nothing if we are already in
4527	 * state D0.
4528	 */
4529	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4530		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4531	pci_restore_bars(dev);
4532	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4533	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4534	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4535	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4536	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4537	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4538	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4539	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4540	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4541
4542	/*
4543	 * Restore extended capabilities for PCI-Express and PCI-X
4544	 */
4545	if (dinfo->cfg.pcie.pcie_location != 0)
4546		pci_cfg_restore_pcie(dev, dinfo);
4547	if (dinfo->cfg.pcix.pcix_location != 0)
4548		pci_cfg_restore_pcix(dev, dinfo);
4549
4550	/* Restore MSI and MSI-X configurations if they are present. */
4551	if (dinfo->cfg.msi.msi_location != 0)
4552		pci_resume_msi(dev);
4553	if (dinfo->cfg.msix.msix_location != 0)
4554		pci_resume_msix(dev);
4555}
4556
4557static void
4558pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4559{
4560#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4561	struct pcicfg_pcie *cfg;
4562	int version, pos;
4563
4564	cfg = &dinfo->cfg.pcie;
4565	pos = cfg->pcie_location;
4566
4567	cfg->pcie_flags = RREG(PCIER_FLAGS);
4568
4569	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4570
4571	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4572
4573	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4574	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4575	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4576		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4577
4578	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4579	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4580	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4581		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4582
4583	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4584	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4585		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4586
4587	if (version > 1) {
4588		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4589		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4590		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4591	}
4592#undef RREG
4593}
4594
4595static void
4596pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4597{
4598	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4599	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4600}
4601
4602void
4603pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4604{
4605	uint32_t cls;
4606	int ps;
4607
4608	/*
4609	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4610	 * we know need special treatment.  Type 2 devices are cardbus bridges
4611	 * which also require special treatment.  Other types are unknown, and
4612	 * we err on the side of safety by ignoring them.  Powering down
4613	 * bridges should not be undertaken lightly.
4614	 */
4615	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4616		return;
4617
4618	/*
4619	 * Some drivers apparently write to these registers w/o updating our
4620	 * cached copy.  No harm happens if we update the copy, so do so here
4621	 * so we can restore them.  The COMMAND register is modified by the
4622	 * bus w/o updating the cache.  This should represent the normally
4623	 * writable portion of the 'defined' part of type 0 headers.  In
4624	 * theory we also need to save/restore the PCI capability structures
4625	 * we know about, but apart from power we don't know any that are
4626	 * writable.
4627	 */
4628	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4629	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4630	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4631	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4632	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4633	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4634	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4635	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4636	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4637	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4638	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4639	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4640	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4641	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4642	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4643
4644	if (dinfo->cfg.pcie.pcie_location != 0)
4645		pci_cfg_save_pcie(dev, dinfo);
4646
4647	if (dinfo->cfg.pcix.pcix_location != 0)
4648		pci_cfg_save_pcix(dev, dinfo);
4649
4650	/*
4651	 * don't set the state for display devices, base peripherals and
4652	 * memory devices since bad things happen when they are powered down.
4653	 * We should (a) have drivers that can easily detach and (b) use
4654	 * generic drivers for these devices so that some device actually
4655	 * attaches.  We need to make sure that when we implement (a) we don't
4656	 * power the device down on a reattach.
4657	 */
4658	cls = pci_get_class(dev);
4659	if (!setstate)
4660		return;
4661	switch (pci_do_power_nodriver)
4662	{
4663		case 0:		/* NO powerdown at all */
4664			return;
4665		case 1:		/* Conservative about what to power down */
4666			if (cls == PCIC_STORAGE)
4667				return;
4668			/*FALLTHROUGH*/
4669		case 2:		/* Agressive about what to power down */
4670			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4671			    cls == PCIC_BASEPERIPH)
4672				return;
4673			/*FALLTHROUGH*/
4674		case 3:		/* Power down everything */
4675			break;
4676	}
4677	/*
4678	 * PCI spec says we can only go into D3 state from D0 state.
4679	 * Transition from D[12] into D0 before going to D3 state.
4680	 */
4681	ps = pci_get_powerstate(dev);
4682	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4683		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4684	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4685		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4686}
4687
4688/* Wrapper APIs suitable for device driver use. */
4689void
4690pci_save_state(device_t dev)
4691{
4692	struct pci_devinfo *dinfo;
4693
4694	dinfo = device_get_ivars(dev);
4695	pci_cfg_save(dev, dinfo, 0);
4696}
4697
4698void
4699pci_restore_state(device_t dev)
4700{
4701	struct pci_devinfo *dinfo;
4702
4703	dinfo = device_get_ivars(dev);
4704	pci_cfg_restore(dev, dinfo);
4705}
4706