pci.c revision 233676
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 233676 2012-03-29 19:03:22Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74#define	PCI_DMA_BOUNDARY	0x100000000
75#endif
76
77#define	PCIR_IS_BIOS(cfg, reg)						\
78	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80
81static pci_addr_t	pci_mapbase(uint64_t mapreg);
82static const char	*pci_maptype(uint64_t mapreg);
83static int		pci_mapsize(uint64_t testval);
84static int		pci_maprange(uint64_t mapreg);
85static pci_addr_t	pci_rombase(uint64_t mapreg);
86static int		pci_romsize(uint64_t testval);
87static void		pci_fixancient(pcicfgregs *cfg);
88static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89
90static int		pci_porten(device_t dev);
91static int		pci_memen(device_t dev);
92static void		pci_assign_interrupt(device_t bus, device_t dev,
93			    int force_route);
94static int		pci_add_map(device_t bus, device_t dev, int reg,
95			    struct resource_list *rl, int force, int prefetch);
96static int		pci_probe(device_t dev);
97static int		pci_attach(device_t dev);
98static void		pci_load_vendor_data(void);
99static int		pci_describe_parse_line(char **ptr, int *vendor,
100			    int *device, char **desc);
101static char		*pci_describe_device(device_t dev);
102static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103static int		pci_modevent(module_t mod, int what, void *arg);
104static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105			    pcicfgregs *cfg);
106static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t *data);
109#if 0
110static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111			    int reg, uint32_t data);
112#endif
113static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114static void		pci_disable_msi(device_t dev);
115static void		pci_enable_msi(device_t dev, uint64_t address,
116			    uint16_t data);
117static void		pci_enable_msix(device_t dev, u_int index,
118			    uint64_t address, uint32_t data);
119static void		pci_mask_msix(device_t dev, u_int index);
120static void		pci_unmask_msix(device_t dev, u_int index);
121static int		pci_msi_blacklisted(void);
122static void		pci_resume_msi(device_t dev);
123static void		pci_resume_msix(device_t dev);
124static int		pci_remap_intr_method(device_t bus, device_t dev,
125			    u_int irq);
126
127static device_method_t pci_methods[] = {
128	/* Device interface */
129	DEVMETHOD(device_probe,		pci_probe),
130	DEVMETHOD(device_attach,	pci_attach),
131	DEVMETHOD(device_detach,	bus_generic_detach),
132	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133	DEVMETHOD(device_suspend,	pci_suspend),
134	DEVMETHOD(device_resume,	pci_resume),
135
136	/* Bus interface */
137	DEVMETHOD(bus_print_child,	pci_print_child),
138	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141	DEVMETHOD(bus_driver_added,	pci_driver_added),
142	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144
145	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158
159	/* PCI interface */
160	DEVMETHOD(pci_read_config,	pci_read_config_method),
161	DEVMETHOD(pci_write_config,	pci_write_config_method),
162	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180
181	DEVMETHOD_END
182};
183
184DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185
186static devclass_t pci_devclass;
187DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188MODULE_VERSION(pci, 1);
189
190static char	*pci_vendordata;
191static size_t	pci_vendordata_size;
192
193struct pci_quirk {
194	uint32_t devid;	/* Vendor/device of the card */
195	int	type;
196#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
200	int	arg1;
201	int	arg2;
202};
203
204static const struct pci_quirk const pci_quirks[] = {
205	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
206	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
208	/* As does the Serverworks OSB4 (the SMBus mapping register) */
209	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210
211	/*
212	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
213	 * or the CMIC-SL (AKA ServerWorks GC_LE).
214	 */
215	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	/*
219	 * MSI doesn't work on earlier Intel chipsets including
220	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
221	 */
222	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229
230	/*
231	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
232	 * bridge.
233	 */
234	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235
236	/*
237	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
238	 * VMware.
239	 */
240	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241
242	/*
243	 * Some virtualization environments emulate an older chipset
244	 * but support MSI just fine.  QEMU uses the Intel 82440.
245	 */
246	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247
248	/*
249	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251	 * It prevents us from attaching hpet(4) when the bit is unset.
252	 * Note this quirk only affects SB600 revision A13 and earlier.
253	 * For SB600 A21 and later, firmware must set the bit to hide it.
254	 * For SB700 and later, it is unused and hardcoded to zero.
255	 */
256	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257
258	{ 0 }
259};
260
261/* map register information */
262#define	PCI_MAPMEM	0x01	/* memory map */
263#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264#define	PCI_MAPPORT	0x04	/* port map */
265
266struct devlist pci_devq;
267uint32_t pci_generation;
268uint32_t pci_numdevs = 0;
269static int pcie_chipset, pcix_chipset;
270
271/* sysctl vars */
272SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273
274static int pci_enable_io_modes = 1;
275TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277    &pci_enable_io_modes, 1,
278    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279enable these bits correctly.  We'd like to do this all the time, but there\n\
280are some peripherals that this causes problems with.");
281
282static int pci_do_power_nodriver = 0;
283TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
284SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
285    &pci_do_power_nodriver, 0,
286  "Place a function into D3 state when no driver attaches to it.  0 means\n\
287disable.  1 means conservatively place devices into D3 state.  2 means\n\
288agressively place devices into D3 state.  3 means put absolutely everything\n\
289in D3 state.");
290
291int pci_do_power_resume = 1;
292TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
293SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
294    &pci_do_power_resume, 1,
295  "Transition from D3 -> D0 on resume.");
296
297int pci_do_power_suspend = 1;
298TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
299SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
300    &pci_do_power_suspend, 1,
301  "Transition from D0 -> D3 on suspend.");
302
303static int pci_do_msi = 1;
304TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
305SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
306    "Enable support for MSI interrupts");
307
308static int pci_do_msix = 1;
309TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
310SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
311    "Enable support for MSI-X interrupts");
312
313static int pci_honor_msi_blacklist = 1;
314TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
315SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
316    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
317
318#if defined(__i386__) || defined(__amd64__)
319static int pci_usb_takeover = 1;
320#else
321static int pci_usb_takeover = 0;
322#endif
323TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
324SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
325    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
326Disable this if you depend on BIOS emulation of USB devices, that is\n\
327you use USB devices (like keyboard or mouse) but do not load USB drivers");
328
329/* Find a device_t by bus/slot/function in domain 0 */
330
331device_t
332pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
333{
334
335	return (pci_find_dbsf(0, bus, slot, func));
336}
337
338/* Find a device_t by domain/bus/slot/function */
339
340device_t
341pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
342{
343	struct pci_devinfo *dinfo;
344
345	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346		if ((dinfo->cfg.domain == domain) &&
347		    (dinfo->cfg.bus == bus) &&
348		    (dinfo->cfg.slot == slot) &&
349		    (dinfo->cfg.func == func)) {
350			return (dinfo->cfg.dev);
351		}
352	}
353
354	return (NULL);
355}
356
357/* Find a device_t by vendor/device ID */
358
359device_t
360pci_find_device(uint16_t vendor, uint16_t device)
361{
362	struct pci_devinfo *dinfo;
363
364	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
365		if ((dinfo->cfg.vendor == vendor) &&
366		    (dinfo->cfg.device == device)) {
367			return (dinfo->cfg.dev);
368		}
369	}
370
371	return (NULL);
372}
373
374device_t
375pci_find_class(uint8_t class, uint8_t subclass)
376{
377	struct pci_devinfo *dinfo;
378
379	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
380		if (dinfo->cfg.baseclass == class &&
381		    dinfo->cfg.subclass == subclass) {
382			return (dinfo->cfg.dev);
383		}
384	}
385
386	return (NULL);
387}
388
389static int
390pci_printf(pcicfgregs *cfg, const char *fmt, ...)
391{
392	va_list ap;
393	int retval;
394
395	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
396	    cfg->func);
397	va_start(ap, fmt);
398	retval += vprintf(fmt, ap);
399	va_end(ap);
400	return (retval);
401}
402
403/* return base address of memory or port map */
404
405static pci_addr_t
406pci_mapbase(uint64_t mapreg)
407{
408
409	if (PCI_BAR_MEM(mapreg))
410		return (mapreg & PCIM_BAR_MEM_BASE);
411	else
412		return (mapreg & PCIM_BAR_IO_BASE);
413}
414
415/* return map type of memory or port map */
416
417static const char *
418pci_maptype(uint64_t mapreg)
419{
420
421	if (PCI_BAR_IO(mapreg))
422		return ("I/O Port");
423	if (mapreg & PCIM_BAR_MEM_PREFETCH)
424		return ("Prefetchable Memory");
425	return ("Memory");
426}
427
428/* return log2 of map size decoded for memory or port map */
429
430static int
431pci_mapsize(uint64_t testval)
432{
433	int ln2size;
434
435	testval = pci_mapbase(testval);
436	ln2size = 0;
437	if (testval != 0) {
438		while ((testval & 1) == 0)
439		{
440			ln2size++;
441			testval >>= 1;
442		}
443	}
444	return (ln2size);
445}
446
447/* return base address of device ROM */
448
449static pci_addr_t
450pci_rombase(uint64_t mapreg)
451{
452
453	return (mapreg & PCIM_BIOS_ADDR_MASK);
454}
455
456/* return log2 of map size decided for device ROM */
457
458static int
459pci_romsize(uint64_t testval)
460{
461	int ln2size;
462
463	testval = pci_rombase(testval);
464	ln2size = 0;
465	if (testval != 0) {
466		while ((testval & 1) == 0)
467		{
468			ln2size++;
469			testval >>= 1;
470		}
471	}
472	return (ln2size);
473}
474
475/* return log2 of address range supported by map register */
476
477static int
478pci_maprange(uint64_t mapreg)
479{
480	int ln2range = 0;
481
482	if (PCI_BAR_IO(mapreg))
483		ln2range = 32;
484	else
485		switch (mapreg & PCIM_BAR_MEM_TYPE) {
486		case PCIM_BAR_MEM_32:
487			ln2range = 32;
488			break;
489		case PCIM_BAR_MEM_1MB:
490			ln2range = 20;
491			break;
492		case PCIM_BAR_MEM_64:
493			ln2range = 64;
494			break;
495		}
496	return (ln2range);
497}
498
499/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
500
501static void
502pci_fixancient(pcicfgregs *cfg)
503{
504	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
505		return;
506
507	/* PCI to PCI bridges use header type 1 */
508	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
509		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
510}
511
512/* extract header type specific config data */
513
514static void
515pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
516{
517#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
518	switch (cfg->hdrtype & PCIM_HDRTYPE) {
519	case PCIM_HDRTYPE_NORMAL:
520		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
521		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
522		cfg->nummaps	    = PCI_MAXMAPS_0;
523		break;
524	case PCIM_HDRTYPE_BRIDGE:
525		cfg->nummaps	    = PCI_MAXMAPS_1;
526		break;
527	case PCIM_HDRTYPE_CARDBUS:
528		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
529		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
530		cfg->nummaps	    = PCI_MAXMAPS_2;
531		break;
532	}
533#undef REG
534}
535
536/* read configuration header into pcicfgregs structure */
537struct pci_devinfo *
538pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
539{
540#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
541	pcicfgregs *cfg = NULL;
542	struct pci_devinfo *devlist_entry;
543	struct devlist *devlist_head;
544
545	devlist_head = &pci_devq;
546
547	devlist_entry = NULL;
548
549	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
550		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
551		if (devlist_entry == NULL)
552			return (NULL);
553
554		cfg = &devlist_entry->cfg;
555
556		cfg->domain		= d;
557		cfg->bus		= b;
558		cfg->slot		= s;
559		cfg->func		= f;
560		cfg->vendor		= REG(PCIR_VENDOR, 2);
561		cfg->device		= REG(PCIR_DEVICE, 2);
562		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
563		cfg->statreg		= REG(PCIR_STATUS, 2);
564		cfg->baseclass		= REG(PCIR_CLASS, 1);
565		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
566		cfg->progif		= REG(PCIR_PROGIF, 1);
567		cfg->revid		= REG(PCIR_REVID, 1);
568		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
569		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
570		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
571		cfg->intpin		= REG(PCIR_INTPIN, 1);
572		cfg->intline		= REG(PCIR_INTLINE, 1);
573
574		cfg->mingnt		= REG(PCIR_MINGNT, 1);
575		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
576
577		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
578		cfg->hdrtype		&= ~PCIM_MFDEV;
579		STAILQ_INIT(&cfg->maps);
580
581		pci_fixancient(cfg);
582		pci_hdrtypedata(pcib, b, s, f, cfg);
583
584		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
585			pci_read_cap(pcib, cfg);
586
587		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
588
589		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
590		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
591		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
592		devlist_entry->conf.pc_sel.pc_func = cfg->func;
593		devlist_entry->conf.pc_hdr = cfg->hdrtype;
594
595		devlist_entry->conf.pc_subvendor = cfg->subvendor;
596		devlist_entry->conf.pc_subdevice = cfg->subdevice;
597		devlist_entry->conf.pc_vendor = cfg->vendor;
598		devlist_entry->conf.pc_device = cfg->device;
599
600		devlist_entry->conf.pc_class = cfg->baseclass;
601		devlist_entry->conf.pc_subclass = cfg->subclass;
602		devlist_entry->conf.pc_progif = cfg->progif;
603		devlist_entry->conf.pc_revid = cfg->revid;
604
605		pci_numdevs++;
606		pci_generation++;
607	}
608	return (devlist_entry);
609#undef REG
610}
611
612static void
613pci_read_cap(device_t pcib, pcicfgregs *cfg)
614{
615#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
616#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
617#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
618	uint64_t addr;
619#endif
620	uint32_t val;
621	int	ptr, nextptr, ptrptr;
622
623	switch (cfg->hdrtype & PCIM_HDRTYPE) {
624	case PCIM_HDRTYPE_NORMAL:
625	case PCIM_HDRTYPE_BRIDGE:
626		ptrptr = PCIR_CAP_PTR;
627		break;
628	case PCIM_HDRTYPE_CARDBUS:
629		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
630		break;
631	default:
632		return;		/* no extended capabilities support */
633	}
634	nextptr = REG(ptrptr, 1);	/* sanity check? */
635
636	/*
637	 * Read capability entries.
638	 */
639	while (nextptr != 0) {
640		/* Sanity check */
641		if (nextptr > 255) {
642			printf("illegal PCI extended capability offset %d\n",
643			    nextptr);
644			return;
645		}
646		/* Find the next entry */
647		ptr = nextptr;
648		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
649
650		/* Process this entry */
651		switch (REG(ptr + PCICAP_ID, 1)) {
652		case PCIY_PMG:		/* PCI power management */
653			if (cfg->pp.pp_cap == 0) {
654				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
655				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
656				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
657				if ((nextptr - ptr) > PCIR_POWER_DATA)
658					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
659			}
660			break;
661		case PCIY_HT:		/* HyperTransport */
662			/* Determine HT-specific capability type. */
663			val = REG(ptr + PCIR_HT_COMMAND, 2);
664
665			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
666				cfg->ht.ht_slave = ptr;
667
668#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
669			switch (val & PCIM_HTCMD_CAP_MASK) {
670			case PCIM_HTCAP_MSI_MAPPING:
671				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
672					/* Sanity check the mapping window. */
673					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
674					    4);
675					addr <<= 32;
676					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
677					    4);
678					if (addr != MSI_INTEL_ADDR_BASE)
679						device_printf(pcib,
680	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
681						    cfg->domain, cfg->bus,
682						    cfg->slot, cfg->func,
683						    (long long)addr);
684				} else
685					addr = MSI_INTEL_ADDR_BASE;
686
687				cfg->ht.ht_msimap = ptr;
688				cfg->ht.ht_msictrl = val;
689				cfg->ht.ht_msiaddr = addr;
690				break;
691			}
692#endif
693			break;
694		case PCIY_MSI:		/* PCI MSI */
695			cfg->msi.msi_location = ptr;
696			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
697			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
698						     PCIM_MSICTRL_MMC_MASK)>>1);
699			break;
700		case PCIY_MSIX:		/* PCI MSI-X */
701			cfg->msix.msix_location = ptr;
702			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
703			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
704			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
705			val = REG(ptr + PCIR_MSIX_TABLE, 4);
706			cfg->msix.msix_table_bar = PCIR_BAR(val &
707			    PCIM_MSIX_BIR_MASK);
708			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
709			val = REG(ptr + PCIR_MSIX_PBA, 4);
710			cfg->msix.msix_pba_bar = PCIR_BAR(val &
711			    PCIM_MSIX_BIR_MASK);
712			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
713			break;
714		case PCIY_VPD:		/* PCI Vital Product Data */
715			cfg->vpd.vpd_reg = ptr;
716			break;
717		case PCIY_SUBVENDOR:
718			/* Should always be true. */
719			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
720			    PCIM_HDRTYPE_BRIDGE) {
721				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
722				cfg->subvendor = val & 0xffff;
723				cfg->subdevice = val >> 16;
724			}
725			break;
726		case PCIY_PCIX:		/* PCI-X */
727			/*
728			 * Assume we have a PCI-X chipset if we have
729			 * at least one PCI-PCI bridge with a PCI-X
730			 * capability.  Note that some systems with
731			 * PCI-express or HT chipsets might match on
732			 * this check as well.
733			 */
734			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
735			    PCIM_HDRTYPE_BRIDGE)
736				pcix_chipset = 1;
737			cfg->pcix.pcix_location = ptr;
738			break;
739		case PCIY_EXPRESS:	/* PCI-express */
740			/*
741			 * Assume we have a PCI-express chipset if we have
742			 * at least one PCI-express device.
743			 */
744			pcie_chipset = 1;
745			cfg->pcie.pcie_location = ptr;
746			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
747			cfg->pcie.pcie_type = val & PCIM_EXP_FLAGS_TYPE;
748			break;
749		default:
750			break;
751		}
752	}
753
754#if defined(__powerpc__)
755	/*
756	 * Enable the MSI mapping window for all HyperTransport
757	 * slaves.  PCI-PCI bridges have their windows enabled via
758	 * PCIB_MAP_MSI().
759	 */
760	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
761	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
762		device_printf(pcib,
763	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
764		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
765		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
766		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
767		     2);
768	}
769#endif
770/* REG and WREG use carry through to next functions */
771}
772
773/*
774 * PCI Vital Product Data
775 */
776
777#define	PCI_VPD_TIMEOUT		1000000
778
779static int
780pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
781{
782	int count = PCI_VPD_TIMEOUT;
783
784	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
785
786	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
787
788	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
789		if (--count < 0)
790			return (ENXIO);
791		DELAY(1);	/* limit looping */
792	}
793	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
794
795	return (0);
796}
797
798#if 0
799static int
800pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
801{
802	int count = PCI_VPD_TIMEOUT;
803
804	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
805
806	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
807	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
808	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
809		if (--count < 0)
810			return (ENXIO);
811		DELAY(1);	/* limit looping */
812	}
813
814	return (0);
815}
816#endif
817
818#undef PCI_VPD_TIMEOUT
819
820struct vpd_readstate {
821	device_t	pcib;
822	pcicfgregs	*cfg;
823	uint32_t	val;
824	int		bytesinval;
825	int		off;
826	uint8_t		cksum;
827};
828
829static int
830vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
831{
832	uint32_t reg;
833	uint8_t byte;
834
835	if (vrs->bytesinval == 0) {
836		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
837			return (ENXIO);
838		vrs->val = le32toh(reg);
839		vrs->off += 4;
840		byte = vrs->val & 0xff;
841		vrs->bytesinval = 3;
842	} else {
843		vrs->val = vrs->val >> 8;
844		byte = vrs->val & 0xff;
845		vrs->bytesinval--;
846	}
847
848	vrs->cksum += byte;
849	*data = byte;
850	return (0);
851}
852
853static void
854pci_read_vpd(device_t pcib, pcicfgregs *cfg)
855{
856	struct vpd_readstate vrs;
857	int state;
858	int name;
859	int remain;
860	int i;
861	int alloc, off;		/* alloc/off for RO/W arrays */
862	int cksumvalid;
863	int dflen;
864	uint8_t byte;
865	uint8_t byte2;
866
867	/* init vpd reader */
868	vrs.bytesinval = 0;
869	vrs.off = 0;
870	vrs.pcib = pcib;
871	vrs.cfg = cfg;
872	vrs.cksum = 0;
873
874	state = 0;
875	name = remain = i = 0;	/* shut up stupid gcc */
876	alloc = off = 0;	/* shut up stupid gcc */
877	dflen = 0;		/* shut up stupid gcc */
878	cksumvalid = -1;
879	while (state >= 0) {
880		if (vpd_nextbyte(&vrs, &byte)) {
881			state = -2;
882			break;
883		}
884#if 0
885		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
886		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
887		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
888#endif
889		switch (state) {
890		case 0:		/* item name */
891			if (byte & 0x80) {
892				if (vpd_nextbyte(&vrs, &byte2)) {
893					state = -2;
894					break;
895				}
896				remain = byte2;
897				if (vpd_nextbyte(&vrs, &byte2)) {
898					state = -2;
899					break;
900				}
901				remain |= byte2 << 8;
902				if (remain > (0x7f*4 - vrs.off)) {
903					state = -1;
904					pci_printf(cfg,
905					    "invalid VPD data, remain %#x\n",
906					    remain);
907				}
908				name = byte & 0x7f;
909			} else {
910				remain = byte & 0x7;
911				name = (byte >> 3) & 0xf;
912			}
913			switch (name) {
914			case 0x2:	/* String */
915				cfg->vpd.vpd_ident = malloc(remain + 1,
916				    M_DEVBUF, M_WAITOK);
917				i = 0;
918				state = 1;
919				break;
920			case 0xf:	/* End */
921				state = -1;
922				break;
923			case 0x10:	/* VPD-R */
924				alloc = 8;
925				off = 0;
926				cfg->vpd.vpd_ros = malloc(alloc *
927				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
928				    M_WAITOK | M_ZERO);
929				state = 2;
930				break;
931			case 0x11:	/* VPD-W */
932				alloc = 8;
933				off = 0;
934				cfg->vpd.vpd_w = malloc(alloc *
935				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
936				    M_WAITOK | M_ZERO);
937				state = 5;
938				break;
939			default:	/* Invalid data, abort */
940				state = -1;
941				break;
942			}
943			break;
944
945		case 1:	/* Identifier String */
946			cfg->vpd.vpd_ident[i++] = byte;
947			remain--;
948			if (remain == 0)  {
949				cfg->vpd.vpd_ident[i] = '\0';
950				state = 0;
951			}
952			break;
953
954		case 2:	/* VPD-R Keyword Header */
955			if (off == alloc) {
956				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
957				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
958				    M_DEVBUF, M_WAITOK | M_ZERO);
959			}
960			cfg->vpd.vpd_ros[off].keyword[0] = byte;
961			if (vpd_nextbyte(&vrs, &byte2)) {
962				state = -2;
963				break;
964			}
965			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
966			if (vpd_nextbyte(&vrs, &byte2)) {
967				state = -2;
968				break;
969			}
970			dflen = byte2;
971			if (dflen == 0 &&
972			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
973			    2) == 0) {
974				/*
975				 * if this happens, we can't trust the rest
976				 * of the VPD.
977				 */
978				pci_printf(cfg, "bad keyword length: %d\n",
979				    dflen);
980				cksumvalid = 0;
981				state = -1;
982				break;
983			} else if (dflen == 0) {
984				cfg->vpd.vpd_ros[off].value = malloc(1 *
985				    sizeof(*cfg->vpd.vpd_ros[off].value),
986				    M_DEVBUF, M_WAITOK);
987				cfg->vpd.vpd_ros[off].value[0] = '\x00';
988			} else
989				cfg->vpd.vpd_ros[off].value = malloc(
990				    (dflen + 1) *
991				    sizeof(*cfg->vpd.vpd_ros[off].value),
992				    M_DEVBUF, M_WAITOK);
993			remain -= 3;
994			i = 0;
995			/* keep in sync w/ state 3's transistions */
996			if (dflen == 0 && remain == 0)
997				state = 0;
998			else if (dflen == 0)
999				state = 2;
1000			else
1001				state = 3;
1002			break;
1003
1004		case 3:	/* VPD-R Keyword Value */
1005			cfg->vpd.vpd_ros[off].value[i++] = byte;
1006			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1007			    "RV", 2) == 0 && cksumvalid == -1) {
1008				if (vrs.cksum == 0)
1009					cksumvalid = 1;
1010				else {
1011					if (bootverbose)
1012						pci_printf(cfg,
1013					    "bad VPD cksum, remain %hhu\n",
1014						    vrs.cksum);
1015					cksumvalid = 0;
1016					state = -1;
1017					break;
1018				}
1019			}
1020			dflen--;
1021			remain--;
1022			/* keep in sync w/ state 2's transistions */
1023			if (dflen == 0)
1024				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1025			if (dflen == 0 && remain == 0) {
1026				cfg->vpd.vpd_rocnt = off;
1027				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1028				    off * sizeof(*cfg->vpd.vpd_ros),
1029				    M_DEVBUF, M_WAITOK | M_ZERO);
1030				state = 0;
1031			} else if (dflen == 0)
1032				state = 2;
1033			break;
1034
1035		case 4:
1036			remain--;
1037			if (remain == 0)
1038				state = 0;
1039			break;
1040
1041		case 5:	/* VPD-W Keyword Header */
1042			if (off == alloc) {
1043				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1044				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1045				    M_DEVBUF, M_WAITOK | M_ZERO);
1046			}
1047			cfg->vpd.vpd_w[off].keyword[0] = byte;
1048			if (vpd_nextbyte(&vrs, &byte2)) {
1049				state = -2;
1050				break;
1051			}
1052			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1053			if (vpd_nextbyte(&vrs, &byte2)) {
1054				state = -2;
1055				break;
1056			}
1057			cfg->vpd.vpd_w[off].len = dflen = byte2;
1058			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1059			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1060			    sizeof(*cfg->vpd.vpd_w[off].value),
1061			    M_DEVBUF, M_WAITOK);
1062			remain -= 3;
1063			i = 0;
1064			/* keep in sync w/ state 6's transistions */
1065			if (dflen == 0 && remain == 0)
1066				state = 0;
1067			else if (dflen == 0)
1068				state = 5;
1069			else
1070				state = 6;
1071			break;
1072
1073		case 6:	/* VPD-W Keyword Value */
1074			cfg->vpd.vpd_w[off].value[i++] = byte;
1075			dflen--;
1076			remain--;
1077			/* keep in sync w/ state 5's transistions */
1078			if (dflen == 0)
1079				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1080			if (dflen == 0 && remain == 0) {
1081				cfg->vpd.vpd_wcnt = off;
1082				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1083				    off * sizeof(*cfg->vpd.vpd_w),
1084				    M_DEVBUF, M_WAITOK | M_ZERO);
1085				state = 0;
1086			} else if (dflen == 0)
1087				state = 5;
1088			break;
1089
1090		default:
1091			pci_printf(cfg, "invalid state: %d\n", state);
1092			state = -1;
1093			break;
1094		}
1095	}
1096
1097	if (cksumvalid == 0 || state < -1) {
1098		/* read-only data bad, clean up */
1099		if (cfg->vpd.vpd_ros != NULL) {
1100			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1101				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1102			free(cfg->vpd.vpd_ros, M_DEVBUF);
1103			cfg->vpd.vpd_ros = NULL;
1104		}
1105	}
1106	if (state < -1) {
1107		/* I/O error, clean up */
1108		pci_printf(cfg, "failed to read VPD data.\n");
1109		if (cfg->vpd.vpd_ident != NULL) {
1110			free(cfg->vpd.vpd_ident, M_DEVBUF);
1111			cfg->vpd.vpd_ident = NULL;
1112		}
1113		if (cfg->vpd.vpd_w != NULL) {
1114			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1115				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1116			free(cfg->vpd.vpd_w, M_DEVBUF);
1117			cfg->vpd.vpd_w = NULL;
1118		}
1119	}
1120	cfg->vpd.vpd_cached = 1;
1121#undef REG
1122#undef WREG
1123}
1124
1125int
1126pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1127{
1128	struct pci_devinfo *dinfo = device_get_ivars(child);
1129	pcicfgregs *cfg = &dinfo->cfg;
1130
1131	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1132		pci_read_vpd(device_get_parent(dev), cfg);
1133
1134	*identptr = cfg->vpd.vpd_ident;
1135
1136	if (*identptr == NULL)
1137		return (ENXIO);
1138
1139	return (0);
1140}
1141
1142int
1143pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1144	const char **vptr)
1145{
1146	struct pci_devinfo *dinfo = device_get_ivars(child);
1147	pcicfgregs *cfg = &dinfo->cfg;
1148	int i;
1149
1150	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1151		pci_read_vpd(device_get_parent(dev), cfg);
1152
1153	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1154		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1155		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1156			*vptr = cfg->vpd.vpd_ros[i].value;
1157			return (0);
1158		}
1159
1160	*vptr = NULL;
1161	return (ENXIO);
1162}
1163
1164/*
1165 * Find the requested HyperTransport capability and return the offset
1166 * in configuration space via the pointer provided.  The function
1167 * returns 0 on success and an error code otherwise.
1168 */
1169int
1170pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1171{
1172	int ptr, error;
1173	uint16_t val;
1174
1175	error = pci_find_cap(child, PCIY_HT, &ptr);
1176	if (error)
1177		return (error);
1178
1179	/*
1180	 * Traverse the capabilities list checking each HT capability
1181	 * to see if it matches the requested HT capability.
1182	 */
1183	while (ptr != 0) {
1184		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1185		if (capability == PCIM_HTCAP_SLAVE ||
1186		    capability == PCIM_HTCAP_HOST)
1187			val &= 0xe000;
1188		else
1189			val &= PCIM_HTCMD_CAP_MASK;
1190		if (val == capability) {
1191			if (capreg != NULL)
1192				*capreg = ptr;
1193			return (0);
1194		}
1195
1196		/* Skip to the next HT capability. */
1197		while (ptr != 0) {
1198			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1199			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1200			    PCIY_HT)
1201				break;
1202		}
1203	}
1204	return (ENOENT);
1205}
1206
1207/*
1208 * Find the requested capability and return the offset in
1209 * configuration space via the pointer provided.  The function returns
1210 * 0 on success and an error code otherwise.
1211 */
1212int
1213pci_find_cap_method(device_t dev, device_t child, int capability,
1214    int *capreg)
1215{
1216	struct pci_devinfo *dinfo = device_get_ivars(child);
1217	pcicfgregs *cfg = &dinfo->cfg;
1218	u_int32_t status;
1219	u_int8_t ptr;
1220
1221	/*
1222	 * Check the CAP_LIST bit of the PCI status register first.
1223	 */
1224	status = pci_read_config(child, PCIR_STATUS, 2);
1225	if (!(status & PCIM_STATUS_CAPPRESENT))
1226		return (ENXIO);
1227
1228	/*
1229	 * Determine the start pointer of the capabilities list.
1230	 */
1231	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1232	case PCIM_HDRTYPE_NORMAL:
1233	case PCIM_HDRTYPE_BRIDGE:
1234		ptr = PCIR_CAP_PTR;
1235		break;
1236	case PCIM_HDRTYPE_CARDBUS:
1237		ptr = PCIR_CAP_PTR_2;
1238		break;
1239	default:
1240		/* XXX: panic? */
1241		return (ENXIO);		/* no extended capabilities support */
1242	}
1243	ptr = pci_read_config(child, ptr, 1);
1244
1245	/*
1246	 * Traverse the capabilities list.
1247	 */
1248	while (ptr != 0) {
1249		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1250			if (capreg != NULL)
1251				*capreg = ptr;
1252			return (0);
1253		}
1254		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1255	}
1256
1257	return (ENOENT);
1258}
1259
1260/*
1261 * Find the requested extended capability and return the offset in
1262 * configuration space via the pointer provided.  The function returns
1263 * 0 on success and an error code otherwise.
1264 */
1265int
1266pci_find_extcap_method(device_t dev, device_t child, int capability,
1267    int *capreg)
1268{
1269	struct pci_devinfo *dinfo = device_get_ivars(child);
1270	pcicfgregs *cfg = &dinfo->cfg;
1271	uint32_t ecap;
1272	uint16_t ptr;
1273
1274	/* Only supported for PCI-express devices. */
1275	if (cfg->pcie.pcie_location == 0)
1276		return (ENXIO);
1277
1278	ptr = PCIR_EXTCAP;
1279	ecap = pci_read_config(child, ptr, 4);
1280	if (ecap == 0xffffffff || ecap == 0)
1281		return (ENOENT);
1282	for (;;) {
1283		if (PCI_EXTCAP_ID(ecap) == capability) {
1284			if (capreg != NULL)
1285				*capreg = ptr;
1286			return (0);
1287		}
1288		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1289		if (ptr == 0)
1290			break;
1291		ecap = pci_read_config(child, ptr, 4);
1292	}
1293
1294	return (ENOENT);
1295}
1296
1297/*
1298 * Support for MSI-X message interrupts.
1299 */
1300void
1301pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1302{
1303	struct pci_devinfo *dinfo = device_get_ivars(dev);
1304	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1305	uint32_t offset;
1306
1307	KASSERT(msix->msix_table_len > index, ("bogus index"));
1308	offset = msix->msix_table_offset + index * 16;
1309	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1310	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1311	bus_write_4(msix->msix_table_res, offset + 8, data);
1312
1313	/* Enable MSI -> HT mapping. */
1314	pci_ht_map_msi(dev, address);
1315}
1316
1317void
1318pci_mask_msix(device_t dev, u_int index)
1319{
1320	struct pci_devinfo *dinfo = device_get_ivars(dev);
1321	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1322	uint32_t offset, val;
1323
1324	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1325	offset = msix->msix_table_offset + index * 16 + 12;
1326	val = bus_read_4(msix->msix_table_res, offset);
1327	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1328		val |= PCIM_MSIX_VCTRL_MASK;
1329		bus_write_4(msix->msix_table_res, offset, val);
1330	}
1331}
1332
1333void
1334pci_unmask_msix(device_t dev, u_int index)
1335{
1336	struct pci_devinfo *dinfo = device_get_ivars(dev);
1337	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1338	uint32_t offset, val;
1339
1340	KASSERT(msix->msix_table_len > index, ("bogus index"));
1341	offset = msix->msix_table_offset + index * 16 + 12;
1342	val = bus_read_4(msix->msix_table_res, offset);
1343	if (val & PCIM_MSIX_VCTRL_MASK) {
1344		val &= ~PCIM_MSIX_VCTRL_MASK;
1345		bus_write_4(msix->msix_table_res, offset, val);
1346	}
1347}
1348
1349int
1350pci_pending_msix(device_t dev, u_int index)
1351{
1352	struct pci_devinfo *dinfo = device_get_ivars(dev);
1353	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1354	uint32_t offset, bit;
1355
1356	KASSERT(msix->msix_table_len > index, ("bogus index"));
1357	offset = msix->msix_pba_offset + (index / 32) * 4;
1358	bit = 1 << index % 32;
1359	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1360}
1361
1362/*
1363 * Restore MSI-X registers and table during resume.  If MSI-X is
1364 * enabled then walk the virtual table to restore the actual MSI-X
1365 * table.
1366 */
1367static void
1368pci_resume_msix(device_t dev)
1369{
1370	struct pci_devinfo *dinfo = device_get_ivars(dev);
1371	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1372	struct msix_table_entry *mte;
1373	struct msix_vector *mv;
1374	int i;
1375
1376	if (msix->msix_alloc > 0) {
1377		/* First, mask all vectors. */
1378		for (i = 0; i < msix->msix_msgnum; i++)
1379			pci_mask_msix(dev, i);
1380
1381		/* Second, program any messages with at least one handler. */
1382		for (i = 0; i < msix->msix_table_len; i++) {
1383			mte = &msix->msix_table[i];
1384			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1385				continue;
1386			mv = &msix->msix_vectors[mte->mte_vector - 1];
1387			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1388			pci_unmask_msix(dev, i);
1389		}
1390	}
1391	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1392	    msix->msix_ctrl, 2);
1393}
1394
1395/*
1396 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1397 * returned in *count.  After this function returns, each message will be
1398 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1399 */
1400int
1401pci_alloc_msix_method(device_t dev, device_t child, int *count)
1402{
1403	struct pci_devinfo *dinfo = device_get_ivars(child);
1404	pcicfgregs *cfg = &dinfo->cfg;
1405	struct resource_list_entry *rle;
1406	int actual, error, i, irq, max;
1407
1408	/* Don't let count == 0 get us into trouble. */
1409	if (*count == 0)
1410		return (EINVAL);
1411
1412	/* If rid 0 is allocated, then fail. */
1413	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1414	if (rle != NULL && rle->res != NULL)
1415		return (ENXIO);
1416
1417	/* Already have allocated messages? */
1418	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1419		return (ENXIO);
1420
1421	/* If MSI is blacklisted for this system, fail. */
1422	if (pci_msi_blacklisted())
1423		return (ENXIO);
1424
1425	/* MSI-X capability present? */
1426	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1427		return (ENODEV);
1428
1429	/* Make sure the appropriate BARs are mapped. */
1430	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1431	    cfg->msix.msix_table_bar);
1432	if (rle == NULL || rle->res == NULL ||
1433	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1434		return (ENXIO);
1435	cfg->msix.msix_table_res = rle->res;
1436	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1437		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1438		    cfg->msix.msix_pba_bar);
1439		if (rle == NULL || rle->res == NULL ||
1440		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1441			return (ENXIO);
1442	}
1443	cfg->msix.msix_pba_res = rle->res;
1444
1445	if (bootverbose)
1446		device_printf(child,
1447		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1448		    *count, cfg->msix.msix_msgnum);
1449	max = min(*count, cfg->msix.msix_msgnum);
1450	for (i = 0; i < max; i++) {
1451		/* Allocate a message. */
1452		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1453		if (error) {
1454			if (i == 0)
1455				return (error);
1456			break;
1457		}
1458		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1459		    irq, 1);
1460	}
1461	actual = i;
1462
1463	if (bootverbose) {
1464		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1465		if (actual == 1)
1466			device_printf(child, "using IRQ %lu for MSI-X\n",
1467			    rle->start);
1468		else {
1469			int run;
1470
1471			/*
1472			 * Be fancy and try to print contiguous runs of
1473			 * IRQ values as ranges.  'irq' is the previous IRQ.
1474			 * 'run' is true if we are in a range.
1475			 */
1476			device_printf(child, "using IRQs %lu", rle->start);
1477			irq = rle->start;
1478			run = 0;
1479			for (i = 1; i < actual; i++) {
1480				rle = resource_list_find(&dinfo->resources,
1481				    SYS_RES_IRQ, i + 1);
1482
1483				/* Still in a run? */
1484				if (rle->start == irq + 1) {
1485					run = 1;
1486					irq++;
1487					continue;
1488				}
1489
1490				/* Finish previous range. */
1491				if (run) {
1492					printf("-%d", irq);
1493					run = 0;
1494				}
1495
1496				/* Start new range. */
1497				printf(",%lu", rle->start);
1498				irq = rle->start;
1499			}
1500
1501			/* Unfinished range? */
1502			if (run)
1503				printf("-%d", irq);
1504			printf(" for MSI-X\n");
1505		}
1506	}
1507
1508	/* Mask all vectors. */
1509	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1510		pci_mask_msix(child, i);
1511
1512	/* Allocate and initialize vector data and virtual table. */
1513	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1514	    M_DEVBUF, M_WAITOK | M_ZERO);
1515	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1516	    M_DEVBUF, M_WAITOK | M_ZERO);
1517	for (i = 0; i < actual; i++) {
1518		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1519		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1520		cfg->msix.msix_table[i].mte_vector = i + 1;
1521	}
1522
1523	/* Update control register to enable MSI-X. */
1524	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1525	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1526	    cfg->msix.msix_ctrl, 2);
1527
1528	/* Update counts of alloc'd messages. */
1529	cfg->msix.msix_alloc = actual;
1530	cfg->msix.msix_table_len = actual;
1531	*count = actual;
1532	return (0);
1533}
1534
1535/*
1536 * By default, pci_alloc_msix() will assign the allocated IRQ
1537 * resources consecutively to the first N messages in the MSI-X table.
1538 * However, device drivers may want to use different layouts if they
1539 * either receive fewer messages than they asked for, or they wish to
1540 * populate the MSI-X table sparsely.  This method allows the driver
1541 * to specify what layout it wants.  It must be called after a
1542 * successful pci_alloc_msix() but before any of the associated
1543 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1544 *
1545 * The 'vectors' array contains 'count' message vectors.  The array
1546 * maps directly to the MSI-X table in that index 0 in the array
1547 * specifies the vector for the first message in the MSI-X table, etc.
1548 * The vector value in each array index can either be 0 to indicate
1549 * that no vector should be assigned to a message slot, or it can be a
1550 * number from 1 to N (where N is the count returned from a
1551 * succcessful call to pci_alloc_msix()) to indicate which message
1552 * vector (IRQ) to be used for the corresponding message.
1553 *
1554 * On successful return, each message with a non-zero vector will have
1555 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1556 * 1.  Additionally, if any of the IRQs allocated via the previous
1557 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1558 * will be freed back to the system automatically.
1559 *
1560 * For example, suppose a driver has a MSI-X table with 6 messages and
1561 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1562 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1563 * C.  After the call to pci_alloc_msix(), the device will be setup to
1564 * have an MSI-X table of ABC--- (where - means no vector assigned).
1565 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1566 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1567 * be freed back to the system.  This device will also have valid
1568 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1569 *
1570 * In any case, the SYS_RES_IRQ rid X will always map to the message
1571 * at MSI-X table index X - 1 and will only be valid if a vector is
1572 * assigned to that table entry.
1573 */
1574int
1575pci_remap_msix_method(device_t dev, device_t child, int count,
1576    const u_int *vectors)
1577{
1578	struct pci_devinfo *dinfo = device_get_ivars(child);
1579	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1580	struct resource_list_entry *rle;
1581	int i, irq, j, *used;
1582
1583	/*
1584	 * Have to have at least one message in the table but the
1585	 * table can't be bigger than the actual MSI-X table in the
1586	 * device.
1587	 */
1588	if (count == 0 || count > msix->msix_msgnum)
1589		return (EINVAL);
1590
1591	/* Sanity check the vectors. */
1592	for (i = 0; i < count; i++)
1593		if (vectors[i] > msix->msix_alloc)
1594			return (EINVAL);
1595
1596	/*
1597	 * Make sure there aren't any holes in the vectors to be used.
1598	 * It's a big pain to support it, and it doesn't really make
1599	 * sense anyway.  Also, at least one vector must be used.
1600	 */
1601	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1602	    M_ZERO);
1603	for (i = 0; i < count; i++)
1604		if (vectors[i] != 0)
1605			used[vectors[i] - 1] = 1;
1606	for (i = 0; i < msix->msix_alloc - 1; i++)
1607		if (used[i] == 0 && used[i + 1] == 1) {
1608			free(used, M_DEVBUF);
1609			return (EINVAL);
1610		}
1611	if (used[0] != 1) {
1612		free(used, M_DEVBUF);
1613		return (EINVAL);
1614	}
1615
1616	/* Make sure none of the resources are allocated. */
1617	for (i = 0; i < msix->msix_table_len; i++) {
1618		if (msix->msix_table[i].mte_vector == 0)
1619			continue;
1620		if (msix->msix_table[i].mte_handlers > 0)
1621			return (EBUSY);
1622		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1623		KASSERT(rle != NULL, ("missing resource"));
1624		if (rle->res != NULL)
1625			return (EBUSY);
1626	}
1627
1628	/* Free the existing resource list entries. */
1629	for (i = 0; i < msix->msix_table_len; i++) {
1630		if (msix->msix_table[i].mte_vector == 0)
1631			continue;
1632		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1633	}
1634
1635	/*
1636	 * Build the new virtual table keeping track of which vectors are
1637	 * used.
1638	 */
1639	free(msix->msix_table, M_DEVBUF);
1640	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1641	    M_DEVBUF, M_WAITOK | M_ZERO);
1642	for (i = 0; i < count; i++)
1643		msix->msix_table[i].mte_vector = vectors[i];
1644	msix->msix_table_len = count;
1645
1646	/* Free any unused IRQs and resize the vectors array if necessary. */
1647	j = msix->msix_alloc - 1;
1648	if (used[j] == 0) {
1649		struct msix_vector *vec;
1650
1651		while (used[j] == 0) {
1652			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1653			    msix->msix_vectors[j].mv_irq);
1654			j--;
1655		}
1656		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1657		    M_WAITOK);
1658		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1659		    (j + 1));
1660		free(msix->msix_vectors, M_DEVBUF);
1661		msix->msix_vectors = vec;
1662		msix->msix_alloc = j + 1;
1663	}
1664	free(used, M_DEVBUF);
1665
1666	/* Map the IRQs onto the rids. */
1667	for (i = 0; i < count; i++) {
1668		if (vectors[i] == 0)
1669			continue;
1670		irq = msix->msix_vectors[vectors[i]].mv_irq;
1671		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1672		    irq, 1);
1673	}
1674
1675	if (bootverbose) {
1676		device_printf(child, "Remapped MSI-X IRQs as: ");
1677		for (i = 0; i < count; i++) {
1678			if (i != 0)
1679				printf(", ");
1680			if (vectors[i] == 0)
1681				printf("---");
1682			else
1683				printf("%d",
1684				    msix->msix_vectors[vectors[i]].mv_irq);
1685		}
1686		printf("\n");
1687	}
1688
1689	return (0);
1690}
1691
1692static int
1693pci_release_msix(device_t dev, device_t child)
1694{
1695	struct pci_devinfo *dinfo = device_get_ivars(child);
1696	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1697	struct resource_list_entry *rle;
1698	int i;
1699
1700	/* Do we have any messages to release? */
1701	if (msix->msix_alloc == 0)
1702		return (ENODEV);
1703
1704	/* Make sure none of the resources are allocated. */
1705	for (i = 0; i < msix->msix_table_len; i++) {
1706		if (msix->msix_table[i].mte_vector == 0)
1707			continue;
1708		if (msix->msix_table[i].mte_handlers > 0)
1709			return (EBUSY);
1710		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1711		KASSERT(rle != NULL, ("missing resource"));
1712		if (rle->res != NULL)
1713			return (EBUSY);
1714	}
1715
1716	/* Update control register to disable MSI-X. */
1717	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1718	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1719	    msix->msix_ctrl, 2);
1720
1721	/* Free the resource list entries. */
1722	for (i = 0; i < msix->msix_table_len; i++) {
1723		if (msix->msix_table[i].mte_vector == 0)
1724			continue;
1725		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1726	}
1727	free(msix->msix_table, M_DEVBUF);
1728	msix->msix_table_len = 0;
1729
1730	/* Release the IRQs. */
1731	for (i = 0; i < msix->msix_alloc; i++)
1732		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1733		    msix->msix_vectors[i].mv_irq);
1734	free(msix->msix_vectors, M_DEVBUF);
1735	msix->msix_alloc = 0;
1736	return (0);
1737}
1738
1739/*
1740 * Return the max supported MSI-X messages this device supports.
1741 * Basically, assuming the MD code can alloc messages, this function
1742 * should return the maximum value that pci_alloc_msix() can return.
1743 * Thus, it is subject to the tunables, etc.
1744 */
1745int
1746pci_msix_count_method(device_t dev, device_t child)
1747{
1748	struct pci_devinfo *dinfo = device_get_ivars(child);
1749	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1750
1751	if (pci_do_msix && msix->msix_location != 0)
1752		return (msix->msix_msgnum);
1753	return (0);
1754}
1755
1756/*
1757 * HyperTransport MSI mapping control
1758 */
1759void
1760pci_ht_map_msi(device_t dev, uint64_t addr)
1761{
1762	struct pci_devinfo *dinfo = device_get_ivars(dev);
1763	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1764
1765	if (!ht->ht_msimap)
1766		return;
1767
1768	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1769	    ht->ht_msiaddr >> 20 == addr >> 20) {
1770		/* Enable MSI -> HT mapping. */
1771		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1772		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1773		    ht->ht_msictrl, 2);
1774	}
1775
1776	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1777		/* Disable MSI -> HT mapping. */
1778		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1779		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1780		    ht->ht_msictrl, 2);
1781	}
1782}
1783
1784int
1785pci_get_max_read_req(device_t dev)
1786{
1787	struct pci_devinfo *dinfo = device_get_ivars(dev);
1788	int cap;
1789	uint16_t val;
1790
1791	cap = dinfo->cfg.pcie.pcie_location;
1792	if (cap == 0)
1793		return (0);
1794	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1795	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1796	val >>= 12;
1797	return (1 << (val + 7));
1798}
1799
1800int
1801pci_set_max_read_req(device_t dev, int size)
1802{
1803	struct pci_devinfo *dinfo = device_get_ivars(dev);
1804	int cap;
1805	uint16_t val;
1806
1807	cap = dinfo->cfg.pcie.pcie_location;
1808	if (cap == 0)
1809		return (0);
1810	if (size < 128)
1811		size = 128;
1812	if (size > 4096)
1813		size = 4096;
1814	size = (1 << (fls(size) - 1));
1815	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1816	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1817	val |= (fls(size) - 8) << 12;
1818	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1819	return (size);
1820}
1821
1822/*
1823 * Support for MSI message signalled interrupts.
1824 */
1825void
1826pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1827{
1828	struct pci_devinfo *dinfo = device_get_ivars(dev);
1829	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1830
1831	/* Write data and address values. */
1832	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1833	    address & 0xffffffff, 4);
1834	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1835		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1836		    address >> 32, 4);
1837		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1838		    data, 2);
1839	} else
1840		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1841		    2);
1842
1843	/* Enable MSI in the control register. */
1844	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1845	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1846	    2);
1847
1848	/* Enable MSI -> HT mapping. */
1849	pci_ht_map_msi(dev, address);
1850}
1851
1852void
1853pci_disable_msi(device_t dev)
1854{
1855	struct pci_devinfo *dinfo = device_get_ivars(dev);
1856	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1857
1858	/* Disable MSI -> HT mapping. */
1859	pci_ht_map_msi(dev, 0);
1860
1861	/* Disable MSI in the control register. */
1862	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1863	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1864	    2);
1865}
1866
1867/*
1868 * Restore MSI registers during resume.  If MSI is enabled then
1869 * restore the data and address registers in addition to the control
1870 * register.
1871 */
1872static void
1873pci_resume_msi(device_t dev)
1874{
1875	struct pci_devinfo *dinfo = device_get_ivars(dev);
1876	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1877	uint64_t address;
1878	uint16_t data;
1879
1880	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1881		address = msi->msi_addr;
1882		data = msi->msi_data;
1883		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1884		    address & 0xffffffff, 4);
1885		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1886			pci_write_config(dev, msi->msi_location +
1887			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1888			pci_write_config(dev, msi->msi_location +
1889			    PCIR_MSI_DATA_64BIT, data, 2);
1890		} else
1891			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1892			    data, 2);
1893	}
1894	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1895	    2);
1896}
1897
1898static int
1899pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1900{
1901	struct pci_devinfo *dinfo = device_get_ivars(dev);
1902	pcicfgregs *cfg = &dinfo->cfg;
1903	struct resource_list_entry *rle;
1904	struct msix_table_entry *mte;
1905	struct msix_vector *mv;
1906	uint64_t addr;
1907	uint32_t data;
1908	int error, i, j;
1909
1910	/*
1911	 * Handle MSI first.  We try to find this IRQ among our list
1912	 * of MSI IRQs.  If we find it, we request updated address and
1913	 * data registers and apply the results.
1914	 */
1915	if (cfg->msi.msi_alloc > 0) {
1916
1917		/* If we don't have any active handlers, nothing to do. */
1918		if (cfg->msi.msi_handlers == 0)
1919			return (0);
1920		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1921			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1922			    i + 1);
1923			if (rle->start == irq) {
1924				error = PCIB_MAP_MSI(device_get_parent(bus),
1925				    dev, irq, &addr, &data);
1926				if (error)
1927					return (error);
1928				pci_disable_msi(dev);
1929				dinfo->cfg.msi.msi_addr = addr;
1930				dinfo->cfg.msi.msi_data = data;
1931				pci_enable_msi(dev, addr, data);
1932				return (0);
1933			}
1934		}
1935		return (ENOENT);
1936	}
1937
1938	/*
1939	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1940	 * we request the updated mapping info.  If that works, we go
1941	 * through all the slots that use this IRQ and update them.
1942	 */
1943	if (cfg->msix.msix_alloc > 0) {
1944		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1945			mv = &cfg->msix.msix_vectors[i];
1946			if (mv->mv_irq == irq) {
1947				error = PCIB_MAP_MSI(device_get_parent(bus),
1948				    dev, irq, &addr, &data);
1949				if (error)
1950					return (error);
1951				mv->mv_address = addr;
1952				mv->mv_data = data;
1953				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1954					mte = &cfg->msix.msix_table[j];
1955					if (mte->mte_vector != i + 1)
1956						continue;
1957					if (mte->mte_handlers == 0)
1958						continue;
1959					pci_mask_msix(dev, j);
1960					pci_enable_msix(dev, j, addr, data);
1961					pci_unmask_msix(dev, j);
1962				}
1963			}
1964		}
1965		return (ENOENT);
1966	}
1967
1968	return (ENOENT);
1969}
1970
1971/*
1972 * Returns true if the specified device is blacklisted because MSI
1973 * doesn't work.
1974 */
1975int
1976pci_msi_device_blacklisted(device_t dev)
1977{
1978	const struct pci_quirk *q;
1979
1980	if (!pci_honor_msi_blacklist)
1981		return (0);
1982
1983	for (q = &pci_quirks[0]; q->devid; q++) {
1984		if (q->devid == pci_get_devid(dev) &&
1985		    q->type == PCI_QUIRK_DISABLE_MSI)
1986			return (1);
1987	}
1988	return (0);
1989}
1990
1991/*
1992 * Returns true if a specified chipset supports MSI when it is
1993 * emulated hardware in a virtual machine.
1994 */
1995static int
1996pci_msi_vm_chipset(device_t dev)
1997{
1998	const struct pci_quirk *q;
1999
2000	for (q = &pci_quirks[0]; q->devid; q++) {
2001		if (q->devid == pci_get_devid(dev) &&
2002		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
2003			return (1);
2004	}
2005	return (0);
2006}
2007
2008/*
2009 * Determine if MSI is blacklisted globally on this sytem.  Currently,
2010 * we just check for blacklisted chipsets as represented by the
2011 * host-PCI bridge at device 0:0:0.  In the future, it may become
2012 * necessary to check other system attributes, such as the kenv values
2013 * that give the motherboard manufacturer and model number.
2014 */
2015static int
2016pci_msi_blacklisted(void)
2017{
2018	device_t dev;
2019
2020	if (!pci_honor_msi_blacklist)
2021		return (0);
2022
2023	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2024	if (!(pcie_chipset || pcix_chipset)) {
2025		if (vm_guest != VM_GUEST_NO) {
2026			dev = pci_find_bsf(0, 0, 0);
2027			if (dev != NULL)
2028				return (pci_msi_vm_chipset(dev) == 0);
2029		}
2030		return (1);
2031	}
2032
2033	dev = pci_find_bsf(0, 0, 0);
2034	if (dev != NULL)
2035		return (pci_msi_device_blacklisted(dev));
2036	return (0);
2037}
2038
2039/*
2040 * Attempt to allocate *count MSI messages.  The actual number allocated is
2041 * returned in *count.  After this function returns, each message will be
2042 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2043 */
2044int
2045pci_alloc_msi_method(device_t dev, device_t child, int *count)
2046{
2047	struct pci_devinfo *dinfo = device_get_ivars(child);
2048	pcicfgregs *cfg = &dinfo->cfg;
2049	struct resource_list_entry *rle;
2050	int actual, error, i, irqs[32];
2051	uint16_t ctrl;
2052
2053	/* Don't let count == 0 get us into trouble. */
2054	if (*count == 0)
2055		return (EINVAL);
2056
2057	/* If rid 0 is allocated, then fail. */
2058	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2059	if (rle != NULL && rle->res != NULL)
2060		return (ENXIO);
2061
2062	/* Already have allocated messages? */
2063	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2064		return (ENXIO);
2065
2066	/* If MSI is blacklisted for this system, fail. */
2067	if (pci_msi_blacklisted())
2068		return (ENXIO);
2069
2070	/* MSI capability present? */
2071	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2072		return (ENODEV);
2073
2074	if (bootverbose)
2075		device_printf(child,
2076		    "attempting to allocate %d MSI vectors (%d supported)\n",
2077		    *count, cfg->msi.msi_msgnum);
2078
2079	/* Don't ask for more than the device supports. */
2080	actual = min(*count, cfg->msi.msi_msgnum);
2081
2082	/* Don't ask for more than 32 messages. */
2083	actual = min(actual, 32);
2084
2085	/* MSI requires power of 2 number of messages. */
2086	if (!powerof2(actual))
2087		return (EINVAL);
2088
2089	for (;;) {
2090		/* Try to allocate N messages. */
2091		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2092		    actual, irqs);
2093		if (error == 0)
2094			break;
2095		if (actual == 1)
2096			return (error);
2097
2098		/* Try N / 2. */
2099		actual >>= 1;
2100	}
2101
2102	/*
2103	 * We now have N actual messages mapped onto SYS_RES_IRQ
2104	 * resources in the irqs[] array, so add new resources
2105	 * starting at rid 1.
2106	 */
2107	for (i = 0; i < actual; i++)
2108		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2109		    irqs[i], irqs[i], 1);
2110
2111	if (bootverbose) {
2112		if (actual == 1)
2113			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2114		else {
2115			int run;
2116
2117			/*
2118			 * Be fancy and try to print contiguous runs
2119			 * of IRQ values as ranges.  'run' is true if
2120			 * we are in a range.
2121			 */
2122			device_printf(child, "using IRQs %d", irqs[0]);
2123			run = 0;
2124			for (i = 1; i < actual; i++) {
2125
2126				/* Still in a run? */
2127				if (irqs[i] == irqs[i - 1] + 1) {
2128					run = 1;
2129					continue;
2130				}
2131
2132				/* Finish previous range. */
2133				if (run) {
2134					printf("-%d", irqs[i - 1]);
2135					run = 0;
2136				}
2137
2138				/* Start new range. */
2139				printf(",%d", irqs[i]);
2140			}
2141
2142			/* Unfinished range? */
2143			if (run)
2144				printf("-%d", irqs[actual - 1]);
2145			printf(" for MSI\n");
2146		}
2147	}
2148
2149	/* Update control register with actual count. */
2150	ctrl = cfg->msi.msi_ctrl;
2151	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2152	ctrl |= (ffs(actual) - 1) << 4;
2153	cfg->msi.msi_ctrl = ctrl;
2154	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2155
2156	/* Update counts of alloc'd messages. */
2157	cfg->msi.msi_alloc = actual;
2158	cfg->msi.msi_handlers = 0;
2159	*count = actual;
2160	return (0);
2161}
2162
2163/* Release the MSI messages associated with this device. */
2164int
2165pci_release_msi_method(device_t dev, device_t child)
2166{
2167	struct pci_devinfo *dinfo = device_get_ivars(child);
2168	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2169	struct resource_list_entry *rle;
2170	int error, i, irqs[32];
2171
2172	/* Try MSI-X first. */
2173	error = pci_release_msix(dev, child);
2174	if (error != ENODEV)
2175		return (error);
2176
2177	/* Do we have any messages to release? */
2178	if (msi->msi_alloc == 0)
2179		return (ENODEV);
2180	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2181
2182	/* Make sure none of the resources are allocated. */
2183	if (msi->msi_handlers > 0)
2184		return (EBUSY);
2185	for (i = 0; i < msi->msi_alloc; i++) {
2186		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2187		KASSERT(rle != NULL, ("missing MSI resource"));
2188		if (rle->res != NULL)
2189			return (EBUSY);
2190		irqs[i] = rle->start;
2191	}
2192
2193	/* Update control register with 0 count. */
2194	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2195	    ("%s: MSI still enabled", __func__));
2196	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2197	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2198	    msi->msi_ctrl, 2);
2199
2200	/* Release the messages. */
2201	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2202	for (i = 0; i < msi->msi_alloc; i++)
2203		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2204
2205	/* Update alloc count. */
2206	msi->msi_alloc = 0;
2207	msi->msi_addr = 0;
2208	msi->msi_data = 0;
2209	return (0);
2210}
2211
2212/*
2213 * Return the max supported MSI messages this device supports.
2214 * Basically, assuming the MD code can alloc messages, this function
2215 * should return the maximum value that pci_alloc_msi() can return.
2216 * Thus, it is subject to the tunables, etc.
2217 */
2218int
2219pci_msi_count_method(device_t dev, device_t child)
2220{
2221	struct pci_devinfo *dinfo = device_get_ivars(child);
2222	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2223
2224	if (pci_do_msi && msi->msi_location != 0)
2225		return (msi->msi_msgnum);
2226	return (0);
2227}
2228
2229/* free pcicfgregs structure and all depending data structures */
2230
2231int
2232pci_freecfg(struct pci_devinfo *dinfo)
2233{
2234	struct devlist *devlist_head;
2235	struct pci_map *pm, *next;
2236	int i;
2237
2238	devlist_head = &pci_devq;
2239
2240	if (dinfo->cfg.vpd.vpd_reg) {
2241		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2242		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2243			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2244		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2245		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2246			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2247		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2248	}
2249	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2250		free(pm, M_DEVBUF);
2251	}
2252	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2253	free(dinfo, M_DEVBUF);
2254
2255	/* increment the generation count */
2256	pci_generation++;
2257
2258	/* we're losing one device */
2259	pci_numdevs--;
2260	return (0);
2261}
2262
2263/*
2264 * PCI power manangement
2265 */
2266int
2267pci_set_powerstate_method(device_t dev, device_t child, int state)
2268{
2269	struct pci_devinfo *dinfo = device_get_ivars(child);
2270	pcicfgregs *cfg = &dinfo->cfg;
2271	uint16_t status;
2272	int result, oldstate, highest, delay;
2273
2274	if (cfg->pp.pp_cap == 0)
2275		return (EOPNOTSUPP);
2276
2277	/*
2278	 * Optimize a no state change request away.  While it would be OK to
2279	 * write to the hardware in theory, some devices have shown odd
2280	 * behavior when going from D3 -> D3.
2281	 */
2282	oldstate = pci_get_powerstate(child);
2283	if (oldstate == state)
2284		return (0);
2285
2286	/*
2287	 * The PCI power management specification states that after a state
2288	 * transition between PCI power states, system software must
2289	 * guarantee a minimal delay before the function accesses the device.
2290	 * Compute the worst case delay that we need to guarantee before we
2291	 * access the device.  Many devices will be responsive much more
2292	 * quickly than this delay, but there are some that don't respond
2293	 * instantly to state changes.  Transitions to/from D3 state require
2294	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2295	 * is done below with DELAY rather than a sleeper function because
2296	 * this function can be called from contexts where we cannot sleep.
2297	 */
2298	highest = (oldstate > state) ? oldstate : state;
2299	if (highest == PCI_POWERSTATE_D3)
2300	    delay = 10000;
2301	else if (highest == PCI_POWERSTATE_D2)
2302	    delay = 200;
2303	else
2304	    delay = 0;
2305	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2306	    & ~PCIM_PSTAT_DMASK;
2307	result = 0;
2308	switch (state) {
2309	case PCI_POWERSTATE_D0:
2310		status |= PCIM_PSTAT_D0;
2311		break;
2312	case PCI_POWERSTATE_D1:
2313		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2314			return (EOPNOTSUPP);
2315		status |= PCIM_PSTAT_D1;
2316		break;
2317	case PCI_POWERSTATE_D2:
2318		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2319			return (EOPNOTSUPP);
2320		status |= PCIM_PSTAT_D2;
2321		break;
2322	case PCI_POWERSTATE_D3:
2323		status |= PCIM_PSTAT_D3;
2324		break;
2325	default:
2326		return (EINVAL);
2327	}
2328
2329	if (bootverbose)
2330		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2331		    state);
2332
2333	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2334	if (delay)
2335		DELAY(delay);
2336	return (0);
2337}
2338
2339int
2340pci_get_powerstate_method(device_t dev, device_t child)
2341{
2342	struct pci_devinfo *dinfo = device_get_ivars(child);
2343	pcicfgregs *cfg = &dinfo->cfg;
2344	uint16_t status;
2345	int result;
2346
2347	if (cfg->pp.pp_cap != 0) {
2348		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2349		switch (status & PCIM_PSTAT_DMASK) {
2350		case PCIM_PSTAT_D0:
2351			result = PCI_POWERSTATE_D0;
2352			break;
2353		case PCIM_PSTAT_D1:
2354			result = PCI_POWERSTATE_D1;
2355			break;
2356		case PCIM_PSTAT_D2:
2357			result = PCI_POWERSTATE_D2;
2358			break;
2359		case PCIM_PSTAT_D3:
2360			result = PCI_POWERSTATE_D3;
2361			break;
2362		default:
2363			result = PCI_POWERSTATE_UNKNOWN;
2364			break;
2365		}
2366	} else {
2367		/* No support, device is always at D0 */
2368		result = PCI_POWERSTATE_D0;
2369	}
2370	return (result);
2371}
2372
2373/*
2374 * Some convenience functions for PCI device drivers.
2375 */
2376
2377static __inline void
2378pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2379{
2380	uint16_t	command;
2381
2382	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2383	command |= bit;
2384	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2385}
2386
2387static __inline void
2388pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2389{
2390	uint16_t	command;
2391
2392	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2393	command &= ~bit;
2394	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2395}
2396
2397int
2398pci_enable_busmaster_method(device_t dev, device_t child)
2399{
2400	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2401	return (0);
2402}
2403
2404int
2405pci_disable_busmaster_method(device_t dev, device_t child)
2406{
2407	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2408	return (0);
2409}
2410
2411int
2412pci_enable_io_method(device_t dev, device_t child, int space)
2413{
2414	uint16_t bit;
2415
2416	switch(space) {
2417	case SYS_RES_IOPORT:
2418		bit = PCIM_CMD_PORTEN;
2419		break;
2420	case SYS_RES_MEMORY:
2421		bit = PCIM_CMD_MEMEN;
2422		break;
2423	default:
2424		return (EINVAL);
2425	}
2426	pci_set_command_bit(dev, child, bit);
2427	return (0);
2428}
2429
2430int
2431pci_disable_io_method(device_t dev, device_t child, int space)
2432{
2433	uint16_t bit;
2434
2435	switch(space) {
2436	case SYS_RES_IOPORT:
2437		bit = PCIM_CMD_PORTEN;
2438		break;
2439	case SYS_RES_MEMORY:
2440		bit = PCIM_CMD_MEMEN;
2441		break;
2442	default:
2443		return (EINVAL);
2444	}
2445	pci_clear_command_bit(dev, child, bit);
2446	return (0);
2447}
2448
2449/*
2450 * New style pci driver.  Parent device is either a pci-host-bridge or a
2451 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2452 */
2453
2454void
2455pci_print_verbose(struct pci_devinfo *dinfo)
2456{
2457
2458	if (bootverbose) {
2459		pcicfgregs *cfg = &dinfo->cfg;
2460
2461		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2462		    cfg->vendor, cfg->device, cfg->revid);
2463		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2464		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2465		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2466		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2467		    cfg->mfdev);
2468		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2469		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2470		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2471		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2472		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2473		if (cfg->intpin > 0)
2474			printf("\tintpin=%c, irq=%d\n",
2475			    cfg->intpin +'a' -1, cfg->intline);
2476		if (cfg->pp.pp_cap) {
2477			uint16_t status;
2478
2479			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2480			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2481			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2482			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2483			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2484			    status & PCIM_PSTAT_DMASK);
2485		}
2486		if (cfg->msi.msi_location) {
2487			int ctrl;
2488
2489			ctrl = cfg->msi.msi_ctrl;
2490			printf("\tMSI supports %d message%s%s%s\n",
2491			    cfg->msi.msi_msgnum,
2492			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2493			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2494			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2495		}
2496		if (cfg->msix.msix_location) {
2497			printf("\tMSI-X supports %d message%s ",
2498			    cfg->msix.msix_msgnum,
2499			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2500			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2501				printf("in map 0x%x\n",
2502				    cfg->msix.msix_table_bar);
2503			else
2504				printf("in maps 0x%x and 0x%x\n",
2505				    cfg->msix.msix_table_bar,
2506				    cfg->msix.msix_pba_bar);
2507		}
2508	}
2509}
2510
2511static int
2512pci_porten(device_t dev)
2513{
2514	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2515}
2516
2517static int
2518pci_memen(device_t dev)
2519{
2520	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2521}
2522
2523static void
2524pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2525{
2526	struct pci_devinfo *dinfo;
2527	pci_addr_t map, testval;
2528	int ln2range;
2529	uint16_t cmd;
2530
2531	/*
2532	 * The device ROM BAR is special.  It is always a 32-bit
2533	 * memory BAR.  Bit 0 is special and should not be set when
2534	 * sizing the BAR.
2535	 */
2536	dinfo = device_get_ivars(dev);
2537	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2538		map = pci_read_config(dev, reg, 4);
2539		pci_write_config(dev, reg, 0xfffffffe, 4);
2540		testval = pci_read_config(dev, reg, 4);
2541		pci_write_config(dev, reg, map, 4);
2542		*mapp = map;
2543		*testvalp = testval;
2544		return;
2545	}
2546
2547	map = pci_read_config(dev, reg, 4);
2548	ln2range = pci_maprange(map);
2549	if (ln2range == 64)
2550		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2551
2552	/*
2553	 * Disable decoding via the command register before
2554	 * determining the BAR's length since we will be placing it in
2555	 * a weird state.
2556	 */
2557	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2558	pci_write_config(dev, PCIR_COMMAND,
2559	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2560
2561	/*
2562	 * Determine the BAR's length by writing all 1's.  The bottom
2563	 * log_2(size) bits of the BAR will stick as 0 when we read
2564	 * the value back.
2565	 */
2566	pci_write_config(dev, reg, 0xffffffff, 4);
2567	testval = pci_read_config(dev, reg, 4);
2568	if (ln2range == 64) {
2569		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2570		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2571	}
2572
2573	/*
2574	 * Restore the original value of the BAR.  We may have reprogrammed
2575	 * the BAR of the low-level console device and when booting verbose,
2576	 * we need the console device addressable.
2577	 */
2578	pci_write_config(dev, reg, map, 4);
2579	if (ln2range == 64)
2580		pci_write_config(dev, reg + 4, map >> 32, 4);
2581	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2582
2583	*mapp = map;
2584	*testvalp = testval;
2585}
2586
2587static void
2588pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2589{
2590	struct pci_devinfo *dinfo;
2591	int ln2range;
2592
2593	/*
2594	 * Don't disable BARs on AGP devices. In general: Don't
2595	 * disable BARs on any PCI display devices, because doing that
2596	 * can sometimes cause the main memory bus to stop working,
2597	 * causing all memory reads to return nothing but 0xFFFFFFFF,
2598	 * even though the memory location was previously written.
2599	 * After a while a privileged instruction fault will appear
2600	 * and then nothing more can be debugged.
2601	 * The reason for this behaviour is unknown.
2602	 */
2603	if (base == 0 && pci_get_class(dev) == PCIC_DISPLAY) {
2604		device_printf(device_get_parent(dev),
2605		    "pci%d:%d:%d:%d BARs on display devices "
2606		    "should not be disabled.\n",
2607		    pci_get_domain(dev),
2608		    pci_get_bus(dev),
2609		    pci_get_slot(dev),
2610		    pci_get_function(dev));
2611		return;
2612	}
2613
2614	/* The device ROM BAR is always a 32-bit memory BAR. */
2615	dinfo = device_get_ivars(dev);
2616	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2617		ln2range = 32;
2618	else
2619		ln2range = pci_maprange(pm->pm_value);
2620	pci_write_config(dev, pm->pm_reg, base, 4);
2621	if (ln2range == 64)
2622		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2623	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2624	if (ln2range == 64)
2625		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2626		    pm->pm_reg + 4, 4) << 32;
2627}
2628
2629struct pci_map *
2630pci_find_bar(device_t dev, int reg)
2631{
2632	struct pci_devinfo *dinfo;
2633	struct pci_map *pm;
2634
2635	dinfo = device_get_ivars(dev);
2636	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2637		if (pm->pm_reg == reg)
2638			return (pm);
2639	}
2640	return (NULL);
2641}
2642
2643int
2644pci_bar_enabled(device_t dev, struct pci_map *pm)
2645{
2646	struct pci_devinfo *dinfo;
2647	uint16_t cmd;
2648
2649	dinfo = device_get_ivars(dev);
2650	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2651	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2652		return (0);
2653	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2654	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2655		return ((cmd & PCIM_CMD_MEMEN) != 0);
2656	else
2657		return ((cmd & PCIM_CMD_PORTEN) != 0);
2658}
2659
2660static struct pci_map *
2661pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2662{
2663	struct pci_devinfo *dinfo;
2664	struct pci_map *pm, *prev;
2665
2666	dinfo = device_get_ivars(dev);
2667	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2668	pm->pm_reg = reg;
2669	pm->pm_value = value;
2670	pm->pm_size = size;
2671	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2672		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2673		    reg));
2674		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2675		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2676			break;
2677	}
2678	if (prev != NULL)
2679		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2680	else
2681		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2682	return (pm);
2683}
2684
2685static void
2686pci_restore_bars(device_t dev)
2687{
2688	struct pci_devinfo *dinfo;
2689	struct pci_map *pm;
2690	int ln2range;
2691
2692	dinfo = device_get_ivars(dev);
2693	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2694		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2695			ln2range = 32;
2696		else
2697			ln2range = pci_maprange(pm->pm_value);
2698		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2699		if (ln2range == 64)
2700			pci_write_config(dev, pm->pm_reg + 4,
2701			    pm->pm_value >> 32, 4);
2702	}
2703}
2704
2705/*
2706 * Add a resource based on a pci map register. Return 1 if the map
2707 * register is a 32bit map register or 2 if it is a 64bit register.
2708 */
2709static int
2710pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2711    int force, int prefetch)
2712{
2713	struct pci_map *pm;
2714	pci_addr_t base, map, testval;
2715	pci_addr_t start, end, count;
2716	int barlen, basezero, maprange, mapsize, type;
2717	uint16_t cmd;
2718	struct resource *res;
2719
2720	/*
2721	 * The BAR may already exist if the device is a CardBus card
2722	 * whose CIS is stored in this BAR.
2723	 */
2724	pm = pci_find_bar(dev, reg);
2725	if (pm != NULL) {
2726		maprange = pci_maprange(pm->pm_value);
2727		barlen = maprange == 64 ? 2 : 1;
2728		return (barlen);
2729	}
2730
2731	pci_read_bar(dev, reg, &map, &testval);
2732	if (PCI_BAR_MEM(map)) {
2733		type = SYS_RES_MEMORY;
2734		if (map & PCIM_BAR_MEM_PREFETCH)
2735			prefetch = 1;
2736	} else
2737		type = SYS_RES_IOPORT;
2738	mapsize = pci_mapsize(testval);
2739	base = pci_mapbase(map);
2740#ifdef __PCI_BAR_ZERO_VALID
2741	basezero = 0;
2742#else
2743	basezero = base == 0;
2744#endif
2745	maprange = pci_maprange(map);
2746	barlen = maprange == 64 ? 2 : 1;
2747
2748	/*
2749	 * For I/O registers, if bottom bit is set, and the next bit up
2750	 * isn't clear, we know we have a BAR that doesn't conform to the
2751	 * spec, so ignore it.  Also, sanity check the size of the data
2752	 * areas to the type of memory involved.  Memory must be at least
2753	 * 16 bytes in size, while I/O ranges must be at least 4.
2754	 */
2755	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2756		return (barlen);
2757	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2758	    (type == SYS_RES_IOPORT && mapsize < 2))
2759		return (barlen);
2760
2761	/* Save a record of this BAR. */
2762	pm = pci_add_bar(dev, reg, map, mapsize);
2763	if (bootverbose) {
2764		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2765		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2766		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2767			printf(", port disabled\n");
2768		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2769			printf(", memory disabled\n");
2770		else
2771			printf(", enabled\n");
2772	}
2773
2774	/*
2775	 * If base is 0, then we have problems if this architecture does
2776	 * not allow that.  It is best to ignore such entries for the
2777	 * moment.  These will be allocated later if the driver specifically
2778	 * requests them.  However, some removable busses look better when
2779	 * all resources are allocated, so allow '0' to be overriden.
2780	 *
2781	 * Similarly treat maps whose values is the same as the test value
2782	 * read back.  These maps have had all f's written to them by the
2783	 * BIOS in an attempt to disable the resources.
2784	 */
2785	if (!force && (basezero || map == testval))
2786		return (barlen);
2787	if ((u_long)base != base) {
2788		device_printf(bus,
2789		    "pci%d:%d:%d:%d bar %#x too many address bits",
2790		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2791		    pci_get_function(dev), reg);
2792		return (barlen);
2793	}
2794
2795	/*
2796	 * This code theoretically does the right thing, but has
2797	 * undesirable side effects in some cases where peripherals
2798	 * respond oddly to having these bits enabled.  Let the user
2799	 * be able to turn them off (since pci_enable_io_modes is 1 by
2800	 * default).
2801	 */
2802	if (pci_enable_io_modes) {
2803		/* Turn on resources that have been left off by a lazy BIOS */
2804		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2805			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2806			cmd |= PCIM_CMD_PORTEN;
2807			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2808		}
2809		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2810			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2811			cmd |= PCIM_CMD_MEMEN;
2812			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2813		}
2814	} else {
2815		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2816			return (barlen);
2817		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2818			return (barlen);
2819	}
2820
2821	count = (pci_addr_t)1 << mapsize;
2822	if (basezero || base == pci_mapbase(testval)) {
2823		start = 0;	/* Let the parent decide. */
2824		end = ~0ul;
2825	} else {
2826		start = base;
2827		end = base + count - 1;
2828	}
2829	resource_list_add(rl, type, reg, start, end, count);
2830
2831	/*
2832	 * Try to allocate the resource for this BAR from our parent
2833	 * so that this resource range is already reserved.  The
2834	 * driver for this device will later inherit this resource in
2835	 * pci_alloc_resource().
2836	 */
2837	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2838	    prefetch ? RF_PREFETCHABLE : 0);
2839	if (res == NULL) {
2840		/*
2841		 * If the allocation fails, clear the BAR and delete
2842		 * the resource list entry to force
2843		 * pci_alloc_resource() to allocate resources from the
2844		 * parent.
2845		 */
2846		resource_list_delete(rl, type, reg);
2847		start = 0;
2848	} else
2849		start = rman_get_start(res);
2850	pci_write_bar(dev, pm, start);
2851	return (barlen);
2852}
2853
2854/*
2855 * For ATA devices we need to decide early what addressing mode to use.
2856 * Legacy demands that the primary and secondary ATA ports sits on the
2857 * same addresses that old ISA hardware did. This dictates that we use
2858 * those addresses and ignore the BAR's if we cannot set PCI native
2859 * addressing mode.
2860 */
2861static void
2862pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2863    uint32_t prefetchmask)
2864{
2865	struct resource *r;
2866	int rid, type, progif;
2867#if 0
2868	/* if this device supports PCI native addressing use it */
2869	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2870	if ((progif & 0x8a) == 0x8a) {
2871		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2872		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2873			printf("Trying ATA native PCI addressing mode\n");
2874			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2875		}
2876	}
2877#endif
2878	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2879	type = SYS_RES_IOPORT;
2880	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2881		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2882		    prefetchmask & (1 << 0));
2883		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2884		    prefetchmask & (1 << 1));
2885	} else {
2886		rid = PCIR_BAR(0);
2887		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2888		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2889		    0x1f7, 8, 0);
2890		rid = PCIR_BAR(1);
2891		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2892		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2893		    0x3f6, 1, 0);
2894	}
2895	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2896		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2897		    prefetchmask & (1 << 2));
2898		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2899		    prefetchmask & (1 << 3));
2900	} else {
2901		rid = PCIR_BAR(2);
2902		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2903		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2904		    0x177, 8, 0);
2905		rid = PCIR_BAR(3);
2906		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2907		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2908		    0x376, 1, 0);
2909	}
2910	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2911	    prefetchmask & (1 << 4));
2912	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2913	    prefetchmask & (1 << 5));
2914}
2915
2916static void
2917pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2918{
2919	struct pci_devinfo *dinfo = device_get_ivars(dev);
2920	pcicfgregs *cfg = &dinfo->cfg;
2921	char tunable_name[64];
2922	int irq;
2923
2924	/* Has to have an intpin to have an interrupt. */
2925	if (cfg->intpin == 0)
2926		return;
2927
2928	/* Let the user override the IRQ with a tunable. */
2929	irq = PCI_INVALID_IRQ;
2930	snprintf(tunable_name, sizeof(tunable_name),
2931	    "hw.pci%d.%d.%d.INT%c.irq",
2932	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2933	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2934		irq = PCI_INVALID_IRQ;
2935
2936	/*
2937	 * If we didn't get an IRQ via the tunable, then we either use the
2938	 * IRQ value in the intline register or we ask the bus to route an
2939	 * interrupt for us.  If force_route is true, then we only use the
2940	 * value in the intline register if the bus was unable to assign an
2941	 * IRQ.
2942	 */
2943	if (!PCI_INTERRUPT_VALID(irq)) {
2944		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2945			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2946		if (!PCI_INTERRUPT_VALID(irq))
2947			irq = cfg->intline;
2948	}
2949
2950	/* If after all that we don't have an IRQ, just bail. */
2951	if (!PCI_INTERRUPT_VALID(irq))
2952		return;
2953
2954	/* Update the config register if it changed. */
2955	if (irq != cfg->intline) {
2956		cfg->intline = irq;
2957		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2958	}
2959
2960	/* Add this IRQ as rid 0 interrupt resource. */
2961	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2962}
2963
2964/* Perform early OHCI takeover from SMM. */
2965static void
2966ohci_early_takeover(device_t self)
2967{
2968	struct resource *res;
2969	uint32_t ctl;
2970	int rid;
2971	int i;
2972
2973	rid = PCIR_BAR(0);
2974	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2975	if (res == NULL)
2976		return;
2977
2978	ctl = bus_read_4(res, OHCI_CONTROL);
2979	if (ctl & OHCI_IR) {
2980		if (bootverbose)
2981			printf("ohci early: "
2982			    "SMM active, request owner change\n");
2983		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2984		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2985			DELAY(1000);
2986			ctl = bus_read_4(res, OHCI_CONTROL);
2987		}
2988		if (ctl & OHCI_IR) {
2989			if (bootverbose)
2990				printf("ohci early: "
2991				    "SMM does not respond, resetting\n");
2992			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2993		}
2994		/* Disable interrupts */
2995		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2996	}
2997
2998	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2999}
3000
3001/* Perform early UHCI takeover from SMM. */
3002static void
3003uhci_early_takeover(device_t self)
3004{
3005	struct resource *res;
3006	int rid;
3007
3008	/*
3009	 * Set the PIRQD enable bit and switch off all the others. We don't
3010	 * want legacy support to interfere with us XXX Does this also mean
3011	 * that the BIOS won't touch the keyboard anymore if it is connected
3012	 * to the ports of the root hub?
3013	 */
3014	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3015
3016	/* Disable interrupts */
3017	rid = PCI_UHCI_BASE_REG;
3018	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3019	if (res != NULL) {
3020		bus_write_2(res, UHCI_INTR, 0);
3021		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3022	}
3023}
3024
3025/* Perform early EHCI takeover from SMM. */
3026static void
3027ehci_early_takeover(device_t self)
3028{
3029	struct resource *res;
3030	uint32_t cparams;
3031	uint32_t eec;
3032	uint8_t eecp;
3033	uint8_t bios_sem;
3034	uint8_t offs;
3035	int rid;
3036	int i;
3037
3038	rid = PCIR_BAR(0);
3039	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3040	if (res == NULL)
3041		return;
3042
3043	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3044
3045	/* Synchronise with the BIOS if it owns the controller. */
3046	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3047	    eecp = EHCI_EECP_NEXT(eec)) {
3048		eec = pci_read_config(self, eecp, 4);
3049		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3050			continue;
3051		}
3052		bios_sem = pci_read_config(self, eecp +
3053		    EHCI_LEGSUP_BIOS_SEM, 1);
3054		if (bios_sem == 0) {
3055			continue;
3056		}
3057		if (bootverbose)
3058			printf("ehci early: "
3059			    "SMM active, request owner change\n");
3060
3061		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3062
3063		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3064			DELAY(1000);
3065			bios_sem = pci_read_config(self, eecp +
3066			    EHCI_LEGSUP_BIOS_SEM, 1);
3067		}
3068
3069		if (bios_sem != 0) {
3070			if (bootverbose)
3071				printf("ehci early: "
3072				    "SMM does not respond\n");
3073		}
3074		/* Disable interrupts */
3075		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3076		bus_write_4(res, offs + EHCI_USBINTR, 0);
3077	}
3078	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3079}
3080
3081/* Perform early XHCI takeover from SMM. */
3082static void
3083xhci_early_takeover(device_t self)
3084{
3085	struct resource *res;
3086	uint32_t cparams;
3087	uint32_t eec;
3088	uint8_t eecp;
3089	uint8_t bios_sem;
3090	uint8_t offs;
3091	int rid;
3092	int i;
3093
3094	rid = PCIR_BAR(0);
3095	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3096	if (res == NULL)
3097		return;
3098
3099	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3100
3101	eec = -1;
3102
3103	/* Synchronise with the BIOS if it owns the controller. */
3104	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3105	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3106		eec = bus_read_4(res, eecp);
3107
3108		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3109			continue;
3110
3111		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3112		if (bios_sem == 0)
3113			continue;
3114
3115		if (bootverbose)
3116			printf("xhci early: "
3117			    "SMM active, request owner change\n");
3118
3119		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3120
3121		/* wait a maximum of 5 second */
3122
3123		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3124			DELAY(1000);
3125			bios_sem = bus_read_1(res, eecp +
3126			    XHCI_XECP_BIOS_SEM);
3127		}
3128
3129		if (bios_sem != 0) {
3130			if (bootverbose)
3131				printf("xhci early: "
3132				    "SMM does not respond\n");
3133		}
3134
3135		/* Disable interrupts */
3136		offs = bus_read_1(res, XHCI_CAPLENGTH);
3137		bus_write_4(res, offs + XHCI_USBCMD, 0);
3138		bus_read_4(res, offs + XHCI_USBSTS);
3139	}
3140	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3141}
3142
3143void
3144pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3145{
3146	struct pci_devinfo *dinfo;
3147	pcicfgregs *cfg;
3148	struct resource_list *rl;
3149	const struct pci_quirk *q;
3150	uint32_t devid;
3151	int i;
3152
3153	dinfo = device_get_ivars(dev);
3154	cfg = &dinfo->cfg;
3155	rl = &dinfo->resources;
3156	devid = (cfg->device << 16) | cfg->vendor;
3157
3158	/* ATA devices needs special map treatment */
3159	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3160	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3161	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3162	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3163	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3164		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3165	else
3166		for (i = 0; i < cfg->nummaps;) {
3167			/*
3168			 * Skip quirked resources.
3169			 */
3170			for (q = &pci_quirks[0]; q->devid != 0; q++)
3171				if (q->devid == devid &&
3172				    q->type == PCI_QUIRK_UNMAP_REG &&
3173				    q->arg1 == PCIR_BAR(i))
3174					break;
3175			if (q->devid != 0) {
3176				i++;
3177				continue;
3178			}
3179			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3180			    prefetchmask & (1 << i));
3181		}
3182
3183	/*
3184	 * Add additional, quirked resources.
3185	 */
3186	for (q = &pci_quirks[0]; q->devid != 0; q++)
3187		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3188			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3189
3190	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3191#ifdef __PCI_REROUTE_INTERRUPT
3192		/*
3193		 * Try to re-route interrupts. Sometimes the BIOS or
3194		 * firmware may leave bogus values in these registers.
3195		 * If the re-route fails, then just stick with what we
3196		 * have.
3197		 */
3198		pci_assign_interrupt(bus, dev, 1);
3199#else
3200		pci_assign_interrupt(bus, dev, 0);
3201#endif
3202	}
3203
3204	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3205	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3206		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3207			xhci_early_takeover(dev);
3208		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3209			ehci_early_takeover(dev);
3210		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3211			ohci_early_takeover(dev);
3212		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3213			uhci_early_takeover(dev);
3214	}
3215}
3216
3217void
3218pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3219{
3220#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3221	device_t pcib = device_get_parent(dev);
3222	struct pci_devinfo *dinfo;
3223	int maxslots;
3224	int s, f, pcifunchigh;
3225	uint8_t hdrtype;
3226
3227	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3228	    ("dinfo_size too small"));
3229	maxslots = PCIB_MAXSLOTS(pcib);
3230	for (s = 0; s <= maxslots; s++) {
3231		pcifunchigh = 0;
3232		f = 0;
3233		DELAY(1);
3234		hdrtype = REG(PCIR_HDRTYPE, 1);
3235		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3236			continue;
3237		if (hdrtype & PCIM_MFDEV)
3238			pcifunchigh = PCI_FUNCMAX;
3239		for (f = 0; f <= pcifunchigh; f++) {
3240			dinfo = pci_read_device(pcib, domain, busno, s, f,
3241			    dinfo_size);
3242			if (dinfo != NULL) {
3243				pci_add_child(dev, dinfo);
3244			}
3245		}
3246	}
3247#undef REG
3248}
3249
3250void
3251pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3252{
3253	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3254	device_set_ivars(dinfo->cfg.dev, dinfo);
3255	resource_list_init(&dinfo->resources);
3256	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3257	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3258	pci_print_verbose(dinfo);
3259	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3260}
3261
3262static int
3263pci_probe(device_t dev)
3264{
3265
3266	device_set_desc(dev, "PCI bus");
3267
3268	/* Allow other subclasses to override this driver. */
3269	return (BUS_PROBE_GENERIC);
3270}
3271
3272int
3273pci_attach_common(device_t dev)
3274{
3275	struct pci_softc *sc;
3276	int busno, domain;
3277#ifdef PCI_DMA_BOUNDARY
3278	int error, tag_valid;
3279#endif
3280
3281	sc = device_get_softc(dev);
3282	domain = pcib_get_domain(dev);
3283	busno = pcib_get_bus(dev);
3284	if (bootverbose)
3285		device_printf(dev, "domain=%d, physical bus=%d\n",
3286		    domain, busno);
3287#ifdef PCI_DMA_BOUNDARY
3288	tag_valid = 0;
3289	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3290	    devclass_find("pci")) {
3291		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3292		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3293		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3294		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3295		if (error)
3296			device_printf(dev, "Failed to create DMA tag: %d\n",
3297			    error);
3298		else
3299			tag_valid = 1;
3300	}
3301	if (!tag_valid)
3302#endif
3303		sc->sc_dma_tag = bus_get_dma_tag(dev);
3304	return (0);
3305}
3306
3307static int
3308pci_attach(device_t dev)
3309{
3310	int busno, domain, error;
3311
3312	error = pci_attach_common(dev);
3313	if (error)
3314		return (error);
3315
3316	/*
3317	 * Since there can be multiple independantly numbered PCI
3318	 * busses on systems with multiple PCI domains, we can't use
3319	 * the unit number to decide which bus we are probing. We ask
3320	 * the parent pcib what our domain and bus numbers are.
3321	 */
3322	domain = pcib_get_domain(dev);
3323	busno = pcib_get_bus(dev);
3324	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3325	return (bus_generic_attach(dev));
3326}
3327
3328static void
3329pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3330    int state)
3331{
3332	device_t child, pcib;
3333	struct pci_devinfo *dinfo;
3334	int dstate, i;
3335
3336	/*
3337	 * Set the device to the given state.  If the firmware suggests
3338	 * a different power state, use it instead.  If power management
3339	 * is not present, the firmware is responsible for managing
3340	 * device power.  Skip children who aren't attached since they
3341	 * are handled separately.
3342	 */
3343	pcib = device_get_parent(dev);
3344	for (i = 0; i < numdevs; i++) {
3345		child = devlist[i];
3346		dinfo = device_get_ivars(child);
3347		dstate = state;
3348		if (device_is_attached(child) &&
3349		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3350			pci_set_powerstate(child, dstate);
3351	}
3352}
3353
3354int
3355pci_suspend(device_t dev)
3356{
3357	device_t child, *devlist;
3358	struct pci_devinfo *dinfo;
3359	int error, i, numdevs;
3360
3361	/*
3362	 * Save the PCI configuration space for each child and set the
3363	 * device in the appropriate power state for this sleep state.
3364	 */
3365	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3366		return (error);
3367	for (i = 0; i < numdevs; i++) {
3368		child = devlist[i];
3369		dinfo = device_get_ivars(child);
3370		pci_cfg_save(child, dinfo, 0);
3371	}
3372
3373	/* Suspend devices before potentially powering them down. */
3374	error = bus_generic_suspend(dev);
3375	if (error) {
3376		free(devlist, M_TEMP);
3377		return (error);
3378	}
3379	if (pci_do_power_suspend)
3380		pci_set_power_children(dev, devlist, numdevs,
3381		    PCI_POWERSTATE_D3);
3382	free(devlist, M_TEMP);
3383	return (0);
3384}
3385
3386int
3387pci_resume(device_t dev)
3388{
3389	device_t child, *devlist;
3390	struct pci_devinfo *dinfo;
3391	int error, i, numdevs;
3392
3393	/*
3394	 * Set each child to D0 and restore its PCI configuration space.
3395	 */
3396	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3397		return (error);
3398	if (pci_do_power_resume)
3399		pci_set_power_children(dev, devlist, numdevs,
3400		    PCI_POWERSTATE_D0);
3401
3402	/* Now the device is powered up, restore its config space. */
3403	for (i = 0; i < numdevs; i++) {
3404		child = devlist[i];
3405		dinfo = device_get_ivars(child);
3406
3407		pci_cfg_restore(child, dinfo);
3408		if (!device_is_attached(child))
3409			pci_cfg_save(child, dinfo, 1);
3410	}
3411
3412	/*
3413	 * Resume critical devices first, then everything else later.
3414	 */
3415	for (i = 0; i < numdevs; i++) {
3416		child = devlist[i];
3417		switch (pci_get_class(child)) {
3418		case PCIC_DISPLAY:
3419		case PCIC_MEMORY:
3420		case PCIC_BRIDGE:
3421		case PCIC_BASEPERIPH:
3422			DEVICE_RESUME(child);
3423			break;
3424		}
3425	}
3426	for (i = 0; i < numdevs; i++) {
3427		child = devlist[i];
3428		switch (pci_get_class(child)) {
3429		case PCIC_DISPLAY:
3430		case PCIC_MEMORY:
3431		case PCIC_BRIDGE:
3432		case PCIC_BASEPERIPH:
3433			break;
3434		default:
3435			DEVICE_RESUME(child);
3436		}
3437	}
3438	free(devlist, M_TEMP);
3439	return (0);
3440}
3441
3442static void
3443pci_load_vendor_data(void)
3444{
3445	caddr_t data;
3446	void *ptr;
3447	size_t sz;
3448
3449	data = preload_search_by_type("pci_vendor_data");
3450	if (data != NULL) {
3451		ptr = preload_fetch_addr(data);
3452		sz = preload_fetch_size(data);
3453		if (ptr != NULL && sz != 0) {
3454			pci_vendordata = ptr;
3455			pci_vendordata_size = sz;
3456			/* terminate the database */
3457			pci_vendordata[pci_vendordata_size] = '\n';
3458		}
3459	}
3460}
3461
3462void
3463pci_driver_added(device_t dev, driver_t *driver)
3464{
3465	int numdevs;
3466	device_t *devlist;
3467	device_t child;
3468	struct pci_devinfo *dinfo;
3469	int i;
3470
3471	if (bootverbose)
3472		device_printf(dev, "driver added\n");
3473	DEVICE_IDENTIFY(driver, dev);
3474	if (device_get_children(dev, &devlist, &numdevs) != 0)
3475		return;
3476	for (i = 0; i < numdevs; i++) {
3477		child = devlist[i];
3478		if (device_get_state(child) != DS_NOTPRESENT)
3479			continue;
3480		dinfo = device_get_ivars(child);
3481		pci_print_verbose(dinfo);
3482		if (bootverbose)
3483			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3484		pci_cfg_restore(child, dinfo);
3485		if (device_probe_and_attach(child) != 0)
3486			pci_cfg_save(child, dinfo, 1);
3487	}
3488	free(devlist, M_TEMP);
3489}
3490
3491int
3492pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3493    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3494{
3495	struct pci_devinfo *dinfo;
3496	struct msix_table_entry *mte;
3497	struct msix_vector *mv;
3498	uint64_t addr;
3499	uint32_t data;
3500	void *cookie;
3501	int error, rid;
3502
3503	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3504	    arg, &cookie);
3505	if (error)
3506		return (error);
3507
3508	/* If this is not a direct child, just bail out. */
3509	if (device_get_parent(child) != dev) {
3510		*cookiep = cookie;
3511		return(0);
3512	}
3513
3514	rid = rman_get_rid(irq);
3515	if (rid == 0) {
3516		/* Make sure that INTx is enabled */
3517		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3518	} else {
3519		/*
3520		 * Check to see if the interrupt is MSI or MSI-X.
3521		 * Ask our parent to map the MSI and give
3522		 * us the address and data register values.
3523		 * If we fail for some reason, teardown the
3524		 * interrupt handler.
3525		 */
3526		dinfo = device_get_ivars(child);
3527		if (dinfo->cfg.msi.msi_alloc > 0) {
3528			if (dinfo->cfg.msi.msi_addr == 0) {
3529				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3530			    ("MSI has handlers, but vectors not mapped"));
3531				error = PCIB_MAP_MSI(device_get_parent(dev),
3532				    child, rman_get_start(irq), &addr, &data);
3533				if (error)
3534					goto bad;
3535				dinfo->cfg.msi.msi_addr = addr;
3536				dinfo->cfg.msi.msi_data = data;
3537			}
3538			if (dinfo->cfg.msi.msi_handlers == 0)
3539				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3540				    dinfo->cfg.msi.msi_data);
3541			dinfo->cfg.msi.msi_handlers++;
3542		} else {
3543			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3544			    ("No MSI or MSI-X interrupts allocated"));
3545			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3546			    ("MSI-X index too high"));
3547			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3548			KASSERT(mte->mte_vector != 0, ("no message vector"));
3549			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3550			KASSERT(mv->mv_irq == rman_get_start(irq),
3551			    ("IRQ mismatch"));
3552			if (mv->mv_address == 0) {
3553				KASSERT(mte->mte_handlers == 0,
3554		    ("MSI-X table entry has handlers, but vector not mapped"));
3555				error = PCIB_MAP_MSI(device_get_parent(dev),
3556				    child, rman_get_start(irq), &addr, &data);
3557				if (error)
3558					goto bad;
3559				mv->mv_address = addr;
3560				mv->mv_data = data;
3561			}
3562			if (mte->mte_handlers == 0) {
3563				pci_enable_msix(child, rid - 1, mv->mv_address,
3564				    mv->mv_data);
3565				pci_unmask_msix(child, rid - 1);
3566			}
3567			mte->mte_handlers++;
3568		}
3569
3570		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3571		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3572	bad:
3573		if (error) {
3574			(void)bus_generic_teardown_intr(dev, child, irq,
3575			    cookie);
3576			return (error);
3577		}
3578	}
3579	*cookiep = cookie;
3580	return (0);
3581}
3582
3583int
3584pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3585    void *cookie)
3586{
3587	struct msix_table_entry *mte;
3588	struct resource_list_entry *rle;
3589	struct pci_devinfo *dinfo;
3590	int error, rid;
3591
3592	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3593		return (EINVAL);
3594
3595	/* If this isn't a direct child, just bail out */
3596	if (device_get_parent(child) != dev)
3597		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3598
3599	rid = rman_get_rid(irq);
3600	if (rid == 0) {
3601		/* Mask INTx */
3602		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3603	} else {
3604		/*
3605		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3606		 * decrement the appropriate handlers count and mask the
3607		 * MSI-X message, or disable MSI messages if the count
3608		 * drops to 0.
3609		 */
3610		dinfo = device_get_ivars(child);
3611		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3612		if (rle->res != irq)
3613			return (EINVAL);
3614		if (dinfo->cfg.msi.msi_alloc > 0) {
3615			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3616			    ("MSI-X index too high"));
3617			if (dinfo->cfg.msi.msi_handlers == 0)
3618				return (EINVAL);
3619			dinfo->cfg.msi.msi_handlers--;
3620			if (dinfo->cfg.msi.msi_handlers == 0)
3621				pci_disable_msi(child);
3622		} else {
3623			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3624			    ("No MSI or MSI-X interrupts allocated"));
3625			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3626			    ("MSI-X index too high"));
3627			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3628			if (mte->mte_handlers == 0)
3629				return (EINVAL);
3630			mte->mte_handlers--;
3631			if (mte->mte_handlers == 0)
3632				pci_mask_msix(child, rid - 1);
3633		}
3634	}
3635	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3636	if (rid > 0)
3637		KASSERT(error == 0,
3638		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3639	return (error);
3640}
3641
3642int
3643pci_print_child(device_t dev, device_t child)
3644{
3645	struct pci_devinfo *dinfo;
3646	struct resource_list *rl;
3647	int retval = 0;
3648
3649	dinfo = device_get_ivars(child);
3650	rl = &dinfo->resources;
3651
3652	retval += bus_print_child_header(dev, child);
3653
3654	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3655	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3656	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3657	if (device_get_flags(dev))
3658		retval += printf(" flags %#x", device_get_flags(dev));
3659
3660	retval += printf(" at device %d.%d", pci_get_slot(child),
3661	    pci_get_function(child));
3662
3663	retval += bus_print_child_footer(dev, child);
3664
3665	return (retval);
3666}
3667
3668static struct
3669{
3670	int	class;
3671	int	subclass;
3672	char	*desc;
3673} pci_nomatch_tab[] = {
3674	{PCIC_OLD,		-1,			"old"},
3675	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3676	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3677	{PCIC_STORAGE,		-1,			"mass storage"},
3678	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3679	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3680	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3681	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3682	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3683	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3684	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3685	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3686	{PCIC_NETWORK,		-1,			"network"},
3687	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3688	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3689	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3690	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3691	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3692	{PCIC_DISPLAY,		-1,			"display"},
3693	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3694	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3695	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3696	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3697	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3698	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3699	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3700	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3701	{PCIC_MEMORY,		-1,			"memory"},
3702	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3703	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3704	{PCIC_BRIDGE,		-1,			"bridge"},
3705	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3706	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3707	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3708	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3709	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3710	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3711	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3712	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3713	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3714	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3715	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3716	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3717	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3718	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3719	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3720	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3721	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3722	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3723	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3724	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3725	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3726	{PCIC_INPUTDEV,		-1,			"input device"},
3727	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3728	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3729	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3730	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3731	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3732	{PCIC_DOCKING,		-1,			"docking station"},
3733	{PCIC_PROCESSOR,	-1,			"processor"},
3734	{PCIC_SERIALBUS,	-1,			"serial bus"},
3735	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3736	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3737	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3738	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3739	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3740	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3741	{PCIC_WIRELESS,		-1,			"wireless controller"},
3742	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3743	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3744	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3745	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3746	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3747	{PCIC_SATCOM,		-1,			"satellite communication"},
3748	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3749	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3750	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3751	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3752	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3753	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3754	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3755	{PCIC_DASP,		-1,			"dasp"},
3756	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3757	{0, 0,		NULL}
3758};
3759
3760void
3761pci_probe_nomatch(device_t dev, device_t child)
3762{
3763	int	i;
3764	char	*cp, *scp, *device;
3765
3766	/*
3767	 * Look for a listing for this device in a loaded device database.
3768	 */
3769	if ((device = pci_describe_device(child)) != NULL) {
3770		device_printf(dev, "<%s>", device);
3771		free(device, M_DEVBUF);
3772	} else {
3773		/*
3774		 * Scan the class/subclass descriptions for a general
3775		 * description.
3776		 */
3777		cp = "unknown";
3778		scp = NULL;
3779		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3780			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3781				if (pci_nomatch_tab[i].subclass == -1) {
3782					cp = pci_nomatch_tab[i].desc;
3783				} else if (pci_nomatch_tab[i].subclass ==
3784				    pci_get_subclass(child)) {
3785					scp = pci_nomatch_tab[i].desc;
3786				}
3787			}
3788		}
3789		device_printf(dev, "<%s%s%s>",
3790		    cp ? cp : "",
3791		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3792		    scp ? scp : "");
3793	}
3794	printf(" at device %d.%d (no driver attached)\n",
3795	    pci_get_slot(child), pci_get_function(child));
3796	pci_cfg_save(child, device_get_ivars(child), 1);
3797	return;
3798}
3799
3800/*
3801 * Parse the PCI device database, if loaded, and return a pointer to a
3802 * description of the device.
3803 *
3804 * The database is flat text formatted as follows:
3805 *
3806 * Any line not in a valid format is ignored.
3807 * Lines are terminated with newline '\n' characters.
3808 *
3809 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3810 * the vendor name.
3811 *
3812 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3813 * - devices cannot be listed without a corresponding VENDOR line.
3814 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3815 * another TAB, then the device name.
3816 */
3817
3818/*
3819 * Assuming (ptr) points to the beginning of a line in the database,
3820 * return the vendor or device and description of the next entry.
3821 * The value of (vendor) or (device) inappropriate for the entry type
3822 * is set to -1.  Returns nonzero at the end of the database.
3823 *
3824 * Note that this is slightly unrobust in the face of corrupt data;
3825 * we attempt to safeguard against this by spamming the end of the
3826 * database with a newline when we initialise.
3827 */
3828static int
3829pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3830{
3831	char	*cp = *ptr;
3832	int	left;
3833
3834	*device = -1;
3835	*vendor = -1;
3836	**desc = '\0';
3837	for (;;) {
3838		left = pci_vendordata_size - (cp - pci_vendordata);
3839		if (left <= 0) {
3840			*ptr = cp;
3841			return(1);
3842		}
3843
3844		/* vendor entry? */
3845		if (*cp != '\t' &&
3846		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3847			break;
3848		/* device entry? */
3849		if (*cp == '\t' &&
3850		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3851			break;
3852
3853		/* skip to next line */
3854		while (*cp != '\n' && left > 0) {
3855			cp++;
3856			left--;
3857		}
3858		if (*cp == '\n') {
3859			cp++;
3860			left--;
3861		}
3862	}
3863	/* skip to next line */
3864	while (*cp != '\n' && left > 0) {
3865		cp++;
3866		left--;
3867	}
3868	if (*cp == '\n' && left > 0)
3869		cp++;
3870	*ptr = cp;
3871	return(0);
3872}
3873
3874static char *
3875pci_describe_device(device_t dev)
3876{
3877	int	vendor, device;
3878	char	*desc, *vp, *dp, *line;
3879
3880	desc = vp = dp = NULL;
3881
3882	/*
3883	 * If we have no vendor data, we can't do anything.
3884	 */
3885	if (pci_vendordata == NULL)
3886		goto out;
3887
3888	/*
3889	 * Scan the vendor data looking for this device
3890	 */
3891	line = pci_vendordata;
3892	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3893		goto out;
3894	for (;;) {
3895		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3896			goto out;
3897		if (vendor == pci_get_vendor(dev))
3898			break;
3899	}
3900	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3901		goto out;
3902	for (;;) {
3903		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3904			*dp = 0;
3905			break;
3906		}
3907		if (vendor != -1) {
3908			*dp = 0;
3909			break;
3910		}
3911		if (device == pci_get_device(dev))
3912			break;
3913	}
3914	if (dp[0] == '\0')
3915		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3916	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3917	    NULL)
3918		sprintf(desc, "%s, %s", vp, dp);
3919 out:
3920	if (vp != NULL)
3921		free(vp, M_DEVBUF);
3922	if (dp != NULL)
3923		free(dp, M_DEVBUF);
3924	return(desc);
3925}
3926
3927int
3928pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3929{
3930	struct pci_devinfo *dinfo;
3931	pcicfgregs *cfg;
3932
3933	dinfo = device_get_ivars(child);
3934	cfg = &dinfo->cfg;
3935
3936	switch (which) {
3937	case PCI_IVAR_ETHADDR:
3938		/*
3939		 * The generic accessor doesn't deal with failure, so
3940		 * we set the return value, then return an error.
3941		 */
3942		*((uint8_t **) result) = NULL;
3943		return (EINVAL);
3944	case PCI_IVAR_SUBVENDOR:
3945		*result = cfg->subvendor;
3946		break;
3947	case PCI_IVAR_SUBDEVICE:
3948		*result = cfg->subdevice;
3949		break;
3950	case PCI_IVAR_VENDOR:
3951		*result = cfg->vendor;
3952		break;
3953	case PCI_IVAR_DEVICE:
3954		*result = cfg->device;
3955		break;
3956	case PCI_IVAR_DEVID:
3957		*result = (cfg->device << 16) | cfg->vendor;
3958		break;
3959	case PCI_IVAR_CLASS:
3960		*result = cfg->baseclass;
3961		break;
3962	case PCI_IVAR_SUBCLASS:
3963		*result = cfg->subclass;
3964		break;
3965	case PCI_IVAR_PROGIF:
3966		*result = cfg->progif;
3967		break;
3968	case PCI_IVAR_REVID:
3969		*result = cfg->revid;
3970		break;
3971	case PCI_IVAR_INTPIN:
3972		*result = cfg->intpin;
3973		break;
3974	case PCI_IVAR_IRQ:
3975		*result = cfg->intline;
3976		break;
3977	case PCI_IVAR_DOMAIN:
3978		*result = cfg->domain;
3979		break;
3980	case PCI_IVAR_BUS:
3981		*result = cfg->bus;
3982		break;
3983	case PCI_IVAR_SLOT:
3984		*result = cfg->slot;
3985		break;
3986	case PCI_IVAR_FUNCTION:
3987		*result = cfg->func;
3988		break;
3989	case PCI_IVAR_CMDREG:
3990		*result = cfg->cmdreg;
3991		break;
3992	case PCI_IVAR_CACHELNSZ:
3993		*result = cfg->cachelnsz;
3994		break;
3995	case PCI_IVAR_MINGNT:
3996		*result = cfg->mingnt;
3997		break;
3998	case PCI_IVAR_MAXLAT:
3999		*result = cfg->maxlat;
4000		break;
4001	case PCI_IVAR_LATTIMER:
4002		*result = cfg->lattimer;
4003		break;
4004	default:
4005		return (ENOENT);
4006	}
4007	return (0);
4008}
4009
4010int
4011pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4012{
4013	struct pci_devinfo *dinfo;
4014
4015	dinfo = device_get_ivars(child);
4016
4017	switch (which) {
4018	case PCI_IVAR_INTPIN:
4019		dinfo->cfg.intpin = value;
4020		return (0);
4021	case PCI_IVAR_ETHADDR:
4022	case PCI_IVAR_SUBVENDOR:
4023	case PCI_IVAR_SUBDEVICE:
4024	case PCI_IVAR_VENDOR:
4025	case PCI_IVAR_DEVICE:
4026	case PCI_IVAR_DEVID:
4027	case PCI_IVAR_CLASS:
4028	case PCI_IVAR_SUBCLASS:
4029	case PCI_IVAR_PROGIF:
4030	case PCI_IVAR_REVID:
4031	case PCI_IVAR_IRQ:
4032	case PCI_IVAR_DOMAIN:
4033	case PCI_IVAR_BUS:
4034	case PCI_IVAR_SLOT:
4035	case PCI_IVAR_FUNCTION:
4036		return (EINVAL);	/* disallow for now */
4037
4038	default:
4039		return (ENOENT);
4040	}
4041}
4042
4043#include "opt_ddb.h"
4044#ifdef DDB
4045#include <ddb/ddb.h>
4046#include <sys/cons.h>
4047
4048/*
4049 * List resources based on pci map registers, used for within ddb
4050 */
4051
4052DB_SHOW_COMMAND(pciregs, db_pci_dump)
4053{
4054	struct pci_devinfo *dinfo;
4055	struct devlist *devlist_head;
4056	struct pci_conf *p;
4057	const char *name;
4058	int i, error, none_count;
4059
4060	none_count = 0;
4061	/* get the head of the device queue */
4062	devlist_head = &pci_devq;
4063
4064	/*
4065	 * Go through the list of devices and print out devices
4066	 */
4067	for (error = 0, i = 0,
4068	     dinfo = STAILQ_FIRST(devlist_head);
4069	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4070	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4071
4072		/* Populate pd_name and pd_unit */
4073		name = NULL;
4074		if (dinfo->cfg.dev)
4075			name = device_get_name(dinfo->cfg.dev);
4076
4077		p = &dinfo->conf;
4078		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4079			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4080			(name && *name) ? name : "none",
4081			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4082			none_count++,
4083			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4084			p->pc_sel.pc_func, (p->pc_class << 16) |
4085			(p->pc_subclass << 8) | p->pc_progif,
4086			(p->pc_subdevice << 16) | p->pc_subvendor,
4087			(p->pc_device << 16) | p->pc_vendor,
4088			p->pc_revid, p->pc_hdr);
4089	}
4090}
4091#endif /* DDB */
4092
4093static struct resource *
4094pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4095    u_long start, u_long end, u_long count, u_int flags)
4096{
4097	struct pci_devinfo *dinfo = device_get_ivars(child);
4098	struct resource_list *rl = &dinfo->resources;
4099	struct resource_list_entry *rle;
4100	struct resource *res;
4101	struct pci_map *pm;
4102	pci_addr_t map, testval;
4103	int mapsize;
4104
4105	res = NULL;
4106	pm = pci_find_bar(child, *rid);
4107	if (pm != NULL) {
4108		/* This is a BAR that we failed to allocate earlier. */
4109		mapsize = pm->pm_size;
4110		map = pm->pm_value;
4111	} else {
4112		/*
4113		 * Weed out the bogons, and figure out how large the
4114		 * BAR/map is.  BARs that read back 0 here are bogus
4115		 * and unimplemented.  Note: atapci in legacy mode are
4116		 * special and handled elsewhere in the code.  If you
4117		 * have a atapci device in legacy mode and it fails
4118		 * here, that other code is broken.
4119		 */
4120		pci_read_bar(child, *rid, &map, &testval);
4121
4122		/*
4123		 * Determine the size of the BAR and ignore BARs with a size
4124		 * of 0.  Device ROM BARs use a different mask value.
4125		 */
4126		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4127			mapsize = pci_romsize(testval);
4128		else
4129			mapsize = pci_mapsize(testval);
4130		if (mapsize == 0)
4131			goto out;
4132		pm = pci_add_bar(child, *rid, map, mapsize);
4133	}
4134
4135	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4136		if (type != SYS_RES_MEMORY) {
4137			if (bootverbose)
4138				device_printf(dev,
4139				    "child %s requested type %d for rid %#x,"
4140				    " but the BAR says it is an memio\n",
4141				    device_get_nameunit(child), type, *rid);
4142			goto out;
4143		}
4144	} else {
4145		if (type != SYS_RES_IOPORT) {
4146			if (bootverbose)
4147				device_printf(dev,
4148				    "child %s requested type %d for rid %#x,"
4149				    " but the BAR says it is an ioport\n",
4150				    device_get_nameunit(child), type, *rid);
4151			goto out;
4152		}
4153	}
4154
4155	/*
4156	 * For real BARs, we need to override the size that
4157	 * the driver requests, because that's what the BAR
4158	 * actually uses and we would otherwise have a
4159	 * situation where we might allocate the excess to
4160	 * another driver, which won't work.
4161	 */
4162	count = (pci_addr_t)1 << mapsize;
4163	if (RF_ALIGNMENT(flags) < mapsize)
4164		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4165	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4166		flags |= RF_PREFETCHABLE;
4167
4168	/*
4169	 * Allocate enough resource, and then write back the
4170	 * appropriate BAR for that resource.
4171	 */
4172	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4173	    start, end, count, flags & ~RF_ACTIVE);
4174	if (res == NULL) {
4175		device_printf(child,
4176		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4177		    count, *rid, type, start, end);
4178		goto out;
4179	}
4180	resource_list_add(rl, type, *rid, start, end, count);
4181	rle = resource_list_find(rl, type, *rid);
4182	if (rle == NULL)
4183		panic("pci_reserve_map: unexpectedly can't find resource.");
4184	rle->res = res;
4185	rle->start = rman_get_start(res);
4186	rle->end = rman_get_end(res);
4187	rle->count = count;
4188	rle->flags = RLE_RESERVED;
4189	if (bootverbose)
4190		device_printf(child,
4191		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4192		    count, *rid, type, rman_get_start(res));
4193	map = rman_get_start(res);
4194	pci_write_bar(child, pm, map);
4195out:;
4196	return (res);
4197}
4198
4199struct resource *
4200pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4201		   u_long start, u_long end, u_long count, u_int flags)
4202{
4203	struct pci_devinfo *dinfo = device_get_ivars(child);
4204	struct resource_list *rl = &dinfo->resources;
4205	struct resource_list_entry *rle;
4206	struct resource *res;
4207	pcicfgregs *cfg = &dinfo->cfg;
4208
4209	if (device_get_parent(child) != dev)
4210		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4211		    type, rid, start, end, count, flags));
4212
4213	/*
4214	 * Perform lazy resource allocation
4215	 */
4216	switch (type) {
4217	case SYS_RES_IRQ:
4218		/*
4219		 * Can't alloc legacy interrupt once MSI messages have
4220		 * been allocated.
4221		 */
4222		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4223		    cfg->msix.msix_alloc > 0))
4224			return (NULL);
4225
4226		/*
4227		 * If the child device doesn't have an interrupt
4228		 * routed and is deserving of an interrupt, try to
4229		 * assign it one.
4230		 */
4231		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4232		    (cfg->intpin != 0))
4233			pci_assign_interrupt(dev, child, 0);
4234		break;
4235	case SYS_RES_IOPORT:
4236	case SYS_RES_MEMORY:
4237#ifdef NEW_PCIB
4238		/*
4239		 * PCI-PCI bridge I/O window resources are not BARs.
4240		 * For those allocations just pass the request up the
4241		 * tree.
4242		 */
4243		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4244			switch (*rid) {
4245			case PCIR_IOBASEL_1:
4246			case PCIR_MEMBASE_1:
4247			case PCIR_PMBASEL_1:
4248				/*
4249				 * XXX: Should we bother creating a resource
4250				 * list entry?
4251				 */
4252				return (bus_generic_alloc_resource(dev, child,
4253				    type, rid, start, end, count, flags));
4254			}
4255		}
4256#endif
4257		/* Reserve resources for this BAR if needed. */
4258		rle = resource_list_find(rl, type, *rid);
4259		if (rle == NULL) {
4260			res = pci_reserve_map(dev, child, type, rid, start, end,
4261			    count, flags);
4262			if (res == NULL)
4263				return (NULL);
4264		}
4265	}
4266	return (resource_list_alloc(rl, dev, child, type, rid,
4267	    start, end, count, flags));
4268}
4269
4270int
4271pci_activate_resource(device_t dev, device_t child, int type, int rid,
4272    struct resource *r)
4273{
4274	struct pci_devinfo *dinfo;
4275	int error;
4276
4277	error = bus_generic_activate_resource(dev, child, type, rid, r);
4278	if (error)
4279		return (error);
4280
4281	/* Enable decoding in the command register when activating BARs. */
4282	if (device_get_parent(child) == dev) {
4283		/* Device ROMs need their decoding explicitly enabled. */
4284		dinfo = device_get_ivars(child);
4285		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4286			pci_write_bar(child, pci_find_bar(child, rid),
4287			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4288		switch (type) {
4289		case SYS_RES_IOPORT:
4290		case SYS_RES_MEMORY:
4291			error = PCI_ENABLE_IO(dev, child, type);
4292			break;
4293		}
4294	}
4295	return (error);
4296}
4297
4298int
4299pci_deactivate_resource(device_t dev, device_t child, int type,
4300    int rid, struct resource *r)
4301{
4302	struct pci_devinfo *dinfo;
4303	int error;
4304
4305	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4306	if (error)
4307		return (error);
4308
4309	/* Disable decoding for device ROMs. */
4310	if (device_get_parent(child) == dev) {
4311		dinfo = device_get_ivars(child);
4312		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4313			pci_write_bar(child, pci_find_bar(child, rid),
4314			    rman_get_start(r));
4315	}
4316	return (0);
4317}
4318
4319void
4320pci_delete_child(device_t dev, device_t child)
4321{
4322	struct resource_list_entry *rle;
4323	struct resource_list *rl;
4324	struct pci_devinfo *dinfo;
4325
4326	dinfo = device_get_ivars(child);
4327	rl = &dinfo->resources;
4328
4329	if (device_is_attached(child))
4330		device_detach(child);
4331
4332	/* Turn off access to resources we're about to free */
4333	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4334	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4335
4336	/* Free all allocated resources */
4337	STAILQ_FOREACH(rle, rl, link) {
4338		if (rle->res) {
4339			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4340			    resource_list_busy(rl, rle->type, rle->rid)) {
4341				pci_printf(&dinfo->cfg,
4342				    "Resource still owned, oops. "
4343				    "(type=%d, rid=%d, addr=%lx)\n",
4344				    rle->type, rle->rid,
4345				    rman_get_start(rle->res));
4346				bus_release_resource(child, rle->type, rle->rid,
4347				    rle->res);
4348			}
4349			resource_list_unreserve(rl, dev, child, rle->type,
4350			    rle->rid);
4351		}
4352	}
4353	resource_list_free(rl);
4354
4355	device_delete_child(dev, child);
4356	pci_freecfg(dinfo);
4357}
4358
4359void
4360pci_delete_resource(device_t dev, device_t child, int type, int rid)
4361{
4362	struct pci_devinfo *dinfo;
4363	struct resource_list *rl;
4364	struct resource_list_entry *rle;
4365
4366	if (device_get_parent(child) != dev)
4367		return;
4368
4369	dinfo = device_get_ivars(child);
4370	rl = &dinfo->resources;
4371	rle = resource_list_find(rl, type, rid);
4372	if (rle == NULL)
4373		return;
4374
4375	if (rle->res) {
4376		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4377		    resource_list_busy(rl, type, rid)) {
4378			device_printf(dev, "delete_resource: "
4379			    "Resource still owned by child, oops. "
4380			    "(type=%d, rid=%d, addr=%lx)\n",
4381			    type, rid, rman_get_start(rle->res));
4382			return;
4383		}
4384
4385#ifndef __PCI_BAR_ZERO_VALID
4386		/*
4387		 * If this is a BAR, clear the BAR so it stops
4388		 * decoding before releasing the resource.
4389		 */
4390		switch (type) {
4391		case SYS_RES_IOPORT:
4392		case SYS_RES_MEMORY:
4393			pci_write_bar(child, pci_find_bar(child, rid), 0);
4394			break;
4395		}
4396#endif
4397		resource_list_unreserve(rl, dev, child, type, rid);
4398	}
4399	resource_list_delete(rl, type, rid);
4400}
4401
4402struct resource_list *
4403pci_get_resource_list (device_t dev, device_t child)
4404{
4405	struct pci_devinfo *dinfo = device_get_ivars(child);
4406
4407	return (&dinfo->resources);
4408}
4409
4410bus_dma_tag_t
4411pci_get_dma_tag(device_t bus, device_t dev)
4412{
4413	struct pci_softc *sc = device_get_softc(bus);
4414
4415	return (sc->sc_dma_tag);
4416}
4417
4418uint32_t
4419pci_read_config_method(device_t dev, device_t child, int reg, int width)
4420{
4421	struct pci_devinfo *dinfo = device_get_ivars(child);
4422	pcicfgregs *cfg = &dinfo->cfg;
4423
4424	return (PCIB_READ_CONFIG(device_get_parent(dev),
4425	    cfg->bus, cfg->slot, cfg->func, reg, width));
4426}
4427
4428void
4429pci_write_config_method(device_t dev, device_t child, int reg,
4430    uint32_t val, int width)
4431{
4432	struct pci_devinfo *dinfo = device_get_ivars(child);
4433	pcicfgregs *cfg = &dinfo->cfg;
4434
4435	PCIB_WRITE_CONFIG(device_get_parent(dev),
4436	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4437}
4438
4439int
4440pci_child_location_str_method(device_t dev, device_t child, char *buf,
4441    size_t buflen)
4442{
4443
4444	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4445	    pci_get_function(child));
4446	return (0);
4447}
4448
4449int
4450pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4451    size_t buflen)
4452{
4453	struct pci_devinfo *dinfo;
4454	pcicfgregs *cfg;
4455
4456	dinfo = device_get_ivars(child);
4457	cfg = &dinfo->cfg;
4458	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4459	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4460	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4461	    cfg->progif);
4462	return (0);
4463}
4464
4465int
4466pci_assign_interrupt_method(device_t dev, device_t child)
4467{
4468	struct pci_devinfo *dinfo = device_get_ivars(child);
4469	pcicfgregs *cfg = &dinfo->cfg;
4470
4471	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4472	    cfg->intpin));
4473}
4474
4475static int
4476pci_modevent(module_t mod, int what, void *arg)
4477{
4478	static struct cdev *pci_cdev;
4479
4480	switch (what) {
4481	case MOD_LOAD:
4482		STAILQ_INIT(&pci_devq);
4483		pci_generation = 0;
4484		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4485		    "pci");
4486		pci_load_vendor_data();
4487		break;
4488
4489	case MOD_UNLOAD:
4490		destroy_dev(pci_cdev);
4491		break;
4492	}
4493
4494	return (0);
4495}
4496
4497static void
4498pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4499{
4500#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4501	struct pcicfg_pcie *cfg;
4502	int version, pos;
4503
4504	cfg = &dinfo->cfg.pcie;
4505	pos = cfg->pcie_location;
4506
4507	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4508
4509	WREG(PCIR_EXPRESS_DEVICE_CTL, cfg->pcie_device_ctl);
4510
4511	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4512	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4513	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4514		WREG(PCIR_EXPRESS_LINK_CTL, cfg->pcie_link_ctl);
4515
4516	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4517	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4518	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4519		WREG(PCIR_EXPRESS_SLOT_CTL, cfg->pcie_slot_ctl);
4520
4521	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4522	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4523		WREG(PCIR_EXPRESS_ROOT_CTL, cfg->pcie_root_ctl);
4524
4525	if (version > 1) {
4526		WREG(PCIR_EXPRESS_DEVICE_CTL2, cfg->pcie_device_ctl2);
4527		WREG(PCIR_EXPRESS_LINK_CTL2, cfg->pcie_link_ctl2);
4528		WREG(PCIR_EXPRESS_SLOT_CTL2, cfg->pcie_slot_ctl2);
4529	}
4530#undef WREG
4531}
4532
4533static void
4534pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4535{
4536	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4537	    dinfo->cfg.pcix.pcix_command,  2);
4538}
4539
4540void
4541pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4542{
4543
4544	/*
4545	 * Only do header type 0 devices.  Type 1 devices are bridges,
4546	 * which we know need special treatment.  Type 2 devices are
4547	 * cardbus bridges which also require special treatment.
4548	 * Other types are unknown, and we err on the side of safety
4549	 * by ignoring them.
4550	 */
4551	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4552		return;
4553
4554	/*
4555	 * Restore the device to full power mode.  We must do this
4556	 * before we restore the registers because moving from D3 to
4557	 * D0 will cause the chip's BARs and some other registers to
4558	 * be reset to some unknown power on reset values.  Cut down
4559	 * the noise on boot by doing nothing if we are already in
4560	 * state D0.
4561	 */
4562	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4563		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4564	pci_restore_bars(dev);
4565	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4566	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4567	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4568	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4569	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4570	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4571	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4572	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4573	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4574
4575	/*
4576	 * Restore extended capabilities for PCI-Express and PCI-X
4577	 */
4578	if (dinfo->cfg.pcie.pcie_location != 0)
4579		pci_cfg_restore_pcie(dev, dinfo);
4580	if (dinfo->cfg.pcix.pcix_location != 0)
4581		pci_cfg_restore_pcix(dev, dinfo);
4582
4583	/* Restore MSI and MSI-X configurations if they are present. */
4584	if (dinfo->cfg.msi.msi_location != 0)
4585		pci_resume_msi(dev);
4586	if (dinfo->cfg.msix.msix_location != 0)
4587		pci_resume_msix(dev);
4588}
4589
4590static void
4591pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4592{
4593#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4594	struct pcicfg_pcie *cfg;
4595	int version, pos;
4596
4597	cfg = &dinfo->cfg.pcie;
4598	pos = cfg->pcie_location;
4599
4600	cfg->pcie_flags = RREG(PCIR_EXPRESS_FLAGS);
4601
4602	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4603
4604	cfg->pcie_device_ctl = RREG(PCIR_EXPRESS_DEVICE_CTL);
4605
4606	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4607	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4608	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4609		cfg->pcie_link_ctl = RREG(PCIR_EXPRESS_LINK_CTL);
4610
4611	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4612	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4613	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4614		cfg->pcie_slot_ctl = RREG(PCIR_EXPRESS_SLOT_CTL);
4615
4616	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4617	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4618		cfg->pcie_root_ctl = RREG(PCIR_EXPRESS_ROOT_CTL);
4619
4620	if (version > 1) {
4621		cfg->pcie_device_ctl2 = RREG(PCIR_EXPRESS_DEVICE_CTL2);
4622		cfg->pcie_link_ctl2 = RREG(PCIR_EXPRESS_LINK_CTL2);
4623		cfg->pcie_slot_ctl2 = RREG(PCIR_EXPRESS_SLOT_CTL2);
4624	}
4625#undef RREG
4626}
4627
4628static void
4629pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4630{
4631	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4632	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4633}
4634
4635void
4636pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4637{
4638	uint32_t cls;
4639	int ps;
4640
4641	/*
4642	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4643	 * we know need special treatment.  Type 2 devices are cardbus bridges
4644	 * which also require special treatment.  Other types are unknown, and
4645	 * we err on the side of safety by ignoring them.  Powering down
4646	 * bridges should not be undertaken lightly.
4647	 */
4648	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4649		return;
4650
4651	/*
4652	 * Some drivers apparently write to these registers w/o updating our
4653	 * cached copy.  No harm happens if we update the copy, so do so here
4654	 * so we can restore them.  The COMMAND register is modified by the
4655	 * bus w/o updating the cache.  This should represent the normally
4656	 * writable portion of the 'defined' part of type 0 headers.  In
4657	 * theory we also need to save/restore the PCI capability structures
4658	 * we know about, but apart from power we don't know any that are
4659	 * writable.
4660	 */
4661	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4662	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4663	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4664	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4665	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4666	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4667	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4668	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4669	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4670	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4671	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4672	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4673	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4674	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4675	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4676
4677	if (dinfo->cfg.pcie.pcie_location != 0)
4678		pci_cfg_save_pcie(dev, dinfo);
4679
4680	if (dinfo->cfg.pcix.pcix_location != 0)
4681		pci_cfg_save_pcix(dev, dinfo);
4682
4683	/*
4684	 * don't set the state for display devices, base peripherals and
4685	 * memory devices since bad things happen when they are powered down.
4686	 * We should (a) have drivers that can easily detach and (b) use
4687	 * generic drivers for these devices so that some device actually
4688	 * attaches.  We need to make sure that when we implement (a) we don't
4689	 * power the device down on a reattach.
4690	 */
4691	cls = pci_get_class(dev);
4692	if (!setstate)
4693		return;
4694	switch (pci_do_power_nodriver)
4695	{
4696		case 0:		/* NO powerdown at all */
4697			return;
4698		case 1:		/* Conservative about what to power down */
4699			if (cls == PCIC_STORAGE)
4700				return;
4701			/*FALLTHROUGH*/
4702		case 2:		/* Agressive about what to power down */
4703			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4704			    cls == PCIC_BASEPERIPH)
4705				return;
4706			/*FALLTHROUGH*/
4707		case 3:		/* Power down everything */
4708			break;
4709	}
4710	/*
4711	 * PCI spec says we can only go into D3 state from D0 state.
4712	 * Transition from D[12] into D0 before going to D3 state.
4713	 */
4714	ps = pci_get_powerstate(dev);
4715	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4716		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4717	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4718		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4719}
4720
4721/* Wrapper APIs suitable for device driver use. */
4722void
4723pci_save_state(device_t dev)
4724{
4725	struct pci_devinfo *dinfo;
4726
4727	dinfo = device_get_ivars(dev);
4728	pci_cfg_save(dev, dinfo, 0);
4729}
4730
4731void
4732pci_restore_state(device_t dev)
4733{
4734	struct pci_devinfo *dinfo;
4735
4736	dinfo = device_get_ivars(dev);
4737	pci_cfg_restore(dev, dinfo);
4738}
4739