pci.c revision 222753
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 222753 2011-06-06 13:21:11Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#define	PCIR_IS_BIOS(cfg, reg)						\
73	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75
76
77static pci_addr_t	pci_mapbase(uint64_t mapreg);
78static const char	*pci_maptype(uint64_t mapreg);
79static int		pci_mapsize(uint64_t testval);
80static int		pci_maprange(uint64_t mapreg);
81static pci_addr_t	pci_rombase(uint64_t mapreg);
82static int		pci_romsize(uint64_t testval);
83static void		pci_fixancient(pcicfgregs *cfg);
84static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85
86static int		pci_porten(device_t dev);
87static int		pci_memen(device_t dev);
88static void		pci_assign_interrupt(device_t bus, device_t dev,
89			    int force_route);
90static int		pci_add_map(device_t bus, device_t dev, int reg,
91			    struct resource_list *rl, int force, int prefetch);
92static int		pci_probe(device_t dev);
93static int		pci_attach(device_t dev);
94static void		pci_load_vendor_data(void);
95static int		pci_describe_parse_line(char **ptr, int *vendor,
96			    int *device, char **desc);
97static char		*pci_describe_device(device_t dev);
98static int		pci_modevent(module_t mod, int what, void *arg);
99static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100			    pcicfgregs *cfg);
101static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103			    int reg, uint32_t *data);
104#if 0
105static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106			    int reg, uint32_t data);
107#endif
108static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109static void		pci_disable_msi(device_t dev);
110static void		pci_enable_msi(device_t dev, uint64_t address,
111			    uint16_t data);
112static void		pci_enable_msix(device_t dev, u_int index,
113			    uint64_t address, uint32_t data);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static void		pci_resume_msi(device_t dev);
118static void		pci_resume_msix(device_t dev);
119static int		pci_remap_intr_method(device_t bus, device_t dev,
120			    u_int irq);
121
122static device_method_t pci_methods[] = {
123	/* Device interface */
124	DEVMETHOD(device_probe,		pci_probe),
125	DEVMETHOD(device_attach,	pci_attach),
126	DEVMETHOD(device_detach,	bus_generic_detach),
127	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128	DEVMETHOD(device_suspend,	pci_suspend),
129	DEVMETHOD(device_resume,	pci_resume),
130
131	/* Bus interface */
132	DEVMETHOD(bus_print_child,	pci_print_child),
133	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136	DEVMETHOD(bus_driver_added,	pci_driver_added),
137	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139
140	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152
153	/* PCI interface */
154	DEVMETHOD(pci_read_config,	pci_read_config_method),
155	DEVMETHOD(pci_write_config,	pci_write_config_method),
156	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172
173	{ 0, 0 }
174};
175
176DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177
178static devclass_t pci_devclass;
179DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180MODULE_VERSION(pci, 1);
181
182static char	*pci_vendordata;
183static size_t	pci_vendordata_size;
184
185
186struct pci_quirk {
187	uint32_t devid;	/* Vendor/device of the card */
188	int	type;
189#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
190#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
191#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
192	int	arg1;
193	int	arg2;
194};
195
196struct pci_quirk pci_quirks[] = {
197	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202
203	/*
204	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206	 */
207	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209
210	/*
211	 * MSI doesn't work on earlier Intel chipsets including
212	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213	 */
214	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221
222	/*
223	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224	 * bridge.
225	 */
226	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227
228	/*
229	 * Some virtualization environments emulate an older chipset
230	 * but support MSI just fine.  QEMU uses the Intel 82440.
231	 */
232	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
233
234	{ 0 }
235};
236
237/* map register information */
238#define	PCI_MAPMEM	0x01	/* memory map */
239#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
240#define	PCI_MAPPORT	0x04	/* port map */
241
242struct devlist pci_devq;
243uint32_t pci_generation;
244uint32_t pci_numdevs = 0;
245static int pcie_chipset, pcix_chipset;
246
247/* sysctl vars */
248SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
249
250static int pci_enable_io_modes = 1;
251TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
252SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
253    &pci_enable_io_modes, 1,
254    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
255enable these bits correctly.  We'd like to do this all the time, but there\n\
256are some peripherals that this causes problems with.");
257
258static int pci_do_power_nodriver = 0;
259TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
260SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
261    &pci_do_power_nodriver, 0,
262  "Place a function into D3 state when no driver attaches to it.  0 means\n\
263disable.  1 means conservatively place devices into D3 state.  2 means\n\
264agressively place devices into D3 state.  3 means put absolutely everything\n\
265in D3 state.");
266
267int pci_do_power_resume = 1;
268TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
269SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
270    &pci_do_power_resume, 1,
271  "Transition from D3 -> D0 on resume.");
272
273int pci_do_power_suspend = 1;
274TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
275SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
276    &pci_do_power_suspend, 1,
277  "Transition from D0 -> D3 on suspend.");
278
279static int pci_do_msi = 1;
280TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
281SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
282    "Enable support for MSI interrupts");
283
284static int pci_do_msix = 1;
285TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
286SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
287    "Enable support for MSI-X interrupts");
288
289static int pci_honor_msi_blacklist = 1;
290TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
291SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
292    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
293
294#if defined(__i386__) || defined(__amd64__)
295static int pci_usb_takeover = 1;
296#else
297static int pci_usb_takeover = 0;
298#endif
299TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
300SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
301    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
302Disable this if you depend on BIOS emulation of USB devices, that is\n\
303you use USB devices (like keyboard or mouse) but do not load USB drivers");
304
305/* Find a device_t by bus/slot/function in domain 0 */
306
307device_t
308pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
309{
310
311	return (pci_find_dbsf(0, bus, slot, func));
312}
313
314/* Find a device_t by domain/bus/slot/function */
315
316device_t
317pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
318{
319	struct pci_devinfo *dinfo;
320
321	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322		if ((dinfo->cfg.domain == domain) &&
323		    (dinfo->cfg.bus == bus) &&
324		    (dinfo->cfg.slot == slot) &&
325		    (dinfo->cfg.func == func)) {
326			return (dinfo->cfg.dev);
327		}
328	}
329
330	return (NULL);
331}
332
333/* Find a device_t by vendor/device ID */
334
335device_t
336pci_find_device(uint16_t vendor, uint16_t device)
337{
338	struct pci_devinfo *dinfo;
339
340	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
341		if ((dinfo->cfg.vendor == vendor) &&
342		    (dinfo->cfg.device == device)) {
343			return (dinfo->cfg.dev);
344		}
345	}
346
347	return (NULL);
348}
349
350static int
351pci_printf(pcicfgregs *cfg, const char *fmt, ...)
352{
353	va_list ap;
354	int retval;
355
356	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
357	    cfg->func);
358	va_start(ap, fmt);
359	retval += vprintf(fmt, ap);
360	va_end(ap);
361	return (retval);
362}
363
364/* return base address of memory or port map */
365
366static pci_addr_t
367pci_mapbase(uint64_t mapreg)
368{
369
370	if (PCI_BAR_MEM(mapreg))
371		return (mapreg & PCIM_BAR_MEM_BASE);
372	else
373		return (mapreg & PCIM_BAR_IO_BASE);
374}
375
376/* return map type of memory or port map */
377
378static const char *
379pci_maptype(uint64_t mapreg)
380{
381
382	if (PCI_BAR_IO(mapreg))
383		return ("I/O Port");
384	if (mapreg & PCIM_BAR_MEM_PREFETCH)
385		return ("Prefetchable Memory");
386	return ("Memory");
387}
388
389/* return log2 of map size decoded for memory or port map */
390
391static int
392pci_mapsize(uint64_t testval)
393{
394	int ln2size;
395
396	testval = pci_mapbase(testval);
397	ln2size = 0;
398	if (testval != 0) {
399		while ((testval & 1) == 0)
400		{
401			ln2size++;
402			testval >>= 1;
403		}
404	}
405	return (ln2size);
406}
407
408/* return base address of device ROM */
409
410static pci_addr_t
411pci_rombase(uint64_t mapreg)
412{
413
414	return (mapreg & PCIM_BIOS_ADDR_MASK);
415}
416
417/* return log2 of map size decided for device ROM */
418
419static int
420pci_romsize(uint64_t testval)
421{
422	int ln2size;
423
424	testval = pci_rombase(testval);
425	ln2size = 0;
426	if (testval != 0) {
427		while ((testval & 1) == 0)
428		{
429			ln2size++;
430			testval >>= 1;
431		}
432	}
433	return (ln2size);
434}
435
436/* return log2 of address range supported by map register */
437
438static int
439pci_maprange(uint64_t mapreg)
440{
441	int ln2range = 0;
442
443	if (PCI_BAR_IO(mapreg))
444		ln2range = 32;
445	else
446		switch (mapreg & PCIM_BAR_MEM_TYPE) {
447		case PCIM_BAR_MEM_32:
448			ln2range = 32;
449			break;
450		case PCIM_BAR_MEM_1MB:
451			ln2range = 20;
452			break;
453		case PCIM_BAR_MEM_64:
454			ln2range = 64;
455			break;
456		}
457	return (ln2range);
458}
459
460/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
461
462static void
463pci_fixancient(pcicfgregs *cfg)
464{
465	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
466		return;
467
468	/* PCI to PCI bridges use header type 1 */
469	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
470		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
471}
472
473/* extract header type specific config data */
474
475static void
476pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
477{
478#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
479	switch (cfg->hdrtype & PCIM_HDRTYPE) {
480	case PCIM_HDRTYPE_NORMAL:
481		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
482		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
483		cfg->nummaps	    = PCI_MAXMAPS_0;
484		break;
485	case PCIM_HDRTYPE_BRIDGE:
486		cfg->nummaps	    = PCI_MAXMAPS_1;
487		break;
488	case PCIM_HDRTYPE_CARDBUS:
489		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
490		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
491		cfg->nummaps	    = PCI_MAXMAPS_2;
492		break;
493	}
494#undef REG
495}
496
497/* read configuration header into pcicfgregs structure */
498struct pci_devinfo *
499pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
500{
501#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
502	pcicfgregs *cfg = NULL;
503	struct pci_devinfo *devlist_entry;
504	struct devlist *devlist_head;
505
506	devlist_head = &pci_devq;
507
508	devlist_entry = NULL;
509
510	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
511		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
512		if (devlist_entry == NULL)
513			return (NULL);
514
515		cfg = &devlist_entry->cfg;
516
517		cfg->domain		= d;
518		cfg->bus		= b;
519		cfg->slot		= s;
520		cfg->func		= f;
521		cfg->vendor		= REG(PCIR_VENDOR, 2);
522		cfg->device		= REG(PCIR_DEVICE, 2);
523		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
524		cfg->statreg		= REG(PCIR_STATUS, 2);
525		cfg->baseclass		= REG(PCIR_CLASS, 1);
526		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
527		cfg->progif		= REG(PCIR_PROGIF, 1);
528		cfg->revid		= REG(PCIR_REVID, 1);
529		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
530		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
531		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
532		cfg->intpin		= REG(PCIR_INTPIN, 1);
533		cfg->intline		= REG(PCIR_INTLINE, 1);
534
535		cfg->mingnt		= REG(PCIR_MINGNT, 1);
536		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
537
538		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
539		cfg->hdrtype		&= ~PCIM_MFDEV;
540		STAILQ_INIT(&cfg->maps);
541
542		pci_fixancient(cfg);
543		pci_hdrtypedata(pcib, b, s, f, cfg);
544
545		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
546			pci_read_cap(pcib, cfg);
547
548		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
549
550		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
551		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
552		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
553		devlist_entry->conf.pc_sel.pc_func = cfg->func;
554		devlist_entry->conf.pc_hdr = cfg->hdrtype;
555
556		devlist_entry->conf.pc_subvendor = cfg->subvendor;
557		devlist_entry->conf.pc_subdevice = cfg->subdevice;
558		devlist_entry->conf.pc_vendor = cfg->vendor;
559		devlist_entry->conf.pc_device = cfg->device;
560
561		devlist_entry->conf.pc_class = cfg->baseclass;
562		devlist_entry->conf.pc_subclass = cfg->subclass;
563		devlist_entry->conf.pc_progif = cfg->progif;
564		devlist_entry->conf.pc_revid = cfg->revid;
565
566		pci_numdevs++;
567		pci_generation++;
568	}
569	return (devlist_entry);
570#undef REG
571}
572
573static void
574pci_read_cap(device_t pcib, pcicfgregs *cfg)
575{
576#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
577#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
578#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
579	uint64_t addr;
580#endif
581	uint32_t val;
582	int	ptr, nextptr, ptrptr;
583
584	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585	case PCIM_HDRTYPE_NORMAL:
586	case PCIM_HDRTYPE_BRIDGE:
587		ptrptr = PCIR_CAP_PTR;
588		break;
589	case PCIM_HDRTYPE_CARDBUS:
590		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
591		break;
592	default:
593		return;		/* no extended capabilities support */
594	}
595	nextptr = REG(ptrptr, 1);	/* sanity check? */
596
597	/*
598	 * Read capability entries.
599	 */
600	while (nextptr != 0) {
601		/* Sanity check */
602		if (nextptr > 255) {
603			printf("illegal PCI extended capability offset %d\n",
604			    nextptr);
605			return;
606		}
607		/* Find the next entry */
608		ptr = nextptr;
609		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
610
611		/* Process this entry */
612		switch (REG(ptr + PCICAP_ID, 1)) {
613		case PCIY_PMG:		/* PCI power management */
614			if (cfg->pp.pp_cap == 0) {
615				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
616				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
617				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
618				if ((nextptr - ptr) > PCIR_POWER_DATA)
619					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
620			}
621			break;
622		case PCIY_HT:		/* HyperTransport */
623			/* Determine HT-specific capability type. */
624			val = REG(ptr + PCIR_HT_COMMAND, 2);
625
626			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
627				cfg->ht.ht_slave = ptr;
628
629#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
630			switch (val & PCIM_HTCMD_CAP_MASK) {
631			case PCIM_HTCAP_MSI_MAPPING:
632				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
633					/* Sanity check the mapping window. */
634					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
635					    4);
636					addr <<= 32;
637					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
638					    4);
639					if (addr != MSI_INTEL_ADDR_BASE)
640						device_printf(pcib,
641	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
642						    cfg->domain, cfg->bus,
643						    cfg->slot, cfg->func,
644						    (long long)addr);
645				} else
646					addr = MSI_INTEL_ADDR_BASE;
647
648				cfg->ht.ht_msimap = ptr;
649				cfg->ht.ht_msictrl = val;
650				cfg->ht.ht_msiaddr = addr;
651				break;
652			}
653#endif
654			break;
655		case PCIY_MSI:		/* PCI MSI */
656			cfg->msi.msi_location = ptr;
657			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
658			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
659						     PCIM_MSICTRL_MMC_MASK)>>1);
660			break;
661		case PCIY_MSIX:		/* PCI MSI-X */
662			cfg->msix.msix_location = ptr;
663			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
664			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
665			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
666			val = REG(ptr + PCIR_MSIX_TABLE, 4);
667			cfg->msix.msix_table_bar = PCIR_BAR(val &
668			    PCIM_MSIX_BIR_MASK);
669			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
670			val = REG(ptr + PCIR_MSIX_PBA, 4);
671			cfg->msix.msix_pba_bar = PCIR_BAR(val &
672			    PCIM_MSIX_BIR_MASK);
673			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
674			break;
675		case PCIY_VPD:		/* PCI Vital Product Data */
676			cfg->vpd.vpd_reg = ptr;
677			break;
678		case PCIY_SUBVENDOR:
679			/* Should always be true. */
680			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
681			    PCIM_HDRTYPE_BRIDGE) {
682				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
683				cfg->subvendor = val & 0xffff;
684				cfg->subdevice = val >> 16;
685			}
686			break;
687		case PCIY_PCIX:		/* PCI-X */
688			/*
689			 * Assume we have a PCI-X chipset if we have
690			 * at least one PCI-PCI bridge with a PCI-X
691			 * capability.  Note that some systems with
692			 * PCI-express or HT chipsets might match on
693			 * this check as well.
694			 */
695			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
696			    PCIM_HDRTYPE_BRIDGE)
697				pcix_chipset = 1;
698			break;
699		case PCIY_EXPRESS:	/* PCI-express */
700			/*
701			 * Assume we have a PCI-express chipset if we have
702			 * at least one PCI-express device.
703			 */
704			pcie_chipset = 1;
705			break;
706		default:
707			break;
708		}
709	}
710
711
712#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
713	/*
714	 * Enable the MSI mapping window for all HyperTransport
715	 * slaves.  PCI-PCI bridges have their windows enabled via
716	 * PCIB_MAP_MSI().
717	 */
718	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
719	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
720		device_printf(pcib,
721	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
722		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
723		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
724		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
725		     2);
726	}
727#endif
728/* REG and WREG use carry through to next functions */
729}
730
731/*
732 * PCI Vital Product Data
733 */
734
735#define	PCI_VPD_TIMEOUT		1000000
736
737static int
738pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
739{
740	int count = PCI_VPD_TIMEOUT;
741
742	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
743
744	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
745
746	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
747		if (--count < 0)
748			return (ENXIO);
749		DELAY(1);	/* limit looping */
750	}
751	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
752
753	return (0);
754}
755
756#if 0
757static int
758pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
759{
760	int count = PCI_VPD_TIMEOUT;
761
762	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
763
764	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
765	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
766	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
767		if (--count < 0)
768			return (ENXIO);
769		DELAY(1);	/* limit looping */
770	}
771
772	return (0);
773}
774#endif
775
776#undef PCI_VPD_TIMEOUT
777
778struct vpd_readstate {
779	device_t	pcib;
780	pcicfgregs	*cfg;
781	uint32_t	val;
782	int		bytesinval;
783	int		off;
784	uint8_t		cksum;
785};
786
787static int
788vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
789{
790	uint32_t reg;
791	uint8_t byte;
792
793	if (vrs->bytesinval == 0) {
794		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
795			return (ENXIO);
796		vrs->val = le32toh(reg);
797		vrs->off += 4;
798		byte = vrs->val & 0xff;
799		vrs->bytesinval = 3;
800	} else {
801		vrs->val = vrs->val >> 8;
802		byte = vrs->val & 0xff;
803		vrs->bytesinval--;
804	}
805
806	vrs->cksum += byte;
807	*data = byte;
808	return (0);
809}
810
811static void
812pci_read_vpd(device_t pcib, pcicfgregs *cfg)
813{
814	struct vpd_readstate vrs;
815	int state;
816	int name;
817	int remain;
818	int i;
819	int alloc, off;		/* alloc/off for RO/W arrays */
820	int cksumvalid;
821	int dflen;
822	uint8_t byte;
823	uint8_t byte2;
824
825	/* init vpd reader */
826	vrs.bytesinval = 0;
827	vrs.off = 0;
828	vrs.pcib = pcib;
829	vrs.cfg = cfg;
830	vrs.cksum = 0;
831
832	state = 0;
833	name = remain = i = 0;	/* shut up stupid gcc */
834	alloc = off = 0;	/* shut up stupid gcc */
835	dflen = 0;		/* shut up stupid gcc */
836	cksumvalid = -1;
837	while (state >= 0) {
838		if (vpd_nextbyte(&vrs, &byte)) {
839			state = -2;
840			break;
841		}
842#if 0
843		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
844		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
845		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
846#endif
847		switch (state) {
848		case 0:		/* item name */
849			if (byte & 0x80) {
850				if (vpd_nextbyte(&vrs, &byte2)) {
851					state = -2;
852					break;
853				}
854				remain = byte2;
855				if (vpd_nextbyte(&vrs, &byte2)) {
856					state = -2;
857					break;
858				}
859				remain |= byte2 << 8;
860				if (remain > (0x7f*4 - vrs.off)) {
861					state = -1;
862					printf(
863			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
864					    cfg->domain, cfg->bus, cfg->slot,
865					    cfg->func, remain);
866				}
867				name = byte & 0x7f;
868			} else {
869				remain = byte & 0x7;
870				name = (byte >> 3) & 0xf;
871			}
872			switch (name) {
873			case 0x2:	/* String */
874				cfg->vpd.vpd_ident = malloc(remain + 1,
875				    M_DEVBUF, M_WAITOK);
876				i = 0;
877				state = 1;
878				break;
879			case 0xf:	/* End */
880				state = -1;
881				break;
882			case 0x10:	/* VPD-R */
883				alloc = 8;
884				off = 0;
885				cfg->vpd.vpd_ros = malloc(alloc *
886				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
887				    M_WAITOK | M_ZERO);
888				state = 2;
889				break;
890			case 0x11:	/* VPD-W */
891				alloc = 8;
892				off = 0;
893				cfg->vpd.vpd_w = malloc(alloc *
894				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
895				    M_WAITOK | M_ZERO);
896				state = 5;
897				break;
898			default:	/* Invalid data, abort */
899				state = -1;
900				break;
901			}
902			break;
903
904		case 1:	/* Identifier String */
905			cfg->vpd.vpd_ident[i++] = byte;
906			remain--;
907			if (remain == 0)  {
908				cfg->vpd.vpd_ident[i] = '\0';
909				state = 0;
910			}
911			break;
912
913		case 2:	/* VPD-R Keyword Header */
914			if (off == alloc) {
915				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
916				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
917				    M_DEVBUF, M_WAITOK | M_ZERO);
918			}
919			cfg->vpd.vpd_ros[off].keyword[0] = byte;
920			if (vpd_nextbyte(&vrs, &byte2)) {
921				state = -2;
922				break;
923			}
924			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
925			if (vpd_nextbyte(&vrs, &byte2)) {
926				state = -2;
927				break;
928			}
929			dflen = byte2;
930			if (dflen == 0 &&
931			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
932			    2) == 0) {
933				/*
934				 * if this happens, we can't trust the rest
935				 * of the VPD.
936				 */
937				printf(
938				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
939				    cfg->domain, cfg->bus, cfg->slot,
940				    cfg->func, dflen);
941				cksumvalid = 0;
942				state = -1;
943				break;
944			} else if (dflen == 0) {
945				cfg->vpd.vpd_ros[off].value = malloc(1 *
946				    sizeof(*cfg->vpd.vpd_ros[off].value),
947				    M_DEVBUF, M_WAITOK);
948				cfg->vpd.vpd_ros[off].value[0] = '\x00';
949			} else
950				cfg->vpd.vpd_ros[off].value = malloc(
951				    (dflen + 1) *
952				    sizeof(*cfg->vpd.vpd_ros[off].value),
953				    M_DEVBUF, M_WAITOK);
954			remain -= 3;
955			i = 0;
956			/* keep in sync w/ state 3's transistions */
957			if (dflen == 0 && remain == 0)
958				state = 0;
959			else if (dflen == 0)
960				state = 2;
961			else
962				state = 3;
963			break;
964
965		case 3:	/* VPD-R Keyword Value */
966			cfg->vpd.vpd_ros[off].value[i++] = byte;
967			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
968			    "RV", 2) == 0 && cksumvalid == -1) {
969				if (vrs.cksum == 0)
970					cksumvalid = 1;
971				else {
972					if (bootverbose)
973						printf(
974				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
975						    cfg->domain, cfg->bus,
976						    cfg->slot, cfg->func,
977						    vrs.cksum);
978					cksumvalid = 0;
979					state = -1;
980					break;
981				}
982			}
983			dflen--;
984			remain--;
985			/* keep in sync w/ state 2's transistions */
986			if (dflen == 0)
987				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
988			if (dflen == 0 && remain == 0) {
989				cfg->vpd.vpd_rocnt = off;
990				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
991				    off * sizeof(*cfg->vpd.vpd_ros),
992				    M_DEVBUF, M_WAITOK | M_ZERO);
993				state = 0;
994			} else if (dflen == 0)
995				state = 2;
996			break;
997
998		case 4:
999			remain--;
1000			if (remain == 0)
1001				state = 0;
1002			break;
1003
1004		case 5:	/* VPD-W Keyword Header */
1005			if (off == alloc) {
1006				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1007				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1008				    M_DEVBUF, M_WAITOK | M_ZERO);
1009			}
1010			cfg->vpd.vpd_w[off].keyword[0] = byte;
1011			if (vpd_nextbyte(&vrs, &byte2)) {
1012				state = -2;
1013				break;
1014			}
1015			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1016			if (vpd_nextbyte(&vrs, &byte2)) {
1017				state = -2;
1018				break;
1019			}
1020			cfg->vpd.vpd_w[off].len = dflen = byte2;
1021			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1022			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1023			    sizeof(*cfg->vpd.vpd_w[off].value),
1024			    M_DEVBUF, M_WAITOK);
1025			remain -= 3;
1026			i = 0;
1027			/* keep in sync w/ state 6's transistions */
1028			if (dflen == 0 && remain == 0)
1029				state = 0;
1030			else if (dflen == 0)
1031				state = 5;
1032			else
1033				state = 6;
1034			break;
1035
1036		case 6:	/* VPD-W Keyword Value */
1037			cfg->vpd.vpd_w[off].value[i++] = byte;
1038			dflen--;
1039			remain--;
1040			/* keep in sync w/ state 5's transistions */
1041			if (dflen == 0)
1042				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1043			if (dflen == 0 && remain == 0) {
1044				cfg->vpd.vpd_wcnt = off;
1045				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1046				    off * sizeof(*cfg->vpd.vpd_w),
1047				    M_DEVBUF, M_WAITOK | M_ZERO);
1048				state = 0;
1049			} else if (dflen == 0)
1050				state = 5;
1051			break;
1052
1053		default:
1054			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1055			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1056			    state);
1057			state = -1;
1058			break;
1059		}
1060	}
1061
1062	if (cksumvalid == 0 || state < -1) {
1063		/* read-only data bad, clean up */
1064		if (cfg->vpd.vpd_ros != NULL) {
1065			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1066				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1067			free(cfg->vpd.vpd_ros, M_DEVBUF);
1068			cfg->vpd.vpd_ros = NULL;
1069		}
1070	}
1071	if (state < -1) {
1072		/* I/O error, clean up */
1073		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1074		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1075		if (cfg->vpd.vpd_ident != NULL) {
1076			free(cfg->vpd.vpd_ident, M_DEVBUF);
1077			cfg->vpd.vpd_ident = NULL;
1078		}
1079		if (cfg->vpd.vpd_w != NULL) {
1080			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1081				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1082			free(cfg->vpd.vpd_w, M_DEVBUF);
1083			cfg->vpd.vpd_w = NULL;
1084		}
1085	}
1086	cfg->vpd.vpd_cached = 1;
1087#undef REG
1088#undef WREG
1089}
1090
1091int
1092pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1093{
1094	struct pci_devinfo *dinfo = device_get_ivars(child);
1095	pcicfgregs *cfg = &dinfo->cfg;
1096
1097	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1098		pci_read_vpd(device_get_parent(dev), cfg);
1099
1100	*identptr = cfg->vpd.vpd_ident;
1101
1102	if (*identptr == NULL)
1103		return (ENXIO);
1104
1105	return (0);
1106}
1107
1108int
1109pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1110	const char **vptr)
1111{
1112	struct pci_devinfo *dinfo = device_get_ivars(child);
1113	pcicfgregs *cfg = &dinfo->cfg;
1114	int i;
1115
1116	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1117		pci_read_vpd(device_get_parent(dev), cfg);
1118
1119	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1120		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1121		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1122			*vptr = cfg->vpd.vpd_ros[i].value;
1123		}
1124
1125	if (i != cfg->vpd.vpd_rocnt)
1126		return (0);
1127
1128	*vptr = NULL;
1129	return (ENXIO);
1130}
1131
1132/*
1133 * Find the requested extended capability and return the offset in
1134 * configuration space via the pointer provided. The function returns
1135 * 0 on success and error code otherwise.
1136 */
1137int
1138pci_find_extcap_method(device_t dev, device_t child, int capability,
1139    int *capreg)
1140{
1141	struct pci_devinfo *dinfo = device_get_ivars(child);
1142	pcicfgregs *cfg = &dinfo->cfg;
1143	u_int32_t status;
1144	u_int8_t ptr;
1145
1146	/*
1147	 * Check the CAP_LIST bit of the PCI status register first.
1148	 */
1149	status = pci_read_config(child, PCIR_STATUS, 2);
1150	if (!(status & PCIM_STATUS_CAPPRESENT))
1151		return (ENXIO);
1152
1153	/*
1154	 * Determine the start pointer of the capabilities list.
1155	 */
1156	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1157	case PCIM_HDRTYPE_NORMAL:
1158	case PCIM_HDRTYPE_BRIDGE:
1159		ptr = PCIR_CAP_PTR;
1160		break;
1161	case PCIM_HDRTYPE_CARDBUS:
1162		ptr = PCIR_CAP_PTR_2;
1163		break;
1164	default:
1165		/* XXX: panic? */
1166		return (ENXIO);		/* no extended capabilities support */
1167	}
1168	ptr = pci_read_config(child, ptr, 1);
1169
1170	/*
1171	 * Traverse the capabilities list.
1172	 */
1173	while (ptr != 0) {
1174		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1175			if (capreg != NULL)
1176				*capreg = ptr;
1177			return (0);
1178		}
1179		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1180	}
1181
1182	return (ENOENT);
1183}
1184
1185/*
1186 * Support for MSI-X message interrupts.
1187 */
1188void
1189pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1190{
1191	struct pci_devinfo *dinfo = device_get_ivars(dev);
1192	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1193	uint32_t offset;
1194
1195	KASSERT(msix->msix_table_len > index, ("bogus index"));
1196	offset = msix->msix_table_offset + index * 16;
1197	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1198	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1199	bus_write_4(msix->msix_table_res, offset + 8, data);
1200
1201	/* Enable MSI -> HT mapping. */
1202	pci_ht_map_msi(dev, address);
1203}
1204
1205void
1206pci_mask_msix(device_t dev, u_int index)
1207{
1208	struct pci_devinfo *dinfo = device_get_ivars(dev);
1209	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1210	uint32_t offset, val;
1211
1212	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1213	offset = msix->msix_table_offset + index * 16 + 12;
1214	val = bus_read_4(msix->msix_table_res, offset);
1215	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1216		val |= PCIM_MSIX_VCTRL_MASK;
1217		bus_write_4(msix->msix_table_res, offset, val);
1218	}
1219}
1220
1221void
1222pci_unmask_msix(device_t dev, u_int index)
1223{
1224	struct pci_devinfo *dinfo = device_get_ivars(dev);
1225	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1226	uint32_t offset, val;
1227
1228	KASSERT(msix->msix_table_len > index, ("bogus index"));
1229	offset = msix->msix_table_offset + index * 16 + 12;
1230	val = bus_read_4(msix->msix_table_res, offset);
1231	if (val & PCIM_MSIX_VCTRL_MASK) {
1232		val &= ~PCIM_MSIX_VCTRL_MASK;
1233		bus_write_4(msix->msix_table_res, offset, val);
1234	}
1235}
1236
1237int
1238pci_pending_msix(device_t dev, u_int index)
1239{
1240	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1242	uint32_t offset, bit;
1243
1244	KASSERT(msix->msix_table_len > index, ("bogus index"));
1245	offset = msix->msix_pba_offset + (index / 32) * 4;
1246	bit = 1 << index % 32;
1247	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1248}
1249
1250/*
1251 * Restore MSI-X registers and table during resume.  If MSI-X is
1252 * enabled then walk the virtual table to restore the actual MSI-X
1253 * table.
1254 */
1255static void
1256pci_resume_msix(device_t dev)
1257{
1258	struct pci_devinfo *dinfo = device_get_ivars(dev);
1259	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1260	struct msix_table_entry *mte;
1261	struct msix_vector *mv;
1262	int i;
1263
1264	if (msix->msix_alloc > 0) {
1265		/* First, mask all vectors. */
1266		for (i = 0; i < msix->msix_msgnum; i++)
1267			pci_mask_msix(dev, i);
1268
1269		/* Second, program any messages with at least one handler. */
1270		for (i = 0; i < msix->msix_table_len; i++) {
1271			mte = &msix->msix_table[i];
1272			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1273				continue;
1274			mv = &msix->msix_vectors[mte->mte_vector - 1];
1275			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1276			pci_unmask_msix(dev, i);
1277		}
1278	}
1279	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1280	    msix->msix_ctrl, 2);
1281}
1282
1283/*
1284 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1285 * returned in *count.  After this function returns, each message will be
1286 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1287 */
1288int
1289pci_alloc_msix_method(device_t dev, device_t child, int *count)
1290{
1291	struct pci_devinfo *dinfo = device_get_ivars(child);
1292	pcicfgregs *cfg = &dinfo->cfg;
1293	struct resource_list_entry *rle;
1294	int actual, error, i, irq, max;
1295
1296	/* Don't let count == 0 get us into trouble. */
1297	if (*count == 0)
1298		return (EINVAL);
1299
1300	/* If rid 0 is allocated, then fail. */
1301	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1302	if (rle != NULL && rle->res != NULL)
1303		return (ENXIO);
1304
1305	/* Already have allocated messages? */
1306	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1307		return (ENXIO);
1308
1309	/* If MSI is blacklisted for this system, fail. */
1310	if (pci_msi_blacklisted())
1311		return (ENXIO);
1312
1313	/* MSI-X capability present? */
1314	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1315		return (ENODEV);
1316
1317	/* Make sure the appropriate BARs are mapped. */
1318	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1319	    cfg->msix.msix_table_bar);
1320	if (rle == NULL || rle->res == NULL ||
1321	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1322		return (ENXIO);
1323	cfg->msix.msix_table_res = rle->res;
1324	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1325		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1326		    cfg->msix.msix_pba_bar);
1327		if (rle == NULL || rle->res == NULL ||
1328		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1329			return (ENXIO);
1330	}
1331	cfg->msix.msix_pba_res = rle->res;
1332
1333	if (bootverbose)
1334		device_printf(child,
1335		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1336		    *count, cfg->msix.msix_msgnum);
1337	max = min(*count, cfg->msix.msix_msgnum);
1338	for (i = 0; i < max; i++) {
1339		/* Allocate a message. */
1340		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1341		if (error)
1342			break;
1343		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1344		    irq, 1);
1345	}
1346	actual = i;
1347
1348	if (bootverbose) {
1349		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1350		if (actual == 1)
1351			device_printf(child, "using IRQ %lu for MSI-X\n",
1352			    rle->start);
1353		else {
1354			int run;
1355
1356			/*
1357			 * Be fancy and try to print contiguous runs of
1358			 * IRQ values as ranges.  'irq' is the previous IRQ.
1359			 * 'run' is true if we are in a range.
1360			 */
1361			device_printf(child, "using IRQs %lu", rle->start);
1362			irq = rle->start;
1363			run = 0;
1364			for (i = 1; i < actual; i++) {
1365				rle = resource_list_find(&dinfo->resources,
1366				    SYS_RES_IRQ, i + 1);
1367
1368				/* Still in a run? */
1369				if (rle->start == irq + 1) {
1370					run = 1;
1371					irq++;
1372					continue;
1373				}
1374
1375				/* Finish previous range. */
1376				if (run) {
1377					printf("-%d", irq);
1378					run = 0;
1379				}
1380
1381				/* Start new range. */
1382				printf(",%lu", rle->start);
1383				irq = rle->start;
1384			}
1385
1386			/* Unfinished range? */
1387			if (run)
1388				printf("-%d", irq);
1389			printf(" for MSI-X\n");
1390		}
1391	}
1392
1393	/* Mask all vectors. */
1394	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1395		pci_mask_msix(child, i);
1396
1397	/* Allocate and initialize vector data and virtual table. */
1398	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1399	    M_DEVBUF, M_WAITOK | M_ZERO);
1400	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1401	    M_DEVBUF, M_WAITOK | M_ZERO);
1402	for (i = 0; i < actual; i++) {
1403		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1404		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1405		cfg->msix.msix_table[i].mte_vector = i + 1;
1406	}
1407
1408	/* Update control register to enable MSI-X. */
1409	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1410	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1411	    cfg->msix.msix_ctrl, 2);
1412
1413	/* Update counts of alloc'd messages. */
1414	cfg->msix.msix_alloc = actual;
1415	cfg->msix.msix_table_len = actual;
1416	*count = actual;
1417	return (0);
1418}
1419
1420/*
1421 * By default, pci_alloc_msix() will assign the allocated IRQ
1422 * resources consecutively to the first N messages in the MSI-X table.
1423 * However, device drivers may want to use different layouts if they
1424 * either receive fewer messages than they asked for, or they wish to
1425 * populate the MSI-X table sparsely.  This method allows the driver
1426 * to specify what layout it wants.  It must be called after a
1427 * successful pci_alloc_msix() but before any of the associated
1428 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1429 *
1430 * The 'vectors' array contains 'count' message vectors.  The array
1431 * maps directly to the MSI-X table in that index 0 in the array
1432 * specifies the vector for the first message in the MSI-X table, etc.
1433 * The vector value in each array index can either be 0 to indicate
1434 * that no vector should be assigned to a message slot, or it can be a
1435 * number from 1 to N (where N is the count returned from a
1436 * succcessful call to pci_alloc_msix()) to indicate which message
1437 * vector (IRQ) to be used for the corresponding message.
1438 *
1439 * On successful return, each message with a non-zero vector will have
1440 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1441 * 1.  Additionally, if any of the IRQs allocated via the previous
1442 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1443 * will be freed back to the system automatically.
1444 *
1445 * For example, suppose a driver has a MSI-X table with 6 messages and
1446 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1447 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1448 * C.  After the call to pci_alloc_msix(), the device will be setup to
1449 * have an MSI-X table of ABC--- (where - means no vector assigned).
1450 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1451 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1452 * be freed back to the system.  This device will also have valid
1453 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1454 *
1455 * In any case, the SYS_RES_IRQ rid X will always map to the message
1456 * at MSI-X table index X - 1 and will only be valid if a vector is
1457 * assigned to that table entry.
1458 */
1459int
1460pci_remap_msix_method(device_t dev, device_t child, int count,
1461    const u_int *vectors)
1462{
1463	struct pci_devinfo *dinfo = device_get_ivars(child);
1464	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465	struct resource_list_entry *rle;
1466	int i, irq, j, *used;
1467
1468	/*
1469	 * Have to have at least one message in the table but the
1470	 * table can't be bigger than the actual MSI-X table in the
1471	 * device.
1472	 */
1473	if (count == 0 || count > msix->msix_msgnum)
1474		return (EINVAL);
1475
1476	/* Sanity check the vectors. */
1477	for (i = 0; i < count; i++)
1478		if (vectors[i] > msix->msix_alloc)
1479			return (EINVAL);
1480
1481	/*
1482	 * Make sure there aren't any holes in the vectors to be used.
1483	 * It's a big pain to support it, and it doesn't really make
1484	 * sense anyway.  Also, at least one vector must be used.
1485	 */
1486	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1487	    M_ZERO);
1488	for (i = 0; i < count; i++)
1489		if (vectors[i] != 0)
1490			used[vectors[i] - 1] = 1;
1491	for (i = 0; i < msix->msix_alloc - 1; i++)
1492		if (used[i] == 0 && used[i + 1] == 1) {
1493			free(used, M_DEVBUF);
1494			return (EINVAL);
1495		}
1496	if (used[0] != 1) {
1497		free(used, M_DEVBUF);
1498		return (EINVAL);
1499	}
1500
1501	/* Make sure none of the resources are allocated. */
1502	for (i = 0; i < msix->msix_table_len; i++) {
1503		if (msix->msix_table[i].mte_vector == 0)
1504			continue;
1505		if (msix->msix_table[i].mte_handlers > 0)
1506			return (EBUSY);
1507		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1508		KASSERT(rle != NULL, ("missing resource"));
1509		if (rle->res != NULL)
1510			return (EBUSY);
1511	}
1512
1513	/* Free the existing resource list entries. */
1514	for (i = 0; i < msix->msix_table_len; i++) {
1515		if (msix->msix_table[i].mte_vector == 0)
1516			continue;
1517		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1518	}
1519
1520	/*
1521	 * Build the new virtual table keeping track of which vectors are
1522	 * used.
1523	 */
1524	free(msix->msix_table, M_DEVBUF);
1525	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1526	    M_DEVBUF, M_WAITOK | M_ZERO);
1527	for (i = 0; i < count; i++)
1528		msix->msix_table[i].mte_vector = vectors[i];
1529	msix->msix_table_len = count;
1530
1531	/* Free any unused IRQs and resize the vectors array if necessary. */
1532	j = msix->msix_alloc - 1;
1533	if (used[j] == 0) {
1534		struct msix_vector *vec;
1535
1536		while (used[j] == 0) {
1537			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1538			    msix->msix_vectors[j].mv_irq);
1539			j--;
1540		}
1541		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1542		    M_WAITOK);
1543		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1544		    (j + 1));
1545		free(msix->msix_vectors, M_DEVBUF);
1546		msix->msix_vectors = vec;
1547		msix->msix_alloc = j + 1;
1548	}
1549	free(used, M_DEVBUF);
1550
1551	/* Map the IRQs onto the rids. */
1552	for (i = 0; i < count; i++) {
1553		if (vectors[i] == 0)
1554			continue;
1555		irq = msix->msix_vectors[vectors[i]].mv_irq;
1556		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1557		    irq, 1);
1558	}
1559
1560	if (bootverbose) {
1561		device_printf(child, "Remapped MSI-X IRQs as: ");
1562		for (i = 0; i < count; i++) {
1563			if (i != 0)
1564				printf(", ");
1565			if (vectors[i] == 0)
1566				printf("---");
1567			else
1568				printf("%d",
1569				    msix->msix_vectors[vectors[i]].mv_irq);
1570		}
1571		printf("\n");
1572	}
1573
1574	return (0);
1575}
1576
1577static int
1578pci_release_msix(device_t dev, device_t child)
1579{
1580	struct pci_devinfo *dinfo = device_get_ivars(child);
1581	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1582	struct resource_list_entry *rle;
1583	int i;
1584
1585	/* Do we have any messages to release? */
1586	if (msix->msix_alloc == 0)
1587		return (ENODEV);
1588
1589	/* Make sure none of the resources are allocated. */
1590	for (i = 0; i < msix->msix_table_len; i++) {
1591		if (msix->msix_table[i].mte_vector == 0)
1592			continue;
1593		if (msix->msix_table[i].mte_handlers > 0)
1594			return (EBUSY);
1595		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596		KASSERT(rle != NULL, ("missing resource"));
1597		if (rle->res != NULL)
1598			return (EBUSY);
1599	}
1600
1601	/* Update control register to disable MSI-X. */
1602	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1603	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1604	    msix->msix_ctrl, 2);
1605
1606	/* Free the resource list entries. */
1607	for (i = 0; i < msix->msix_table_len; i++) {
1608		if (msix->msix_table[i].mte_vector == 0)
1609			continue;
1610		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1611	}
1612	free(msix->msix_table, M_DEVBUF);
1613	msix->msix_table_len = 0;
1614
1615	/* Release the IRQs. */
1616	for (i = 0; i < msix->msix_alloc; i++)
1617		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1618		    msix->msix_vectors[i].mv_irq);
1619	free(msix->msix_vectors, M_DEVBUF);
1620	msix->msix_alloc = 0;
1621	return (0);
1622}
1623
1624/*
1625 * Return the max supported MSI-X messages this device supports.
1626 * Basically, assuming the MD code can alloc messages, this function
1627 * should return the maximum value that pci_alloc_msix() can return.
1628 * Thus, it is subject to the tunables, etc.
1629 */
1630int
1631pci_msix_count_method(device_t dev, device_t child)
1632{
1633	struct pci_devinfo *dinfo = device_get_ivars(child);
1634	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635
1636	if (pci_do_msix && msix->msix_location != 0)
1637		return (msix->msix_msgnum);
1638	return (0);
1639}
1640
1641/*
1642 * HyperTransport MSI mapping control
1643 */
1644void
1645pci_ht_map_msi(device_t dev, uint64_t addr)
1646{
1647	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1649
1650	if (!ht->ht_msimap)
1651		return;
1652
1653	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1654	    ht->ht_msiaddr >> 20 == addr >> 20) {
1655		/* Enable MSI -> HT mapping. */
1656		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1657		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1658		    ht->ht_msictrl, 2);
1659	}
1660
1661	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1662		/* Disable MSI -> HT mapping. */
1663		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1664		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1665		    ht->ht_msictrl, 2);
1666	}
1667}
1668
1669int
1670pci_get_max_read_req(device_t dev)
1671{
1672	int cap;
1673	uint16_t val;
1674
1675	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1676		return (0);
1677	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1678	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1679	val >>= 12;
1680	return (1 << (val + 7));
1681}
1682
1683int
1684pci_set_max_read_req(device_t dev, int size)
1685{
1686	int cap;
1687	uint16_t val;
1688
1689	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1690		return (0);
1691	if (size < 128)
1692		size = 128;
1693	if (size > 4096)
1694		size = 4096;
1695	size = (1 << (fls(size) - 1));
1696	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1697	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1698	val |= (fls(size) - 8) << 12;
1699	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1700	return (size);
1701}
1702
1703/*
1704 * Support for MSI message signalled interrupts.
1705 */
1706void
1707pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1708{
1709	struct pci_devinfo *dinfo = device_get_ivars(dev);
1710	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1711
1712	/* Write data and address values. */
1713	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1714	    address & 0xffffffff, 4);
1715	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1716		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1717		    address >> 32, 4);
1718		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1719		    data, 2);
1720	} else
1721		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1722		    2);
1723
1724	/* Enable MSI in the control register. */
1725	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1726	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1727	    2);
1728
1729	/* Enable MSI -> HT mapping. */
1730	pci_ht_map_msi(dev, address);
1731}
1732
1733void
1734pci_disable_msi(device_t dev)
1735{
1736	struct pci_devinfo *dinfo = device_get_ivars(dev);
1737	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1738
1739	/* Disable MSI -> HT mapping. */
1740	pci_ht_map_msi(dev, 0);
1741
1742	/* Disable MSI in the control register. */
1743	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1744	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1745	    2);
1746}
1747
1748/*
1749 * Restore MSI registers during resume.  If MSI is enabled then
1750 * restore the data and address registers in addition to the control
1751 * register.
1752 */
1753static void
1754pci_resume_msi(device_t dev)
1755{
1756	struct pci_devinfo *dinfo = device_get_ivars(dev);
1757	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1758	uint64_t address;
1759	uint16_t data;
1760
1761	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1762		address = msi->msi_addr;
1763		data = msi->msi_data;
1764		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1765		    address & 0xffffffff, 4);
1766		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1767			pci_write_config(dev, msi->msi_location +
1768			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1769			pci_write_config(dev, msi->msi_location +
1770			    PCIR_MSI_DATA_64BIT, data, 2);
1771		} else
1772			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1773			    data, 2);
1774	}
1775	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1776	    2);
1777}
1778
1779static int
1780pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1781{
1782	struct pci_devinfo *dinfo = device_get_ivars(dev);
1783	pcicfgregs *cfg = &dinfo->cfg;
1784	struct resource_list_entry *rle;
1785	struct msix_table_entry *mte;
1786	struct msix_vector *mv;
1787	uint64_t addr;
1788	uint32_t data;
1789	int error, i, j;
1790
1791	/*
1792	 * Handle MSI first.  We try to find this IRQ among our list
1793	 * of MSI IRQs.  If we find it, we request updated address and
1794	 * data registers and apply the results.
1795	 */
1796	if (cfg->msi.msi_alloc > 0) {
1797
1798		/* If we don't have any active handlers, nothing to do. */
1799		if (cfg->msi.msi_handlers == 0)
1800			return (0);
1801		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1802			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1803			    i + 1);
1804			if (rle->start == irq) {
1805				error = PCIB_MAP_MSI(device_get_parent(bus),
1806				    dev, irq, &addr, &data);
1807				if (error)
1808					return (error);
1809				pci_disable_msi(dev);
1810				dinfo->cfg.msi.msi_addr = addr;
1811				dinfo->cfg.msi.msi_data = data;
1812				pci_enable_msi(dev, addr, data);
1813				return (0);
1814			}
1815		}
1816		return (ENOENT);
1817	}
1818
1819	/*
1820	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1821	 * we request the updated mapping info.  If that works, we go
1822	 * through all the slots that use this IRQ and update them.
1823	 */
1824	if (cfg->msix.msix_alloc > 0) {
1825		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1826			mv = &cfg->msix.msix_vectors[i];
1827			if (mv->mv_irq == irq) {
1828				error = PCIB_MAP_MSI(device_get_parent(bus),
1829				    dev, irq, &addr, &data);
1830				if (error)
1831					return (error);
1832				mv->mv_address = addr;
1833				mv->mv_data = data;
1834				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1835					mte = &cfg->msix.msix_table[j];
1836					if (mte->mte_vector != i + 1)
1837						continue;
1838					if (mte->mte_handlers == 0)
1839						continue;
1840					pci_mask_msix(dev, j);
1841					pci_enable_msix(dev, j, addr, data);
1842					pci_unmask_msix(dev, j);
1843				}
1844			}
1845		}
1846		return (ENOENT);
1847	}
1848
1849	return (ENOENT);
1850}
1851
1852/*
1853 * Returns true if the specified device is blacklisted because MSI
1854 * doesn't work.
1855 */
1856int
1857pci_msi_device_blacklisted(device_t dev)
1858{
1859	struct pci_quirk *q;
1860
1861	if (!pci_honor_msi_blacklist)
1862		return (0);
1863
1864	for (q = &pci_quirks[0]; q->devid; q++) {
1865		if (q->devid == pci_get_devid(dev) &&
1866		    q->type == PCI_QUIRK_DISABLE_MSI)
1867			return (1);
1868	}
1869	return (0);
1870}
1871
1872/*
1873 * Returns true if a specified chipset supports MSI when it is
1874 * emulated hardware in a virtual machine.
1875 */
1876static int
1877pci_msi_vm_chipset(device_t dev)
1878{
1879	struct pci_quirk *q;
1880
1881	for (q = &pci_quirks[0]; q->devid; q++) {
1882		if (q->devid == pci_get_devid(dev) &&
1883		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1884			return (1);
1885	}
1886	return (0);
1887}
1888
1889/*
1890 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1891 * we just check for blacklisted chipsets as represented by the
1892 * host-PCI bridge at device 0:0:0.  In the future, it may become
1893 * necessary to check other system attributes, such as the kenv values
1894 * that give the motherboard manufacturer and model number.
1895 */
1896static int
1897pci_msi_blacklisted(void)
1898{
1899	device_t dev;
1900
1901	if (!pci_honor_msi_blacklist)
1902		return (0);
1903
1904	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1905	if (!(pcie_chipset || pcix_chipset)) {
1906		if (vm_guest != VM_GUEST_NO) {
1907			dev = pci_find_bsf(0, 0, 0);
1908			if (dev != NULL)
1909				return (pci_msi_vm_chipset(dev) == 0);
1910		}
1911		return (1);
1912	}
1913
1914	dev = pci_find_bsf(0, 0, 0);
1915	if (dev != NULL)
1916		return (pci_msi_device_blacklisted(dev));
1917	return (0);
1918}
1919
1920/*
1921 * Attempt to allocate *count MSI messages.  The actual number allocated is
1922 * returned in *count.  After this function returns, each message will be
1923 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1924 */
1925int
1926pci_alloc_msi_method(device_t dev, device_t child, int *count)
1927{
1928	struct pci_devinfo *dinfo = device_get_ivars(child);
1929	pcicfgregs *cfg = &dinfo->cfg;
1930	struct resource_list_entry *rle;
1931	int actual, error, i, irqs[32];
1932	uint16_t ctrl;
1933
1934	/* Don't let count == 0 get us into trouble. */
1935	if (*count == 0)
1936		return (EINVAL);
1937
1938	/* If rid 0 is allocated, then fail. */
1939	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1940	if (rle != NULL && rle->res != NULL)
1941		return (ENXIO);
1942
1943	/* Already have allocated messages? */
1944	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1945		return (ENXIO);
1946
1947	/* If MSI is blacklisted for this system, fail. */
1948	if (pci_msi_blacklisted())
1949		return (ENXIO);
1950
1951	/* MSI capability present? */
1952	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1953		return (ENODEV);
1954
1955	if (bootverbose)
1956		device_printf(child,
1957		    "attempting to allocate %d MSI vectors (%d supported)\n",
1958		    *count, cfg->msi.msi_msgnum);
1959
1960	/* Don't ask for more than the device supports. */
1961	actual = min(*count, cfg->msi.msi_msgnum);
1962
1963	/* Don't ask for more than 32 messages. */
1964	actual = min(actual, 32);
1965
1966	/* MSI requires power of 2 number of messages. */
1967	if (!powerof2(actual))
1968		return (EINVAL);
1969
1970	for (;;) {
1971		/* Try to allocate N messages. */
1972		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1973		    actual, irqs);
1974		if (error == 0)
1975			break;
1976		if (actual == 1)
1977			return (error);
1978
1979		/* Try N / 2. */
1980		actual >>= 1;
1981	}
1982
1983	/*
1984	 * We now have N actual messages mapped onto SYS_RES_IRQ
1985	 * resources in the irqs[] array, so add new resources
1986	 * starting at rid 1.
1987	 */
1988	for (i = 0; i < actual; i++)
1989		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1990		    irqs[i], irqs[i], 1);
1991
1992	if (bootverbose) {
1993		if (actual == 1)
1994			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1995		else {
1996			int run;
1997
1998			/*
1999			 * Be fancy and try to print contiguous runs
2000			 * of IRQ values as ranges.  'run' is true if
2001			 * we are in a range.
2002			 */
2003			device_printf(child, "using IRQs %d", irqs[0]);
2004			run = 0;
2005			for (i = 1; i < actual; i++) {
2006
2007				/* Still in a run? */
2008				if (irqs[i] == irqs[i - 1] + 1) {
2009					run = 1;
2010					continue;
2011				}
2012
2013				/* Finish previous range. */
2014				if (run) {
2015					printf("-%d", irqs[i - 1]);
2016					run = 0;
2017				}
2018
2019				/* Start new range. */
2020				printf(",%d", irqs[i]);
2021			}
2022
2023			/* Unfinished range? */
2024			if (run)
2025				printf("-%d", irqs[actual - 1]);
2026			printf(" for MSI\n");
2027		}
2028	}
2029
2030	/* Update control register with actual count. */
2031	ctrl = cfg->msi.msi_ctrl;
2032	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2033	ctrl |= (ffs(actual) - 1) << 4;
2034	cfg->msi.msi_ctrl = ctrl;
2035	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2036
2037	/* Update counts of alloc'd messages. */
2038	cfg->msi.msi_alloc = actual;
2039	cfg->msi.msi_handlers = 0;
2040	*count = actual;
2041	return (0);
2042}
2043
2044/* Release the MSI messages associated with this device. */
2045int
2046pci_release_msi_method(device_t dev, device_t child)
2047{
2048	struct pci_devinfo *dinfo = device_get_ivars(child);
2049	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2050	struct resource_list_entry *rle;
2051	int error, i, irqs[32];
2052
2053	/* Try MSI-X first. */
2054	error = pci_release_msix(dev, child);
2055	if (error != ENODEV)
2056		return (error);
2057
2058	/* Do we have any messages to release? */
2059	if (msi->msi_alloc == 0)
2060		return (ENODEV);
2061	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2062
2063	/* Make sure none of the resources are allocated. */
2064	if (msi->msi_handlers > 0)
2065		return (EBUSY);
2066	for (i = 0; i < msi->msi_alloc; i++) {
2067		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2068		KASSERT(rle != NULL, ("missing MSI resource"));
2069		if (rle->res != NULL)
2070			return (EBUSY);
2071		irqs[i] = rle->start;
2072	}
2073
2074	/* Update control register with 0 count. */
2075	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2076	    ("%s: MSI still enabled", __func__));
2077	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2078	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2079	    msi->msi_ctrl, 2);
2080
2081	/* Release the messages. */
2082	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2083	for (i = 0; i < msi->msi_alloc; i++)
2084		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2085
2086	/* Update alloc count. */
2087	msi->msi_alloc = 0;
2088	msi->msi_addr = 0;
2089	msi->msi_data = 0;
2090	return (0);
2091}
2092
2093/*
2094 * Return the max supported MSI messages this device supports.
2095 * Basically, assuming the MD code can alloc messages, this function
2096 * should return the maximum value that pci_alloc_msi() can return.
2097 * Thus, it is subject to the tunables, etc.
2098 */
2099int
2100pci_msi_count_method(device_t dev, device_t child)
2101{
2102	struct pci_devinfo *dinfo = device_get_ivars(child);
2103	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2104
2105	if (pci_do_msi && msi->msi_location != 0)
2106		return (msi->msi_msgnum);
2107	return (0);
2108}
2109
2110/* free pcicfgregs structure and all depending data structures */
2111
2112int
2113pci_freecfg(struct pci_devinfo *dinfo)
2114{
2115	struct devlist *devlist_head;
2116	struct pci_map *pm, *next;
2117	int i;
2118
2119	devlist_head = &pci_devq;
2120
2121	if (dinfo->cfg.vpd.vpd_reg) {
2122		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2123		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2124			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2125		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2126		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2127			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2128		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2129	}
2130	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2131		free(pm, M_DEVBUF);
2132	}
2133	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2134	free(dinfo, M_DEVBUF);
2135
2136	/* increment the generation count */
2137	pci_generation++;
2138
2139	/* we're losing one device */
2140	pci_numdevs--;
2141	return (0);
2142}
2143
2144/*
2145 * PCI power manangement
2146 */
2147int
2148pci_set_powerstate_method(device_t dev, device_t child, int state)
2149{
2150	struct pci_devinfo *dinfo = device_get_ivars(child);
2151	pcicfgregs *cfg = &dinfo->cfg;
2152	uint16_t status;
2153	int result, oldstate, highest, delay;
2154
2155	if (cfg->pp.pp_cap == 0)
2156		return (EOPNOTSUPP);
2157
2158	/*
2159	 * Optimize a no state change request away.  While it would be OK to
2160	 * write to the hardware in theory, some devices have shown odd
2161	 * behavior when going from D3 -> D3.
2162	 */
2163	oldstate = pci_get_powerstate(child);
2164	if (oldstate == state)
2165		return (0);
2166
2167	/*
2168	 * The PCI power management specification states that after a state
2169	 * transition between PCI power states, system software must
2170	 * guarantee a minimal delay before the function accesses the device.
2171	 * Compute the worst case delay that we need to guarantee before we
2172	 * access the device.  Many devices will be responsive much more
2173	 * quickly than this delay, but there are some that don't respond
2174	 * instantly to state changes.  Transitions to/from D3 state require
2175	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2176	 * is done below with DELAY rather than a sleeper function because
2177	 * this function can be called from contexts where we cannot sleep.
2178	 */
2179	highest = (oldstate > state) ? oldstate : state;
2180	if (highest == PCI_POWERSTATE_D3)
2181	    delay = 10000;
2182	else if (highest == PCI_POWERSTATE_D2)
2183	    delay = 200;
2184	else
2185	    delay = 0;
2186	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2187	    & ~PCIM_PSTAT_DMASK;
2188	result = 0;
2189	switch (state) {
2190	case PCI_POWERSTATE_D0:
2191		status |= PCIM_PSTAT_D0;
2192		break;
2193	case PCI_POWERSTATE_D1:
2194		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2195			return (EOPNOTSUPP);
2196		status |= PCIM_PSTAT_D1;
2197		break;
2198	case PCI_POWERSTATE_D2:
2199		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2200			return (EOPNOTSUPP);
2201		status |= PCIM_PSTAT_D2;
2202		break;
2203	case PCI_POWERSTATE_D3:
2204		status |= PCIM_PSTAT_D3;
2205		break;
2206	default:
2207		return (EINVAL);
2208	}
2209
2210	if (bootverbose)
2211		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2212		    state);
2213
2214	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2215	if (delay)
2216		DELAY(delay);
2217	return (0);
2218}
2219
2220int
2221pci_get_powerstate_method(device_t dev, device_t child)
2222{
2223	struct pci_devinfo *dinfo = device_get_ivars(child);
2224	pcicfgregs *cfg = &dinfo->cfg;
2225	uint16_t status;
2226	int result;
2227
2228	if (cfg->pp.pp_cap != 0) {
2229		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2230		switch (status & PCIM_PSTAT_DMASK) {
2231		case PCIM_PSTAT_D0:
2232			result = PCI_POWERSTATE_D0;
2233			break;
2234		case PCIM_PSTAT_D1:
2235			result = PCI_POWERSTATE_D1;
2236			break;
2237		case PCIM_PSTAT_D2:
2238			result = PCI_POWERSTATE_D2;
2239			break;
2240		case PCIM_PSTAT_D3:
2241			result = PCI_POWERSTATE_D3;
2242			break;
2243		default:
2244			result = PCI_POWERSTATE_UNKNOWN;
2245			break;
2246		}
2247	} else {
2248		/* No support, device is always at D0 */
2249		result = PCI_POWERSTATE_D0;
2250	}
2251	return (result);
2252}
2253
2254/*
2255 * Some convenience functions for PCI device drivers.
2256 */
2257
2258static __inline void
2259pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2260{
2261	uint16_t	command;
2262
2263	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2264	command |= bit;
2265	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2266}
2267
2268static __inline void
2269pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2270{
2271	uint16_t	command;
2272
2273	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2274	command &= ~bit;
2275	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2276}
2277
2278int
2279pci_enable_busmaster_method(device_t dev, device_t child)
2280{
2281	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2282	return (0);
2283}
2284
2285int
2286pci_disable_busmaster_method(device_t dev, device_t child)
2287{
2288	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2289	return (0);
2290}
2291
2292int
2293pci_enable_io_method(device_t dev, device_t child, int space)
2294{
2295	uint16_t bit;
2296
2297	switch(space) {
2298	case SYS_RES_IOPORT:
2299		bit = PCIM_CMD_PORTEN;
2300		break;
2301	case SYS_RES_MEMORY:
2302		bit = PCIM_CMD_MEMEN;
2303		break;
2304	default:
2305		return (EINVAL);
2306	}
2307	pci_set_command_bit(dev, child, bit);
2308	return (0);
2309}
2310
2311int
2312pci_disable_io_method(device_t dev, device_t child, int space)
2313{
2314	uint16_t bit;
2315
2316	switch(space) {
2317	case SYS_RES_IOPORT:
2318		bit = PCIM_CMD_PORTEN;
2319		break;
2320	case SYS_RES_MEMORY:
2321		bit = PCIM_CMD_MEMEN;
2322		break;
2323	default:
2324		return (EINVAL);
2325	}
2326	pci_clear_command_bit(dev, child, bit);
2327	return (0);
2328}
2329
2330/*
2331 * New style pci driver.  Parent device is either a pci-host-bridge or a
2332 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2333 */
2334
2335void
2336pci_print_verbose(struct pci_devinfo *dinfo)
2337{
2338
2339	if (bootverbose) {
2340		pcicfgregs *cfg = &dinfo->cfg;
2341
2342		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2343		    cfg->vendor, cfg->device, cfg->revid);
2344		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2345		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2346		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2347		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2348		    cfg->mfdev);
2349		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2350		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2351		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2352		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2353		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2354		if (cfg->intpin > 0)
2355			printf("\tintpin=%c, irq=%d\n",
2356			    cfg->intpin +'a' -1, cfg->intline);
2357		if (cfg->pp.pp_cap) {
2358			uint16_t status;
2359
2360			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2361			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2362			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2363			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2364			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2365			    status & PCIM_PSTAT_DMASK);
2366		}
2367		if (cfg->msi.msi_location) {
2368			int ctrl;
2369
2370			ctrl = cfg->msi.msi_ctrl;
2371			printf("\tMSI supports %d message%s%s%s\n",
2372			    cfg->msi.msi_msgnum,
2373			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2374			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2375			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2376		}
2377		if (cfg->msix.msix_location) {
2378			printf("\tMSI-X supports %d message%s ",
2379			    cfg->msix.msix_msgnum,
2380			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2381			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2382				printf("in map 0x%x\n",
2383				    cfg->msix.msix_table_bar);
2384			else
2385				printf("in maps 0x%x and 0x%x\n",
2386				    cfg->msix.msix_table_bar,
2387				    cfg->msix.msix_pba_bar);
2388		}
2389	}
2390}
2391
2392static int
2393pci_porten(device_t dev)
2394{
2395	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2396}
2397
2398static int
2399pci_memen(device_t dev)
2400{
2401	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2402}
2403
2404static void
2405pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2406{
2407	struct pci_devinfo *dinfo;
2408	pci_addr_t map, testval;
2409	int ln2range;
2410	uint16_t cmd;
2411
2412	/*
2413	 * The device ROM BAR is special.  It is always a 32-bit
2414	 * memory BAR.  Bit 0 is special and should not be set when
2415	 * sizing the BAR.
2416	 */
2417	dinfo = device_get_ivars(dev);
2418	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2419		map = pci_read_config(dev, reg, 4);
2420		pci_write_config(dev, reg, 0xfffffffe, 4);
2421		testval = pci_read_config(dev, reg, 4);
2422		pci_write_config(dev, reg, map, 4);
2423		*mapp = map;
2424		*testvalp = testval;
2425		return;
2426	}
2427
2428	map = pci_read_config(dev, reg, 4);
2429	ln2range = pci_maprange(map);
2430	if (ln2range == 64)
2431		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2432
2433	/*
2434	 * Disable decoding via the command register before
2435	 * determining the BAR's length since we will be placing it in
2436	 * a weird state.
2437	 */
2438	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2439	pci_write_config(dev, PCIR_COMMAND,
2440	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2441
2442	/*
2443	 * Determine the BAR's length by writing all 1's.  The bottom
2444	 * log_2(size) bits of the BAR will stick as 0 when we read
2445	 * the value back.
2446	 */
2447	pci_write_config(dev, reg, 0xffffffff, 4);
2448	testval = pci_read_config(dev, reg, 4);
2449	if (ln2range == 64) {
2450		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2451		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2452	}
2453
2454	/*
2455	 * Restore the original value of the BAR.  We may have reprogrammed
2456	 * the BAR of the low-level console device and when booting verbose,
2457	 * we need the console device addressable.
2458	 */
2459	pci_write_config(dev, reg, map, 4);
2460	if (ln2range == 64)
2461		pci_write_config(dev, reg + 4, map >> 32, 4);
2462	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2463
2464	*mapp = map;
2465	*testvalp = testval;
2466}
2467
2468static void
2469pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2470{
2471	struct pci_devinfo *dinfo;
2472	int ln2range;
2473
2474	/* The device ROM BAR is always a 32-bit memory BAR. */
2475	dinfo = device_get_ivars(dev);
2476	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2477		ln2range = 32;
2478	else
2479		ln2range = pci_maprange(pm->pm_value);
2480	pci_write_config(dev, pm->pm_reg, base, 4);
2481	if (ln2range == 64)
2482		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2483	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2484	if (ln2range == 64)
2485		pm->pm_value |= (pci_addr_t)pci_read_config(dev, pm->pm_reg + 4, 4) << 32;
2486}
2487
2488struct pci_map *
2489pci_find_bar(device_t dev, int reg)
2490{
2491	struct pci_devinfo *dinfo;
2492	struct pci_map *pm;
2493
2494	dinfo = device_get_ivars(dev);
2495	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2496		if (pm->pm_reg == reg)
2497			return (pm);
2498	}
2499	return (NULL);
2500}
2501
2502int
2503pci_bar_enabled(device_t dev, struct pci_map *pm)
2504{
2505	struct pci_devinfo *dinfo;
2506	uint16_t cmd;
2507
2508	dinfo = device_get_ivars(dev);
2509	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2510	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2511		return (0);
2512	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2513	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2514		return ((cmd & PCIM_CMD_MEMEN) != 0);
2515	else
2516		return ((cmd & PCIM_CMD_PORTEN) != 0);
2517}
2518
2519static struct pci_map *
2520pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2521{
2522	struct pci_devinfo *dinfo;
2523	struct pci_map *pm, *prev;
2524
2525	dinfo = device_get_ivars(dev);
2526	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2527	pm->pm_reg = reg;
2528	pm->pm_value = value;
2529	pm->pm_size = size;
2530	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2531		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2532		    reg));
2533		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2534		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2535			break;
2536	}
2537	if (prev != NULL)
2538		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2539	else
2540		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2541	return (pm);
2542}
2543
2544static void
2545pci_restore_bars(device_t dev)
2546{
2547	struct pci_devinfo *dinfo;
2548	struct pci_map *pm;
2549	int ln2range;
2550
2551	dinfo = device_get_ivars(dev);
2552	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2553		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2554			ln2range = 32;
2555		else
2556			ln2range = pci_maprange(pm->pm_value);
2557		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2558		if (ln2range == 64)
2559			pci_write_config(dev, pm->pm_reg + 4,
2560			    pm->pm_value >> 32, 4);
2561	}
2562}
2563
2564/*
2565 * Add a resource based on a pci map register. Return 1 if the map
2566 * register is a 32bit map register or 2 if it is a 64bit register.
2567 */
2568static int
2569pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2570    int force, int prefetch)
2571{
2572	struct pci_map *pm;
2573	pci_addr_t base, map, testval;
2574	pci_addr_t start, end, count;
2575	int barlen, basezero, maprange, mapsize, type;
2576	uint16_t cmd;
2577	struct resource *res;
2578
2579	/*
2580	 * The BAR may already exist if the device is a CardBus card
2581	 * whose CIS is stored in this BAR.
2582	 */
2583	pm = pci_find_bar(dev, reg);
2584	if (pm != NULL) {
2585		maprange = pci_maprange(pm->pm_value);
2586		barlen = maprange == 64 ? 2 : 1;
2587		return (barlen);
2588	}
2589
2590	pci_read_bar(dev, reg, &map, &testval);
2591	if (PCI_BAR_MEM(map)) {
2592		type = SYS_RES_MEMORY;
2593		if (map & PCIM_BAR_MEM_PREFETCH)
2594			prefetch = 1;
2595	} else
2596		type = SYS_RES_IOPORT;
2597	mapsize = pci_mapsize(testval);
2598	base = pci_mapbase(map);
2599#ifdef __PCI_BAR_ZERO_VALID
2600	basezero = 0;
2601#else
2602	basezero = base == 0;
2603#endif
2604	maprange = pci_maprange(map);
2605	barlen = maprange == 64 ? 2 : 1;
2606
2607	/*
2608	 * For I/O registers, if bottom bit is set, and the next bit up
2609	 * isn't clear, we know we have a BAR that doesn't conform to the
2610	 * spec, so ignore it.  Also, sanity check the size of the data
2611	 * areas to the type of memory involved.  Memory must be at least
2612	 * 16 bytes in size, while I/O ranges must be at least 4.
2613	 */
2614	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2615		return (barlen);
2616	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2617	    (type == SYS_RES_IOPORT && mapsize < 2))
2618		return (barlen);
2619
2620	/* Save a record of this BAR. */
2621	pm = pci_add_bar(dev, reg, map, mapsize);
2622	if (bootverbose) {
2623		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2624		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2625		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2626			printf(", port disabled\n");
2627		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2628			printf(", memory disabled\n");
2629		else
2630			printf(", enabled\n");
2631	}
2632
2633	/*
2634	 * If base is 0, then we have problems if this architecture does
2635	 * not allow that.  It is best to ignore such entries for the
2636	 * moment.  These will be allocated later if the driver specifically
2637	 * requests them.  However, some removable busses look better when
2638	 * all resources are allocated, so allow '0' to be overriden.
2639	 *
2640	 * Similarly treat maps whose values is the same as the test value
2641	 * read back.  These maps have had all f's written to them by the
2642	 * BIOS in an attempt to disable the resources.
2643	 */
2644	if (!force && (basezero || map == testval))
2645		return (barlen);
2646	if ((u_long)base != base) {
2647		device_printf(bus,
2648		    "pci%d:%d:%d:%d bar %#x too many address bits",
2649		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2650		    pci_get_function(dev), reg);
2651		return (barlen);
2652	}
2653
2654	/*
2655	 * This code theoretically does the right thing, but has
2656	 * undesirable side effects in some cases where peripherals
2657	 * respond oddly to having these bits enabled.  Let the user
2658	 * be able to turn them off (since pci_enable_io_modes is 1 by
2659	 * default).
2660	 */
2661	if (pci_enable_io_modes) {
2662		/* Turn on resources that have been left off by a lazy BIOS */
2663		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2664			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2665			cmd |= PCIM_CMD_PORTEN;
2666			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2667		}
2668		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2669			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2670			cmd |= PCIM_CMD_MEMEN;
2671			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2672		}
2673	} else {
2674		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2675			return (barlen);
2676		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2677			return (barlen);
2678	}
2679
2680	count = (pci_addr_t)1 << mapsize;
2681	if (basezero || base == pci_mapbase(testval)) {
2682		start = 0;	/* Let the parent decide. */
2683		end = ~0ULL;
2684	} else {
2685		start = base;
2686		end = base + count - 1;
2687	}
2688	resource_list_add(rl, type, reg, start, end, count);
2689
2690	/*
2691	 * Try to allocate the resource for this BAR from our parent
2692	 * so that this resource range is already reserved.  The
2693	 * driver for this device will later inherit this resource in
2694	 * pci_alloc_resource().
2695	 */
2696	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2697	    prefetch ? RF_PREFETCHABLE : 0);
2698	if (res == NULL) {
2699		/*
2700		 * If the allocation fails, clear the BAR and delete
2701		 * the resource list entry to force
2702		 * pci_alloc_resource() to allocate resources from the
2703		 * parent.
2704		 */
2705		resource_list_delete(rl, type, reg);
2706		start = 0;
2707	} else
2708		start = rman_get_start(res);
2709	pci_write_bar(dev, pm, start);
2710	return (barlen);
2711}
2712
2713/*
2714 * For ATA devices we need to decide early what addressing mode to use.
2715 * Legacy demands that the primary and secondary ATA ports sits on the
2716 * same addresses that old ISA hardware did. This dictates that we use
2717 * those addresses and ignore the BAR's if we cannot set PCI native
2718 * addressing mode.
2719 */
2720static void
2721pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2722    uint32_t prefetchmask)
2723{
2724	struct resource *r;
2725	int rid, type, progif;
2726#if 0
2727	/* if this device supports PCI native addressing use it */
2728	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2729	if ((progif & 0x8a) == 0x8a) {
2730		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2731		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2732			printf("Trying ATA native PCI addressing mode\n");
2733			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2734		}
2735	}
2736#endif
2737	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2738	type = SYS_RES_IOPORT;
2739	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2740		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2741		    prefetchmask & (1 << 0));
2742		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2743		    prefetchmask & (1 << 1));
2744	} else {
2745		rid = PCIR_BAR(0);
2746		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2747		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2748		    0x1f7, 8, 0);
2749		rid = PCIR_BAR(1);
2750		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2751		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2752		    0x3f6, 1, 0);
2753	}
2754	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2755		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2756		    prefetchmask & (1 << 2));
2757		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2758		    prefetchmask & (1 << 3));
2759	} else {
2760		rid = PCIR_BAR(2);
2761		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2762		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2763		    0x177, 8, 0);
2764		rid = PCIR_BAR(3);
2765		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2766		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2767		    0x376, 1, 0);
2768	}
2769	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2770	    prefetchmask & (1 << 4));
2771	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2772	    prefetchmask & (1 << 5));
2773}
2774
2775static void
2776pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2777{
2778	struct pci_devinfo *dinfo = device_get_ivars(dev);
2779	pcicfgregs *cfg = &dinfo->cfg;
2780	char tunable_name[64];
2781	int irq;
2782
2783	/* Has to have an intpin to have an interrupt. */
2784	if (cfg->intpin == 0)
2785		return;
2786
2787	/* Let the user override the IRQ with a tunable. */
2788	irq = PCI_INVALID_IRQ;
2789	snprintf(tunable_name, sizeof(tunable_name),
2790	    "hw.pci%d.%d.%d.INT%c.irq",
2791	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2792	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2793		irq = PCI_INVALID_IRQ;
2794
2795	/*
2796	 * If we didn't get an IRQ via the tunable, then we either use the
2797	 * IRQ value in the intline register or we ask the bus to route an
2798	 * interrupt for us.  If force_route is true, then we only use the
2799	 * value in the intline register if the bus was unable to assign an
2800	 * IRQ.
2801	 */
2802	if (!PCI_INTERRUPT_VALID(irq)) {
2803		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2804			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2805		if (!PCI_INTERRUPT_VALID(irq))
2806			irq = cfg->intline;
2807	}
2808
2809	/* If after all that we don't have an IRQ, just bail. */
2810	if (!PCI_INTERRUPT_VALID(irq))
2811		return;
2812
2813	/* Update the config register if it changed. */
2814	if (irq != cfg->intline) {
2815		cfg->intline = irq;
2816		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2817	}
2818
2819	/* Add this IRQ as rid 0 interrupt resource. */
2820	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2821}
2822
2823/* Perform early OHCI takeover from SMM. */
2824static void
2825ohci_early_takeover(device_t self)
2826{
2827	struct resource *res;
2828	uint32_t ctl;
2829	int rid;
2830	int i;
2831
2832	rid = PCIR_BAR(0);
2833	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2834	if (res == NULL)
2835		return;
2836
2837	ctl = bus_read_4(res, OHCI_CONTROL);
2838	if (ctl & OHCI_IR) {
2839		if (bootverbose)
2840			printf("ohci early: "
2841			    "SMM active, request owner change\n");
2842		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2843		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2844			DELAY(1000);
2845			ctl = bus_read_4(res, OHCI_CONTROL);
2846		}
2847		if (ctl & OHCI_IR) {
2848			if (bootverbose)
2849				printf("ohci early: "
2850				    "SMM does not respond, resetting\n");
2851			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2852		}
2853		/* Disable interrupts */
2854		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2855	}
2856
2857	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2858}
2859
2860/* Perform early UHCI takeover from SMM. */
2861static void
2862uhci_early_takeover(device_t self)
2863{
2864	struct resource *res;
2865	int rid;
2866
2867	/*
2868	 * Set the PIRQD enable bit and switch off all the others. We don't
2869	 * want legacy support to interfere with us XXX Does this also mean
2870	 * that the BIOS won't touch the keyboard anymore if it is connected
2871	 * to the ports of the root hub?
2872	 */
2873	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2874
2875	/* Disable interrupts */
2876	rid = PCI_UHCI_BASE_REG;
2877	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2878	if (res != NULL) {
2879		bus_write_2(res, UHCI_INTR, 0);
2880		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2881	}
2882}
2883
2884/* Perform early EHCI takeover from SMM. */
2885static void
2886ehci_early_takeover(device_t self)
2887{
2888	struct resource *res;
2889	uint32_t cparams;
2890	uint32_t eec;
2891	uint8_t eecp;
2892	uint8_t bios_sem;
2893	uint8_t offs;
2894	int rid;
2895	int i;
2896
2897	rid = PCIR_BAR(0);
2898	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2899	if (res == NULL)
2900		return;
2901
2902	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2903
2904	/* Synchronise with the BIOS if it owns the controller. */
2905	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2906	    eecp = EHCI_EECP_NEXT(eec)) {
2907		eec = pci_read_config(self, eecp, 4);
2908		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2909			continue;
2910		}
2911		bios_sem = pci_read_config(self, eecp +
2912		    EHCI_LEGSUP_BIOS_SEM, 1);
2913		if (bios_sem == 0) {
2914			continue;
2915		}
2916		if (bootverbose)
2917			printf("ehci early: "
2918			    "SMM active, request owner change\n");
2919
2920		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2921
2922		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2923			DELAY(1000);
2924			bios_sem = pci_read_config(self, eecp +
2925			    EHCI_LEGSUP_BIOS_SEM, 1);
2926		}
2927
2928		if (bios_sem != 0) {
2929			if (bootverbose)
2930				printf("ehci early: "
2931				    "SMM does not respond\n");
2932		}
2933		/* Disable interrupts */
2934		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2935		bus_write_4(res, offs + EHCI_USBINTR, 0);
2936	}
2937	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2938}
2939
2940void
2941pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2942{
2943	struct pci_devinfo *dinfo = device_get_ivars(dev);
2944	pcicfgregs *cfg = &dinfo->cfg;
2945	struct resource_list *rl = &dinfo->resources;
2946	struct pci_quirk *q;
2947	int i;
2948
2949	/* ATA devices needs special map treatment */
2950	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2951	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2952	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2953	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2954	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2955		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2956	else
2957		for (i = 0; i < cfg->nummaps;)
2958			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2959			    prefetchmask & (1 << i));
2960
2961	/*
2962	 * Add additional, quirked resources.
2963	 */
2964	for (q = &pci_quirks[0]; q->devid; q++) {
2965		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2966		    && q->type == PCI_QUIRK_MAP_REG)
2967			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2968	}
2969
2970	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2971#ifdef __PCI_REROUTE_INTERRUPT
2972		/*
2973		 * Try to re-route interrupts. Sometimes the BIOS or
2974		 * firmware may leave bogus values in these registers.
2975		 * If the re-route fails, then just stick with what we
2976		 * have.
2977		 */
2978		pci_assign_interrupt(bus, dev, 1);
2979#else
2980		pci_assign_interrupt(bus, dev, 0);
2981#endif
2982	}
2983
2984	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2985	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2986		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2987			ehci_early_takeover(dev);
2988		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2989			ohci_early_takeover(dev);
2990		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2991			uhci_early_takeover(dev);
2992	}
2993}
2994
2995void
2996pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2997{
2998#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2999	device_t pcib = device_get_parent(dev);
3000	struct pci_devinfo *dinfo;
3001	int maxslots;
3002	int s, f, pcifunchigh;
3003	uint8_t hdrtype;
3004
3005	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3006	    ("dinfo_size too small"));
3007	maxslots = PCIB_MAXSLOTS(pcib);
3008	for (s = 0; s <= maxslots; s++) {
3009		pcifunchigh = 0;
3010		f = 0;
3011		DELAY(1);
3012		hdrtype = REG(PCIR_HDRTYPE, 1);
3013		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3014			continue;
3015		if (hdrtype & PCIM_MFDEV)
3016			pcifunchigh = PCI_FUNCMAX;
3017		for (f = 0; f <= pcifunchigh; f++) {
3018			dinfo = pci_read_device(pcib, domain, busno, s, f,
3019			    dinfo_size);
3020			if (dinfo != NULL) {
3021				pci_add_child(dev, dinfo);
3022			}
3023		}
3024	}
3025#undef REG
3026}
3027
3028void
3029pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3030{
3031	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3032	device_set_ivars(dinfo->cfg.dev, dinfo);
3033	resource_list_init(&dinfo->resources);
3034	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3035	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3036	pci_print_verbose(dinfo);
3037	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3038}
3039
3040static int
3041pci_probe(device_t dev)
3042{
3043
3044	device_set_desc(dev, "PCI bus");
3045
3046	/* Allow other subclasses to override this driver. */
3047	return (BUS_PROBE_GENERIC);
3048}
3049
3050static int
3051pci_attach(device_t dev)
3052{
3053	int busno, domain;
3054
3055	/*
3056	 * Since there can be multiple independantly numbered PCI
3057	 * busses on systems with multiple PCI domains, we can't use
3058	 * the unit number to decide which bus we are probing. We ask
3059	 * the parent pcib what our domain and bus numbers are.
3060	 */
3061	domain = pcib_get_domain(dev);
3062	busno = pcib_get_bus(dev);
3063	if (bootverbose)
3064		device_printf(dev, "domain=%d, physical bus=%d\n",
3065		    domain, busno);
3066	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3067	return (bus_generic_attach(dev));
3068}
3069
3070static void
3071pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3072    int state)
3073{
3074	device_t child, pcib;
3075	struct pci_devinfo *dinfo;
3076	int dstate, i;
3077
3078	/*
3079	 * Set the device to the given state.  If the firmware suggests
3080	 * a different power state, use it instead.  If power management
3081	 * is not present, the firmware is responsible for managing
3082	 * device power.  Skip children who aren't attached since they
3083	 * are handled separately.
3084	 */
3085	pcib = device_get_parent(dev);
3086	for (i = 0; i < numdevs; i++) {
3087		child = devlist[i];
3088		dinfo = device_get_ivars(child);
3089		dstate = state;
3090		if (device_is_attached(child) &&
3091		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3092			pci_set_powerstate(child, dstate);
3093	}
3094}
3095
3096int
3097pci_suspend(device_t dev)
3098{
3099	device_t child, *devlist;
3100	struct pci_devinfo *dinfo;
3101	int error, i, numdevs;
3102
3103	/*
3104	 * Save the PCI configuration space for each child and set the
3105	 * device in the appropriate power state for this sleep state.
3106	 */
3107	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3108		return (error);
3109	for (i = 0; i < numdevs; i++) {
3110		child = devlist[i];
3111		dinfo = device_get_ivars(child);
3112		pci_cfg_save(child, dinfo, 0);
3113	}
3114
3115	/* Suspend devices before potentially powering them down. */
3116	error = bus_generic_suspend(dev);
3117	if (error) {
3118		free(devlist, M_TEMP);
3119		return (error);
3120	}
3121	if (pci_do_power_suspend)
3122		pci_set_power_children(dev, devlist, numdevs,
3123		    PCI_POWERSTATE_D3);
3124	free(devlist, M_TEMP);
3125	return (0);
3126}
3127
3128int
3129pci_resume(device_t dev)
3130{
3131	device_t child, *devlist;
3132	struct pci_devinfo *dinfo;
3133	int error, i, numdevs;
3134
3135	/*
3136	 * Set each child to D0 and restore its PCI configuration space.
3137	 */
3138	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3139		return (error);
3140	if (pci_do_power_resume)
3141		pci_set_power_children(dev, devlist, numdevs,
3142		    PCI_POWERSTATE_D0);
3143
3144	/* Now the device is powered up, restore its config space. */
3145	for (i = 0; i < numdevs; i++) {
3146		child = devlist[i];
3147		dinfo = device_get_ivars(child);
3148
3149		pci_cfg_restore(child, dinfo);
3150		if (!device_is_attached(child))
3151			pci_cfg_save(child, dinfo, 1);
3152	}
3153
3154	/*
3155	 * Resume critical devices first, then everything else later.
3156	 */
3157	for (i = 0; i < numdevs; i++) {
3158		child = devlist[i];
3159		switch (pci_get_class(child)) {
3160		case PCIC_DISPLAY:
3161		case PCIC_MEMORY:
3162		case PCIC_BRIDGE:
3163		case PCIC_BASEPERIPH:
3164			DEVICE_RESUME(child);
3165			break;
3166		}
3167	}
3168	for (i = 0; i < numdevs; i++) {
3169		child = devlist[i];
3170		switch (pci_get_class(child)) {
3171		case PCIC_DISPLAY:
3172		case PCIC_MEMORY:
3173		case PCIC_BRIDGE:
3174		case PCIC_BASEPERIPH:
3175			break;
3176		default:
3177			DEVICE_RESUME(child);
3178		}
3179	}
3180	free(devlist, M_TEMP);
3181	return (0);
3182}
3183
3184static void
3185pci_load_vendor_data(void)
3186{
3187	caddr_t data;
3188	void *ptr;
3189	size_t sz;
3190
3191	data = preload_search_by_type("pci_vendor_data");
3192	if (data != NULL) {
3193		ptr = preload_fetch_addr(data);
3194		sz = preload_fetch_size(data);
3195		if (ptr != NULL && sz != 0) {
3196			pci_vendordata = ptr;
3197			pci_vendordata_size = sz;
3198			/* terminate the database */
3199			pci_vendordata[pci_vendordata_size] = '\n';
3200		}
3201	}
3202}
3203
3204void
3205pci_driver_added(device_t dev, driver_t *driver)
3206{
3207	int numdevs;
3208	device_t *devlist;
3209	device_t child;
3210	struct pci_devinfo *dinfo;
3211	int i;
3212
3213	if (bootverbose)
3214		device_printf(dev, "driver added\n");
3215	DEVICE_IDENTIFY(driver, dev);
3216	if (device_get_children(dev, &devlist, &numdevs) != 0)
3217		return;
3218	for (i = 0; i < numdevs; i++) {
3219		child = devlist[i];
3220		if (device_get_state(child) != DS_NOTPRESENT)
3221			continue;
3222		dinfo = device_get_ivars(child);
3223		pci_print_verbose(dinfo);
3224		if (bootverbose)
3225			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3226		pci_cfg_restore(child, dinfo);
3227		if (device_probe_and_attach(child) != 0)
3228			pci_cfg_save(child, dinfo, 1);
3229	}
3230	free(devlist, M_TEMP);
3231}
3232
3233int
3234pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3235    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3236{
3237	struct pci_devinfo *dinfo;
3238	struct msix_table_entry *mte;
3239	struct msix_vector *mv;
3240	uint64_t addr;
3241	uint32_t data;
3242	void *cookie;
3243	int error, rid;
3244
3245	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3246	    arg, &cookie);
3247	if (error)
3248		return (error);
3249
3250	/* If this is not a direct child, just bail out. */
3251	if (device_get_parent(child) != dev) {
3252		*cookiep = cookie;
3253		return(0);
3254	}
3255
3256	rid = rman_get_rid(irq);
3257	if (rid == 0) {
3258		/* Make sure that INTx is enabled */
3259		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3260	} else {
3261		/*
3262		 * Check to see if the interrupt is MSI or MSI-X.
3263		 * Ask our parent to map the MSI and give
3264		 * us the address and data register values.
3265		 * If we fail for some reason, teardown the
3266		 * interrupt handler.
3267		 */
3268		dinfo = device_get_ivars(child);
3269		if (dinfo->cfg.msi.msi_alloc > 0) {
3270			if (dinfo->cfg.msi.msi_addr == 0) {
3271				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3272			    ("MSI has handlers, but vectors not mapped"));
3273				error = PCIB_MAP_MSI(device_get_parent(dev),
3274				    child, rman_get_start(irq), &addr, &data);
3275				if (error)
3276					goto bad;
3277				dinfo->cfg.msi.msi_addr = addr;
3278				dinfo->cfg.msi.msi_data = data;
3279			}
3280			if (dinfo->cfg.msi.msi_handlers == 0)
3281				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3282				    dinfo->cfg.msi.msi_data);
3283			dinfo->cfg.msi.msi_handlers++;
3284		} else {
3285			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3286			    ("No MSI or MSI-X interrupts allocated"));
3287			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3288			    ("MSI-X index too high"));
3289			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3290			KASSERT(mte->mte_vector != 0, ("no message vector"));
3291			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3292			KASSERT(mv->mv_irq == rman_get_start(irq),
3293			    ("IRQ mismatch"));
3294			if (mv->mv_address == 0) {
3295				KASSERT(mte->mte_handlers == 0,
3296		    ("MSI-X table entry has handlers, but vector not mapped"));
3297				error = PCIB_MAP_MSI(device_get_parent(dev),
3298				    child, rman_get_start(irq), &addr, &data);
3299				if (error)
3300					goto bad;
3301				mv->mv_address = addr;
3302				mv->mv_data = data;
3303			}
3304			if (mte->mte_handlers == 0) {
3305				pci_enable_msix(child, rid - 1, mv->mv_address,
3306				    mv->mv_data);
3307				pci_unmask_msix(child, rid - 1);
3308			}
3309			mte->mte_handlers++;
3310		}
3311
3312		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3313		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3314	bad:
3315		if (error) {
3316			(void)bus_generic_teardown_intr(dev, child, irq,
3317			    cookie);
3318			return (error);
3319		}
3320	}
3321	*cookiep = cookie;
3322	return (0);
3323}
3324
3325int
3326pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3327    void *cookie)
3328{
3329	struct msix_table_entry *mte;
3330	struct resource_list_entry *rle;
3331	struct pci_devinfo *dinfo;
3332	int error, rid;
3333
3334	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3335		return (EINVAL);
3336
3337	/* If this isn't a direct child, just bail out */
3338	if (device_get_parent(child) != dev)
3339		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3340
3341	rid = rman_get_rid(irq);
3342	if (rid == 0) {
3343		/* Mask INTx */
3344		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3345	} else {
3346		/*
3347		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3348		 * decrement the appropriate handlers count and mask the
3349		 * MSI-X message, or disable MSI messages if the count
3350		 * drops to 0.
3351		 */
3352		dinfo = device_get_ivars(child);
3353		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3354		if (rle->res != irq)
3355			return (EINVAL);
3356		if (dinfo->cfg.msi.msi_alloc > 0) {
3357			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3358			    ("MSI-X index too high"));
3359			if (dinfo->cfg.msi.msi_handlers == 0)
3360				return (EINVAL);
3361			dinfo->cfg.msi.msi_handlers--;
3362			if (dinfo->cfg.msi.msi_handlers == 0)
3363				pci_disable_msi(child);
3364		} else {
3365			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3366			    ("No MSI or MSI-X interrupts allocated"));
3367			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3368			    ("MSI-X index too high"));
3369			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3370			if (mte->mte_handlers == 0)
3371				return (EINVAL);
3372			mte->mte_handlers--;
3373			if (mte->mte_handlers == 0)
3374				pci_mask_msix(child, rid - 1);
3375		}
3376	}
3377	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3378	if (rid > 0)
3379		KASSERT(error == 0,
3380		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3381	return (error);
3382}
3383
3384int
3385pci_print_child(device_t dev, device_t child)
3386{
3387	struct pci_devinfo *dinfo;
3388	struct resource_list *rl;
3389	int retval = 0;
3390
3391	dinfo = device_get_ivars(child);
3392	rl = &dinfo->resources;
3393
3394	retval += bus_print_child_header(dev, child);
3395
3396	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3397	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3398	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3399	if (device_get_flags(dev))
3400		retval += printf(" flags %#x", device_get_flags(dev));
3401
3402	retval += printf(" at device %d.%d", pci_get_slot(child),
3403	    pci_get_function(child));
3404
3405	retval += bus_print_child_footer(dev, child);
3406
3407	return (retval);
3408}
3409
3410static struct
3411{
3412	int	class;
3413	int	subclass;
3414	char	*desc;
3415} pci_nomatch_tab[] = {
3416	{PCIC_OLD,		-1,			"old"},
3417	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3418	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3419	{PCIC_STORAGE,		-1,			"mass storage"},
3420	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3421	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3422	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3423	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3424	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3425	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3426	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3427	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3428	{PCIC_NETWORK,		-1,			"network"},
3429	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3430	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3431	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3432	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3433	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3434	{PCIC_DISPLAY,		-1,			"display"},
3435	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3436	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3437	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3438	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3439	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3440	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3441	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3442	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3443	{PCIC_MEMORY,		-1,			"memory"},
3444	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3445	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3446	{PCIC_BRIDGE,		-1,			"bridge"},
3447	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3448	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3449	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3450	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3451	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3452	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3453	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3454	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3455	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3456	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3457	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3458	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3459	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3460	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3461	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3462	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3463	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3464	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3465	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3466	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3467	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3468	{PCIC_INPUTDEV,		-1,			"input device"},
3469	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3470	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3471	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3472	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3473	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3474	{PCIC_DOCKING,		-1,			"docking station"},
3475	{PCIC_PROCESSOR,	-1,			"processor"},
3476	{PCIC_SERIALBUS,	-1,			"serial bus"},
3477	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3478	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3479	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3480	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3481	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3482	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3483	{PCIC_WIRELESS,		-1,			"wireless controller"},
3484	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3485	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3486	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3487	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3488	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3489	{PCIC_SATCOM,		-1,			"satellite communication"},
3490	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3491	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3492	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3493	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3494	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3495	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3496	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3497	{PCIC_DASP,		-1,			"dasp"},
3498	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3499	{0, 0,		NULL}
3500};
3501
3502void
3503pci_probe_nomatch(device_t dev, device_t child)
3504{
3505	int	i;
3506	char	*cp, *scp, *device;
3507
3508	/*
3509	 * Look for a listing for this device in a loaded device database.
3510	 */
3511	if ((device = pci_describe_device(child)) != NULL) {
3512		device_printf(dev, "<%s>", device);
3513		free(device, M_DEVBUF);
3514	} else {
3515		/*
3516		 * Scan the class/subclass descriptions for a general
3517		 * description.
3518		 */
3519		cp = "unknown";
3520		scp = NULL;
3521		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3522			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3523				if (pci_nomatch_tab[i].subclass == -1) {
3524					cp = pci_nomatch_tab[i].desc;
3525				} else if (pci_nomatch_tab[i].subclass ==
3526				    pci_get_subclass(child)) {
3527					scp = pci_nomatch_tab[i].desc;
3528				}
3529			}
3530		}
3531		device_printf(dev, "<%s%s%s>",
3532		    cp ? cp : "",
3533		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3534		    scp ? scp : "");
3535	}
3536	printf(" at device %d.%d (no driver attached)\n",
3537	    pci_get_slot(child), pci_get_function(child));
3538	pci_cfg_save(child, device_get_ivars(child), 1);
3539	return;
3540}
3541
3542/*
3543 * Parse the PCI device database, if loaded, and return a pointer to a
3544 * description of the device.
3545 *
3546 * The database is flat text formatted as follows:
3547 *
3548 * Any line not in a valid format is ignored.
3549 * Lines are terminated with newline '\n' characters.
3550 *
3551 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3552 * the vendor name.
3553 *
3554 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3555 * - devices cannot be listed without a corresponding VENDOR line.
3556 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3557 * another TAB, then the device name.
3558 */
3559
3560/*
3561 * Assuming (ptr) points to the beginning of a line in the database,
3562 * return the vendor or device and description of the next entry.
3563 * The value of (vendor) or (device) inappropriate for the entry type
3564 * is set to -1.  Returns nonzero at the end of the database.
3565 *
3566 * Note that this is slightly unrobust in the face of corrupt data;
3567 * we attempt to safeguard against this by spamming the end of the
3568 * database with a newline when we initialise.
3569 */
3570static int
3571pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3572{
3573	char	*cp = *ptr;
3574	int	left;
3575
3576	*device = -1;
3577	*vendor = -1;
3578	**desc = '\0';
3579	for (;;) {
3580		left = pci_vendordata_size - (cp - pci_vendordata);
3581		if (left <= 0) {
3582			*ptr = cp;
3583			return(1);
3584		}
3585
3586		/* vendor entry? */
3587		if (*cp != '\t' &&
3588		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3589			break;
3590		/* device entry? */
3591		if (*cp == '\t' &&
3592		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3593			break;
3594
3595		/* skip to next line */
3596		while (*cp != '\n' && left > 0) {
3597			cp++;
3598			left--;
3599		}
3600		if (*cp == '\n') {
3601			cp++;
3602			left--;
3603		}
3604	}
3605	/* skip to next line */
3606	while (*cp != '\n' && left > 0) {
3607		cp++;
3608		left--;
3609	}
3610	if (*cp == '\n' && left > 0)
3611		cp++;
3612	*ptr = cp;
3613	return(0);
3614}
3615
3616static char *
3617pci_describe_device(device_t dev)
3618{
3619	int	vendor, device;
3620	char	*desc, *vp, *dp, *line;
3621
3622	desc = vp = dp = NULL;
3623
3624	/*
3625	 * If we have no vendor data, we can't do anything.
3626	 */
3627	if (pci_vendordata == NULL)
3628		goto out;
3629
3630	/*
3631	 * Scan the vendor data looking for this device
3632	 */
3633	line = pci_vendordata;
3634	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3635		goto out;
3636	for (;;) {
3637		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3638			goto out;
3639		if (vendor == pci_get_vendor(dev))
3640			break;
3641	}
3642	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3643		goto out;
3644	for (;;) {
3645		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3646			*dp = 0;
3647			break;
3648		}
3649		if (vendor != -1) {
3650			*dp = 0;
3651			break;
3652		}
3653		if (device == pci_get_device(dev))
3654			break;
3655	}
3656	if (dp[0] == '\0')
3657		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3658	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3659	    NULL)
3660		sprintf(desc, "%s, %s", vp, dp);
3661 out:
3662	if (vp != NULL)
3663		free(vp, M_DEVBUF);
3664	if (dp != NULL)
3665		free(dp, M_DEVBUF);
3666	return(desc);
3667}
3668
3669int
3670pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3671{
3672	struct pci_devinfo *dinfo;
3673	pcicfgregs *cfg;
3674
3675	dinfo = device_get_ivars(child);
3676	cfg = &dinfo->cfg;
3677
3678	switch (which) {
3679	case PCI_IVAR_ETHADDR:
3680		/*
3681		 * The generic accessor doesn't deal with failure, so
3682		 * we set the return value, then return an error.
3683		 */
3684		*((uint8_t **) result) = NULL;
3685		return (EINVAL);
3686	case PCI_IVAR_SUBVENDOR:
3687		*result = cfg->subvendor;
3688		break;
3689	case PCI_IVAR_SUBDEVICE:
3690		*result = cfg->subdevice;
3691		break;
3692	case PCI_IVAR_VENDOR:
3693		*result = cfg->vendor;
3694		break;
3695	case PCI_IVAR_DEVICE:
3696		*result = cfg->device;
3697		break;
3698	case PCI_IVAR_DEVID:
3699		*result = (cfg->device << 16) | cfg->vendor;
3700		break;
3701	case PCI_IVAR_CLASS:
3702		*result = cfg->baseclass;
3703		break;
3704	case PCI_IVAR_SUBCLASS:
3705		*result = cfg->subclass;
3706		break;
3707	case PCI_IVAR_PROGIF:
3708		*result = cfg->progif;
3709		break;
3710	case PCI_IVAR_REVID:
3711		*result = cfg->revid;
3712		break;
3713	case PCI_IVAR_INTPIN:
3714		*result = cfg->intpin;
3715		break;
3716	case PCI_IVAR_IRQ:
3717		*result = cfg->intline;
3718		break;
3719	case PCI_IVAR_DOMAIN:
3720		*result = cfg->domain;
3721		break;
3722	case PCI_IVAR_BUS:
3723		*result = cfg->bus;
3724		break;
3725	case PCI_IVAR_SLOT:
3726		*result = cfg->slot;
3727		break;
3728	case PCI_IVAR_FUNCTION:
3729		*result = cfg->func;
3730		break;
3731	case PCI_IVAR_CMDREG:
3732		*result = cfg->cmdreg;
3733		break;
3734	case PCI_IVAR_CACHELNSZ:
3735		*result = cfg->cachelnsz;
3736		break;
3737	case PCI_IVAR_MINGNT:
3738		*result = cfg->mingnt;
3739		break;
3740	case PCI_IVAR_MAXLAT:
3741		*result = cfg->maxlat;
3742		break;
3743	case PCI_IVAR_LATTIMER:
3744		*result = cfg->lattimer;
3745		break;
3746	default:
3747		return (ENOENT);
3748	}
3749	return (0);
3750}
3751
3752int
3753pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3754{
3755	struct pci_devinfo *dinfo;
3756
3757	dinfo = device_get_ivars(child);
3758
3759	switch (which) {
3760	case PCI_IVAR_INTPIN:
3761		dinfo->cfg.intpin = value;
3762		return (0);
3763	case PCI_IVAR_ETHADDR:
3764	case PCI_IVAR_SUBVENDOR:
3765	case PCI_IVAR_SUBDEVICE:
3766	case PCI_IVAR_VENDOR:
3767	case PCI_IVAR_DEVICE:
3768	case PCI_IVAR_DEVID:
3769	case PCI_IVAR_CLASS:
3770	case PCI_IVAR_SUBCLASS:
3771	case PCI_IVAR_PROGIF:
3772	case PCI_IVAR_REVID:
3773	case PCI_IVAR_IRQ:
3774	case PCI_IVAR_DOMAIN:
3775	case PCI_IVAR_BUS:
3776	case PCI_IVAR_SLOT:
3777	case PCI_IVAR_FUNCTION:
3778		return (EINVAL);	/* disallow for now */
3779
3780	default:
3781		return (ENOENT);
3782	}
3783}
3784
3785
3786#include "opt_ddb.h"
3787#ifdef DDB
3788#include <ddb/ddb.h>
3789#include <sys/cons.h>
3790
3791/*
3792 * List resources based on pci map registers, used for within ddb
3793 */
3794
3795DB_SHOW_COMMAND(pciregs, db_pci_dump)
3796{
3797	struct pci_devinfo *dinfo;
3798	struct devlist *devlist_head;
3799	struct pci_conf *p;
3800	const char *name;
3801	int i, error, none_count;
3802
3803	none_count = 0;
3804	/* get the head of the device queue */
3805	devlist_head = &pci_devq;
3806
3807	/*
3808	 * Go through the list of devices and print out devices
3809	 */
3810	for (error = 0, i = 0,
3811	     dinfo = STAILQ_FIRST(devlist_head);
3812	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3813	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3814
3815		/* Populate pd_name and pd_unit */
3816		name = NULL;
3817		if (dinfo->cfg.dev)
3818			name = device_get_name(dinfo->cfg.dev);
3819
3820		p = &dinfo->conf;
3821		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3822			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3823			(name && *name) ? name : "none",
3824			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3825			none_count++,
3826			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3827			p->pc_sel.pc_func, (p->pc_class << 16) |
3828			(p->pc_subclass << 8) | p->pc_progif,
3829			(p->pc_subdevice << 16) | p->pc_subvendor,
3830			(p->pc_device << 16) | p->pc_vendor,
3831			p->pc_revid, p->pc_hdr);
3832	}
3833}
3834#endif /* DDB */
3835
3836static struct resource *
3837pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3838    u_long start, u_long end, u_long count, u_int flags)
3839{
3840	struct pci_devinfo *dinfo = device_get_ivars(child);
3841	struct resource_list *rl = &dinfo->resources;
3842	struct resource_list_entry *rle;
3843	struct resource *res;
3844	struct pci_map *pm;
3845	pci_addr_t map, testval;
3846	int mapsize;
3847
3848	res = NULL;
3849	pm = pci_find_bar(child, *rid);
3850	if (pm != NULL) {
3851		/* This is a BAR that we failed to allocate earlier. */
3852		mapsize = pm->pm_size;
3853		map = pm->pm_value;
3854	} else {
3855		/*
3856		 * Weed out the bogons, and figure out how large the
3857		 * BAR/map is.  BARs that read back 0 here are bogus
3858		 * and unimplemented.  Note: atapci in legacy mode are
3859		 * special and handled elsewhere in the code.  If you
3860		 * have a atapci device in legacy mode and it fails
3861		 * here, that other code is broken.
3862		 */
3863		pci_read_bar(child, *rid, &map, &testval);
3864
3865		/*
3866		 * Determine the size of the BAR and ignore BARs with a size
3867		 * of 0.  Device ROM BARs use a different mask value.
3868		 */
3869		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3870			mapsize = pci_romsize(testval);
3871		else
3872			mapsize = pci_mapsize(testval);
3873		if (mapsize == 0)
3874			goto out;
3875		pm = pci_add_bar(child, *rid, map, mapsize);
3876	}
3877
3878	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3879		if (type != SYS_RES_MEMORY) {
3880			if (bootverbose)
3881				device_printf(dev,
3882				    "child %s requested type %d for rid %#x,"
3883				    " but the BAR says it is an memio\n",
3884				    device_get_nameunit(child), type, *rid);
3885			goto out;
3886		}
3887	} else {
3888		if (type != SYS_RES_IOPORT) {
3889			if (bootverbose)
3890				device_printf(dev,
3891				    "child %s requested type %d for rid %#x,"
3892				    " but the BAR says it is an ioport\n",
3893				    device_get_nameunit(child), type, *rid);
3894			goto out;
3895		}
3896	}
3897
3898	/*
3899	 * For real BARs, we need to override the size that
3900	 * the driver requests, because that's what the BAR
3901	 * actually uses and we would otherwise have a
3902	 * situation where we might allocate the excess to
3903	 * another driver, which won't work.
3904	 */
3905	count = (pci_addr_t)1 << mapsize;
3906	if (RF_ALIGNMENT(flags) < mapsize)
3907		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3908	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3909		flags |= RF_PREFETCHABLE;
3910
3911	/*
3912	 * Allocate enough resource, and then write back the
3913	 * appropriate BAR for that resource.
3914	 */
3915	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3916	    start, end, count, flags & ~RF_ACTIVE);
3917	if (res == NULL) {
3918		device_printf(child,
3919		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3920		    count, *rid, type, start, end);
3921		goto out;
3922	}
3923	resource_list_add(rl, type, *rid, start, end, count);
3924	rle = resource_list_find(rl, type, *rid);
3925	if (rle == NULL)
3926		panic("pci_reserve_map: unexpectedly can't find resource.");
3927	rle->res = res;
3928	rle->start = rman_get_start(res);
3929	rle->end = rman_get_end(res);
3930	rle->count = count;
3931	rle->flags = RLE_RESERVED;
3932	if (bootverbose)
3933		device_printf(child,
3934		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3935		    count, *rid, type, rman_get_start(res));
3936	map = rman_get_start(res);
3937	pci_write_bar(child, pm, map);
3938out:;
3939	return (res);
3940}
3941
3942
3943struct resource *
3944pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3945		   u_long start, u_long end, u_long count, u_int flags)
3946{
3947	struct pci_devinfo *dinfo = device_get_ivars(child);
3948	struct resource_list *rl = &dinfo->resources;
3949	struct resource_list_entry *rle;
3950	struct resource *res;
3951	pcicfgregs *cfg = &dinfo->cfg;
3952
3953	if (device_get_parent(child) != dev)
3954		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3955		    type, rid, start, end, count, flags));
3956
3957	/*
3958	 * Perform lazy resource allocation
3959	 */
3960	switch (type) {
3961	case SYS_RES_IRQ:
3962		/*
3963		 * Can't alloc legacy interrupt once MSI messages have
3964		 * been allocated.
3965		 */
3966		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3967		    cfg->msix.msix_alloc > 0))
3968			return (NULL);
3969
3970		/*
3971		 * If the child device doesn't have an interrupt
3972		 * routed and is deserving of an interrupt, try to
3973		 * assign it one.
3974		 */
3975		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3976		    (cfg->intpin != 0))
3977			pci_assign_interrupt(dev, child, 0);
3978		break;
3979	case SYS_RES_IOPORT:
3980	case SYS_RES_MEMORY:
3981#ifdef NEW_PCIB
3982		/*
3983		 * PCI-PCI bridge I/O window resources are not BARs.
3984		 * For those allocations just pass the request up the
3985		 * tree.
3986		 */
3987		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
3988			switch (*rid) {
3989			case PCIR_IOBASEL_1:
3990			case PCIR_MEMBASE_1:
3991			case PCIR_PMBASEL_1:
3992				/*
3993				 * XXX: Should we bother creating a resource
3994				 * list entry?
3995				 */
3996				return (bus_generic_alloc_resource(dev, child,
3997				    type, rid, start, end, count, flags));
3998			}
3999		}
4000#endif
4001		/* Reserve resources for this BAR if needed. */
4002		rle = resource_list_find(rl, type, *rid);
4003		if (rle == NULL) {
4004			res = pci_reserve_map(dev, child, type, rid, start, end,
4005			    count, flags);
4006			if (res == NULL)
4007				return (NULL);
4008		}
4009	}
4010	return (resource_list_alloc(rl, dev, child, type, rid,
4011	    start, end, count, flags));
4012}
4013
4014int
4015pci_activate_resource(device_t dev, device_t child, int type, int rid,
4016    struct resource *r)
4017{
4018	struct pci_devinfo *dinfo;
4019	int error;
4020
4021	error = bus_generic_activate_resource(dev, child, type, rid, r);
4022	if (error)
4023		return (error);
4024
4025	/* Enable decoding in the command register when activating BARs. */
4026	if (device_get_parent(child) == dev) {
4027		/* Device ROMs need their decoding explicitly enabled. */
4028		dinfo = device_get_ivars(child);
4029		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4030			pci_write_bar(child, pci_find_bar(child, rid),
4031			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4032		switch (type) {
4033		case SYS_RES_IOPORT:
4034		case SYS_RES_MEMORY:
4035			error = PCI_ENABLE_IO(dev, child, type);
4036			break;
4037		}
4038	}
4039	return (error);
4040}
4041
4042int
4043pci_deactivate_resource(device_t dev, device_t child, int type,
4044    int rid, struct resource *r)
4045{
4046	struct pci_devinfo *dinfo;
4047	int error;
4048
4049	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4050	if (error)
4051		return (error);
4052
4053	/* Disable decoding for device ROMs. */
4054	if (device_get_parent(child) == dev) {
4055		dinfo = device_get_ivars(child);
4056		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4057			pci_write_bar(child, pci_find_bar(child, rid),
4058			    rman_get_start(r));
4059	}
4060	return (0);
4061}
4062
4063void
4064pci_delete_child(device_t dev, device_t child)
4065{
4066	struct resource_list_entry *rle;
4067	struct resource_list *rl;
4068	struct pci_devinfo *dinfo;
4069
4070	dinfo = device_get_ivars(child);
4071	rl = &dinfo->resources;
4072
4073	if (device_is_attached(child))
4074		device_detach(child);
4075
4076	/* Turn off access to resources we're about to free */
4077	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4078	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4079
4080	/* Free all allocated resources */
4081	STAILQ_FOREACH(rle, rl, link) {
4082		if (rle->res) {
4083			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4084			    resource_list_busy(rl, rle->type, rle->rid)) {
4085				pci_printf(&dinfo->cfg,
4086				    "Resource still owned, oops. "
4087				    "(type=%d, rid=%d, addr=%lx)\n",
4088				    rle->type, rle->rid,
4089				    rman_get_start(rle->res));
4090				bus_release_resource(child, rle->type, rle->rid,
4091				    rle->res);
4092			}
4093			resource_list_unreserve(rl, dev, child, rle->type,
4094			    rle->rid);
4095		}
4096	}
4097	resource_list_free(rl);
4098
4099	device_delete_child(dev, child);
4100	pci_freecfg(dinfo);
4101}
4102
4103void
4104pci_delete_resource(device_t dev, device_t child, int type, int rid)
4105{
4106	struct pci_devinfo *dinfo;
4107	struct resource_list *rl;
4108	struct resource_list_entry *rle;
4109
4110	if (device_get_parent(child) != dev)
4111		return;
4112
4113	dinfo = device_get_ivars(child);
4114	rl = &dinfo->resources;
4115	rle = resource_list_find(rl, type, rid);
4116	if (rle == NULL)
4117		return;
4118
4119	if (rle->res) {
4120		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4121		    resource_list_busy(rl, type, rid)) {
4122			device_printf(dev, "delete_resource: "
4123			    "Resource still owned by child, oops. "
4124			    "(type=%d, rid=%d, addr=%lx)\n",
4125			    type, rid, rman_get_start(rle->res));
4126			return;
4127		}
4128
4129#ifndef __PCI_BAR_ZERO_VALID
4130		/*
4131		 * If this is a BAR, clear the BAR so it stops
4132		 * decoding before releasing the resource.
4133		 */
4134		switch (type) {
4135		case SYS_RES_IOPORT:
4136		case SYS_RES_MEMORY:
4137			pci_write_bar(child, pci_find_bar(child, rid), 0);
4138			break;
4139		}
4140#endif
4141		resource_list_unreserve(rl, dev, child, type, rid);
4142	}
4143	resource_list_delete(rl, type, rid);
4144}
4145
4146struct resource_list *
4147pci_get_resource_list (device_t dev, device_t child)
4148{
4149	struct pci_devinfo *dinfo = device_get_ivars(child);
4150
4151	return (&dinfo->resources);
4152}
4153
4154uint32_t
4155pci_read_config_method(device_t dev, device_t child, int reg, int width)
4156{
4157	struct pci_devinfo *dinfo = device_get_ivars(child);
4158	pcicfgregs *cfg = &dinfo->cfg;
4159
4160	return (PCIB_READ_CONFIG(device_get_parent(dev),
4161	    cfg->bus, cfg->slot, cfg->func, reg, width));
4162}
4163
4164void
4165pci_write_config_method(device_t dev, device_t child, int reg,
4166    uint32_t val, int width)
4167{
4168	struct pci_devinfo *dinfo = device_get_ivars(child);
4169	pcicfgregs *cfg = &dinfo->cfg;
4170
4171	PCIB_WRITE_CONFIG(device_get_parent(dev),
4172	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4173}
4174
4175int
4176pci_child_location_str_method(device_t dev, device_t child, char *buf,
4177    size_t buflen)
4178{
4179
4180	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4181	    pci_get_function(child));
4182	return (0);
4183}
4184
4185int
4186pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4187    size_t buflen)
4188{
4189	struct pci_devinfo *dinfo;
4190	pcicfgregs *cfg;
4191
4192	dinfo = device_get_ivars(child);
4193	cfg = &dinfo->cfg;
4194	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4195	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4196	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4197	    cfg->progif);
4198	return (0);
4199}
4200
4201int
4202pci_assign_interrupt_method(device_t dev, device_t child)
4203{
4204	struct pci_devinfo *dinfo = device_get_ivars(child);
4205	pcicfgregs *cfg = &dinfo->cfg;
4206
4207	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4208	    cfg->intpin));
4209}
4210
4211static int
4212pci_modevent(module_t mod, int what, void *arg)
4213{
4214	static struct cdev *pci_cdev;
4215
4216	switch (what) {
4217	case MOD_LOAD:
4218		STAILQ_INIT(&pci_devq);
4219		pci_generation = 0;
4220		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4221		    "pci");
4222		pci_load_vendor_data();
4223		break;
4224
4225	case MOD_UNLOAD:
4226		destroy_dev(pci_cdev);
4227		break;
4228	}
4229
4230	return (0);
4231}
4232
4233void
4234pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4235{
4236
4237	/*
4238	 * Only do header type 0 devices.  Type 1 devices are bridges,
4239	 * which we know need special treatment.  Type 2 devices are
4240	 * cardbus bridges which also require special treatment.
4241	 * Other types are unknown, and we err on the side of safety
4242	 * by ignoring them.
4243	 */
4244	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4245		return;
4246
4247	/*
4248	 * Restore the device to full power mode.  We must do this
4249	 * before we restore the registers because moving from D3 to
4250	 * D0 will cause the chip's BARs and some other registers to
4251	 * be reset to some unknown power on reset values.  Cut down
4252	 * the noise on boot by doing nothing if we are already in
4253	 * state D0.
4254	 */
4255	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4256		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4257	pci_restore_bars(dev);
4258	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4259	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4260	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4261	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4262	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4263	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4264	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4265	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4266	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4267
4268	/* Restore MSI and MSI-X configurations if they are present. */
4269	if (dinfo->cfg.msi.msi_location != 0)
4270		pci_resume_msi(dev);
4271	if (dinfo->cfg.msix.msix_location != 0)
4272		pci_resume_msix(dev);
4273}
4274
4275void
4276pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4277{
4278	uint32_t cls;
4279	int ps;
4280
4281	/*
4282	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4283	 * we know need special treatment.  Type 2 devices are cardbus bridges
4284	 * which also require special treatment.  Other types are unknown, and
4285	 * we err on the side of safety by ignoring them.  Powering down
4286	 * bridges should not be undertaken lightly.
4287	 */
4288	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4289		return;
4290
4291	/*
4292	 * Some drivers apparently write to these registers w/o updating our
4293	 * cached copy.  No harm happens if we update the copy, so do so here
4294	 * so we can restore them.  The COMMAND register is modified by the
4295	 * bus w/o updating the cache.  This should represent the normally
4296	 * writable portion of the 'defined' part of type 0 headers.  In
4297	 * theory we also need to save/restore the PCI capability structures
4298	 * we know about, but apart from power we don't know any that are
4299	 * writable.
4300	 */
4301	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4302	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4303	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4304	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4305	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4306	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4307	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4308	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4309	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4310	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4311	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4312	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4313	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4314	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4315	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4316
4317	/*
4318	 * don't set the state for display devices, base peripherals and
4319	 * memory devices since bad things happen when they are powered down.
4320	 * We should (a) have drivers that can easily detach and (b) use
4321	 * generic drivers for these devices so that some device actually
4322	 * attaches.  We need to make sure that when we implement (a) we don't
4323	 * power the device down on a reattach.
4324	 */
4325	cls = pci_get_class(dev);
4326	if (!setstate)
4327		return;
4328	switch (pci_do_power_nodriver)
4329	{
4330		case 0:		/* NO powerdown at all */
4331			return;
4332		case 1:		/* Conservative about what to power down */
4333			if (cls == PCIC_STORAGE)
4334				return;
4335			/*FALLTHROUGH*/
4336		case 2:		/* Agressive about what to power down */
4337			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4338			    cls == PCIC_BASEPERIPH)
4339				return;
4340			/*FALLTHROUGH*/
4341		case 3:		/* Power down everything */
4342			break;
4343	}
4344	/*
4345	 * PCI spec says we can only go into D3 state from D0 state.
4346	 * Transition from D[12] into D0 before going to D3 state.
4347	 */
4348	ps = pci_get_powerstate(dev);
4349	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4350		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4351	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4352		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4353}
4354