pci.c revision 223371
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 223371 2011-06-21 19:31:31Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#define	PCIR_IS_BIOS(cfg, reg)						\
73	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75
76
77static pci_addr_t	pci_mapbase(uint64_t mapreg);
78static const char	*pci_maptype(uint64_t mapreg);
79static int		pci_mapsize(uint64_t testval);
80static int		pci_maprange(uint64_t mapreg);
81static pci_addr_t	pci_rombase(uint64_t mapreg);
82static int		pci_romsize(uint64_t testval);
83static void		pci_fixancient(pcicfgregs *cfg);
84static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85
86static int		pci_porten(device_t dev);
87static int		pci_memen(device_t dev);
88static void		pci_assign_interrupt(device_t bus, device_t dev,
89			    int force_route);
90static int		pci_add_map(device_t bus, device_t dev, int reg,
91			    struct resource_list *rl, int force, int prefetch);
92static int		pci_probe(device_t dev);
93static int		pci_attach(device_t dev);
94static void		pci_load_vendor_data(void);
95static int		pci_describe_parse_line(char **ptr, int *vendor,
96			    int *device, char **desc);
97static char		*pci_describe_device(device_t dev);
98static int		pci_modevent(module_t mod, int what, void *arg);
99static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100			    pcicfgregs *cfg);
101static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103			    int reg, uint32_t *data);
104#if 0
105static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106			    int reg, uint32_t data);
107#endif
108static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109static void		pci_disable_msi(device_t dev);
110static void		pci_enable_msi(device_t dev, uint64_t address,
111			    uint16_t data);
112static void		pci_enable_msix(device_t dev, u_int index,
113			    uint64_t address, uint32_t data);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static void		pci_resume_msi(device_t dev);
118static void		pci_resume_msix(device_t dev);
119static int		pci_remap_intr_method(device_t bus, device_t dev,
120			    u_int irq);
121
122static device_method_t pci_methods[] = {
123	/* Device interface */
124	DEVMETHOD(device_probe,		pci_probe),
125	DEVMETHOD(device_attach,	pci_attach),
126	DEVMETHOD(device_detach,	bus_generic_detach),
127	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128	DEVMETHOD(device_suspend,	pci_suspend),
129	DEVMETHOD(device_resume,	pci_resume),
130
131	/* Bus interface */
132	DEVMETHOD(bus_print_child,	pci_print_child),
133	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136	DEVMETHOD(bus_driver_added,	pci_driver_added),
137	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139
140	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152
153	/* PCI interface */
154	DEVMETHOD(pci_read_config,	pci_read_config_method),
155	DEVMETHOD(pci_write_config,	pci_write_config_method),
156	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172
173	{ 0, 0 }
174};
175
176DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177
178static devclass_t pci_devclass;
179DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180MODULE_VERSION(pci, 1);
181
182static char	*pci_vendordata;
183static size_t	pci_vendordata_size;
184
185
186struct pci_quirk {
187	uint32_t devid;	/* Vendor/device of the card */
188	int	type;
189#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
190#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
191#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
192	int	arg1;
193	int	arg2;
194};
195
196struct pci_quirk pci_quirks[] = {
197	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202
203	/*
204	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206	 */
207	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209
210	/*
211	 * MSI doesn't work on earlier Intel chipsets including
212	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213	 */
214	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221
222	/*
223	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224	 * bridge.
225	 */
226	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227
228	/*
229	 * Some virtualization environments emulate an older chipset
230	 * but support MSI just fine.  QEMU uses the Intel 82440.
231	 */
232	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
233
234	{ 0 }
235};
236
237/* map register information */
238#define	PCI_MAPMEM	0x01	/* memory map */
239#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
240#define	PCI_MAPPORT	0x04	/* port map */
241
242struct devlist pci_devq;
243uint32_t pci_generation;
244uint32_t pci_numdevs = 0;
245static int pcie_chipset, pcix_chipset;
246
247/* sysctl vars */
248SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
249
250static int pci_enable_io_modes = 1;
251TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
252SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
253    &pci_enable_io_modes, 1,
254    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
255enable these bits correctly.  We'd like to do this all the time, but there\n\
256are some peripherals that this causes problems with.");
257
258static int pci_do_power_nodriver = 0;
259TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
260SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
261    &pci_do_power_nodriver, 0,
262  "Place a function into D3 state when no driver attaches to it.  0 means\n\
263disable.  1 means conservatively place devices into D3 state.  2 means\n\
264agressively place devices into D3 state.  3 means put absolutely everything\n\
265in D3 state.");
266
267int pci_do_power_resume = 1;
268TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
269SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
270    &pci_do_power_resume, 1,
271  "Transition from D3 -> D0 on resume.");
272
273int pci_do_power_suspend = 1;
274TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
275SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
276    &pci_do_power_suspend, 1,
277  "Transition from D0 -> D3 on suspend.");
278
279static int pci_do_msi = 1;
280TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
281SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
282    "Enable support for MSI interrupts");
283
284static int pci_do_msix = 1;
285TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
286SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
287    "Enable support for MSI-X interrupts");
288
289static int pci_honor_msi_blacklist = 1;
290TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
291SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
292    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
293
294#if defined(__i386__) || defined(__amd64__)
295static int pci_usb_takeover = 1;
296#else
297static int pci_usb_takeover = 0;
298#endif
299TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
300SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
301    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
302Disable this if you depend on BIOS emulation of USB devices, that is\n\
303you use USB devices (like keyboard or mouse) but do not load USB drivers");
304
305/* Find a device_t by bus/slot/function in domain 0 */
306
307device_t
308pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
309{
310
311	return (pci_find_dbsf(0, bus, slot, func));
312}
313
314/* Find a device_t by domain/bus/slot/function */
315
316device_t
317pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
318{
319	struct pci_devinfo *dinfo;
320
321	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322		if ((dinfo->cfg.domain == domain) &&
323		    (dinfo->cfg.bus == bus) &&
324		    (dinfo->cfg.slot == slot) &&
325		    (dinfo->cfg.func == func)) {
326			return (dinfo->cfg.dev);
327		}
328	}
329
330	return (NULL);
331}
332
333/* Find a device_t by vendor/device ID */
334
335device_t
336pci_find_device(uint16_t vendor, uint16_t device)
337{
338	struct pci_devinfo *dinfo;
339
340	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
341		if ((dinfo->cfg.vendor == vendor) &&
342		    (dinfo->cfg.device == device)) {
343			return (dinfo->cfg.dev);
344		}
345	}
346
347	return (NULL);
348}
349
350static int
351pci_printf(pcicfgregs *cfg, const char *fmt, ...)
352{
353	va_list ap;
354	int retval;
355
356	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
357	    cfg->func);
358	va_start(ap, fmt);
359	retval += vprintf(fmt, ap);
360	va_end(ap);
361	return (retval);
362}
363
364/* return base address of memory or port map */
365
366static pci_addr_t
367pci_mapbase(uint64_t mapreg)
368{
369
370	if (PCI_BAR_MEM(mapreg))
371		return (mapreg & PCIM_BAR_MEM_BASE);
372	else
373		return (mapreg & PCIM_BAR_IO_BASE);
374}
375
376/* return map type of memory or port map */
377
378static const char *
379pci_maptype(uint64_t mapreg)
380{
381
382	if (PCI_BAR_IO(mapreg))
383		return ("I/O Port");
384	if (mapreg & PCIM_BAR_MEM_PREFETCH)
385		return ("Prefetchable Memory");
386	return ("Memory");
387}
388
389/* return log2 of map size decoded for memory or port map */
390
391static int
392pci_mapsize(uint64_t testval)
393{
394	int ln2size;
395
396	testval = pci_mapbase(testval);
397	ln2size = 0;
398	if (testval != 0) {
399		while ((testval & 1) == 0)
400		{
401			ln2size++;
402			testval >>= 1;
403		}
404	}
405	return (ln2size);
406}
407
408/* return base address of device ROM */
409
410static pci_addr_t
411pci_rombase(uint64_t mapreg)
412{
413
414	return (mapreg & PCIM_BIOS_ADDR_MASK);
415}
416
417/* return log2 of map size decided for device ROM */
418
419static int
420pci_romsize(uint64_t testval)
421{
422	int ln2size;
423
424	testval = pci_rombase(testval);
425	ln2size = 0;
426	if (testval != 0) {
427		while ((testval & 1) == 0)
428		{
429			ln2size++;
430			testval >>= 1;
431		}
432	}
433	return (ln2size);
434}
435
436/* return log2 of address range supported by map register */
437
438static int
439pci_maprange(uint64_t mapreg)
440{
441	int ln2range = 0;
442
443	if (PCI_BAR_IO(mapreg))
444		ln2range = 32;
445	else
446		switch (mapreg & PCIM_BAR_MEM_TYPE) {
447		case PCIM_BAR_MEM_32:
448			ln2range = 32;
449			break;
450		case PCIM_BAR_MEM_1MB:
451			ln2range = 20;
452			break;
453		case PCIM_BAR_MEM_64:
454			ln2range = 64;
455			break;
456		}
457	return (ln2range);
458}
459
460/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
461
462static void
463pci_fixancient(pcicfgregs *cfg)
464{
465	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
466		return;
467
468	/* PCI to PCI bridges use header type 1 */
469	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
470		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
471}
472
473/* extract header type specific config data */
474
475static void
476pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
477{
478#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
479	switch (cfg->hdrtype & PCIM_HDRTYPE) {
480	case PCIM_HDRTYPE_NORMAL:
481		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
482		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
483		cfg->nummaps	    = PCI_MAXMAPS_0;
484		break;
485	case PCIM_HDRTYPE_BRIDGE:
486		cfg->nummaps	    = PCI_MAXMAPS_1;
487		break;
488	case PCIM_HDRTYPE_CARDBUS:
489		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
490		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
491		cfg->nummaps	    = PCI_MAXMAPS_2;
492		break;
493	}
494#undef REG
495}
496
497/* read configuration header into pcicfgregs structure */
498struct pci_devinfo *
499pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
500{
501#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
502	pcicfgregs *cfg = NULL;
503	struct pci_devinfo *devlist_entry;
504	struct devlist *devlist_head;
505
506	devlist_head = &pci_devq;
507
508	devlist_entry = NULL;
509
510	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
511		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
512		if (devlist_entry == NULL)
513			return (NULL);
514
515		cfg = &devlist_entry->cfg;
516
517		cfg->domain		= d;
518		cfg->bus		= b;
519		cfg->slot		= s;
520		cfg->func		= f;
521		cfg->vendor		= REG(PCIR_VENDOR, 2);
522		cfg->device		= REG(PCIR_DEVICE, 2);
523		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
524		cfg->statreg		= REG(PCIR_STATUS, 2);
525		cfg->baseclass		= REG(PCIR_CLASS, 1);
526		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
527		cfg->progif		= REG(PCIR_PROGIF, 1);
528		cfg->revid		= REG(PCIR_REVID, 1);
529		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
530		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
531		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
532		cfg->intpin		= REG(PCIR_INTPIN, 1);
533		cfg->intline		= REG(PCIR_INTLINE, 1);
534
535		cfg->mingnt		= REG(PCIR_MINGNT, 1);
536		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
537
538		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
539		cfg->hdrtype		&= ~PCIM_MFDEV;
540		STAILQ_INIT(&cfg->maps);
541
542		pci_fixancient(cfg);
543		pci_hdrtypedata(pcib, b, s, f, cfg);
544
545		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
546			pci_read_cap(pcib, cfg);
547
548		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
549
550		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
551		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
552		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
553		devlist_entry->conf.pc_sel.pc_func = cfg->func;
554		devlist_entry->conf.pc_hdr = cfg->hdrtype;
555
556		devlist_entry->conf.pc_subvendor = cfg->subvendor;
557		devlist_entry->conf.pc_subdevice = cfg->subdevice;
558		devlist_entry->conf.pc_vendor = cfg->vendor;
559		devlist_entry->conf.pc_device = cfg->device;
560
561		devlist_entry->conf.pc_class = cfg->baseclass;
562		devlist_entry->conf.pc_subclass = cfg->subclass;
563		devlist_entry->conf.pc_progif = cfg->progif;
564		devlist_entry->conf.pc_revid = cfg->revid;
565
566		pci_numdevs++;
567		pci_generation++;
568	}
569	return (devlist_entry);
570#undef REG
571}
572
573static void
574pci_read_cap(device_t pcib, pcicfgregs *cfg)
575{
576#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
577#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
578#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
579	uint64_t addr;
580#endif
581	uint32_t val;
582	int	ptr, nextptr, ptrptr;
583
584	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585	case PCIM_HDRTYPE_NORMAL:
586	case PCIM_HDRTYPE_BRIDGE:
587		ptrptr = PCIR_CAP_PTR;
588		break;
589	case PCIM_HDRTYPE_CARDBUS:
590		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
591		break;
592	default:
593		return;		/* no extended capabilities support */
594	}
595	nextptr = REG(ptrptr, 1);	/* sanity check? */
596
597	/*
598	 * Read capability entries.
599	 */
600	while (nextptr != 0) {
601		/* Sanity check */
602		if (nextptr > 255) {
603			printf("illegal PCI extended capability offset %d\n",
604			    nextptr);
605			return;
606		}
607		/* Find the next entry */
608		ptr = nextptr;
609		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
610
611		/* Process this entry */
612		switch (REG(ptr + PCICAP_ID, 1)) {
613		case PCIY_PMG:		/* PCI power management */
614			if (cfg->pp.pp_cap == 0) {
615				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
616				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
617				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
618				if ((nextptr - ptr) > PCIR_POWER_DATA)
619					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
620			}
621			break;
622		case PCIY_HT:		/* HyperTransport */
623			/* Determine HT-specific capability type. */
624			val = REG(ptr + PCIR_HT_COMMAND, 2);
625
626			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
627				cfg->ht.ht_slave = ptr;
628
629#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
630			switch (val & PCIM_HTCMD_CAP_MASK) {
631			case PCIM_HTCAP_MSI_MAPPING:
632				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
633					/* Sanity check the mapping window. */
634					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
635					    4);
636					addr <<= 32;
637					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
638					    4);
639					if (addr != MSI_INTEL_ADDR_BASE)
640						device_printf(pcib,
641	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
642						    cfg->domain, cfg->bus,
643						    cfg->slot, cfg->func,
644						    (long long)addr);
645				} else
646					addr = MSI_INTEL_ADDR_BASE;
647
648				cfg->ht.ht_msimap = ptr;
649				cfg->ht.ht_msictrl = val;
650				cfg->ht.ht_msiaddr = addr;
651				break;
652			}
653#endif
654			break;
655		case PCIY_MSI:		/* PCI MSI */
656			cfg->msi.msi_location = ptr;
657			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
658			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
659						     PCIM_MSICTRL_MMC_MASK)>>1);
660			break;
661		case PCIY_MSIX:		/* PCI MSI-X */
662			cfg->msix.msix_location = ptr;
663			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
664			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
665			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
666			val = REG(ptr + PCIR_MSIX_TABLE, 4);
667			cfg->msix.msix_table_bar = PCIR_BAR(val &
668			    PCIM_MSIX_BIR_MASK);
669			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
670			val = REG(ptr + PCIR_MSIX_PBA, 4);
671			cfg->msix.msix_pba_bar = PCIR_BAR(val &
672			    PCIM_MSIX_BIR_MASK);
673			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
674			break;
675		case PCIY_VPD:		/* PCI Vital Product Data */
676			cfg->vpd.vpd_reg = ptr;
677			break;
678		case PCIY_SUBVENDOR:
679			/* Should always be true. */
680			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
681			    PCIM_HDRTYPE_BRIDGE) {
682				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
683				cfg->subvendor = val & 0xffff;
684				cfg->subdevice = val >> 16;
685			}
686			break;
687		case PCIY_PCIX:		/* PCI-X */
688			/*
689			 * Assume we have a PCI-X chipset if we have
690			 * at least one PCI-PCI bridge with a PCI-X
691			 * capability.  Note that some systems with
692			 * PCI-express or HT chipsets might match on
693			 * this check as well.
694			 */
695			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
696			    PCIM_HDRTYPE_BRIDGE)
697				pcix_chipset = 1;
698			break;
699		case PCIY_EXPRESS:	/* PCI-express */
700			/*
701			 * Assume we have a PCI-express chipset if we have
702			 * at least one PCI-express device.
703			 */
704			pcie_chipset = 1;
705			break;
706		default:
707			break;
708		}
709	}
710
711
712#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
713	/*
714	 * Enable the MSI mapping window for all HyperTransport
715	 * slaves.  PCI-PCI bridges have their windows enabled via
716	 * PCIB_MAP_MSI().
717	 */
718	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
719	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
720		device_printf(pcib,
721	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
722		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
723		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
724		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
725		     2);
726	}
727#endif
728/* REG and WREG use carry through to next functions */
729}
730
731/*
732 * PCI Vital Product Data
733 */
734
735#define	PCI_VPD_TIMEOUT		1000000
736
737static int
738pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
739{
740	int count = PCI_VPD_TIMEOUT;
741
742	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
743
744	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
745
746	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
747		if (--count < 0)
748			return (ENXIO);
749		DELAY(1);	/* limit looping */
750	}
751	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
752
753	return (0);
754}
755
756#if 0
757static int
758pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
759{
760	int count = PCI_VPD_TIMEOUT;
761
762	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
763
764	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
765	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
766	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
767		if (--count < 0)
768			return (ENXIO);
769		DELAY(1);	/* limit looping */
770	}
771
772	return (0);
773}
774#endif
775
776#undef PCI_VPD_TIMEOUT
777
778struct vpd_readstate {
779	device_t	pcib;
780	pcicfgregs	*cfg;
781	uint32_t	val;
782	int		bytesinval;
783	int		off;
784	uint8_t		cksum;
785};
786
787static int
788vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
789{
790	uint32_t reg;
791	uint8_t byte;
792
793	if (vrs->bytesinval == 0) {
794		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
795			return (ENXIO);
796		vrs->val = le32toh(reg);
797		vrs->off += 4;
798		byte = vrs->val & 0xff;
799		vrs->bytesinval = 3;
800	} else {
801		vrs->val = vrs->val >> 8;
802		byte = vrs->val & 0xff;
803		vrs->bytesinval--;
804	}
805
806	vrs->cksum += byte;
807	*data = byte;
808	return (0);
809}
810
811static void
812pci_read_vpd(device_t pcib, pcicfgregs *cfg)
813{
814	struct vpd_readstate vrs;
815	int state;
816	int name;
817	int remain;
818	int i;
819	int alloc, off;		/* alloc/off for RO/W arrays */
820	int cksumvalid;
821	int dflen;
822	uint8_t byte;
823	uint8_t byte2;
824
825	/* init vpd reader */
826	vrs.bytesinval = 0;
827	vrs.off = 0;
828	vrs.pcib = pcib;
829	vrs.cfg = cfg;
830	vrs.cksum = 0;
831
832	state = 0;
833	name = remain = i = 0;	/* shut up stupid gcc */
834	alloc = off = 0;	/* shut up stupid gcc */
835	dflen = 0;		/* shut up stupid gcc */
836	cksumvalid = -1;
837	while (state >= 0) {
838		if (vpd_nextbyte(&vrs, &byte)) {
839			state = -2;
840			break;
841		}
842#if 0
843		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
844		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
845		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
846#endif
847		switch (state) {
848		case 0:		/* item name */
849			if (byte & 0x80) {
850				if (vpd_nextbyte(&vrs, &byte2)) {
851					state = -2;
852					break;
853				}
854				remain = byte2;
855				if (vpd_nextbyte(&vrs, &byte2)) {
856					state = -2;
857					break;
858				}
859				remain |= byte2 << 8;
860				if (remain > (0x7f*4 - vrs.off)) {
861					state = -1;
862					printf(
863			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
864					    cfg->domain, cfg->bus, cfg->slot,
865					    cfg->func, remain);
866				}
867				name = byte & 0x7f;
868			} else {
869				remain = byte & 0x7;
870				name = (byte >> 3) & 0xf;
871			}
872			switch (name) {
873			case 0x2:	/* String */
874				cfg->vpd.vpd_ident = malloc(remain + 1,
875				    M_DEVBUF, M_WAITOK);
876				i = 0;
877				state = 1;
878				break;
879			case 0xf:	/* End */
880				state = -1;
881				break;
882			case 0x10:	/* VPD-R */
883				alloc = 8;
884				off = 0;
885				cfg->vpd.vpd_ros = malloc(alloc *
886				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
887				    M_WAITOK | M_ZERO);
888				state = 2;
889				break;
890			case 0x11:	/* VPD-W */
891				alloc = 8;
892				off = 0;
893				cfg->vpd.vpd_w = malloc(alloc *
894				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
895				    M_WAITOK | M_ZERO);
896				state = 5;
897				break;
898			default:	/* Invalid data, abort */
899				state = -1;
900				break;
901			}
902			break;
903
904		case 1:	/* Identifier String */
905			cfg->vpd.vpd_ident[i++] = byte;
906			remain--;
907			if (remain == 0)  {
908				cfg->vpd.vpd_ident[i] = '\0';
909				state = 0;
910			}
911			break;
912
913		case 2:	/* VPD-R Keyword Header */
914			if (off == alloc) {
915				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
916				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
917				    M_DEVBUF, M_WAITOK | M_ZERO);
918			}
919			cfg->vpd.vpd_ros[off].keyword[0] = byte;
920			if (vpd_nextbyte(&vrs, &byte2)) {
921				state = -2;
922				break;
923			}
924			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
925			if (vpd_nextbyte(&vrs, &byte2)) {
926				state = -2;
927				break;
928			}
929			dflen = byte2;
930			if (dflen == 0 &&
931			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
932			    2) == 0) {
933				/*
934				 * if this happens, we can't trust the rest
935				 * of the VPD.
936				 */
937				printf(
938				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
939				    cfg->domain, cfg->bus, cfg->slot,
940				    cfg->func, dflen);
941				cksumvalid = 0;
942				state = -1;
943				break;
944			} else if (dflen == 0) {
945				cfg->vpd.vpd_ros[off].value = malloc(1 *
946				    sizeof(*cfg->vpd.vpd_ros[off].value),
947				    M_DEVBUF, M_WAITOK);
948				cfg->vpd.vpd_ros[off].value[0] = '\x00';
949			} else
950				cfg->vpd.vpd_ros[off].value = malloc(
951				    (dflen + 1) *
952				    sizeof(*cfg->vpd.vpd_ros[off].value),
953				    M_DEVBUF, M_WAITOK);
954			remain -= 3;
955			i = 0;
956			/* keep in sync w/ state 3's transistions */
957			if (dflen == 0 && remain == 0)
958				state = 0;
959			else if (dflen == 0)
960				state = 2;
961			else
962				state = 3;
963			break;
964
965		case 3:	/* VPD-R Keyword Value */
966			cfg->vpd.vpd_ros[off].value[i++] = byte;
967			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
968			    "RV", 2) == 0 && cksumvalid == -1) {
969				if (vrs.cksum == 0)
970					cksumvalid = 1;
971				else {
972					if (bootverbose)
973						printf(
974				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
975						    cfg->domain, cfg->bus,
976						    cfg->slot, cfg->func,
977						    vrs.cksum);
978					cksumvalid = 0;
979					state = -1;
980					break;
981				}
982			}
983			dflen--;
984			remain--;
985			/* keep in sync w/ state 2's transistions */
986			if (dflen == 0)
987				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
988			if (dflen == 0 && remain == 0) {
989				cfg->vpd.vpd_rocnt = off;
990				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
991				    off * sizeof(*cfg->vpd.vpd_ros),
992				    M_DEVBUF, M_WAITOK | M_ZERO);
993				state = 0;
994			} else if (dflen == 0)
995				state = 2;
996			break;
997
998		case 4:
999			remain--;
1000			if (remain == 0)
1001				state = 0;
1002			break;
1003
1004		case 5:	/* VPD-W Keyword Header */
1005			if (off == alloc) {
1006				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1007				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1008				    M_DEVBUF, M_WAITOK | M_ZERO);
1009			}
1010			cfg->vpd.vpd_w[off].keyword[0] = byte;
1011			if (vpd_nextbyte(&vrs, &byte2)) {
1012				state = -2;
1013				break;
1014			}
1015			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1016			if (vpd_nextbyte(&vrs, &byte2)) {
1017				state = -2;
1018				break;
1019			}
1020			cfg->vpd.vpd_w[off].len = dflen = byte2;
1021			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1022			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1023			    sizeof(*cfg->vpd.vpd_w[off].value),
1024			    M_DEVBUF, M_WAITOK);
1025			remain -= 3;
1026			i = 0;
1027			/* keep in sync w/ state 6's transistions */
1028			if (dflen == 0 && remain == 0)
1029				state = 0;
1030			else if (dflen == 0)
1031				state = 5;
1032			else
1033				state = 6;
1034			break;
1035
1036		case 6:	/* VPD-W Keyword Value */
1037			cfg->vpd.vpd_w[off].value[i++] = byte;
1038			dflen--;
1039			remain--;
1040			/* keep in sync w/ state 5's transistions */
1041			if (dflen == 0)
1042				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1043			if (dflen == 0 && remain == 0) {
1044				cfg->vpd.vpd_wcnt = off;
1045				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1046				    off * sizeof(*cfg->vpd.vpd_w),
1047				    M_DEVBUF, M_WAITOK | M_ZERO);
1048				state = 0;
1049			} else if (dflen == 0)
1050				state = 5;
1051			break;
1052
1053		default:
1054			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1055			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1056			    state);
1057			state = -1;
1058			break;
1059		}
1060	}
1061
1062	if (cksumvalid == 0 || state < -1) {
1063		/* read-only data bad, clean up */
1064		if (cfg->vpd.vpd_ros != NULL) {
1065			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1066				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1067			free(cfg->vpd.vpd_ros, M_DEVBUF);
1068			cfg->vpd.vpd_ros = NULL;
1069		}
1070	}
1071	if (state < -1) {
1072		/* I/O error, clean up */
1073		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1074		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1075		if (cfg->vpd.vpd_ident != NULL) {
1076			free(cfg->vpd.vpd_ident, M_DEVBUF);
1077			cfg->vpd.vpd_ident = NULL;
1078		}
1079		if (cfg->vpd.vpd_w != NULL) {
1080			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1081				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1082			free(cfg->vpd.vpd_w, M_DEVBUF);
1083			cfg->vpd.vpd_w = NULL;
1084		}
1085	}
1086	cfg->vpd.vpd_cached = 1;
1087#undef REG
1088#undef WREG
1089}
1090
1091int
1092pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1093{
1094	struct pci_devinfo *dinfo = device_get_ivars(child);
1095	pcicfgregs *cfg = &dinfo->cfg;
1096
1097	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1098		pci_read_vpd(device_get_parent(dev), cfg);
1099
1100	*identptr = cfg->vpd.vpd_ident;
1101
1102	if (*identptr == NULL)
1103		return (ENXIO);
1104
1105	return (0);
1106}
1107
1108int
1109pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1110	const char **vptr)
1111{
1112	struct pci_devinfo *dinfo = device_get_ivars(child);
1113	pcicfgregs *cfg = &dinfo->cfg;
1114	int i;
1115
1116	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1117		pci_read_vpd(device_get_parent(dev), cfg);
1118
1119	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1120		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1121		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1122			*vptr = cfg->vpd.vpd_ros[i].value;
1123		}
1124
1125	if (i != cfg->vpd.vpd_rocnt)
1126		return (0);
1127
1128	*vptr = NULL;
1129	return (ENXIO);
1130}
1131
1132/*
1133 * Find the requested extended capability and return the offset in
1134 * configuration space via the pointer provided. The function returns
1135 * 0 on success and error code otherwise.
1136 */
1137int
1138pci_find_extcap_method(device_t dev, device_t child, int capability,
1139    int *capreg)
1140{
1141	struct pci_devinfo *dinfo = device_get_ivars(child);
1142	pcicfgregs *cfg = &dinfo->cfg;
1143	u_int32_t status;
1144	u_int8_t ptr;
1145
1146	/*
1147	 * Check the CAP_LIST bit of the PCI status register first.
1148	 */
1149	status = pci_read_config(child, PCIR_STATUS, 2);
1150	if (!(status & PCIM_STATUS_CAPPRESENT))
1151		return (ENXIO);
1152
1153	/*
1154	 * Determine the start pointer of the capabilities list.
1155	 */
1156	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1157	case PCIM_HDRTYPE_NORMAL:
1158	case PCIM_HDRTYPE_BRIDGE:
1159		ptr = PCIR_CAP_PTR;
1160		break;
1161	case PCIM_HDRTYPE_CARDBUS:
1162		ptr = PCIR_CAP_PTR_2;
1163		break;
1164	default:
1165		/* XXX: panic? */
1166		return (ENXIO);		/* no extended capabilities support */
1167	}
1168	ptr = pci_read_config(child, ptr, 1);
1169
1170	/*
1171	 * Traverse the capabilities list.
1172	 */
1173	while (ptr != 0) {
1174		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1175			if (capreg != NULL)
1176				*capreg = ptr;
1177			return (0);
1178		}
1179		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1180	}
1181
1182	return (ENOENT);
1183}
1184
1185/*
1186 * Support for MSI-X message interrupts.
1187 */
1188void
1189pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1190{
1191	struct pci_devinfo *dinfo = device_get_ivars(dev);
1192	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1193	uint32_t offset;
1194
1195	KASSERT(msix->msix_table_len > index, ("bogus index"));
1196	offset = msix->msix_table_offset + index * 16;
1197	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1198	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1199	bus_write_4(msix->msix_table_res, offset + 8, data);
1200
1201	/* Enable MSI -> HT mapping. */
1202	pci_ht_map_msi(dev, address);
1203}
1204
1205void
1206pci_mask_msix(device_t dev, u_int index)
1207{
1208	struct pci_devinfo *dinfo = device_get_ivars(dev);
1209	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1210	uint32_t offset, val;
1211
1212	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1213	offset = msix->msix_table_offset + index * 16 + 12;
1214	val = bus_read_4(msix->msix_table_res, offset);
1215	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1216		val |= PCIM_MSIX_VCTRL_MASK;
1217		bus_write_4(msix->msix_table_res, offset, val);
1218	}
1219}
1220
1221void
1222pci_unmask_msix(device_t dev, u_int index)
1223{
1224	struct pci_devinfo *dinfo = device_get_ivars(dev);
1225	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1226	uint32_t offset, val;
1227
1228	KASSERT(msix->msix_table_len > index, ("bogus index"));
1229	offset = msix->msix_table_offset + index * 16 + 12;
1230	val = bus_read_4(msix->msix_table_res, offset);
1231	if (val & PCIM_MSIX_VCTRL_MASK) {
1232		val &= ~PCIM_MSIX_VCTRL_MASK;
1233		bus_write_4(msix->msix_table_res, offset, val);
1234	}
1235}
1236
1237int
1238pci_pending_msix(device_t dev, u_int index)
1239{
1240	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1242	uint32_t offset, bit;
1243
1244	KASSERT(msix->msix_table_len > index, ("bogus index"));
1245	offset = msix->msix_pba_offset + (index / 32) * 4;
1246	bit = 1 << index % 32;
1247	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1248}
1249
1250/*
1251 * Restore MSI-X registers and table during resume.  If MSI-X is
1252 * enabled then walk the virtual table to restore the actual MSI-X
1253 * table.
1254 */
1255static void
1256pci_resume_msix(device_t dev)
1257{
1258	struct pci_devinfo *dinfo = device_get_ivars(dev);
1259	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1260	struct msix_table_entry *mte;
1261	struct msix_vector *mv;
1262	int i;
1263
1264	if (msix->msix_alloc > 0) {
1265		/* First, mask all vectors. */
1266		for (i = 0; i < msix->msix_msgnum; i++)
1267			pci_mask_msix(dev, i);
1268
1269		/* Second, program any messages with at least one handler. */
1270		for (i = 0; i < msix->msix_table_len; i++) {
1271			mte = &msix->msix_table[i];
1272			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1273				continue;
1274			mv = &msix->msix_vectors[mte->mte_vector - 1];
1275			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1276			pci_unmask_msix(dev, i);
1277		}
1278	}
1279	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1280	    msix->msix_ctrl, 2);
1281}
1282
1283/*
1284 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1285 * returned in *count.  After this function returns, each message will be
1286 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1287 */
1288int
1289pci_alloc_msix_method(device_t dev, device_t child, int *count)
1290{
1291	struct pci_devinfo *dinfo = device_get_ivars(child);
1292	pcicfgregs *cfg = &dinfo->cfg;
1293	struct resource_list_entry *rle;
1294	int actual, error, i, irq, max;
1295
1296	/* Don't let count == 0 get us into trouble. */
1297	if (*count == 0)
1298		return (EINVAL);
1299
1300	/* If rid 0 is allocated, then fail. */
1301	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1302	if (rle != NULL && rle->res != NULL)
1303		return (ENXIO);
1304
1305	/* Already have allocated messages? */
1306	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1307		return (ENXIO);
1308
1309	/* If MSI is blacklisted for this system, fail. */
1310	if (pci_msi_blacklisted())
1311		return (ENXIO);
1312
1313	/* MSI-X capability present? */
1314	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1315		return (ENODEV);
1316
1317	/* Make sure the appropriate BARs are mapped. */
1318	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1319	    cfg->msix.msix_table_bar);
1320	if (rle == NULL || rle->res == NULL ||
1321	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1322		return (ENXIO);
1323	cfg->msix.msix_table_res = rle->res;
1324	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1325		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1326		    cfg->msix.msix_pba_bar);
1327		if (rle == NULL || rle->res == NULL ||
1328		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1329			return (ENXIO);
1330	}
1331	cfg->msix.msix_pba_res = rle->res;
1332
1333	if (bootverbose)
1334		device_printf(child,
1335		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1336		    *count, cfg->msix.msix_msgnum);
1337	max = min(*count, cfg->msix.msix_msgnum);
1338	for (i = 0; i < max; i++) {
1339		/* Allocate a message. */
1340		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1341		if (error)
1342			break;
1343		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1344		    irq, 1);
1345	}
1346	actual = i;
1347
1348	if (bootverbose) {
1349		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1350		if (actual == 1)
1351			device_printf(child, "using IRQ %lu for MSI-X\n",
1352			    rle->start);
1353		else {
1354			int run;
1355
1356			/*
1357			 * Be fancy and try to print contiguous runs of
1358			 * IRQ values as ranges.  'irq' is the previous IRQ.
1359			 * 'run' is true if we are in a range.
1360			 */
1361			device_printf(child, "using IRQs %lu", rle->start);
1362			irq = rle->start;
1363			run = 0;
1364			for (i = 1; i < actual; i++) {
1365				rle = resource_list_find(&dinfo->resources,
1366				    SYS_RES_IRQ, i + 1);
1367
1368				/* Still in a run? */
1369				if (rle->start == irq + 1) {
1370					run = 1;
1371					irq++;
1372					continue;
1373				}
1374
1375				/* Finish previous range. */
1376				if (run) {
1377					printf("-%d", irq);
1378					run = 0;
1379				}
1380
1381				/* Start new range. */
1382				printf(",%lu", rle->start);
1383				irq = rle->start;
1384			}
1385
1386			/* Unfinished range? */
1387			if (run)
1388				printf("-%d", irq);
1389			printf(" for MSI-X\n");
1390		}
1391	}
1392
1393	/* Mask all vectors. */
1394	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1395		pci_mask_msix(child, i);
1396
1397	/* Allocate and initialize vector data and virtual table. */
1398	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1399	    M_DEVBUF, M_WAITOK | M_ZERO);
1400	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1401	    M_DEVBUF, M_WAITOK | M_ZERO);
1402	for (i = 0; i < actual; i++) {
1403		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1404		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1405		cfg->msix.msix_table[i].mte_vector = i + 1;
1406	}
1407
1408	/* Update control register to enable MSI-X. */
1409	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1410	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1411	    cfg->msix.msix_ctrl, 2);
1412
1413	/* Update counts of alloc'd messages. */
1414	cfg->msix.msix_alloc = actual;
1415	cfg->msix.msix_table_len = actual;
1416	*count = actual;
1417	return (0);
1418}
1419
1420/*
1421 * By default, pci_alloc_msix() will assign the allocated IRQ
1422 * resources consecutively to the first N messages in the MSI-X table.
1423 * However, device drivers may want to use different layouts if they
1424 * either receive fewer messages than they asked for, or they wish to
1425 * populate the MSI-X table sparsely.  This method allows the driver
1426 * to specify what layout it wants.  It must be called after a
1427 * successful pci_alloc_msix() but before any of the associated
1428 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1429 *
1430 * The 'vectors' array contains 'count' message vectors.  The array
1431 * maps directly to the MSI-X table in that index 0 in the array
1432 * specifies the vector for the first message in the MSI-X table, etc.
1433 * The vector value in each array index can either be 0 to indicate
1434 * that no vector should be assigned to a message slot, or it can be a
1435 * number from 1 to N (where N is the count returned from a
1436 * succcessful call to pci_alloc_msix()) to indicate which message
1437 * vector (IRQ) to be used for the corresponding message.
1438 *
1439 * On successful return, each message with a non-zero vector will have
1440 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1441 * 1.  Additionally, if any of the IRQs allocated via the previous
1442 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1443 * will be freed back to the system automatically.
1444 *
1445 * For example, suppose a driver has a MSI-X table with 6 messages and
1446 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1447 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1448 * C.  After the call to pci_alloc_msix(), the device will be setup to
1449 * have an MSI-X table of ABC--- (where - means no vector assigned).
1450 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1451 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1452 * be freed back to the system.  This device will also have valid
1453 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1454 *
1455 * In any case, the SYS_RES_IRQ rid X will always map to the message
1456 * at MSI-X table index X - 1 and will only be valid if a vector is
1457 * assigned to that table entry.
1458 */
1459int
1460pci_remap_msix_method(device_t dev, device_t child, int count,
1461    const u_int *vectors)
1462{
1463	struct pci_devinfo *dinfo = device_get_ivars(child);
1464	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465	struct resource_list_entry *rle;
1466	int i, irq, j, *used;
1467
1468	/*
1469	 * Have to have at least one message in the table but the
1470	 * table can't be bigger than the actual MSI-X table in the
1471	 * device.
1472	 */
1473	if (count == 0 || count > msix->msix_msgnum)
1474		return (EINVAL);
1475
1476	/* Sanity check the vectors. */
1477	for (i = 0; i < count; i++)
1478		if (vectors[i] > msix->msix_alloc)
1479			return (EINVAL);
1480
1481	/*
1482	 * Make sure there aren't any holes in the vectors to be used.
1483	 * It's a big pain to support it, and it doesn't really make
1484	 * sense anyway.  Also, at least one vector must be used.
1485	 */
1486	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1487	    M_ZERO);
1488	for (i = 0; i < count; i++)
1489		if (vectors[i] != 0)
1490			used[vectors[i] - 1] = 1;
1491	for (i = 0; i < msix->msix_alloc - 1; i++)
1492		if (used[i] == 0 && used[i + 1] == 1) {
1493			free(used, M_DEVBUF);
1494			return (EINVAL);
1495		}
1496	if (used[0] != 1) {
1497		free(used, M_DEVBUF);
1498		return (EINVAL);
1499	}
1500
1501	/* Make sure none of the resources are allocated. */
1502	for (i = 0; i < msix->msix_table_len; i++) {
1503		if (msix->msix_table[i].mte_vector == 0)
1504			continue;
1505		if (msix->msix_table[i].mte_handlers > 0)
1506			return (EBUSY);
1507		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1508		KASSERT(rle != NULL, ("missing resource"));
1509		if (rle->res != NULL)
1510			return (EBUSY);
1511	}
1512
1513	/* Free the existing resource list entries. */
1514	for (i = 0; i < msix->msix_table_len; i++) {
1515		if (msix->msix_table[i].mte_vector == 0)
1516			continue;
1517		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1518	}
1519
1520	/*
1521	 * Build the new virtual table keeping track of which vectors are
1522	 * used.
1523	 */
1524	free(msix->msix_table, M_DEVBUF);
1525	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1526	    M_DEVBUF, M_WAITOK | M_ZERO);
1527	for (i = 0; i < count; i++)
1528		msix->msix_table[i].mte_vector = vectors[i];
1529	msix->msix_table_len = count;
1530
1531	/* Free any unused IRQs and resize the vectors array if necessary. */
1532	j = msix->msix_alloc - 1;
1533	if (used[j] == 0) {
1534		struct msix_vector *vec;
1535
1536		while (used[j] == 0) {
1537			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1538			    msix->msix_vectors[j].mv_irq);
1539			j--;
1540		}
1541		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1542		    M_WAITOK);
1543		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1544		    (j + 1));
1545		free(msix->msix_vectors, M_DEVBUF);
1546		msix->msix_vectors = vec;
1547		msix->msix_alloc = j + 1;
1548	}
1549	free(used, M_DEVBUF);
1550
1551	/* Map the IRQs onto the rids. */
1552	for (i = 0; i < count; i++) {
1553		if (vectors[i] == 0)
1554			continue;
1555		irq = msix->msix_vectors[vectors[i]].mv_irq;
1556		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1557		    irq, 1);
1558	}
1559
1560	if (bootverbose) {
1561		device_printf(child, "Remapped MSI-X IRQs as: ");
1562		for (i = 0; i < count; i++) {
1563			if (i != 0)
1564				printf(", ");
1565			if (vectors[i] == 0)
1566				printf("---");
1567			else
1568				printf("%d",
1569				    msix->msix_vectors[vectors[i]].mv_irq);
1570		}
1571		printf("\n");
1572	}
1573
1574	return (0);
1575}
1576
1577static int
1578pci_release_msix(device_t dev, device_t child)
1579{
1580	struct pci_devinfo *dinfo = device_get_ivars(child);
1581	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1582	struct resource_list_entry *rle;
1583	int i;
1584
1585	/* Do we have any messages to release? */
1586	if (msix->msix_alloc == 0)
1587		return (ENODEV);
1588
1589	/* Make sure none of the resources are allocated. */
1590	for (i = 0; i < msix->msix_table_len; i++) {
1591		if (msix->msix_table[i].mte_vector == 0)
1592			continue;
1593		if (msix->msix_table[i].mte_handlers > 0)
1594			return (EBUSY);
1595		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596		KASSERT(rle != NULL, ("missing resource"));
1597		if (rle->res != NULL)
1598			return (EBUSY);
1599	}
1600
1601	/* Update control register to disable MSI-X. */
1602	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1603	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1604	    msix->msix_ctrl, 2);
1605
1606	/* Free the resource list entries. */
1607	for (i = 0; i < msix->msix_table_len; i++) {
1608		if (msix->msix_table[i].mte_vector == 0)
1609			continue;
1610		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1611	}
1612	free(msix->msix_table, M_DEVBUF);
1613	msix->msix_table_len = 0;
1614
1615	/* Release the IRQs. */
1616	for (i = 0; i < msix->msix_alloc; i++)
1617		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1618		    msix->msix_vectors[i].mv_irq);
1619	free(msix->msix_vectors, M_DEVBUF);
1620	msix->msix_alloc = 0;
1621	return (0);
1622}
1623
1624/*
1625 * Return the max supported MSI-X messages this device supports.
1626 * Basically, assuming the MD code can alloc messages, this function
1627 * should return the maximum value that pci_alloc_msix() can return.
1628 * Thus, it is subject to the tunables, etc.
1629 */
1630int
1631pci_msix_count_method(device_t dev, device_t child)
1632{
1633	struct pci_devinfo *dinfo = device_get_ivars(child);
1634	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635
1636	if (pci_do_msix && msix->msix_location != 0)
1637		return (msix->msix_msgnum);
1638	return (0);
1639}
1640
1641/*
1642 * HyperTransport MSI mapping control
1643 */
1644void
1645pci_ht_map_msi(device_t dev, uint64_t addr)
1646{
1647	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1649
1650	if (!ht->ht_msimap)
1651		return;
1652
1653	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1654	    ht->ht_msiaddr >> 20 == addr >> 20) {
1655		/* Enable MSI -> HT mapping. */
1656		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1657		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1658		    ht->ht_msictrl, 2);
1659	}
1660
1661	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1662		/* Disable MSI -> HT mapping. */
1663		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1664		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1665		    ht->ht_msictrl, 2);
1666	}
1667}
1668
1669int
1670pci_get_max_read_req(device_t dev)
1671{
1672	int cap;
1673	uint16_t val;
1674
1675	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1676		return (0);
1677	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1678	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1679	val >>= 12;
1680	return (1 << (val + 7));
1681}
1682
1683int
1684pci_set_max_read_req(device_t dev, int size)
1685{
1686	int cap;
1687	uint16_t val;
1688
1689	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1690		return (0);
1691	if (size < 128)
1692		size = 128;
1693	if (size > 4096)
1694		size = 4096;
1695	size = (1 << (fls(size) - 1));
1696	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1697	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1698	val |= (fls(size) - 8) << 12;
1699	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1700	return (size);
1701}
1702
1703/*
1704 * Support for MSI message signalled interrupts.
1705 */
1706void
1707pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1708{
1709	struct pci_devinfo *dinfo = device_get_ivars(dev);
1710	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1711
1712	/* Write data and address values. */
1713	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1714	    address & 0xffffffff, 4);
1715	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1716		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1717		    address >> 32, 4);
1718		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1719		    data, 2);
1720	} else
1721		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1722		    2);
1723
1724	/* Enable MSI in the control register. */
1725	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1726	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1727	    2);
1728
1729	/* Enable MSI -> HT mapping. */
1730	pci_ht_map_msi(dev, address);
1731}
1732
1733void
1734pci_disable_msi(device_t dev)
1735{
1736	struct pci_devinfo *dinfo = device_get_ivars(dev);
1737	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1738
1739	/* Disable MSI -> HT mapping. */
1740	pci_ht_map_msi(dev, 0);
1741
1742	/* Disable MSI in the control register. */
1743	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1744	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1745	    2);
1746}
1747
1748/*
1749 * Restore MSI registers during resume.  If MSI is enabled then
1750 * restore the data and address registers in addition to the control
1751 * register.
1752 */
1753static void
1754pci_resume_msi(device_t dev)
1755{
1756	struct pci_devinfo *dinfo = device_get_ivars(dev);
1757	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1758	uint64_t address;
1759	uint16_t data;
1760
1761	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1762		address = msi->msi_addr;
1763		data = msi->msi_data;
1764		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1765		    address & 0xffffffff, 4);
1766		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1767			pci_write_config(dev, msi->msi_location +
1768			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1769			pci_write_config(dev, msi->msi_location +
1770			    PCIR_MSI_DATA_64BIT, data, 2);
1771		} else
1772			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1773			    data, 2);
1774	}
1775	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1776	    2);
1777}
1778
1779static int
1780pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1781{
1782	struct pci_devinfo *dinfo = device_get_ivars(dev);
1783	pcicfgregs *cfg = &dinfo->cfg;
1784	struct resource_list_entry *rle;
1785	struct msix_table_entry *mte;
1786	struct msix_vector *mv;
1787	uint64_t addr;
1788	uint32_t data;
1789	int error, i, j;
1790
1791	/*
1792	 * Handle MSI first.  We try to find this IRQ among our list
1793	 * of MSI IRQs.  If we find it, we request updated address and
1794	 * data registers and apply the results.
1795	 */
1796	if (cfg->msi.msi_alloc > 0) {
1797
1798		/* If we don't have any active handlers, nothing to do. */
1799		if (cfg->msi.msi_handlers == 0)
1800			return (0);
1801		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1802			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1803			    i + 1);
1804			if (rle->start == irq) {
1805				error = PCIB_MAP_MSI(device_get_parent(bus),
1806				    dev, irq, &addr, &data);
1807				if (error)
1808					return (error);
1809				pci_disable_msi(dev);
1810				dinfo->cfg.msi.msi_addr = addr;
1811				dinfo->cfg.msi.msi_data = data;
1812				pci_enable_msi(dev, addr, data);
1813				return (0);
1814			}
1815		}
1816		return (ENOENT);
1817	}
1818
1819	/*
1820	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1821	 * we request the updated mapping info.  If that works, we go
1822	 * through all the slots that use this IRQ and update them.
1823	 */
1824	if (cfg->msix.msix_alloc > 0) {
1825		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1826			mv = &cfg->msix.msix_vectors[i];
1827			if (mv->mv_irq == irq) {
1828				error = PCIB_MAP_MSI(device_get_parent(bus),
1829				    dev, irq, &addr, &data);
1830				if (error)
1831					return (error);
1832				mv->mv_address = addr;
1833				mv->mv_data = data;
1834				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1835					mte = &cfg->msix.msix_table[j];
1836					if (mte->mte_vector != i + 1)
1837						continue;
1838					if (mte->mte_handlers == 0)
1839						continue;
1840					pci_mask_msix(dev, j);
1841					pci_enable_msix(dev, j, addr, data);
1842					pci_unmask_msix(dev, j);
1843				}
1844			}
1845		}
1846		return (ENOENT);
1847	}
1848
1849	return (ENOENT);
1850}
1851
1852/*
1853 * Returns true if the specified device is blacklisted because MSI
1854 * doesn't work.
1855 */
1856int
1857pci_msi_device_blacklisted(device_t dev)
1858{
1859	struct pci_quirk *q;
1860
1861	if (!pci_honor_msi_blacklist)
1862		return (0);
1863
1864	for (q = &pci_quirks[0]; q->devid; q++) {
1865		if (q->devid == pci_get_devid(dev) &&
1866		    q->type == PCI_QUIRK_DISABLE_MSI)
1867			return (1);
1868	}
1869	return (0);
1870}
1871
1872/*
1873 * Returns true if a specified chipset supports MSI when it is
1874 * emulated hardware in a virtual machine.
1875 */
1876static int
1877pci_msi_vm_chipset(device_t dev)
1878{
1879	struct pci_quirk *q;
1880
1881	for (q = &pci_quirks[0]; q->devid; q++) {
1882		if (q->devid == pci_get_devid(dev) &&
1883		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1884			return (1);
1885	}
1886	return (0);
1887}
1888
1889/*
1890 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1891 * we just check for blacklisted chipsets as represented by the
1892 * host-PCI bridge at device 0:0:0.  In the future, it may become
1893 * necessary to check other system attributes, such as the kenv values
1894 * that give the motherboard manufacturer and model number.
1895 */
1896static int
1897pci_msi_blacklisted(void)
1898{
1899	device_t dev;
1900
1901	if (!pci_honor_msi_blacklist)
1902		return (0);
1903
1904	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1905	if (!(pcie_chipset || pcix_chipset)) {
1906		if (vm_guest != VM_GUEST_NO) {
1907			dev = pci_find_bsf(0, 0, 0);
1908			if (dev != NULL)
1909				return (pci_msi_vm_chipset(dev) == 0);
1910		}
1911		return (1);
1912	}
1913
1914	dev = pci_find_bsf(0, 0, 0);
1915	if (dev != NULL)
1916		return (pci_msi_device_blacklisted(dev));
1917	return (0);
1918}
1919
1920/*
1921 * Attempt to allocate *count MSI messages.  The actual number allocated is
1922 * returned in *count.  After this function returns, each message will be
1923 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1924 */
1925int
1926pci_alloc_msi_method(device_t dev, device_t child, int *count)
1927{
1928	struct pci_devinfo *dinfo = device_get_ivars(child);
1929	pcicfgregs *cfg = &dinfo->cfg;
1930	struct resource_list_entry *rle;
1931	int actual, error, i, irqs[32];
1932	uint16_t ctrl;
1933
1934	/* Don't let count == 0 get us into trouble. */
1935	if (*count == 0)
1936		return (EINVAL);
1937
1938	/* If rid 0 is allocated, then fail. */
1939	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1940	if (rle != NULL && rle->res != NULL)
1941		return (ENXIO);
1942
1943	/* Already have allocated messages? */
1944	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1945		return (ENXIO);
1946
1947	/* If MSI is blacklisted for this system, fail. */
1948	if (pci_msi_blacklisted())
1949		return (ENXIO);
1950
1951	/* MSI capability present? */
1952	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1953		return (ENODEV);
1954
1955	if (bootverbose)
1956		device_printf(child,
1957		    "attempting to allocate %d MSI vectors (%d supported)\n",
1958		    *count, cfg->msi.msi_msgnum);
1959
1960	/* Don't ask for more than the device supports. */
1961	actual = min(*count, cfg->msi.msi_msgnum);
1962
1963	/* Don't ask for more than 32 messages. */
1964	actual = min(actual, 32);
1965
1966	/* MSI requires power of 2 number of messages. */
1967	if (!powerof2(actual))
1968		return (EINVAL);
1969
1970	for (;;) {
1971		/* Try to allocate N messages. */
1972		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1973		    actual, irqs);
1974		if (error == 0)
1975			break;
1976		if (actual == 1)
1977			return (error);
1978
1979		/* Try N / 2. */
1980		actual >>= 1;
1981	}
1982
1983	/*
1984	 * We now have N actual messages mapped onto SYS_RES_IRQ
1985	 * resources in the irqs[] array, so add new resources
1986	 * starting at rid 1.
1987	 */
1988	for (i = 0; i < actual; i++)
1989		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1990		    irqs[i], irqs[i], 1);
1991
1992	if (bootverbose) {
1993		if (actual == 1)
1994			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1995		else {
1996			int run;
1997
1998			/*
1999			 * Be fancy and try to print contiguous runs
2000			 * of IRQ values as ranges.  'run' is true if
2001			 * we are in a range.
2002			 */
2003			device_printf(child, "using IRQs %d", irqs[0]);
2004			run = 0;
2005			for (i = 1; i < actual; i++) {
2006
2007				/* Still in a run? */
2008				if (irqs[i] == irqs[i - 1] + 1) {
2009					run = 1;
2010					continue;
2011				}
2012
2013				/* Finish previous range. */
2014				if (run) {
2015					printf("-%d", irqs[i - 1]);
2016					run = 0;
2017				}
2018
2019				/* Start new range. */
2020				printf(",%d", irqs[i]);
2021			}
2022
2023			/* Unfinished range? */
2024			if (run)
2025				printf("-%d", irqs[actual - 1]);
2026			printf(" for MSI\n");
2027		}
2028	}
2029
2030	/* Update control register with actual count. */
2031	ctrl = cfg->msi.msi_ctrl;
2032	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2033	ctrl |= (ffs(actual) - 1) << 4;
2034	cfg->msi.msi_ctrl = ctrl;
2035	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2036
2037	/* Update counts of alloc'd messages. */
2038	cfg->msi.msi_alloc = actual;
2039	cfg->msi.msi_handlers = 0;
2040	*count = actual;
2041	return (0);
2042}
2043
2044/* Release the MSI messages associated with this device. */
2045int
2046pci_release_msi_method(device_t dev, device_t child)
2047{
2048	struct pci_devinfo *dinfo = device_get_ivars(child);
2049	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2050	struct resource_list_entry *rle;
2051	int error, i, irqs[32];
2052
2053	/* Try MSI-X first. */
2054	error = pci_release_msix(dev, child);
2055	if (error != ENODEV)
2056		return (error);
2057
2058	/* Do we have any messages to release? */
2059	if (msi->msi_alloc == 0)
2060		return (ENODEV);
2061	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2062
2063	/* Make sure none of the resources are allocated. */
2064	if (msi->msi_handlers > 0)
2065		return (EBUSY);
2066	for (i = 0; i < msi->msi_alloc; i++) {
2067		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2068		KASSERT(rle != NULL, ("missing MSI resource"));
2069		if (rle->res != NULL)
2070			return (EBUSY);
2071		irqs[i] = rle->start;
2072	}
2073
2074	/* Update control register with 0 count. */
2075	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2076	    ("%s: MSI still enabled", __func__));
2077	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2078	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2079	    msi->msi_ctrl, 2);
2080
2081	/* Release the messages. */
2082	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2083	for (i = 0; i < msi->msi_alloc; i++)
2084		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2085
2086	/* Update alloc count. */
2087	msi->msi_alloc = 0;
2088	msi->msi_addr = 0;
2089	msi->msi_data = 0;
2090	return (0);
2091}
2092
2093/*
2094 * Return the max supported MSI messages this device supports.
2095 * Basically, assuming the MD code can alloc messages, this function
2096 * should return the maximum value that pci_alloc_msi() can return.
2097 * Thus, it is subject to the tunables, etc.
2098 */
2099int
2100pci_msi_count_method(device_t dev, device_t child)
2101{
2102	struct pci_devinfo *dinfo = device_get_ivars(child);
2103	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2104
2105	if (pci_do_msi && msi->msi_location != 0)
2106		return (msi->msi_msgnum);
2107	return (0);
2108}
2109
2110/* free pcicfgregs structure and all depending data structures */
2111
2112int
2113pci_freecfg(struct pci_devinfo *dinfo)
2114{
2115	struct devlist *devlist_head;
2116	struct pci_map *pm, *next;
2117	int i;
2118
2119	devlist_head = &pci_devq;
2120
2121	if (dinfo->cfg.vpd.vpd_reg) {
2122		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2123		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2124			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2125		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2126		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2127			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2128		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2129	}
2130	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2131		free(pm, M_DEVBUF);
2132	}
2133	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2134	free(dinfo, M_DEVBUF);
2135
2136	/* increment the generation count */
2137	pci_generation++;
2138
2139	/* we're losing one device */
2140	pci_numdevs--;
2141	return (0);
2142}
2143
2144/*
2145 * PCI power manangement
2146 */
2147int
2148pci_set_powerstate_method(device_t dev, device_t child, int state)
2149{
2150	struct pci_devinfo *dinfo = device_get_ivars(child);
2151	pcicfgregs *cfg = &dinfo->cfg;
2152	uint16_t status;
2153	int result, oldstate, highest, delay;
2154
2155	if (cfg->pp.pp_cap == 0)
2156		return (EOPNOTSUPP);
2157
2158	/*
2159	 * Optimize a no state change request away.  While it would be OK to
2160	 * write to the hardware in theory, some devices have shown odd
2161	 * behavior when going from D3 -> D3.
2162	 */
2163	oldstate = pci_get_powerstate(child);
2164	if (oldstate == state)
2165		return (0);
2166
2167	/*
2168	 * The PCI power management specification states that after a state
2169	 * transition between PCI power states, system software must
2170	 * guarantee a minimal delay before the function accesses the device.
2171	 * Compute the worst case delay that we need to guarantee before we
2172	 * access the device.  Many devices will be responsive much more
2173	 * quickly than this delay, but there are some that don't respond
2174	 * instantly to state changes.  Transitions to/from D3 state require
2175	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2176	 * is done below with DELAY rather than a sleeper function because
2177	 * this function can be called from contexts where we cannot sleep.
2178	 */
2179	highest = (oldstate > state) ? oldstate : state;
2180	if (highest == PCI_POWERSTATE_D3)
2181	    delay = 10000;
2182	else if (highest == PCI_POWERSTATE_D2)
2183	    delay = 200;
2184	else
2185	    delay = 0;
2186	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2187	    & ~PCIM_PSTAT_DMASK;
2188	result = 0;
2189	switch (state) {
2190	case PCI_POWERSTATE_D0:
2191		status |= PCIM_PSTAT_D0;
2192		break;
2193	case PCI_POWERSTATE_D1:
2194		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2195			return (EOPNOTSUPP);
2196		status |= PCIM_PSTAT_D1;
2197		break;
2198	case PCI_POWERSTATE_D2:
2199		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2200			return (EOPNOTSUPP);
2201		status |= PCIM_PSTAT_D2;
2202		break;
2203	case PCI_POWERSTATE_D3:
2204		status |= PCIM_PSTAT_D3;
2205		break;
2206	default:
2207		return (EINVAL);
2208	}
2209
2210	if (bootverbose)
2211		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2212		    state);
2213
2214	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2215	if (delay)
2216		DELAY(delay);
2217	return (0);
2218}
2219
2220int
2221pci_get_powerstate_method(device_t dev, device_t child)
2222{
2223	struct pci_devinfo *dinfo = device_get_ivars(child);
2224	pcicfgregs *cfg = &dinfo->cfg;
2225	uint16_t status;
2226	int result;
2227
2228	if (cfg->pp.pp_cap != 0) {
2229		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2230		switch (status & PCIM_PSTAT_DMASK) {
2231		case PCIM_PSTAT_D0:
2232			result = PCI_POWERSTATE_D0;
2233			break;
2234		case PCIM_PSTAT_D1:
2235			result = PCI_POWERSTATE_D1;
2236			break;
2237		case PCIM_PSTAT_D2:
2238			result = PCI_POWERSTATE_D2;
2239			break;
2240		case PCIM_PSTAT_D3:
2241			result = PCI_POWERSTATE_D3;
2242			break;
2243		default:
2244			result = PCI_POWERSTATE_UNKNOWN;
2245			break;
2246		}
2247	} else {
2248		/* No support, device is always at D0 */
2249		result = PCI_POWERSTATE_D0;
2250	}
2251	return (result);
2252}
2253
2254/*
2255 * Some convenience functions for PCI device drivers.
2256 */
2257
2258static __inline void
2259pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2260{
2261	uint16_t	command;
2262
2263	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2264	command |= bit;
2265	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2266}
2267
2268static __inline void
2269pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2270{
2271	uint16_t	command;
2272
2273	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2274	command &= ~bit;
2275	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2276}
2277
2278int
2279pci_enable_busmaster_method(device_t dev, device_t child)
2280{
2281	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2282	return (0);
2283}
2284
2285int
2286pci_disable_busmaster_method(device_t dev, device_t child)
2287{
2288	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2289	return (0);
2290}
2291
2292int
2293pci_enable_io_method(device_t dev, device_t child, int space)
2294{
2295	uint16_t bit;
2296
2297	switch(space) {
2298	case SYS_RES_IOPORT:
2299		bit = PCIM_CMD_PORTEN;
2300		break;
2301	case SYS_RES_MEMORY:
2302		bit = PCIM_CMD_MEMEN;
2303		break;
2304	default:
2305		return (EINVAL);
2306	}
2307	pci_set_command_bit(dev, child, bit);
2308	return (0);
2309}
2310
2311int
2312pci_disable_io_method(device_t dev, device_t child, int space)
2313{
2314	uint16_t bit;
2315
2316	switch(space) {
2317	case SYS_RES_IOPORT:
2318		bit = PCIM_CMD_PORTEN;
2319		break;
2320	case SYS_RES_MEMORY:
2321		bit = PCIM_CMD_MEMEN;
2322		break;
2323	default:
2324		return (EINVAL);
2325	}
2326	pci_clear_command_bit(dev, child, bit);
2327	return (0);
2328}
2329
2330/*
2331 * New style pci driver.  Parent device is either a pci-host-bridge or a
2332 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2333 */
2334
2335void
2336pci_print_verbose(struct pci_devinfo *dinfo)
2337{
2338
2339	if (bootverbose) {
2340		pcicfgregs *cfg = &dinfo->cfg;
2341
2342		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2343		    cfg->vendor, cfg->device, cfg->revid);
2344		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2345		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2346		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2347		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2348		    cfg->mfdev);
2349		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2350		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2351		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2352		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2353		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2354		if (cfg->intpin > 0)
2355			printf("\tintpin=%c, irq=%d\n",
2356			    cfg->intpin +'a' -1, cfg->intline);
2357		if (cfg->pp.pp_cap) {
2358			uint16_t status;
2359
2360			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2361			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2362			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2363			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2364			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2365			    status & PCIM_PSTAT_DMASK);
2366		}
2367		if (cfg->msi.msi_location) {
2368			int ctrl;
2369
2370			ctrl = cfg->msi.msi_ctrl;
2371			printf("\tMSI supports %d message%s%s%s\n",
2372			    cfg->msi.msi_msgnum,
2373			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2374			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2375			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2376		}
2377		if (cfg->msix.msix_location) {
2378			printf("\tMSI-X supports %d message%s ",
2379			    cfg->msix.msix_msgnum,
2380			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2381			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2382				printf("in map 0x%x\n",
2383				    cfg->msix.msix_table_bar);
2384			else
2385				printf("in maps 0x%x and 0x%x\n",
2386				    cfg->msix.msix_table_bar,
2387				    cfg->msix.msix_pba_bar);
2388		}
2389	}
2390}
2391
2392static int
2393pci_porten(device_t dev)
2394{
2395	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2396}
2397
2398static int
2399pci_memen(device_t dev)
2400{
2401	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2402}
2403
2404static void
2405pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2406{
2407	struct pci_devinfo *dinfo;
2408	pci_addr_t map, testval;
2409	int ln2range;
2410	uint16_t cmd;
2411
2412	/*
2413	 * The device ROM BAR is special.  It is always a 32-bit
2414	 * memory BAR.  Bit 0 is special and should not be set when
2415	 * sizing the BAR.
2416	 */
2417	dinfo = device_get_ivars(dev);
2418	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2419		map = pci_read_config(dev, reg, 4);
2420		pci_write_config(dev, reg, 0xfffffffe, 4);
2421		testval = pci_read_config(dev, reg, 4);
2422		pci_write_config(dev, reg, map, 4);
2423		*mapp = map;
2424		*testvalp = testval;
2425		return;
2426	}
2427
2428	map = pci_read_config(dev, reg, 4);
2429	ln2range = pci_maprange(map);
2430	if (ln2range == 64)
2431		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2432
2433	/*
2434	 * Disable decoding via the command register before
2435	 * determining the BAR's length since we will be placing it in
2436	 * a weird state.
2437	 */
2438	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2439	pci_write_config(dev, PCIR_COMMAND,
2440	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2441
2442	/*
2443	 * Determine the BAR's length by writing all 1's.  The bottom
2444	 * log_2(size) bits of the BAR will stick as 0 when we read
2445	 * the value back.
2446	 */
2447	pci_write_config(dev, reg, 0xffffffff, 4);
2448	testval = pci_read_config(dev, reg, 4);
2449	if (ln2range == 64) {
2450		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2451		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2452	}
2453
2454	/*
2455	 * Restore the original value of the BAR.  We may have reprogrammed
2456	 * the BAR of the low-level console device and when booting verbose,
2457	 * we need the console device addressable.
2458	 */
2459	pci_write_config(dev, reg, map, 4);
2460	if (ln2range == 64)
2461		pci_write_config(dev, reg + 4, map >> 32, 4);
2462	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2463
2464	*mapp = map;
2465	*testvalp = testval;
2466}
2467
2468static void
2469pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2470{
2471	struct pci_devinfo *dinfo;
2472	int ln2range;
2473
2474	/* The device ROM BAR is always a 32-bit memory BAR. */
2475	dinfo = device_get_ivars(dev);
2476	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2477		ln2range = 32;
2478	else
2479		ln2range = pci_maprange(pm->pm_value);
2480	pci_write_config(dev, pm->pm_reg, base, 4);
2481	if (ln2range == 64)
2482		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2483	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2484	if (ln2range == 64)
2485		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2486		    pm->pm_reg + 4, 4) << 32;
2487}
2488
2489struct pci_map *
2490pci_find_bar(device_t dev, int reg)
2491{
2492	struct pci_devinfo *dinfo;
2493	struct pci_map *pm;
2494
2495	dinfo = device_get_ivars(dev);
2496	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2497		if (pm->pm_reg == reg)
2498			return (pm);
2499	}
2500	return (NULL);
2501}
2502
2503int
2504pci_bar_enabled(device_t dev, struct pci_map *pm)
2505{
2506	struct pci_devinfo *dinfo;
2507	uint16_t cmd;
2508
2509	dinfo = device_get_ivars(dev);
2510	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2511	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2512		return (0);
2513	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2514	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2515		return ((cmd & PCIM_CMD_MEMEN) != 0);
2516	else
2517		return ((cmd & PCIM_CMD_PORTEN) != 0);
2518}
2519
2520static struct pci_map *
2521pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2522{
2523	struct pci_devinfo *dinfo;
2524	struct pci_map *pm, *prev;
2525
2526	dinfo = device_get_ivars(dev);
2527	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2528	pm->pm_reg = reg;
2529	pm->pm_value = value;
2530	pm->pm_size = size;
2531	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2532		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2533		    reg));
2534		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2535		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2536			break;
2537	}
2538	if (prev != NULL)
2539		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2540	else
2541		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2542	return (pm);
2543}
2544
2545static void
2546pci_restore_bars(device_t dev)
2547{
2548	struct pci_devinfo *dinfo;
2549	struct pci_map *pm;
2550	int ln2range;
2551
2552	dinfo = device_get_ivars(dev);
2553	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2554		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2555			ln2range = 32;
2556		else
2557			ln2range = pci_maprange(pm->pm_value);
2558		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2559		if (ln2range == 64)
2560			pci_write_config(dev, pm->pm_reg + 4,
2561			    pm->pm_value >> 32, 4);
2562	}
2563}
2564
2565/*
2566 * Add a resource based on a pci map register. Return 1 if the map
2567 * register is a 32bit map register or 2 if it is a 64bit register.
2568 */
2569static int
2570pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2571    int force, int prefetch)
2572{
2573	struct pci_map *pm;
2574	pci_addr_t base, map, testval;
2575	pci_addr_t start, end, count;
2576	int barlen, basezero, maprange, mapsize, type;
2577	uint16_t cmd;
2578	struct resource *res;
2579
2580	/*
2581	 * The BAR may already exist if the device is a CardBus card
2582	 * whose CIS is stored in this BAR.
2583	 */
2584	pm = pci_find_bar(dev, reg);
2585	if (pm != NULL) {
2586		maprange = pci_maprange(pm->pm_value);
2587		barlen = maprange == 64 ? 2 : 1;
2588		return (barlen);
2589	}
2590
2591	pci_read_bar(dev, reg, &map, &testval);
2592	if (PCI_BAR_MEM(map)) {
2593		type = SYS_RES_MEMORY;
2594		if (map & PCIM_BAR_MEM_PREFETCH)
2595			prefetch = 1;
2596	} else
2597		type = SYS_RES_IOPORT;
2598	mapsize = pci_mapsize(testval);
2599	base = pci_mapbase(map);
2600#ifdef __PCI_BAR_ZERO_VALID
2601	basezero = 0;
2602#else
2603	basezero = base == 0;
2604#endif
2605	maprange = pci_maprange(map);
2606	barlen = maprange == 64 ? 2 : 1;
2607
2608	/*
2609	 * For I/O registers, if bottom bit is set, and the next bit up
2610	 * isn't clear, we know we have a BAR that doesn't conform to the
2611	 * spec, so ignore it.  Also, sanity check the size of the data
2612	 * areas to the type of memory involved.  Memory must be at least
2613	 * 16 bytes in size, while I/O ranges must be at least 4.
2614	 */
2615	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2616		return (barlen);
2617	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2618	    (type == SYS_RES_IOPORT && mapsize < 2))
2619		return (barlen);
2620
2621	/* Save a record of this BAR. */
2622	pm = pci_add_bar(dev, reg, map, mapsize);
2623	if (bootverbose) {
2624		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2625		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2626		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2627			printf(", port disabled\n");
2628		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2629			printf(", memory disabled\n");
2630		else
2631			printf(", enabled\n");
2632	}
2633
2634	/*
2635	 * If base is 0, then we have problems if this architecture does
2636	 * not allow that.  It is best to ignore such entries for the
2637	 * moment.  These will be allocated later if the driver specifically
2638	 * requests them.  However, some removable busses look better when
2639	 * all resources are allocated, so allow '0' to be overriden.
2640	 *
2641	 * Similarly treat maps whose values is the same as the test value
2642	 * read back.  These maps have had all f's written to them by the
2643	 * BIOS in an attempt to disable the resources.
2644	 */
2645	if (!force && (basezero || map == testval))
2646		return (barlen);
2647	if ((u_long)base != base) {
2648		device_printf(bus,
2649		    "pci%d:%d:%d:%d bar %#x too many address bits",
2650		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2651		    pci_get_function(dev), reg);
2652		return (barlen);
2653	}
2654
2655	/*
2656	 * This code theoretically does the right thing, but has
2657	 * undesirable side effects in some cases where peripherals
2658	 * respond oddly to having these bits enabled.  Let the user
2659	 * be able to turn them off (since pci_enable_io_modes is 1 by
2660	 * default).
2661	 */
2662	if (pci_enable_io_modes) {
2663		/* Turn on resources that have been left off by a lazy BIOS */
2664		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2665			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2666			cmd |= PCIM_CMD_PORTEN;
2667			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2668		}
2669		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2670			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2671			cmd |= PCIM_CMD_MEMEN;
2672			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2673		}
2674	} else {
2675		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2676			return (barlen);
2677		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2678			return (barlen);
2679	}
2680
2681	count = (pci_addr_t)1 << mapsize;
2682	if (basezero || base == pci_mapbase(testval)) {
2683		start = 0;	/* Let the parent decide. */
2684		end = ~0ul;
2685	} else {
2686		start = base;
2687		end = base + count - 1;
2688	}
2689	resource_list_add(rl, type, reg, start, end, count);
2690
2691	/*
2692	 * Try to allocate the resource for this BAR from our parent
2693	 * so that this resource range is already reserved.  The
2694	 * driver for this device will later inherit this resource in
2695	 * pci_alloc_resource().
2696	 */
2697	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2698	    prefetch ? RF_PREFETCHABLE : 0);
2699	if (res == NULL) {
2700		/*
2701		 * If the allocation fails, clear the BAR and delete
2702		 * the resource list entry to force
2703		 * pci_alloc_resource() to allocate resources from the
2704		 * parent.
2705		 */
2706		resource_list_delete(rl, type, reg);
2707		start = 0;
2708	} else
2709		start = rman_get_start(res);
2710	pci_write_bar(dev, pm, start);
2711	return (barlen);
2712}
2713
2714/*
2715 * For ATA devices we need to decide early what addressing mode to use.
2716 * Legacy demands that the primary and secondary ATA ports sits on the
2717 * same addresses that old ISA hardware did. This dictates that we use
2718 * those addresses and ignore the BAR's if we cannot set PCI native
2719 * addressing mode.
2720 */
2721static void
2722pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2723    uint32_t prefetchmask)
2724{
2725	struct resource *r;
2726	int rid, type, progif;
2727#if 0
2728	/* if this device supports PCI native addressing use it */
2729	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2730	if ((progif & 0x8a) == 0x8a) {
2731		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2732		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2733			printf("Trying ATA native PCI addressing mode\n");
2734			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2735		}
2736	}
2737#endif
2738	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2739	type = SYS_RES_IOPORT;
2740	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2741		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2742		    prefetchmask & (1 << 0));
2743		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2744		    prefetchmask & (1 << 1));
2745	} else {
2746		rid = PCIR_BAR(0);
2747		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2748		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2749		    0x1f7, 8, 0);
2750		rid = PCIR_BAR(1);
2751		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2752		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2753		    0x3f6, 1, 0);
2754	}
2755	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2756		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2757		    prefetchmask & (1 << 2));
2758		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2759		    prefetchmask & (1 << 3));
2760	} else {
2761		rid = PCIR_BAR(2);
2762		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2763		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2764		    0x177, 8, 0);
2765		rid = PCIR_BAR(3);
2766		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2767		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2768		    0x376, 1, 0);
2769	}
2770	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2771	    prefetchmask & (1 << 4));
2772	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2773	    prefetchmask & (1 << 5));
2774}
2775
2776static void
2777pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2778{
2779	struct pci_devinfo *dinfo = device_get_ivars(dev);
2780	pcicfgregs *cfg = &dinfo->cfg;
2781	char tunable_name[64];
2782	int irq;
2783
2784	/* Has to have an intpin to have an interrupt. */
2785	if (cfg->intpin == 0)
2786		return;
2787
2788	/* Let the user override the IRQ with a tunable. */
2789	irq = PCI_INVALID_IRQ;
2790	snprintf(tunable_name, sizeof(tunable_name),
2791	    "hw.pci%d.%d.%d.INT%c.irq",
2792	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2793	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2794		irq = PCI_INVALID_IRQ;
2795
2796	/*
2797	 * If we didn't get an IRQ via the tunable, then we either use the
2798	 * IRQ value in the intline register or we ask the bus to route an
2799	 * interrupt for us.  If force_route is true, then we only use the
2800	 * value in the intline register if the bus was unable to assign an
2801	 * IRQ.
2802	 */
2803	if (!PCI_INTERRUPT_VALID(irq)) {
2804		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2805			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2806		if (!PCI_INTERRUPT_VALID(irq))
2807			irq = cfg->intline;
2808	}
2809
2810	/* If after all that we don't have an IRQ, just bail. */
2811	if (!PCI_INTERRUPT_VALID(irq))
2812		return;
2813
2814	/* Update the config register if it changed. */
2815	if (irq != cfg->intline) {
2816		cfg->intline = irq;
2817		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2818	}
2819
2820	/* Add this IRQ as rid 0 interrupt resource. */
2821	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2822}
2823
2824/* Perform early OHCI takeover from SMM. */
2825static void
2826ohci_early_takeover(device_t self)
2827{
2828	struct resource *res;
2829	uint32_t ctl;
2830	int rid;
2831	int i;
2832
2833	rid = PCIR_BAR(0);
2834	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2835	if (res == NULL)
2836		return;
2837
2838	ctl = bus_read_4(res, OHCI_CONTROL);
2839	if (ctl & OHCI_IR) {
2840		if (bootverbose)
2841			printf("ohci early: "
2842			    "SMM active, request owner change\n");
2843		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2844		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2845			DELAY(1000);
2846			ctl = bus_read_4(res, OHCI_CONTROL);
2847		}
2848		if (ctl & OHCI_IR) {
2849			if (bootverbose)
2850				printf("ohci early: "
2851				    "SMM does not respond, resetting\n");
2852			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2853		}
2854		/* Disable interrupts */
2855		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2856	}
2857
2858	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2859}
2860
2861/* Perform early UHCI takeover from SMM. */
2862static void
2863uhci_early_takeover(device_t self)
2864{
2865	struct resource *res;
2866	int rid;
2867
2868	/*
2869	 * Set the PIRQD enable bit and switch off all the others. We don't
2870	 * want legacy support to interfere with us XXX Does this also mean
2871	 * that the BIOS won't touch the keyboard anymore if it is connected
2872	 * to the ports of the root hub?
2873	 */
2874	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2875
2876	/* Disable interrupts */
2877	rid = PCI_UHCI_BASE_REG;
2878	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2879	if (res != NULL) {
2880		bus_write_2(res, UHCI_INTR, 0);
2881		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2882	}
2883}
2884
2885/* Perform early EHCI takeover from SMM. */
2886static void
2887ehci_early_takeover(device_t self)
2888{
2889	struct resource *res;
2890	uint32_t cparams;
2891	uint32_t eec;
2892	uint8_t eecp;
2893	uint8_t bios_sem;
2894	uint8_t offs;
2895	int rid;
2896	int i;
2897
2898	rid = PCIR_BAR(0);
2899	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2900	if (res == NULL)
2901		return;
2902
2903	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2904
2905	/* Synchronise with the BIOS if it owns the controller. */
2906	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2907	    eecp = EHCI_EECP_NEXT(eec)) {
2908		eec = pci_read_config(self, eecp, 4);
2909		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2910			continue;
2911		}
2912		bios_sem = pci_read_config(self, eecp +
2913		    EHCI_LEGSUP_BIOS_SEM, 1);
2914		if (bios_sem == 0) {
2915			continue;
2916		}
2917		if (bootverbose)
2918			printf("ehci early: "
2919			    "SMM active, request owner change\n");
2920
2921		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2922
2923		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2924			DELAY(1000);
2925			bios_sem = pci_read_config(self, eecp +
2926			    EHCI_LEGSUP_BIOS_SEM, 1);
2927		}
2928
2929		if (bios_sem != 0) {
2930			if (bootverbose)
2931				printf("ehci early: "
2932				    "SMM does not respond\n");
2933		}
2934		/* Disable interrupts */
2935		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2936		bus_write_4(res, offs + EHCI_USBINTR, 0);
2937	}
2938	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2939}
2940
2941void
2942pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2943{
2944	struct pci_devinfo *dinfo = device_get_ivars(dev);
2945	pcicfgregs *cfg = &dinfo->cfg;
2946	struct resource_list *rl = &dinfo->resources;
2947	struct pci_quirk *q;
2948	int i;
2949
2950	/* ATA devices needs special map treatment */
2951	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2952	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2953	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2954	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2955	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2956		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2957	else
2958		for (i = 0; i < cfg->nummaps;)
2959			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2960			    prefetchmask & (1 << i));
2961
2962	/*
2963	 * Add additional, quirked resources.
2964	 */
2965	for (q = &pci_quirks[0]; q->devid; q++) {
2966		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2967		    && q->type == PCI_QUIRK_MAP_REG)
2968			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2969	}
2970
2971	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2972#ifdef __PCI_REROUTE_INTERRUPT
2973		/*
2974		 * Try to re-route interrupts. Sometimes the BIOS or
2975		 * firmware may leave bogus values in these registers.
2976		 * If the re-route fails, then just stick with what we
2977		 * have.
2978		 */
2979		pci_assign_interrupt(bus, dev, 1);
2980#else
2981		pci_assign_interrupt(bus, dev, 0);
2982#endif
2983	}
2984
2985	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2986	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2987		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2988			ehci_early_takeover(dev);
2989		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2990			ohci_early_takeover(dev);
2991		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2992			uhci_early_takeover(dev);
2993	}
2994}
2995
2996void
2997pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2998{
2999#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3000	device_t pcib = device_get_parent(dev);
3001	struct pci_devinfo *dinfo;
3002	int maxslots;
3003	int s, f, pcifunchigh;
3004	uint8_t hdrtype;
3005
3006	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3007	    ("dinfo_size too small"));
3008	maxslots = PCIB_MAXSLOTS(pcib);
3009	for (s = 0; s <= maxslots; s++) {
3010		pcifunchigh = 0;
3011		f = 0;
3012		DELAY(1);
3013		hdrtype = REG(PCIR_HDRTYPE, 1);
3014		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3015			continue;
3016		if (hdrtype & PCIM_MFDEV)
3017			pcifunchigh = PCI_FUNCMAX;
3018		for (f = 0; f <= pcifunchigh; f++) {
3019			dinfo = pci_read_device(pcib, domain, busno, s, f,
3020			    dinfo_size);
3021			if (dinfo != NULL) {
3022				pci_add_child(dev, dinfo);
3023			}
3024		}
3025	}
3026#undef REG
3027}
3028
3029void
3030pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3031{
3032	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3033	device_set_ivars(dinfo->cfg.dev, dinfo);
3034	resource_list_init(&dinfo->resources);
3035	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3036	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3037	pci_print_verbose(dinfo);
3038	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3039}
3040
3041static int
3042pci_probe(device_t dev)
3043{
3044
3045	device_set_desc(dev, "PCI bus");
3046
3047	/* Allow other subclasses to override this driver. */
3048	return (BUS_PROBE_GENERIC);
3049}
3050
3051static int
3052pci_attach(device_t dev)
3053{
3054	int busno, domain;
3055
3056	/*
3057	 * Since there can be multiple independantly numbered PCI
3058	 * busses on systems with multiple PCI domains, we can't use
3059	 * the unit number to decide which bus we are probing. We ask
3060	 * the parent pcib what our domain and bus numbers are.
3061	 */
3062	domain = pcib_get_domain(dev);
3063	busno = pcib_get_bus(dev);
3064	if (bootverbose)
3065		device_printf(dev, "domain=%d, physical bus=%d\n",
3066		    domain, busno);
3067	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3068	return (bus_generic_attach(dev));
3069}
3070
3071static void
3072pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3073    int state)
3074{
3075	device_t child, pcib;
3076	struct pci_devinfo *dinfo;
3077	int dstate, i;
3078
3079	/*
3080	 * Set the device to the given state.  If the firmware suggests
3081	 * a different power state, use it instead.  If power management
3082	 * is not present, the firmware is responsible for managing
3083	 * device power.  Skip children who aren't attached since they
3084	 * are handled separately.
3085	 */
3086	pcib = device_get_parent(dev);
3087	for (i = 0; i < numdevs; i++) {
3088		child = devlist[i];
3089		dinfo = device_get_ivars(child);
3090		dstate = state;
3091		if (device_is_attached(child) &&
3092		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3093			pci_set_powerstate(child, dstate);
3094	}
3095}
3096
3097int
3098pci_suspend(device_t dev)
3099{
3100	device_t child, *devlist;
3101	struct pci_devinfo *dinfo;
3102	int error, i, numdevs;
3103
3104	/*
3105	 * Save the PCI configuration space for each child and set the
3106	 * device in the appropriate power state for this sleep state.
3107	 */
3108	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3109		return (error);
3110	for (i = 0; i < numdevs; i++) {
3111		child = devlist[i];
3112		dinfo = device_get_ivars(child);
3113		pci_cfg_save(child, dinfo, 0);
3114	}
3115
3116	/* Suspend devices before potentially powering them down. */
3117	error = bus_generic_suspend(dev);
3118	if (error) {
3119		free(devlist, M_TEMP);
3120		return (error);
3121	}
3122	if (pci_do_power_suspend)
3123		pci_set_power_children(dev, devlist, numdevs,
3124		    PCI_POWERSTATE_D3);
3125	free(devlist, M_TEMP);
3126	return (0);
3127}
3128
3129int
3130pci_resume(device_t dev)
3131{
3132	device_t child, *devlist;
3133	struct pci_devinfo *dinfo;
3134	int error, i, numdevs;
3135
3136	/*
3137	 * Set each child to D0 and restore its PCI configuration space.
3138	 */
3139	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3140		return (error);
3141	if (pci_do_power_resume)
3142		pci_set_power_children(dev, devlist, numdevs,
3143		    PCI_POWERSTATE_D0);
3144
3145	/* Now the device is powered up, restore its config space. */
3146	for (i = 0; i < numdevs; i++) {
3147		child = devlist[i];
3148		dinfo = device_get_ivars(child);
3149
3150		pci_cfg_restore(child, dinfo);
3151		if (!device_is_attached(child))
3152			pci_cfg_save(child, dinfo, 1);
3153	}
3154
3155	/*
3156	 * Resume critical devices first, then everything else later.
3157	 */
3158	for (i = 0; i < numdevs; i++) {
3159		child = devlist[i];
3160		switch (pci_get_class(child)) {
3161		case PCIC_DISPLAY:
3162		case PCIC_MEMORY:
3163		case PCIC_BRIDGE:
3164		case PCIC_BASEPERIPH:
3165			DEVICE_RESUME(child);
3166			break;
3167		}
3168	}
3169	for (i = 0; i < numdevs; i++) {
3170		child = devlist[i];
3171		switch (pci_get_class(child)) {
3172		case PCIC_DISPLAY:
3173		case PCIC_MEMORY:
3174		case PCIC_BRIDGE:
3175		case PCIC_BASEPERIPH:
3176			break;
3177		default:
3178			DEVICE_RESUME(child);
3179		}
3180	}
3181	free(devlist, M_TEMP);
3182	return (0);
3183}
3184
3185static void
3186pci_load_vendor_data(void)
3187{
3188	caddr_t data;
3189	void *ptr;
3190	size_t sz;
3191
3192	data = preload_search_by_type("pci_vendor_data");
3193	if (data != NULL) {
3194		ptr = preload_fetch_addr(data);
3195		sz = preload_fetch_size(data);
3196		if (ptr != NULL && sz != 0) {
3197			pci_vendordata = ptr;
3198			pci_vendordata_size = sz;
3199			/* terminate the database */
3200			pci_vendordata[pci_vendordata_size] = '\n';
3201		}
3202	}
3203}
3204
3205void
3206pci_driver_added(device_t dev, driver_t *driver)
3207{
3208	int numdevs;
3209	device_t *devlist;
3210	device_t child;
3211	struct pci_devinfo *dinfo;
3212	int i;
3213
3214	if (bootverbose)
3215		device_printf(dev, "driver added\n");
3216	DEVICE_IDENTIFY(driver, dev);
3217	if (device_get_children(dev, &devlist, &numdevs) != 0)
3218		return;
3219	for (i = 0; i < numdevs; i++) {
3220		child = devlist[i];
3221		if (device_get_state(child) != DS_NOTPRESENT)
3222			continue;
3223		dinfo = device_get_ivars(child);
3224		pci_print_verbose(dinfo);
3225		if (bootverbose)
3226			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3227		pci_cfg_restore(child, dinfo);
3228		if (device_probe_and_attach(child) != 0)
3229			pci_cfg_save(child, dinfo, 1);
3230	}
3231	free(devlist, M_TEMP);
3232}
3233
3234int
3235pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3236    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3237{
3238	struct pci_devinfo *dinfo;
3239	struct msix_table_entry *mte;
3240	struct msix_vector *mv;
3241	uint64_t addr;
3242	uint32_t data;
3243	void *cookie;
3244	int error, rid;
3245
3246	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3247	    arg, &cookie);
3248	if (error)
3249		return (error);
3250
3251	/* If this is not a direct child, just bail out. */
3252	if (device_get_parent(child) != dev) {
3253		*cookiep = cookie;
3254		return(0);
3255	}
3256
3257	rid = rman_get_rid(irq);
3258	if (rid == 0) {
3259		/* Make sure that INTx is enabled */
3260		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3261	} else {
3262		/*
3263		 * Check to see if the interrupt is MSI or MSI-X.
3264		 * Ask our parent to map the MSI and give
3265		 * us the address and data register values.
3266		 * If we fail for some reason, teardown the
3267		 * interrupt handler.
3268		 */
3269		dinfo = device_get_ivars(child);
3270		if (dinfo->cfg.msi.msi_alloc > 0) {
3271			if (dinfo->cfg.msi.msi_addr == 0) {
3272				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3273			    ("MSI has handlers, but vectors not mapped"));
3274				error = PCIB_MAP_MSI(device_get_parent(dev),
3275				    child, rman_get_start(irq), &addr, &data);
3276				if (error)
3277					goto bad;
3278				dinfo->cfg.msi.msi_addr = addr;
3279				dinfo->cfg.msi.msi_data = data;
3280			}
3281			if (dinfo->cfg.msi.msi_handlers == 0)
3282				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3283				    dinfo->cfg.msi.msi_data);
3284			dinfo->cfg.msi.msi_handlers++;
3285		} else {
3286			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3287			    ("No MSI or MSI-X interrupts allocated"));
3288			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3289			    ("MSI-X index too high"));
3290			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3291			KASSERT(mte->mte_vector != 0, ("no message vector"));
3292			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3293			KASSERT(mv->mv_irq == rman_get_start(irq),
3294			    ("IRQ mismatch"));
3295			if (mv->mv_address == 0) {
3296				KASSERT(mte->mte_handlers == 0,
3297		    ("MSI-X table entry has handlers, but vector not mapped"));
3298				error = PCIB_MAP_MSI(device_get_parent(dev),
3299				    child, rman_get_start(irq), &addr, &data);
3300				if (error)
3301					goto bad;
3302				mv->mv_address = addr;
3303				mv->mv_data = data;
3304			}
3305			if (mte->mte_handlers == 0) {
3306				pci_enable_msix(child, rid - 1, mv->mv_address,
3307				    mv->mv_data);
3308				pci_unmask_msix(child, rid - 1);
3309			}
3310			mte->mte_handlers++;
3311		}
3312
3313		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3314		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3315	bad:
3316		if (error) {
3317			(void)bus_generic_teardown_intr(dev, child, irq,
3318			    cookie);
3319			return (error);
3320		}
3321	}
3322	*cookiep = cookie;
3323	return (0);
3324}
3325
3326int
3327pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3328    void *cookie)
3329{
3330	struct msix_table_entry *mte;
3331	struct resource_list_entry *rle;
3332	struct pci_devinfo *dinfo;
3333	int error, rid;
3334
3335	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3336		return (EINVAL);
3337
3338	/* If this isn't a direct child, just bail out */
3339	if (device_get_parent(child) != dev)
3340		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3341
3342	rid = rman_get_rid(irq);
3343	if (rid == 0) {
3344		/* Mask INTx */
3345		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3346	} else {
3347		/*
3348		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3349		 * decrement the appropriate handlers count and mask the
3350		 * MSI-X message, or disable MSI messages if the count
3351		 * drops to 0.
3352		 */
3353		dinfo = device_get_ivars(child);
3354		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3355		if (rle->res != irq)
3356			return (EINVAL);
3357		if (dinfo->cfg.msi.msi_alloc > 0) {
3358			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3359			    ("MSI-X index too high"));
3360			if (dinfo->cfg.msi.msi_handlers == 0)
3361				return (EINVAL);
3362			dinfo->cfg.msi.msi_handlers--;
3363			if (dinfo->cfg.msi.msi_handlers == 0)
3364				pci_disable_msi(child);
3365		} else {
3366			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3367			    ("No MSI or MSI-X interrupts allocated"));
3368			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3369			    ("MSI-X index too high"));
3370			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3371			if (mte->mte_handlers == 0)
3372				return (EINVAL);
3373			mte->mte_handlers--;
3374			if (mte->mte_handlers == 0)
3375				pci_mask_msix(child, rid - 1);
3376		}
3377	}
3378	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3379	if (rid > 0)
3380		KASSERT(error == 0,
3381		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3382	return (error);
3383}
3384
3385int
3386pci_print_child(device_t dev, device_t child)
3387{
3388	struct pci_devinfo *dinfo;
3389	struct resource_list *rl;
3390	int retval = 0;
3391
3392	dinfo = device_get_ivars(child);
3393	rl = &dinfo->resources;
3394
3395	retval += bus_print_child_header(dev, child);
3396
3397	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3398	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3399	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3400	if (device_get_flags(dev))
3401		retval += printf(" flags %#x", device_get_flags(dev));
3402
3403	retval += printf(" at device %d.%d", pci_get_slot(child),
3404	    pci_get_function(child));
3405
3406	retval += bus_print_child_footer(dev, child);
3407
3408	return (retval);
3409}
3410
3411static struct
3412{
3413	int	class;
3414	int	subclass;
3415	char	*desc;
3416} pci_nomatch_tab[] = {
3417	{PCIC_OLD,		-1,			"old"},
3418	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3419	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3420	{PCIC_STORAGE,		-1,			"mass storage"},
3421	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3422	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3423	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3424	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3425	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3426	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3427	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3428	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3429	{PCIC_NETWORK,		-1,			"network"},
3430	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3431	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3432	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3433	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3434	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3435	{PCIC_DISPLAY,		-1,			"display"},
3436	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3437	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3438	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3439	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3440	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3441	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3442	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3443	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3444	{PCIC_MEMORY,		-1,			"memory"},
3445	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3446	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3447	{PCIC_BRIDGE,		-1,			"bridge"},
3448	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3449	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3450	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3451	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3452	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3453	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3454	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3455	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3456	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3457	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3458	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3459	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3460	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3461	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3462	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3463	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3464	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3465	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3466	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3467	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3468	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3469	{PCIC_INPUTDEV,		-1,			"input device"},
3470	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3471	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3472	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3473	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3474	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3475	{PCIC_DOCKING,		-1,			"docking station"},
3476	{PCIC_PROCESSOR,	-1,			"processor"},
3477	{PCIC_SERIALBUS,	-1,			"serial bus"},
3478	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3479	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3480	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3481	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3482	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3483	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3484	{PCIC_WIRELESS,		-1,			"wireless controller"},
3485	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3486	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3487	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3488	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3489	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3490	{PCIC_SATCOM,		-1,			"satellite communication"},
3491	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3492	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3493	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3494	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3495	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3496	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3497	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3498	{PCIC_DASP,		-1,			"dasp"},
3499	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3500	{0, 0,		NULL}
3501};
3502
3503void
3504pci_probe_nomatch(device_t dev, device_t child)
3505{
3506	int	i;
3507	char	*cp, *scp, *device;
3508
3509	/*
3510	 * Look for a listing for this device in a loaded device database.
3511	 */
3512	if ((device = pci_describe_device(child)) != NULL) {
3513		device_printf(dev, "<%s>", device);
3514		free(device, M_DEVBUF);
3515	} else {
3516		/*
3517		 * Scan the class/subclass descriptions for a general
3518		 * description.
3519		 */
3520		cp = "unknown";
3521		scp = NULL;
3522		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3523			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3524				if (pci_nomatch_tab[i].subclass == -1) {
3525					cp = pci_nomatch_tab[i].desc;
3526				} else if (pci_nomatch_tab[i].subclass ==
3527				    pci_get_subclass(child)) {
3528					scp = pci_nomatch_tab[i].desc;
3529				}
3530			}
3531		}
3532		device_printf(dev, "<%s%s%s>",
3533		    cp ? cp : "",
3534		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3535		    scp ? scp : "");
3536	}
3537	printf(" at device %d.%d (no driver attached)\n",
3538	    pci_get_slot(child), pci_get_function(child));
3539	pci_cfg_save(child, device_get_ivars(child), 1);
3540	return;
3541}
3542
3543/*
3544 * Parse the PCI device database, if loaded, and return a pointer to a
3545 * description of the device.
3546 *
3547 * The database is flat text formatted as follows:
3548 *
3549 * Any line not in a valid format is ignored.
3550 * Lines are terminated with newline '\n' characters.
3551 *
3552 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3553 * the vendor name.
3554 *
3555 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3556 * - devices cannot be listed without a corresponding VENDOR line.
3557 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3558 * another TAB, then the device name.
3559 */
3560
3561/*
3562 * Assuming (ptr) points to the beginning of a line in the database,
3563 * return the vendor or device and description of the next entry.
3564 * The value of (vendor) or (device) inappropriate for the entry type
3565 * is set to -1.  Returns nonzero at the end of the database.
3566 *
3567 * Note that this is slightly unrobust in the face of corrupt data;
3568 * we attempt to safeguard against this by spamming the end of the
3569 * database with a newline when we initialise.
3570 */
3571static int
3572pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3573{
3574	char	*cp = *ptr;
3575	int	left;
3576
3577	*device = -1;
3578	*vendor = -1;
3579	**desc = '\0';
3580	for (;;) {
3581		left = pci_vendordata_size - (cp - pci_vendordata);
3582		if (left <= 0) {
3583			*ptr = cp;
3584			return(1);
3585		}
3586
3587		/* vendor entry? */
3588		if (*cp != '\t' &&
3589		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3590			break;
3591		/* device entry? */
3592		if (*cp == '\t' &&
3593		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3594			break;
3595
3596		/* skip to next line */
3597		while (*cp != '\n' && left > 0) {
3598			cp++;
3599			left--;
3600		}
3601		if (*cp == '\n') {
3602			cp++;
3603			left--;
3604		}
3605	}
3606	/* skip to next line */
3607	while (*cp != '\n' && left > 0) {
3608		cp++;
3609		left--;
3610	}
3611	if (*cp == '\n' && left > 0)
3612		cp++;
3613	*ptr = cp;
3614	return(0);
3615}
3616
3617static char *
3618pci_describe_device(device_t dev)
3619{
3620	int	vendor, device;
3621	char	*desc, *vp, *dp, *line;
3622
3623	desc = vp = dp = NULL;
3624
3625	/*
3626	 * If we have no vendor data, we can't do anything.
3627	 */
3628	if (pci_vendordata == NULL)
3629		goto out;
3630
3631	/*
3632	 * Scan the vendor data looking for this device
3633	 */
3634	line = pci_vendordata;
3635	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3636		goto out;
3637	for (;;) {
3638		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3639			goto out;
3640		if (vendor == pci_get_vendor(dev))
3641			break;
3642	}
3643	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3644		goto out;
3645	for (;;) {
3646		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3647			*dp = 0;
3648			break;
3649		}
3650		if (vendor != -1) {
3651			*dp = 0;
3652			break;
3653		}
3654		if (device == pci_get_device(dev))
3655			break;
3656	}
3657	if (dp[0] == '\0')
3658		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3659	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3660	    NULL)
3661		sprintf(desc, "%s, %s", vp, dp);
3662 out:
3663	if (vp != NULL)
3664		free(vp, M_DEVBUF);
3665	if (dp != NULL)
3666		free(dp, M_DEVBUF);
3667	return(desc);
3668}
3669
3670int
3671pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3672{
3673	struct pci_devinfo *dinfo;
3674	pcicfgregs *cfg;
3675
3676	dinfo = device_get_ivars(child);
3677	cfg = &dinfo->cfg;
3678
3679	switch (which) {
3680	case PCI_IVAR_ETHADDR:
3681		/*
3682		 * The generic accessor doesn't deal with failure, so
3683		 * we set the return value, then return an error.
3684		 */
3685		*((uint8_t **) result) = NULL;
3686		return (EINVAL);
3687	case PCI_IVAR_SUBVENDOR:
3688		*result = cfg->subvendor;
3689		break;
3690	case PCI_IVAR_SUBDEVICE:
3691		*result = cfg->subdevice;
3692		break;
3693	case PCI_IVAR_VENDOR:
3694		*result = cfg->vendor;
3695		break;
3696	case PCI_IVAR_DEVICE:
3697		*result = cfg->device;
3698		break;
3699	case PCI_IVAR_DEVID:
3700		*result = (cfg->device << 16) | cfg->vendor;
3701		break;
3702	case PCI_IVAR_CLASS:
3703		*result = cfg->baseclass;
3704		break;
3705	case PCI_IVAR_SUBCLASS:
3706		*result = cfg->subclass;
3707		break;
3708	case PCI_IVAR_PROGIF:
3709		*result = cfg->progif;
3710		break;
3711	case PCI_IVAR_REVID:
3712		*result = cfg->revid;
3713		break;
3714	case PCI_IVAR_INTPIN:
3715		*result = cfg->intpin;
3716		break;
3717	case PCI_IVAR_IRQ:
3718		*result = cfg->intline;
3719		break;
3720	case PCI_IVAR_DOMAIN:
3721		*result = cfg->domain;
3722		break;
3723	case PCI_IVAR_BUS:
3724		*result = cfg->bus;
3725		break;
3726	case PCI_IVAR_SLOT:
3727		*result = cfg->slot;
3728		break;
3729	case PCI_IVAR_FUNCTION:
3730		*result = cfg->func;
3731		break;
3732	case PCI_IVAR_CMDREG:
3733		*result = cfg->cmdreg;
3734		break;
3735	case PCI_IVAR_CACHELNSZ:
3736		*result = cfg->cachelnsz;
3737		break;
3738	case PCI_IVAR_MINGNT:
3739		*result = cfg->mingnt;
3740		break;
3741	case PCI_IVAR_MAXLAT:
3742		*result = cfg->maxlat;
3743		break;
3744	case PCI_IVAR_LATTIMER:
3745		*result = cfg->lattimer;
3746		break;
3747	default:
3748		return (ENOENT);
3749	}
3750	return (0);
3751}
3752
3753int
3754pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3755{
3756	struct pci_devinfo *dinfo;
3757
3758	dinfo = device_get_ivars(child);
3759
3760	switch (which) {
3761	case PCI_IVAR_INTPIN:
3762		dinfo->cfg.intpin = value;
3763		return (0);
3764	case PCI_IVAR_ETHADDR:
3765	case PCI_IVAR_SUBVENDOR:
3766	case PCI_IVAR_SUBDEVICE:
3767	case PCI_IVAR_VENDOR:
3768	case PCI_IVAR_DEVICE:
3769	case PCI_IVAR_DEVID:
3770	case PCI_IVAR_CLASS:
3771	case PCI_IVAR_SUBCLASS:
3772	case PCI_IVAR_PROGIF:
3773	case PCI_IVAR_REVID:
3774	case PCI_IVAR_IRQ:
3775	case PCI_IVAR_DOMAIN:
3776	case PCI_IVAR_BUS:
3777	case PCI_IVAR_SLOT:
3778	case PCI_IVAR_FUNCTION:
3779		return (EINVAL);	/* disallow for now */
3780
3781	default:
3782		return (ENOENT);
3783	}
3784}
3785
3786
3787#include "opt_ddb.h"
3788#ifdef DDB
3789#include <ddb/ddb.h>
3790#include <sys/cons.h>
3791
3792/*
3793 * List resources based on pci map registers, used for within ddb
3794 */
3795
3796DB_SHOW_COMMAND(pciregs, db_pci_dump)
3797{
3798	struct pci_devinfo *dinfo;
3799	struct devlist *devlist_head;
3800	struct pci_conf *p;
3801	const char *name;
3802	int i, error, none_count;
3803
3804	none_count = 0;
3805	/* get the head of the device queue */
3806	devlist_head = &pci_devq;
3807
3808	/*
3809	 * Go through the list of devices and print out devices
3810	 */
3811	for (error = 0, i = 0,
3812	     dinfo = STAILQ_FIRST(devlist_head);
3813	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3814	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3815
3816		/* Populate pd_name and pd_unit */
3817		name = NULL;
3818		if (dinfo->cfg.dev)
3819			name = device_get_name(dinfo->cfg.dev);
3820
3821		p = &dinfo->conf;
3822		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3823			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3824			(name && *name) ? name : "none",
3825			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3826			none_count++,
3827			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3828			p->pc_sel.pc_func, (p->pc_class << 16) |
3829			(p->pc_subclass << 8) | p->pc_progif,
3830			(p->pc_subdevice << 16) | p->pc_subvendor,
3831			(p->pc_device << 16) | p->pc_vendor,
3832			p->pc_revid, p->pc_hdr);
3833	}
3834}
3835#endif /* DDB */
3836
3837static struct resource *
3838pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3839    u_long start, u_long end, u_long count, u_int flags)
3840{
3841	struct pci_devinfo *dinfo = device_get_ivars(child);
3842	struct resource_list *rl = &dinfo->resources;
3843	struct resource_list_entry *rle;
3844	struct resource *res;
3845	struct pci_map *pm;
3846	pci_addr_t map, testval;
3847	int mapsize;
3848
3849	res = NULL;
3850	pm = pci_find_bar(child, *rid);
3851	if (pm != NULL) {
3852		/* This is a BAR that we failed to allocate earlier. */
3853		mapsize = pm->pm_size;
3854		map = pm->pm_value;
3855	} else {
3856		/*
3857		 * Weed out the bogons, and figure out how large the
3858		 * BAR/map is.  BARs that read back 0 here are bogus
3859		 * and unimplemented.  Note: atapci in legacy mode are
3860		 * special and handled elsewhere in the code.  If you
3861		 * have a atapci device in legacy mode and it fails
3862		 * here, that other code is broken.
3863		 */
3864		pci_read_bar(child, *rid, &map, &testval);
3865
3866		/*
3867		 * Determine the size of the BAR and ignore BARs with a size
3868		 * of 0.  Device ROM BARs use a different mask value.
3869		 */
3870		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3871			mapsize = pci_romsize(testval);
3872		else
3873			mapsize = pci_mapsize(testval);
3874		if (mapsize == 0)
3875			goto out;
3876		pm = pci_add_bar(child, *rid, map, mapsize);
3877	}
3878
3879	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3880		if (type != SYS_RES_MEMORY) {
3881			if (bootverbose)
3882				device_printf(dev,
3883				    "child %s requested type %d for rid %#x,"
3884				    " but the BAR says it is an memio\n",
3885				    device_get_nameunit(child), type, *rid);
3886			goto out;
3887		}
3888	} else {
3889		if (type != SYS_RES_IOPORT) {
3890			if (bootverbose)
3891				device_printf(dev,
3892				    "child %s requested type %d for rid %#x,"
3893				    " but the BAR says it is an ioport\n",
3894				    device_get_nameunit(child), type, *rid);
3895			goto out;
3896		}
3897	}
3898
3899	/*
3900	 * For real BARs, we need to override the size that
3901	 * the driver requests, because that's what the BAR
3902	 * actually uses and we would otherwise have a
3903	 * situation where we might allocate the excess to
3904	 * another driver, which won't work.
3905	 */
3906	count = (pci_addr_t)1 << mapsize;
3907	if (RF_ALIGNMENT(flags) < mapsize)
3908		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3909	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3910		flags |= RF_PREFETCHABLE;
3911
3912	/*
3913	 * Allocate enough resource, and then write back the
3914	 * appropriate BAR for that resource.
3915	 */
3916	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3917	    start, end, count, flags & ~RF_ACTIVE);
3918	if (res == NULL) {
3919		device_printf(child,
3920		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3921		    count, *rid, type, start, end);
3922		goto out;
3923	}
3924	resource_list_add(rl, type, *rid, start, end, count);
3925	rle = resource_list_find(rl, type, *rid);
3926	if (rle == NULL)
3927		panic("pci_reserve_map: unexpectedly can't find resource.");
3928	rle->res = res;
3929	rle->start = rman_get_start(res);
3930	rle->end = rman_get_end(res);
3931	rle->count = count;
3932	rle->flags = RLE_RESERVED;
3933	if (bootverbose)
3934		device_printf(child,
3935		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3936		    count, *rid, type, rman_get_start(res));
3937	map = rman_get_start(res);
3938	pci_write_bar(child, pm, map);
3939out:;
3940	return (res);
3941}
3942
3943
3944struct resource *
3945pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3946		   u_long start, u_long end, u_long count, u_int flags)
3947{
3948	struct pci_devinfo *dinfo = device_get_ivars(child);
3949	struct resource_list *rl = &dinfo->resources;
3950	struct resource_list_entry *rle;
3951	struct resource *res;
3952	pcicfgregs *cfg = &dinfo->cfg;
3953
3954	if (device_get_parent(child) != dev)
3955		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3956		    type, rid, start, end, count, flags));
3957
3958	/*
3959	 * Perform lazy resource allocation
3960	 */
3961	switch (type) {
3962	case SYS_RES_IRQ:
3963		/*
3964		 * Can't alloc legacy interrupt once MSI messages have
3965		 * been allocated.
3966		 */
3967		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3968		    cfg->msix.msix_alloc > 0))
3969			return (NULL);
3970
3971		/*
3972		 * If the child device doesn't have an interrupt
3973		 * routed and is deserving of an interrupt, try to
3974		 * assign it one.
3975		 */
3976		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3977		    (cfg->intpin != 0))
3978			pci_assign_interrupt(dev, child, 0);
3979		break;
3980	case SYS_RES_IOPORT:
3981	case SYS_RES_MEMORY:
3982#ifdef NEW_PCIB
3983		/*
3984		 * PCI-PCI bridge I/O window resources are not BARs.
3985		 * For those allocations just pass the request up the
3986		 * tree.
3987		 */
3988		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
3989			switch (*rid) {
3990			case PCIR_IOBASEL_1:
3991			case PCIR_MEMBASE_1:
3992			case PCIR_PMBASEL_1:
3993				/*
3994				 * XXX: Should we bother creating a resource
3995				 * list entry?
3996				 */
3997				return (bus_generic_alloc_resource(dev, child,
3998				    type, rid, start, end, count, flags));
3999			}
4000		}
4001#endif
4002		/* Reserve resources for this BAR if needed. */
4003		rle = resource_list_find(rl, type, *rid);
4004		if (rle == NULL) {
4005			res = pci_reserve_map(dev, child, type, rid, start, end,
4006			    count, flags);
4007			if (res == NULL)
4008				return (NULL);
4009		}
4010	}
4011	return (resource_list_alloc(rl, dev, child, type, rid,
4012	    start, end, count, flags));
4013}
4014
4015int
4016pci_activate_resource(device_t dev, device_t child, int type, int rid,
4017    struct resource *r)
4018{
4019	struct pci_devinfo *dinfo;
4020	int error;
4021
4022	error = bus_generic_activate_resource(dev, child, type, rid, r);
4023	if (error)
4024		return (error);
4025
4026	/* Enable decoding in the command register when activating BARs. */
4027	if (device_get_parent(child) == dev) {
4028		/* Device ROMs need their decoding explicitly enabled. */
4029		dinfo = device_get_ivars(child);
4030		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4031			pci_write_bar(child, pci_find_bar(child, rid),
4032			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4033		switch (type) {
4034		case SYS_RES_IOPORT:
4035		case SYS_RES_MEMORY:
4036			error = PCI_ENABLE_IO(dev, child, type);
4037			break;
4038		}
4039	}
4040	return (error);
4041}
4042
4043int
4044pci_deactivate_resource(device_t dev, device_t child, int type,
4045    int rid, struct resource *r)
4046{
4047	struct pci_devinfo *dinfo;
4048	int error;
4049
4050	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4051	if (error)
4052		return (error);
4053
4054	/* Disable decoding for device ROMs. */
4055	if (device_get_parent(child) == dev) {
4056		dinfo = device_get_ivars(child);
4057		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4058			pci_write_bar(child, pci_find_bar(child, rid),
4059			    rman_get_start(r));
4060	}
4061	return (0);
4062}
4063
4064void
4065pci_delete_child(device_t dev, device_t child)
4066{
4067	struct resource_list_entry *rle;
4068	struct resource_list *rl;
4069	struct pci_devinfo *dinfo;
4070
4071	dinfo = device_get_ivars(child);
4072	rl = &dinfo->resources;
4073
4074	if (device_is_attached(child))
4075		device_detach(child);
4076
4077	/* Turn off access to resources we're about to free */
4078	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4079	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4080
4081	/* Free all allocated resources */
4082	STAILQ_FOREACH(rle, rl, link) {
4083		if (rle->res) {
4084			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4085			    resource_list_busy(rl, rle->type, rle->rid)) {
4086				pci_printf(&dinfo->cfg,
4087				    "Resource still owned, oops. "
4088				    "(type=%d, rid=%d, addr=%lx)\n",
4089				    rle->type, rle->rid,
4090				    rman_get_start(rle->res));
4091				bus_release_resource(child, rle->type, rle->rid,
4092				    rle->res);
4093			}
4094			resource_list_unreserve(rl, dev, child, rle->type,
4095			    rle->rid);
4096		}
4097	}
4098	resource_list_free(rl);
4099
4100	device_delete_child(dev, child);
4101	pci_freecfg(dinfo);
4102}
4103
4104void
4105pci_delete_resource(device_t dev, device_t child, int type, int rid)
4106{
4107	struct pci_devinfo *dinfo;
4108	struct resource_list *rl;
4109	struct resource_list_entry *rle;
4110
4111	if (device_get_parent(child) != dev)
4112		return;
4113
4114	dinfo = device_get_ivars(child);
4115	rl = &dinfo->resources;
4116	rle = resource_list_find(rl, type, rid);
4117	if (rle == NULL)
4118		return;
4119
4120	if (rle->res) {
4121		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4122		    resource_list_busy(rl, type, rid)) {
4123			device_printf(dev, "delete_resource: "
4124			    "Resource still owned by child, oops. "
4125			    "(type=%d, rid=%d, addr=%lx)\n",
4126			    type, rid, rman_get_start(rle->res));
4127			return;
4128		}
4129
4130#ifndef __PCI_BAR_ZERO_VALID
4131		/*
4132		 * If this is a BAR, clear the BAR so it stops
4133		 * decoding before releasing the resource.
4134		 */
4135		switch (type) {
4136		case SYS_RES_IOPORT:
4137		case SYS_RES_MEMORY:
4138			pci_write_bar(child, pci_find_bar(child, rid), 0);
4139			break;
4140		}
4141#endif
4142		resource_list_unreserve(rl, dev, child, type, rid);
4143	}
4144	resource_list_delete(rl, type, rid);
4145}
4146
4147struct resource_list *
4148pci_get_resource_list (device_t dev, device_t child)
4149{
4150	struct pci_devinfo *dinfo = device_get_ivars(child);
4151
4152	return (&dinfo->resources);
4153}
4154
4155uint32_t
4156pci_read_config_method(device_t dev, device_t child, int reg, int width)
4157{
4158	struct pci_devinfo *dinfo = device_get_ivars(child);
4159	pcicfgregs *cfg = &dinfo->cfg;
4160
4161	return (PCIB_READ_CONFIG(device_get_parent(dev),
4162	    cfg->bus, cfg->slot, cfg->func, reg, width));
4163}
4164
4165void
4166pci_write_config_method(device_t dev, device_t child, int reg,
4167    uint32_t val, int width)
4168{
4169	struct pci_devinfo *dinfo = device_get_ivars(child);
4170	pcicfgregs *cfg = &dinfo->cfg;
4171
4172	PCIB_WRITE_CONFIG(device_get_parent(dev),
4173	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4174}
4175
4176int
4177pci_child_location_str_method(device_t dev, device_t child, char *buf,
4178    size_t buflen)
4179{
4180
4181	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4182	    pci_get_function(child));
4183	return (0);
4184}
4185
4186int
4187pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4188    size_t buflen)
4189{
4190	struct pci_devinfo *dinfo;
4191	pcicfgregs *cfg;
4192
4193	dinfo = device_get_ivars(child);
4194	cfg = &dinfo->cfg;
4195	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4196	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4197	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4198	    cfg->progif);
4199	return (0);
4200}
4201
4202int
4203pci_assign_interrupt_method(device_t dev, device_t child)
4204{
4205	struct pci_devinfo *dinfo = device_get_ivars(child);
4206	pcicfgregs *cfg = &dinfo->cfg;
4207
4208	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4209	    cfg->intpin));
4210}
4211
4212static int
4213pci_modevent(module_t mod, int what, void *arg)
4214{
4215	static struct cdev *pci_cdev;
4216
4217	switch (what) {
4218	case MOD_LOAD:
4219		STAILQ_INIT(&pci_devq);
4220		pci_generation = 0;
4221		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4222		    "pci");
4223		pci_load_vendor_data();
4224		break;
4225
4226	case MOD_UNLOAD:
4227		destroy_dev(pci_cdev);
4228		break;
4229	}
4230
4231	return (0);
4232}
4233
4234void
4235pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4236{
4237
4238	/*
4239	 * Only do header type 0 devices.  Type 1 devices are bridges,
4240	 * which we know need special treatment.  Type 2 devices are
4241	 * cardbus bridges which also require special treatment.
4242	 * Other types are unknown, and we err on the side of safety
4243	 * by ignoring them.
4244	 */
4245	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4246		return;
4247
4248	/*
4249	 * Restore the device to full power mode.  We must do this
4250	 * before we restore the registers because moving from D3 to
4251	 * D0 will cause the chip's BARs and some other registers to
4252	 * be reset to some unknown power on reset values.  Cut down
4253	 * the noise on boot by doing nothing if we are already in
4254	 * state D0.
4255	 */
4256	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4257		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4258	pci_restore_bars(dev);
4259	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4260	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4261	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4262	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4263	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4264	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4265	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4266	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4267	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4268
4269	/* Restore MSI and MSI-X configurations if they are present. */
4270	if (dinfo->cfg.msi.msi_location != 0)
4271		pci_resume_msi(dev);
4272	if (dinfo->cfg.msix.msix_location != 0)
4273		pci_resume_msix(dev);
4274}
4275
4276void
4277pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4278{
4279	uint32_t cls;
4280	int ps;
4281
4282	/*
4283	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4284	 * we know need special treatment.  Type 2 devices are cardbus bridges
4285	 * which also require special treatment.  Other types are unknown, and
4286	 * we err on the side of safety by ignoring them.  Powering down
4287	 * bridges should not be undertaken lightly.
4288	 */
4289	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4290		return;
4291
4292	/*
4293	 * Some drivers apparently write to these registers w/o updating our
4294	 * cached copy.  No harm happens if we update the copy, so do so here
4295	 * so we can restore them.  The COMMAND register is modified by the
4296	 * bus w/o updating the cache.  This should represent the normally
4297	 * writable portion of the 'defined' part of type 0 headers.  In
4298	 * theory we also need to save/restore the PCI capability structures
4299	 * we know about, but apart from power we don't know any that are
4300	 * writable.
4301	 */
4302	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4303	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4304	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4305	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4306	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4307	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4308	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4309	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4310	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4311	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4312	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4313	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4314	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4315	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4316	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4317
4318	/*
4319	 * don't set the state for display devices, base peripherals and
4320	 * memory devices since bad things happen when they are powered down.
4321	 * We should (a) have drivers that can easily detach and (b) use
4322	 * generic drivers for these devices so that some device actually
4323	 * attaches.  We need to make sure that when we implement (a) we don't
4324	 * power the device down on a reattach.
4325	 */
4326	cls = pci_get_class(dev);
4327	if (!setstate)
4328		return;
4329	switch (pci_do_power_nodriver)
4330	{
4331		case 0:		/* NO powerdown at all */
4332			return;
4333		case 1:		/* Conservative about what to power down */
4334			if (cls == PCIC_STORAGE)
4335				return;
4336			/*FALLTHROUGH*/
4337		case 2:		/* Agressive about what to power down */
4338			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4339			    cls == PCIC_BASEPERIPH)
4340				return;
4341			/*FALLTHROUGH*/
4342		case 3:		/* Power down everything */
4343			break;
4344	}
4345	/*
4346	 * PCI spec says we can only go into D3 state from D0 state.
4347	 * Transition from D[12] into D0 before going to D3 state.
4348	 */
4349	ps = pci_get_powerstate(dev);
4350	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4351		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4352	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4353		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4354}
4355