pci.c revision 171649
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 171649 2007-07-29 02:44:41Z marcel $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static const char	*pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114
115static device_method_t pci_methods[] = {
116	/* Device interface */
117	DEVMETHOD(device_probe,		pci_probe),
118	DEVMETHOD(device_attach,	pci_attach),
119	DEVMETHOD(device_detach,	bus_generic_detach),
120	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121	DEVMETHOD(device_suspend,	pci_suspend),
122	DEVMETHOD(device_resume,	pci_resume),
123
124	/* Bus interface */
125	DEVMETHOD(bus_print_child,	pci_print_child),
126	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129	DEVMETHOD(bus_driver_added,	pci_driver_added),
130	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132
133	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143
144	/* PCI interface */
145	DEVMETHOD(pci_read_config,	pci_read_config_method),
146	DEVMETHOD(pci_write_config,	pci_write_config_method),
147	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163
164	{ 0, 0 }
165};
166
167DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168
169static devclass_t pci_devclass;
170DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171MODULE_VERSION(pci, 1);
172
173static char	*pci_vendordata;
174static size_t	pci_vendordata_size;
175
176
177struct pci_quirk {
178	uint32_t devid;	/* Vendor/device of the card */
179	int	type;
180#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182	int	arg1;
183	int	arg2;
184};
185
186struct pci_quirk pci_quirks[] = {
187	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192
193	/*
194	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196	 */
197	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199
200	/*
201	 * MSI doesn't work on earlier Intel chipsets including
202	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203	 */
204	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211
212	/*
213	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214	 * bridge.
215	 */
216	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	{ 0 }
219};
220
221/* map register information */
222#define	PCI_MAPMEM	0x01	/* memory map */
223#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224#define	PCI_MAPPORT	0x04	/* port map */
225
226struct devlist pci_devq;
227uint32_t pci_generation;
228uint32_t pci_numdevs = 0;
229static int pcie_chipset, pcix_chipset;
230
231/* sysctl vars */
232SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233
234static int pci_enable_io_modes = 1;
235TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237    &pci_enable_io_modes, 1,
238    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239enable these bits correctly.  We'd like to do this all the time, but there\n\
240are some peripherals that this causes problems with.");
241
242static int pci_do_power_nodriver = 0;
243TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245    &pci_do_power_nodriver, 0,
246  "Place a function into D3 state when no driver attaches to it.  0 means\n\
247disable.  1 means conservatively place devices into D3 state.  2 means\n\
248agressively place devices into D3 state.  3 means put absolutely everything\n\
249in D3 state.");
250
251static int pci_do_power_resume = 1;
252TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254    &pci_do_power_resume, 1,
255  "Transition from D3 -> D0 on resume.");
256
257static int pci_do_vpd = 1;
258TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
259SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
260    "Enable support for VPD.");
261
262static int pci_do_msi = 1;
263TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
265    "Enable support for MSI interrupts");
266
267static int pci_do_msix = 1;
268TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
269SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
270    "Enable support for MSI-X interrupts");
271
272static int pci_honor_msi_blacklist = 1;
273TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
274SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
275    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
276
277/* Find a device_t by bus/slot/function */
278
279device_t
280pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
281{
282	struct pci_devinfo *dinfo;
283
284	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
285		if ((dinfo->cfg.bus == bus) &&
286		    (dinfo->cfg.slot == slot) &&
287		    (dinfo->cfg.func == func)) {
288			return (dinfo->cfg.dev);
289		}
290	}
291
292	return (NULL);
293}
294
295/* Find a device_t by vendor/device ID */
296
297device_t
298pci_find_device(uint16_t vendor, uint16_t device)
299{
300	struct pci_devinfo *dinfo;
301
302	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
303		if ((dinfo->cfg.vendor == vendor) &&
304		    (dinfo->cfg.device == device)) {
305			return (dinfo->cfg.dev);
306		}
307	}
308
309	return (NULL);
310}
311
312/* return base address of memory or port map */
313
314static uint32_t
315pci_mapbase(uint32_t mapreg)
316{
317
318	if (PCI_BAR_MEM(mapreg))
319		return (mapreg & PCIM_BAR_MEM_BASE);
320	else
321		return (mapreg & PCIM_BAR_IO_BASE);
322}
323
324/* return map type of memory or port map */
325
326static const char *
327pci_maptype(unsigned mapreg)
328{
329
330	if (PCI_BAR_IO(mapreg))
331		return ("I/O Port");
332	if (mapreg & PCIM_BAR_MEM_PREFETCH)
333		return ("Prefetchable Memory");
334	return ("Memory");
335}
336
337/* return log2 of map size decoded for memory or port map */
338
339static int
340pci_mapsize(uint32_t testval)
341{
342	int ln2size;
343
344	testval = pci_mapbase(testval);
345	ln2size = 0;
346	if (testval != 0) {
347		while ((testval & 1) == 0)
348		{
349			ln2size++;
350			testval >>= 1;
351		}
352	}
353	return (ln2size);
354}
355
356/* return log2 of address range supported by map register */
357
358static int
359pci_maprange(unsigned mapreg)
360{
361	int ln2range = 0;
362
363	if (PCI_BAR_IO(mapreg))
364		ln2range = 32;
365	else
366		switch (mapreg & PCIM_BAR_MEM_TYPE) {
367		case PCIM_BAR_MEM_32:
368			ln2range = 32;
369			break;
370		case PCIM_BAR_MEM_1MB:
371			ln2range = 20;
372			break;
373		case PCIM_BAR_MEM_64:
374			ln2range = 64;
375			break;
376		}
377	return (ln2range);
378}
379
380/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
381
382static void
383pci_fixancient(pcicfgregs *cfg)
384{
385	if (cfg->hdrtype != 0)
386		return;
387
388	/* PCI to PCI bridges use header type 1 */
389	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
390		cfg->hdrtype = 1;
391}
392
393/* extract header type specific config data */
394
395static void
396pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
397{
398#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
399	switch (cfg->hdrtype) {
400	case 0:
401		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
402		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
403		cfg->nummaps	    = PCI_MAXMAPS_0;
404		break;
405	case 1:
406		cfg->nummaps	    = PCI_MAXMAPS_1;
407		break;
408	case 2:
409		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
410		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
411		cfg->nummaps	    = PCI_MAXMAPS_2;
412		break;
413	}
414#undef REG
415}
416
417/* read configuration header into pcicfgregs structure */
418struct pci_devinfo *
419pci_read_device(device_t pcib, int b, int s, int f, size_t size)
420{
421#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
422	pcicfgregs *cfg = NULL;
423	struct pci_devinfo *devlist_entry;
424	struct devlist *devlist_head;
425
426	devlist_head = &pci_devq;
427
428	devlist_entry = NULL;
429
430	if (REG(PCIR_DEVVENDOR, 4) != -1) {
431		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
432		if (devlist_entry == NULL)
433			return (NULL);
434
435		cfg = &devlist_entry->cfg;
436
437		cfg->bus		= b;
438		cfg->slot		= s;
439		cfg->func		= f;
440		cfg->vendor		= REG(PCIR_VENDOR, 2);
441		cfg->device		= REG(PCIR_DEVICE, 2);
442		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
443		cfg->statreg		= REG(PCIR_STATUS, 2);
444		cfg->baseclass		= REG(PCIR_CLASS, 1);
445		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
446		cfg->progif		= REG(PCIR_PROGIF, 1);
447		cfg->revid		= REG(PCIR_REVID, 1);
448		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
449		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
450		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
451		cfg->intpin		= REG(PCIR_INTPIN, 1);
452		cfg->intline		= REG(PCIR_INTLINE, 1);
453
454		cfg->mingnt		= REG(PCIR_MINGNT, 1);
455		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
456
457		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
458		cfg->hdrtype		&= ~PCIM_MFDEV;
459
460		pci_fixancient(cfg);
461		pci_hdrtypedata(pcib, b, s, f, cfg);
462
463		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
464			pci_read_extcap(pcib, cfg);
465
466		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
467
468		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
469		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
470		devlist_entry->conf.pc_sel.pc_func = cfg->func;
471		devlist_entry->conf.pc_hdr = cfg->hdrtype;
472
473		devlist_entry->conf.pc_subvendor = cfg->subvendor;
474		devlist_entry->conf.pc_subdevice = cfg->subdevice;
475		devlist_entry->conf.pc_vendor = cfg->vendor;
476		devlist_entry->conf.pc_device = cfg->device;
477
478		devlist_entry->conf.pc_class = cfg->baseclass;
479		devlist_entry->conf.pc_subclass = cfg->subclass;
480		devlist_entry->conf.pc_progif = cfg->progif;
481		devlist_entry->conf.pc_revid = cfg->revid;
482
483		pci_numdevs++;
484		pci_generation++;
485	}
486	return (devlist_entry);
487#undef REG
488}
489
490static void
491pci_read_extcap(device_t pcib, pcicfgregs *cfg)
492{
493#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
494#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
495#if defined(__i386__) || defined(__amd64__)
496	uint64_t addr;
497#endif
498	uint32_t val;
499	int	ptr, nextptr, ptrptr;
500
501	switch (cfg->hdrtype & PCIM_HDRTYPE) {
502	case 0:
503	case 1:
504		ptrptr = PCIR_CAP_PTR;
505		break;
506	case 2:
507		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
508		break;
509	default:
510		return;		/* no extended capabilities support */
511	}
512	nextptr = REG(ptrptr, 1);	/* sanity check? */
513
514	/*
515	 * Read capability entries.
516	 */
517	while (nextptr != 0) {
518		/* Sanity check */
519		if (nextptr > 255) {
520			printf("illegal PCI extended capability offset %d\n",
521			    nextptr);
522			return;
523		}
524		/* Find the next entry */
525		ptr = nextptr;
526		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
527
528		/* Process this entry */
529		switch (REG(ptr + PCICAP_ID, 1)) {
530		case PCIY_PMG:		/* PCI power management */
531			if (cfg->pp.pp_cap == 0) {
532				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
533				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
534				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
535				if ((nextptr - ptr) > PCIR_POWER_DATA)
536					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
537			}
538			break;
539#if defined(__i386__) || defined(__amd64__)
540		case PCIY_HT:		/* HyperTransport */
541			/* Determine HT-specific capability type. */
542			val = REG(ptr + PCIR_HT_COMMAND, 2);
543			switch (val & PCIM_HTCMD_CAP_MASK) {
544			case PCIM_HTCAP_MSI_MAPPING:
545				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
546					/* Sanity check the mapping window. */
547					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
548					    4);
549					addr <<= 32;
550					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
551					    4);
552					if (addr != MSI_INTEL_ADDR_BASE)
553						device_printf(pcib,
554		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
555						    cfg->bus, cfg->slot,
556						    cfg->func, (long long)addr);
557				}
558
559				/* Enable MSI -> HT mapping. */
560				val |= PCIM_HTCMD_MSI_ENABLE;
561				WREG(ptr + PCIR_HT_COMMAND, val, 2);
562				break;
563			}
564			break;
565#endif
566		case PCIY_MSI:		/* PCI MSI */
567			cfg->msi.msi_location = ptr;
568			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
569			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
570						     PCIM_MSICTRL_MMC_MASK)>>1);
571			break;
572		case PCIY_MSIX:		/* PCI MSI-X */
573			cfg->msix.msix_location = ptr;
574			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
575			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
576			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
577			val = REG(ptr + PCIR_MSIX_TABLE, 4);
578			cfg->msix.msix_table_bar = PCIR_BAR(val &
579			    PCIM_MSIX_BIR_MASK);
580			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
581			val = REG(ptr + PCIR_MSIX_PBA, 4);
582			cfg->msix.msix_pba_bar = PCIR_BAR(val &
583			    PCIM_MSIX_BIR_MASK);
584			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
585			break;
586		case PCIY_VPD:		/* PCI Vital Product Data */
587			cfg->vpd.vpd_reg = ptr;
588			break;
589		case PCIY_SUBVENDOR:
590			/* Should always be true. */
591			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
592				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
593				cfg->subvendor = val & 0xffff;
594				cfg->subdevice = val >> 16;
595			}
596			break;
597		case PCIY_PCIX:		/* PCI-X */
598			/*
599			 * Assume we have a PCI-X chipset if we have
600			 * at least one PCI-PCI bridge with a PCI-X
601			 * capability.  Note that some systems with
602			 * PCI-express or HT chipsets might match on
603			 * this check as well.
604			 */
605			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
606				pcix_chipset = 1;
607			break;
608		case PCIY_EXPRESS:	/* PCI-express */
609			/*
610			 * Assume we have a PCI-express chipset if we have
611			 * at least one PCI-express root port.
612			 */
613			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
614			if ((val & PCIM_EXP_FLAGS_TYPE) ==
615			    PCIM_EXP_TYPE_ROOT_PORT)
616				pcie_chipset = 1;
617			break;
618		default:
619			break;
620		}
621	}
622/* REG and WREG use carry through to next functions */
623}
624
625/*
626 * PCI Vital Product Data
627 */
628static uint32_t
629pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
630{
631
632	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
633
634	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
635	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
636		DELAY(1);	/* limit looping */
637
638	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
639}
640
641#if 0
642static void
643pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
644{
645	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
646
647	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
648	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
649	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
650		DELAY(1);	/* limit looping */
651
652	return;
653}
654#endif
655
656struct vpd_readstate {
657	device_t	pcib;
658	pcicfgregs	*cfg;
659	uint32_t	val;
660	int		bytesinval;
661	int		off;
662	uint8_t		cksum;
663};
664
665static uint8_t
666vpd_nextbyte(struct vpd_readstate *vrs)
667{
668	uint8_t byte;
669
670	if (vrs->bytesinval == 0) {
671		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
672		    vrs->off));
673		vrs->off += 4;
674		byte = vrs->val & 0xff;
675		vrs->bytesinval = 3;
676	} else {
677		vrs->val = vrs->val >> 8;
678		byte = vrs->val & 0xff;
679		vrs->bytesinval--;
680	}
681
682	vrs->cksum += byte;
683	return (byte);
684}
685
686static void
687pci_read_vpd(device_t pcib, pcicfgregs *cfg)
688{
689	struct vpd_readstate vrs;
690	int state;
691	int name;
692	int remain;
693	int end;
694	int i;
695	uint8_t byte;
696	int alloc, off;		/* alloc/off for RO/W arrays */
697	int cksumvalid;
698	int dflen;
699
700	if (!pci_do_vpd) {
701		cfg->vpd.vpd_cached = 1;
702		return;
703	}
704
705	/* init vpd reader */
706	vrs.bytesinval = 0;
707	vrs.off = 0;
708	vrs.pcib = pcib;
709	vrs.cfg = cfg;
710	vrs.cksum = 0;
711
712	state = 0;
713	name = remain = i = 0;	/* shut up stupid gcc */
714	alloc = off = 0;	/* shut up stupid gcc */
715	dflen = 0;		/* shut up stupid gcc */
716	end = 0;
717	cksumvalid = -1;
718	for (; !end;) {
719		byte = vpd_nextbyte(&vrs);
720#if 0
721		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
722		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
723		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
724#endif
725		switch (state) {
726		case 0:		/* item name */
727			if (byte & 0x80) {
728				remain = vpd_nextbyte(&vrs);
729				remain |= vpd_nextbyte(&vrs) << 8;
730				if (remain > (0x7f*4 - vrs.off)) {
731					end = 1;
732					printf(
733			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
734					    cfg->bus, cfg->slot, cfg->func,
735					    remain);
736				}
737				name = byte & 0x7f;
738			} else {
739				remain = byte & 0x7;
740				name = (byte >> 3) & 0xf;
741			}
742			switch (name) {
743			case 0x2:	/* String */
744				cfg->vpd.vpd_ident = malloc(remain + 1,
745				    M_DEVBUF, M_WAITOK);
746				i = 0;
747				state = 1;
748				break;
749			case 0xf:	/* End */
750				end = 1;
751				state = -1;
752				break;
753			case 0x10:	/* VPD-R */
754				alloc = 8;
755				off = 0;
756				cfg->vpd.vpd_ros = malloc(alloc *
757				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
758				    M_WAITOK);
759				state = 2;
760				break;
761			case 0x11:	/* VPD-W */
762				alloc = 8;
763				off = 0;
764				cfg->vpd.vpd_w = malloc(alloc *
765				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
766				    M_WAITOK);
767				state = 5;
768				break;
769			default:	/* Invalid data, abort */
770				end = 1;
771				continue;
772			}
773			break;
774
775		case 1:	/* Identifier String */
776			cfg->vpd.vpd_ident[i++] = byte;
777			remain--;
778			if (remain == 0)  {
779				cfg->vpd.vpd_ident[i] = '\0';
780				state = 0;
781			}
782			break;
783
784		case 2:	/* VPD-R Keyword Header */
785			if (off == alloc) {
786				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
787				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
788				    M_DEVBUF, M_WAITOK);
789			}
790			cfg->vpd.vpd_ros[off].keyword[0] = byte;
791			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
792			dflen = vpd_nextbyte(&vrs);
793			if (dflen == 0 &&
794			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
795			    2) == 0) {
796				/*
797				 * if this happens, we can't trust the rest
798				 * of the VPD.
799				 */
800				printf("pci%d:%d:%d: bad keyword length: %d\n",
801				    cfg->bus, cfg->slot, cfg->func, dflen);
802				cksumvalid = 0;
803				end = 1;
804				break;
805			} else if (dflen == 0) {
806				cfg->vpd.vpd_ros[off].value = malloc(1 *
807				    sizeof *cfg->vpd.vpd_ros[off].value,
808				    M_DEVBUF, M_WAITOK);
809				cfg->vpd.vpd_ros[off].value[0] = '\x00';
810			} else
811				cfg->vpd.vpd_ros[off].value = malloc(
812				    (dflen + 1) *
813				    sizeof *cfg->vpd.vpd_ros[off].value,
814				    M_DEVBUF, M_WAITOK);
815			remain -= 3;
816			i = 0;
817			/* keep in sync w/ state 3's transistions */
818			if (dflen == 0 && remain == 0)
819				state = 0;
820			else if (dflen == 0)
821				state = 2;
822			else
823				state = 3;
824			break;
825
826		case 3:	/* VPD-R Keyword Value */
827			cfg->vpd.vpd_ros[off].value[i++] = byte;
828			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
829			    "RV", 2) == 0 && cksumvalid == -1) {
830				if (vrs.cksum == 0)
831					cksumvalid = 1;
832				else {
833					printf(
834				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
835					    cfg->bus, cfg->slot, cfg->func,
836					    vrs.cksum);
837					cksumvalid = 0;
838					end = 1;
839					break;
840				}
841			}
842			dflen--;
843			remain--;
844			/* keep in sync w/ state 2's transistions */
845			if (dflen == 0)
846				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
847			if (dflen == 0 && remain == 0) {
848				cfg->vpd.vpd_rocnt = off;
849				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
850				    off * sizeof *cfg->vpd.vpd_ros,
851				    M_DEVBUF, M_WAITOK);
852				state = 0;
853			} else if (dflen == 0)
854				state = 2;
855			break;
856
857		case 4:
858			remain--;
859			if (remain == 0)
860				state = 0;
861			break;
862
863		case 5:	/* VPD-W Keyword Header */
864			if (off == alloc) {
865				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
866				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
867				    M_DEVBUF, M_WAITOK);
868			}
869			cfg->vpd.vpd_w[off].keyword[0] = byte;
870			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
871			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
872			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
873			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
874			    sizeof *cfg->vpd.vpd_w[off].value,
875			    M_DEVBUF, M_WAITOK);
876			remain -= 3;
877			i = 0;
878			/* keep in sync w/ state 6's transistions */
879			if (dflen == 0 && remain == 0)
880				state = 0;
881			else if (dflen == 0)
882				state = 5;
883			else
884				state = 6;
885			break;
886
887		case 6:	/* VPD-W Keyword Value */
888			cfg->vpd.vpd_w[off].value[i++] = byte;
889			dflen--;
890			remain--;
891			/* keep in sync w/ state 5's transistions */
892			if (dflen == 0)
893				cfg->vpd.vpd_w[off++].value[i++] = '\0';
894			if (dflen == 0 && remain == 0) {
895				cfg->vpd.vpd_wcnt = off;
896				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
897				    off * sizeof *cfg->vpd.vpd_w,
898				    M_DEVBUF, M_WAITOK);
899				state = 0;
900			} else if (dflen == 0)
901				state = 5;
902			break;
903
904		default:
905			printf("pci%d:%d:%d: invalid state: %d\n",
906			    cfg->bus, cfg->slot, cfg->func, state);
907			end = 1;
908			break;
909		}
910	}
911
912	if (cksumvalid == 0) {
913		/* read-only data bad, clean up */
914		for (; off; off--)
915			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
916
917		free(cfg->vpd.vpd_ros, M_DEVBUF);
918		cfg->vpd.vpd_ros = NULL;
919	}
920	cfg->vpd.vpd_cached = 1;
921#undef REG
922#undef WREG
923}
924
925int
926pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
927{
928	struct pci_devinfo *dinfo = device_get_ivars(child);
929	pcicfgregs *cfg = &dinfo->cfg;
930
931	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
932		pci_read_vpd(device_get_parent(dev), cfg);
933
934	*identptr = cfg->vpd.vpd_ident;
935
936	if (*identptr == NULL)
937		return (ENXIO);
938
939	return (0);
940}
941
942int
943pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
944	const char **vptr)
945{
946	struct pci_devinfo *dinfo = device_get_ivars(child);
947	pcicfgregs *cfg = &dinfo->cfg;
948	int i;
949
950	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
951		pci_read_vpd(device_get_parent(dev), cfg);
952
953	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
954		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
955		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
956			*vptr = cfg->vpd.vpd_ros[i].value;
957		}
958
959	if (i != cfg->vpd.vpd_rocnt)
960		return (0);
961
962	*vptr = NULL;
963	return (ENXIO);
964}
965
966/*
967 * Return the offset in configuration space of the requested extended
968 * capability entry or 0 if the specified capability was not found.
969 */
970int
971pci_find_extcap_method(device_t dev, device_t child, int capability,
972    int *capreg)
973{
974	struct pci_devinfo *dinfo = device_get_ivars(child);
975	pcicfgregs *cfg = &dinfo->cfg;
976	u_int32_t status;
977	u_int8_t ptr;
978
979	/*
980	 * Check the CAP_LIST bit of the PCI status register first.
981	 */
982	status = pci_read_config(child, PCIR_STATUS, 2);
983	if (!(status & PCIM_STATUS_CAPPRESENT))
984		return (ENXIO);
985
986	/*
987	 * Determine the start pointer of the capabilities list.
988	 */
989	switch (cfg->hdrtype & PCIM_HDRTYPE) {
990	case 0:
991	case 1:
992		ptr = PCIR_CAP_PTR;
993		break;
994	case 2:
995		ptr = PCIR_CAP_PTR_2;
996		break;
997	default:
998		/* XXX: panic? */
999		return (ENXIO);		/* no extended capabilities support */
1000	}
1001	ptr = pci_read_config(child, ptr, 1);
1002
1003	/*
1004	 * Traverse the capabilities list.
1005	 */
1006	while (ptr != 0) {
1007		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1008			if (capreg != NULL)
1009				*capreg = ptr;
1010			return (0);
1011		}
1012		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1013	}
1014
1015	return (ENOENT);
1016}
1017
1018/*
1019 * Support for MSI-X message interrupts.
1020 */
1021void
1022pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1023{
1024	struct pci_devinfo *dinfo = device_get_ivars(dev);
1025	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1026	uint32_t offset;
1027
1028	KASSERT(msix->msix_table_len > index, ("bogus index"));
1029	offset = msix->msix_table_offset + index * 16;
1030	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1031	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1032	bus_write_4(msix->msix_table_res, offset + 8, data);
1033}
1034
1035void
1036pci_mask_msix(device_t dev, u_int index)
1037{
1038	struct pci_devinfo *dinfo = device_get_ivars(dev);
1039	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1040	uint32_t offset, val;
1041
1042	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1043	offset = msix->msix_table_offset + index * 16 + 12;
1044	val = bus_read_4(msix->msix_table_res, offset);
1045	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1046		val |= PCIM_MSIX_VCTRL_MASK;
1047		bus_write_4(msix->msix_table_res, offset, val);
1048	}
1049}
1050
1051void
1052pci_unmask_msix(device_t dev, u_int index)
1053{
1054	struct pci_devinfo *dinfo = device_get_ivars(dev);
1055	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1056	uint32_t offset, val;
1057
1058	KASSERT(msix->msix_table_len > index, ("bogus index"));
1059	offset = msix->msix_table_offset + index * 16 + 12;
1060	val = bus_read_4(msix->msix_table_res, offset);
1061	if (val & PCIM_MSIX_VCTRL_MASK) {
1062		val &= ~PCIM_MSIX_VCTRL_MASK;
1063		bus_write_4(msix->msix_table_res, offset, val);
1064	}
1065}
1066
1067int
1068pci_pending_msix(device_t dev, u_int index)
1069{
1070	struct pci_devinfo *dinfo = device_get_ivars(dev);
1071	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1072	uint32_t offset, bit;
1073
1074	KASSERT(msix->msix_table_len > index, ("bogus index"));
1075	offset = msix->msix_pba_offset + (index / 32) * 4;
1076	bit = 1 << index % 32;
1077	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1078}
1079
1080/*
1081 * Restore MSI-X registers and table during resume.  If MSI-X is
1082 * enabled then walk the virtual table to restore the actual MSI-X
1083 * table.
1084 */
1085static void
1086pci_resume_msix(device_t dev)
1087{
1088	struct pci_devinfo *dinfo = device_get_ivars(dev);
1089	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1090	struct msix_table_entry *mte;
1091	struct msix_vector *mv;
1092	int i;
1093
1094	if (msix->msix_alloc > 0) {
1095		/* First, mask all vectors. */
1096		for (i = 0; i < msix->msix_msgnum; i++)
1097			pci_mask_msix(dev, i);
1098
1099		/* Second, program any messages with at least one handler. */
1100		for (i = 0; i < msix->msix_table_len; i++) {
1101			mte = &msix->msix_table[i];
1102			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1103				continue;
1104			mv = &msix->msix_vectors[mte->mte_vector - 1];
1105			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1106			pci_unmask_msix(dev, i);
1107		}
1108	}
1109	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1110	    msix->msix_ctrl, 2);
1111}
1112
1113/*
1114 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1115 * returned in *count.  After this function returns, each message will be
1116 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1117 */
1118int
1119pci_alloc_msix_method(device_t dev, device_t child, int *count)
1120{
1121	struct pci_devinfo *dinfo = device_get_ivars(child);
1122	pcicfgregs *cfg = &dinfo->cfg;
1123	struct resource_list_entry *rle;
1124	int actual, error, i, irq, max;
1125
1126	/* Don't let count == 0 get us into trouble. */
1127	if (*count == 0)
1128		return (EINVAL);
1129
1130	/* If rid 0 is allocated, then fail. */
1131	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1132	if (rle != NULL && rle->res != NULL)
1133		return (ENXIO);
1134
1135	/* Already have allocated messages? */
1136	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1137		return (ENXIO);
1138
1139	/* If MSI is blacklisted for this system, fail. */
1140	if (pci_msi_blacklisted())
1141		return (ENXIO);
1142
1143	/* MSI-X capability present? */
1144	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1145		return (ENODEV);
1146
1147	/* Make sure the appropriate BARs are mapped. */
1148	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1149	    cfg->msix.msix_table_bar);
1150	if (rle == NULL || rle->res == NULL ||
1151	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1152		return (ENXIO);
1153	cfg->msix.msix_table_res = rle->res;
1154	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1155		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1156		    cfg->msix.msix_pba_bar);
1157		if (rle == NULL || rle->res == NULL ||
1158		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1159			return (ENXIO);
1160	}
1161	cfg->msix.msix_pba_res = rle->res;
1162
1163	if (bootverbose)
1164		device_printf(child,
1165		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1166		    *count, cfg->msix.msix_msgnum);
1167	max = min(*count, cfg->msix.msix_msgnum);
1168	for (i = 0; i < max; i++) {
1169		/* Allocate a message. */
1170		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1171		if (error)
1172			break;
1173		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1174		    irq, 1);
1175	}
1176	actual = i;
1177
1178	if (bootverbose) {
1179		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1180		if (actual == 1)
1181			device_printf(child, "using IRQ %lu for MSI-X\n",
1182			    rle->start);
1183		else {
1184			int run;
1185
1186			/*
1187			 * Be fancy and try to print contiguous runs of
1188			 * IRQ values as ranges.  'irq' is the previous IRQ.
1189			 * 'run' is true if we are in a range.
1190			 */
1191			device_printf(child, "using IRQs %lu", rle->start);
1192			irq = rle->start;
1193			run = 0;
1194			for (i = 1; i < actual; i++) {
1195				rle = resource_list_find(&dinfo->resources,
1196				    SYS_RES_IRQ, i + 1);
1197
1198				/* Still in a run? */
1199				if (rle->start == irq + 1) {
1200					run = 1;
1201					irq++;
1202					continue;
1203				}
1204
1205				/* Finish previous range. */
1206				if (run) {
1207					printf("-%d", irq);
1208					run = 0;
1209				}
1210
1211				/* Start new range. */
1212				printf(",%lu", rle->start);
1213				irq = rle->start;
1214			}
1215
1216			/* Unfinished range? */
1217			if (run)
1218				printf("-%d", irq);
1219			printf(" for MSI-X\n");
1220		}
1221	}
1222
1223	/* Mask all vectors. */
1224	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1225		pci_mask_msix(child, i);
1226
1227	/* Allocate and initialize vector data and virtual table. */
1228	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1229	    M_DEVBUF, M_WAITOK | M_ZERO);
1230	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1231	    M_DEVBUF, M_WAITOK | M_ZERO);
1232	for (i = 0; i < actual; i++) {
1233		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1234		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1235		cfg->msix.msix_table[i].mte_vector = i + 1;
1236	}
1237
1238	/* Update control register to enable MSI-X. */
1239	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1240	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1241	    cfg->msix.msix_ctrl, 2);
1242
1243	/* Update counts of alloc'd messages. */
1244	cfg->msix.msix_alloc = actual;
1245	cfg->msix.msix_table_len = actual;
1246	*count = actual;
1247	return (0);
1248}
1249
1250/*
1251 * By default, pci_alloc_msix() will assign the allocated IRQ
1252 * resources consecutively to the first N messages in the MSI-X table.
1253 * However, device drivers may want to use different layouts if they
1254 * either receive fewer messages than they asked for, or they wish to
1255 * populate the MSI-X table sparsely.  This method allows the driver
1256 * to specify what layout it wants.  It must be called after a
1257 * successful pci_alloc_msix() but before any of the associated
1258 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1259 *
1260 * The 'vectors' array contains 'count' message vectors.  The array
1261 * maps directly to the MSI-X table in that index 0 in the array
1262 * specifies the vector for the first message in the MSI-X table, etc.
1263 * The vector value in each array index can either be 0 to indicate
1264 * that no vector should be assigned to a message slot, or it can be a
1265 * number from 1 to N (where N is the count returned from a
1266 * succcessful call to pci_alloc_msix()) to indicate which message
1267 * vector (IRQ) to be used for the corresponding message.
1268 *
1269 * On successful return, each message with a non-zero vector will have
1270 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1271 * 1.  Additionally, if any of the IRQs allocated via the previous
1272 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1273 * will be freed back to the system automatically.
1274 *
1275 * For example, suppose a driver has a MSI-X table with 6 messages and
1276 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1277 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1278 * C.  After the call to pci_alloc_msix(), the device will be setup to
1279 * have an MSI-X table of ABC--- (where - means no vector assigned).
1280 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1281 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1282 * be freed back to the system.  This device will also have valid
1283 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1284 *
1285 * In any case, the SYS_RES_IRQ rid X will always map to the message
1286 * at MSI-X table index X - 1 and will only be valid if a vector is
1287 * assigned to that table entry.
1288 */
1289int
1290pci_remap_msix_method(device_t dev, device_t child, int count,
1291    const u_int *vectors)
1292{
1293	struct pci_devinfo *dinfo = device_get_ivars(child);
1294	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1295	struct resource_list_entry *rle;
1296	int i, irq, j, *used;
1297
1298	/*
1299	 * Have to have at least one message in the table but the
1300	 * table can't be bigger than the actual MSI-X table in the
1301	 * device.
1302	 */
1303	if (count == 0 || count > msix->msix_msgnum)
1304		return (EINVAL);
1305
1306	/* Sanity check the vectors. */
1307	for (i = 0; i < count; i++)
1308		if (vectors[i] > msix->msix_alloc)
1309			return (EINVAL);
1310
1311	/*
1312	 * Make sure there aren't any holes in the vectors to be used.
1313	 * It's a big pain to support it, and it doesn't really make
1314	 * sense anyway.  Also, at least one vector must be used.
1315	 */
1316	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1317	    M_ZERO);
1318	for (i = 0; i < count; i++)
1319		if (vectors[i] != 0)
1320			used[vectors[i] - 1] = 1;
1321	for (i = 0; i < msix->msix_alloc - 1; i++)
1322		if (used[i] == 0 && used[i + 1] == 1) {
1323			free(used, M_DEVBUF);
1324			return (EINVAL);
1325		}
1326	if (used[0] != 1) {
1327		free(used, M_DEVBUF);
1328		return (EINVAL);
1329	}
1330
1331	/* Make sure none of the resources are allocated. */
1332	for (i = 0; i < msix->msix_table_len; i++) {
1333		if (msix->msix_table[i].mte_vector == 0)
1334			continue;
1335		if (msix->msix_table[i].mte_handlers > 0)
1336			return (EBUSY);
1337		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1338		KASSERT(rle != NULL, ("missing resource"));
1339		if (rle->res != NULL)
1340			return (EBUSY);
1341	}
1342
1343	/* Free the existing resource list entries. */
1344	for (i = 0; i < msix->msix_table_len; i++) {
1345		if (msix->msix_table[i].mte_vector == 0)
1346			continue;
1347		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1348	}
1349
1350	/*
1351	 * Build the new virtual table keeping track of which vectors are
1352	 * used.
1353	 */
1354	free(msix->msix_table, M_DEVBUF);
1355	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1356	    M_DEVBUF, M_WAITOK | M_ZERO);
1357	for (i = 0; i < count; i++)
1358		msix->msix_table[i].mte_vector = vectors[i];
1359	msix->msix_table_len = count;
1360
1361	/* Free any unused IRQs and resize the vectors array if necessary. */
1362	j = msix->msix_alloc - 1;
1363	if (used[j] == 0) {
1364		struct msix_vector *vec;
1365
1366		while (used[j] == 0) {
1367			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1368			    msix->msix_vectors[j].mv_irq);
1369			j--;
1370		}
1371		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1372		    M_WAITOK);
1373		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1374		    (j + 1));
1375		free(msix->msix_vectors, M_DEVBUF);
1376		msix->msix_vectors = vec;
1377		msix->msix_alloc = j + 1;
1378	}
1379	free(used, M_DEVBUF);
1380
1381	/* Map the IRQs onto the rids. */
1382	for (i = 0; i < count; i++) {
1383		if (vectors[i] == 0)
1384			continue;
1385		irq = msix->msix_vectors[vectors[i]].mv_irq;
1386		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1387		    irq, 1);
1388	}
1389
1390	if (bootverbose) {
1391		device_printf(child, "Remapped MSI-X IRQs as: ");
1392		for (i = 0; i < count; i++) {
1393			if (i != 0)
1394				printf(", ");
1395			if (vectors[i] == 0)
1396				printf("---");
1397			else
1398				printf("%d",
1399				    msix->msix_vectors[vectors[i]].mv_irq);
1400		}
1401		printf("\n");
1402	}
1403
1404	return (0);
1405}
1406
1407static int
1408pci_release_msix(device_t dev, device_t child)
1409{
1410	struct pci_devinfo *dinfo = device_get_ivars(child);
1411	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1412	struct resource_list_entry *rle;
1413	int i;
1414
1415	/* Do we have any messages to release? */
1416	if (msix->msix_alloc == 0)
1417		return (ENODEV);
1418
1419	/* Make sure none of the resources are allocated. */
1420	for (i = 0; i < msix->msix_table_len; i++) {
1421		if (msix->msix_table[i].mte_vector == 0)
1422			continue;
1423		if (msix->msix_table[i].mte_handlers > 0)
1424			return (EBUSY);
1425		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1426		KASSERT(rle != NULL, ("missing resource"));
1427		if (rle->res != NULL)
1428			return (EBUSY);
1429	}
1430
1431	/* Update control register to disable MSI-X. */
1432	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1433	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1434	    msix->msix_ctrl, 2);
1435
1436	/* Free the resource list entries. */
1437	for (i = 0; i < msix->msix_table_len; i++) {
1438		if (msix->msix_table[i].mte_vector == 0)
1439			continue;
1440		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1441	}
1442	free(msix->msix_table, M_DEVBUF);
1443	msix->msix_table_len = 0;
1444
1445	/* Release the IRQs. */
1446	for (i = 0; i < msix->msix_alloc; i++)
1447		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1448		    msix->msix_vectors[i].mv_irq);
1449	free(msix->msix_vectors, M_DEVBUF);
1450	msix->msix_alloc = 0;
1451	return (0);
1452}
1453
1454/*
1455 * Return the max supported MSI-X messages this device supports.
1456 * Basically, assuming the MD code can alloc messages, this function
1457 * should return the maximum value that pci_alloc_msix() can return.
1458 * Thus, it is subject to the tunables, etc.
1459 */
1460int
1461pci_msix_count_method(device_t dev, device_t child)
1462{
1463	struct pci_devinfo *dinfo = device_get_ivars(child);
1464	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465
1466	if (pci_do_msix && msix->msix_location != 0)
1467		return (msix->msix_msgnum);
1468	return (0);
1469}
1470
1471/*
1472 * Support for MSI message signalled interrupts.
1473 */
1474void
1475pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1476{
1477	struct pci_devinfo *dinfo = device_get_ivars(dev);
1478	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1479
1480	/* Write data and address values. */
1481	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1482	    address & 0xffffffff, 4);
1483	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1484		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1485		    address >> 32, 4);
1486		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1487		    data, 2);
1488	} else
1489		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1490		    2);
1491
1492	/* Enable MSI in the control register. */
1493	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1494	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1495	    2);
1496}
1497
1498void
1499pci_disable_msi(device_t dev)
1500{
1501	struct pci_devinfo *dinfo = device_get_ivars(dev);
1502	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1503
1504	/* Disable MSI in the control register. */
1505	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1506	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1507	    2);
1508}
1509
1510/*
1511 * Restore MSI registers during resume.  If MSI is enabled then
1512 * restore the data and address registers in addition to the control
1513 * register.
1514 */
1515static void
1516pci_resume_msi(device_t dev)
1517{
1518	struct pci_devinfo *dinfo = device_get_ivars(dev);
1519	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1520	uint64_t address;
1521	uint16_t data;
1522
1523	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1524		address = msi->msi_addr;
1525		data = msi->msi_data;
1526		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1527		    address & 0xffffffff, 4);
1528		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1529			pci_write_config(dev, msi->msi_location +
1530			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1531			pci_write_config(dev, msi->msi_location +
1532			    PCIR_MSI_DATA_64BIT, data, 2);
1533		} else
1534			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1535			    data, 2);
1536	}
1537	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1538	    2);
1539}
1540
1541int
1542pci_remap_msi_irq(device_t dev, u_int irq)
1543{
1544	struct pci_devinfo *dinfo = device_get_ivars(dev);
1545	pcicfgregs *cfg = &dinfo->cfg;
1546	struct resource_list_entry *rle;
1547	struct msix_table_entry *mte;
1548	struct msix_vector *mv;
1549	device_t bus;
1550	uint64_t addr;
1551	uint32_t data;
1552	int error, i, j;
1553
1554	bus = device_get_parent(dev);
1555
1556	/*
1557	 * Handle MSI first.  We try to find this IRQ among our list
1558	 * of MSI IRQs.  If we find it, we request updated address and
1559	 * data registers and apply the results.
1560	 */
1561	if (cfg->msi.msi_alloc > 0) {
1562
1563		/* If we don't have any active handlers, nothing to do. */
1564		if (cfg->msi.msi_handlers == 0)
1565			return (0);
1566		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1567			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1568			    i + 1);
1569			if (rle->start == irq) {
1570				error = PCIB_MAP_MSI(device_get_parent(bus),
1571				    dev, irq, &addr, &data);
1572				if (error)
1573					return (error);
1574				pci_disable_msi(dev);
1575				dinfo->cfg.msi.msi_addr = addr;
1576				dinfo->cfg.msi.msi_data = data;
1577				pci_enable_msi(dev, addr, data);
1578				return (0);
1579			}
1580		}
1581		return (ENOENT);
1582	}
1583
1584	/*
1585	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1586	 * we request the updated mapping info.  If that works, we go
1587	 * through all the slots that use this IRQ and update them.
1588	 */
1589	if (cfg->msix.msix_alloc > 0) {
1590		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1591			mv = &cfg->msix.msix_vectors[i];
1592			if (mv->mv_irq == irq) {
1593				error = PCIB_MAP_MSI(device_get_parent(bus),
1594				    dev, irq, &addr, &data);
1595				if (error)
1596					return (error);
1597				mv->mv_address = addr;
1598				mv->mv_data = data;
1599				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1600					mte = &cfg->msix.msix_table[j];
1601					if (mte->mte_vector != i + 1)
1602						continue;
1603					if (mte->mte_handlers == 0)
1604						continue;
1605					pci_mask_msix(dev, j);
1606					pci_enable_msix(dev, j, addr, data);
1607					pci_unmask_msix(dev, j);
1608				}
1609			}
1610		}
1611		return (ENOENT);
1612	}
1613
1614	return (ENOENT);
1615}
1616
1617/*
1618 * Returns true if the specified device is blacklisted because MSI
1619 * doesn't work.
1620 */
1621int
1622pci_msi_device_blacklisted(device_t dev)
1623{
1624	struct pci_quirk *q;
1625
1626	if (!pci_honor_msi_blacklist)
1627		return (0);
1628
1629	for (q = &pci_quirks[0]; q->devid; q++) {
1630		if (q->devid == pci_get_devid(dev) &&
1631		    q->type == PCI_QUIRK_DISABLE_MSI)
1632			return (1);
1633	}
1634	return (0);
1635}
1636
1637/*
1638 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1639 * we just check for blacklisted chipsets as represented by the
1640 * host-PCI bridge at device 0:0:0.  In the future, it may become
1641 * necessary to check other system attributes, such as the kenv values
1642 * that give the motherboard manufacturer and model number.
1643 */
1644static int
1645pci_msi_blacklisted(void)
1646{
1647	device_t dev;
1648
1649	if (!pci_honor_msi_blacklist)
1650		return (0);
1651
1652	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1653	if (!(pcie_chipset || pcix_chipset))
1654		return (1);
1655
1656	dev = pci_find_bsf(0, 0, 0);
1657	if (dev != NULL)
1658		return (pci_msi_device_blacklisted(dev));
1659	return (0);
1660}
1661
1662/*
1663 * Attempt to allocate *count MSI messages.  The actual number allocated is
1664 * returned in *count.  After this function returns, each message will be
1665 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1666 */
1667int
1668pci_alloc_msi_method(device_t dev, device_t child, int *count)
1669{
1670	struct pci_devinfo *dinfo = device_get_ivars(child);
1671	pcicfgregs *cfg = &dinfo->cfg;
1672	struct resource_list_entry *rle;
1673	int actual, error, i, irqs[32];
1674	uint16_t ctrl;
1675
1676	/* Don't let count == 0 get us into trouble. */
1677	if (*count == 0)
1678		return (EINVAL);
1679
1680	/* If rid 0 is allocated, then fail. */
1681	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1682	if (rle != NULL && rle->res != NULL)
1683		return (ENXIO);
1684
1685	/* Already have allocated messages? */
1686	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1687		return (ENXIO);
1688
1689	/* If MSI is blacklisted for this system, fail. */
1690	if (pci_msi_blacklisted())
1691		return (ENXIO);
1692
1693	/* MSI capability present? */
1694	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1695		return (ENODEV);
1696
1697	if (bootverbose)
1698		device_printf(child,
1699		    "attempting to allocate %d MSI vectors (%d supported)\n",
1700		    *count, cfg->msi.msi_msgnum);
1701
1702	/* Don't ask for more than the device supports. */
1703	actual = min(*count, cfg->msi.msi_msgnum);
1704
1705	/* Don't ask for more than 32 messages. */
1706	actual = min(actual, 32);
1707
1708	/* MSI requires power of 2 number of messages. */
1709	if (!powerof2(actual))
1710		return (EINVAL);
1711
1712	for (;;) {
1713		/* Try to allocate N messages. */
1714		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1715		    cfg->msi.msi_msgnum, irqs);
1716		if (error == 0)
1717			break;
1718		if (actual == 1)
1719			return (error);
1720
1721		/* Try N / 2. */
1722		actual >>= 1;
1723	}
1724
1725	/*
1726	 * We now have N actual messages mapped onto SYS_RES_IRQ
1727	 * resources in the irqs[] array, so add new resources
1728	 * starting at rid 1.
1729	 */
1730	for (i = 0; i < actual; i++)
1731		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1732		    irqs[i], irqs[i], 1);
1733
1734	if (bootverbose) {
1735		if (actual == 1)
1736			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1737		else {
1738			int run;
1739
1740			/*
1741			 * Be fancy and try to print contiguous runs
1742			 * of IRQ values as ranges.  'run' is true if
1743			 * we are in a range.
1744			 */
1745			device_printf(child, "using IRQs %d", irqs[0]);
1746			run = 0;
1747			for (i = 1; i < actual; i++) {
1748
1749				/* Still in a run? */
1750				if (irqs[i] == irqs[i - 1] + 1) {
1751					run = 1;
1752					continue;
1753				}
1754
1755				/* Finish previous range. */
1756				if (run) {
1757					printf("-%d", irqs[i - 1]);
1758					run = 0;
1759				}
1760
1761				/* Start new range. */
1762				printf(",%d", irqs[i]);
1763			}
1764
1765			/* Unfinished range? */
1766			if (run)
1767				printf("-%d", irqs[actual - 1]);
1768			printf(" for MSI\n");
1769		}
1770	}
1771
1772	/* Update control register with actual count. */
1773	ctrl = cfg->msi.msi_ctrl;
1774	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1775	ctrl |= (ffs(actual) - 1) << 4;
1776	cfg->msi.msi_ctrl = ctrl;
1777	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1778
1779	/* Update counts of alloc'd messages. */
1780	cfg->msi.msi_alloc = actual;
1781	cfg->msi.msi_handlers = 0;
1782	*count = actual;
1783	return (0);
1784}
1785
1786/* Release the MSI messages associated with this device. */
1787int
1788pci_release_msi_method(device_t dev, device_t child)
1789{
1790	struct pci_devinfo *dinfo = device_get_ivars(child);
1791	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1792	struct resource_list_entry *rle;
1793	int error, i, irqs[32];
1794
1795	/* Try MSI-X first. */
1796	error = pci_release_msix(dev, child);
1797	if (error != ENODEV)
1798		return (error);
1799
1800	/* Do we have any messages to release? */
1801	if (msi->msi_alloc == 0)
1802		return (ENODEV);
1803	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1804
1805	/* Make sure none of the resources are allocated. */
1806	if (msi->msi_handlers > 0)
1807		return (EBUSY);
1808	for (i = 0; i < msi->msi_alloc; i++) {
1809		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1810		KASSERT(rle != NULL, ("missing MSI resource"));
1811		if (rle->res != NULL)
1812			return (EBUSY);
1813		irqs[i] = rle->start;
1814	}
1815
1816	/* Update control register with 0 count. */
1817	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1818	    ("%s: MSI still enabled", __func__));
1819	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1820	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1821	    msi->msi_ctrl, 2);
1822
1823	/* Release the messages. */
1824	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1825	for (i = 0; i < msi->msi_alloc; i++)
1826		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1827
1828	/* Update alloc count. */
1829	msi->msi_alloc = 0;
1830	msi->msi_addr = 0;
1831	msi->msi_data = 0;
1832	return (0);
1833}
1834
1835/*
1836 * Return the max supported MSI messages this device supports.
1837 * Basically, assuming the MD code can alloc messages, this function
1838 * should return the maximum value that pci_alloc_msi() can return.
1839 * Thus, it is subject to the tunables, etc.
1840 */
1841int
1842pci_msi_count_method(device_t dev, device_t child)
1843{
1844	struct pci_devinfo *dinfo = device_get_ivars(child);
1845	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1846
1847	if (pci_do_msi && msi->msi_location != 0)
1848		return (msi->msi_msgnum);
1849	return (0);
1850}
1851
1852/* free pcicfgregs structure and all depending data structures */
1853
1854int
1855pci_freecfg(struct pci_devinfo *dinfo)
1856{
1857	struct devlist *devlist_head;
1858	int i;
1859
1860	devlist_head = &pci_devq;
1861
1862	if (dinfo->cfg.vpd.vpd_reg) {
1863		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1864		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1865			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1866		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1867		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1868			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1869		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1870	}
1871	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1872	free(dinfo, M_DEVBUF);
1873
1874	/* increment the generation count */
1875	pci_generation++;
1876
1877	/* we're losing one device */
1878	pci_numdevs--;
1879	return (0);
1880}
1881
1882/*
1883 * PCI power manangement
1884 */
1885int
1886pci_set_powerstate_method(device_t dev, device_t child, int state)
1887{
1888	struct pci_devinfo *dinfo = device_get_ivars(child);
1889	pcicfgregs *cfg = &dinfo->cfg;
1890	uint16_t status;
1891	int result, oldstate, highest, delay;
1892
1893	if (cfg->pp.pp_cap == 0)
1894		return (EOPNOTSUPP);
1895
1896	/*
1897	 * Optimize a no state change request away.  While it would be OK to
1898	 * write to the hardware in theory, some devices have shown odd
1899	 * behavior when going from D3 -> D3.
1900	 */
1901	oldstate = pci_get_powerstate(child);
1902	if (oldstate == state)
1903		return (0);
1904
1905	/*
1906	 * The PCI power management specification states that after a state
1907	 * transition between PCI power states, system software must
1908	 * guarantee a minimal delay before the function accesses the device.
1909	 * Compute the worst case delay that we need to guarantee before we
1910	 * access the device.  Many devices will be responsive much more
1911	 * quickly than this delay, but there are some that don't respond
1912	 * instantly to state changes.  Transitions to/from D3 state require
1913	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1914	 * is done below with DELAY rather than a sleeper function because
1915	 * this function can be called from contexts where we cannot sleep.
1916	 */
1917	highest = (oldstate > state) ? oldstate : state;
1918	if (highest == PCI_POWERSTATE_D3)
1919	    delay = 10000;
1920	else if (highest == PCI_POWERSTATE_D2)
1921	    delay = 200;
1922	else
1923	    delay = 0;
1924	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1925	    & ~PCIM_PSTAT_DMASK;
1926	result = 0;
1927	switch (state) {
1928	case PCI_POWERSTATE_D0:
1929		status |= PCIM_PSTAT_D0;
1930		break;
1931	case PCI_POWERSTATE_D1:
1932		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1933			return (EOPNOTSUPP);
1934		status |= PCIM_PSTAT_D1;
1935		break;
1936	case PCI_POWERSTATE_D2:
1937		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1938			return (EOPNOTSUPP);
1939		status |= PCIM_PSTAT_D2;
1940		break;
1941	case PCI_POWERSTATE_D3:
1942		status |= PCIM_PSTAT_D3;
1943		break;
1944	default:
1945		return (EINVAL);
1946	}
1947
1948	if (bootverbose)
1949		printf(
1950		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1951		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1952		    oldstate, state);
1953
1954	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1955	if (delay)
1956		DELAY(delay);
1957	return (0);
1958}
1959
1960int
1961pci_get_powerstate_method(device_t dev, device_t child)
1962{
1963	struct pci_devinfo *dinfo = device_get_ivars(child);
1964	pcicfgregs *cfg = &dinfo->cfg;
1965	uint16_t status;
1966	int result;
1967
1968	if (cfg->pp.pp_cap != 0) {
1969		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1970		switch (status & PCIM_PSTAT_DMASK) {
1971		case PCIM_PSTAT_D0:
1972			result = PCI_POWERSTATE_D0;
1973			break;
1974		case PCIM_PSTAT_D1:
1975			result = PCI_POWERSTATE_D1;
1976			break;
1977		case PCIM_PSTAT_D2:
1978			result = PCI_POWERSTATE_D2;
1979			break;
1980		case PCIM_PSTAT_D3:
1981			result = PCI_POWERSTATE_D3;
1982			break;
1983		default:
1984			result = PCI_POWERSTATE_UNKNOWN;
1985			break;
1986		}
1987	} else {
1988		/* No support, device is always at D0 */
1989		result = PCI_POWERSTATE_D0;
1990	}
1991	return (result);
1992}
1993
1994/*
1995 * Some convenience functions for PCI device drivers.
1996 */
1997
1998static __inline void
1999pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2000{
2001	uint16_t	command;
2002
2003	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2004	command |= bit;
2005	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2006}
2007
2008static __inline void
2009pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2010{
2011	uint16_t	command;
2012
2013	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2014	command &= ~bit;
2015	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2016}
2017
2018int
2019pci_enable_busmaster_method(device_t dev, device_t child)
2020{
2021	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2022	return (0);
2023}
2024
2025int
2026pci_disable_busmaster_method(device_t dev, device_t child)
2027{
2028	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2029	return (0);
2030}
2031
2032int
2033pci_enable_io_method(device_t dev, device_t child, int space)
2034{
2035	uint16_t command;
2036	uint16_t bit;
2037	char *error;
2038
2039	bit = 0;
2040	error = NULL;
2041
2042	switch(space) {
2043	case SYS_RES_IOPORT:
2044		bit = PCIM_CMD_PORTEN;
2045		error = "port";
2046		break;
2047	case SYS_RES_MEMORY:
2048		bit = PCIM_CMD_MEMEN;
2049		error = "memory";
2050		break;
2051	default:
2052		return (EINVAL);
2053	}
2054	pci_set_command_bit(dev, child, bit);
2055	/* Some devices seem to need a brief stall here, what do to? */
2056	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2057	if (command & bit)
2058		return (0);
2059	device_printf(child, "failed to enable %s mapping!\n", error);
2060	return (ENXIO);
2061}
2062
2063int
2064pci_disable_io_method(device_t dev, device_t child, int space)
2065{
2066	uint16_t command;
2067	uint16_t bit;
2068	char *error;
2069
2070	bit = 0;
2071	error = NULL;
2072
2073	switch(space) {
2074	case SYS_RES_IOPORT:
2075		bit = PCIM_CMD_PORTEN;
2076		error = "port";
2077		break;
2078	case SYS_RES_MEMORY:
2079		bit = PCIM_CMD_MEMEN;
2080		error = "memory";
2081		break;
2082	default:
2083		return (EINVAL);
2084	}
2085	pci_clear_command_bit(dev, child, bit);
2086	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2087	if (command & bit) {
2088		device_printf(child, "failed to disable %s mapping!\n", error);
2089		return (ENXIO);
2090	}
2091	return (0);
2092}
2093
2094/*
2095 * New style pci driver.  Parent device is either a pci-host-bridge or a
2096 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2097 */
2098
2099void
2100pci_print_verbose(struct pci_devinfo *dinfo)
2101{
2102
2103	if (bootverbose) {
2104		pcicfgregs *cfg = &dinfo->cfg;
2105
2106		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2107		    cfg->vendor, cfg->device, cfg->revid);
2108		printf("\tbus=%d, slot=%d, func=%d\n",
2109		    cfg->bus, cfg->slot, cfg->func);
2110		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2111		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2112		    cfg->mfdev);
2113		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2114		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2115		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2116		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2117		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2118		if (cfg->intpin > 0)
2119			printf("\tintpin=%c, irq=%d\n",
2120			    cfg->intpin +'a' -1, cfg->intline);
2121		if (cfg->pp.pp_cap) {
2122			uint16_t status;
2123
2124			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2125			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2126			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2127			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2128			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2129			    status & PCIM_PSTAT_DMASK);
2130		}
2131		if (cfg->msi.msi_location) {
2132			int ctrl;
2133
2134			ctrl = cfg->msi.msi_ctrl;
2135			printf("\tMSI supports %d message%s%s%s\n",
2136			    cfg->msi.msi_msgnum,
2137			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2138			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2139			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2140		}
2141		if (cfg->msix.msix_location) {
2142			printf("\tMSI-X supports %d message%s ",
2143			    cfg->msix.msix_msgnum,
2144			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2145			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2146				printf("in map 0x%x\n",
2147				    cfg->msix.msix_table_bar);
2148			else
2149				printf("in maps 0x%x and 0x%x\n",
2150				    cfg->msix.msix_table_bar,
2151				    cfg->msix.msix_pba_bar);
2152		}
2153	}
2154}
2155
2156static int
2157pci_porten(device_t pcib, int b, int s, int f)
2158{
2159	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2160		& PCIM_CMD_PORTEN) != 0;
2161}
2162
2163static int
2164pci_memen(device_t pcib, int b, int s, int f)
2165{
2166	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2167		& PCIM_CMD_MEMEN) != 0;
2168}
2169
2170/*
2171 * Add a resource based on a pci map register. Return 1 if the map
2172 * register is a 32bit map register or 2 if it is a 64bit register.
2173 */
2174static int
2175pci_add_map(device_t pcib, device_t bus, device_t dev,
2176    int b, int s, int f, int reg, struct resource_list *rl, int force,
2177    int prefetch)
2178{
2179	uint32_t map;
2180	pci_addr_t base;
2181	pci_addr_t start, end, count;
2182	uint8_t ln2size;
2183	uint8_t ln2range;
2184	uint32_t testval;
2185	uint16_t cmd;
2186	int type;
2187	int barlen;
2188	struct resource *res;
2189
2190	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2191	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2192	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2193	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2194
2195	if (PCI_BAR_MEM(map))
2196		type = SYS_RES_MEMORY;
2197	else
2198		type = SYS_RES_IOPORT;
2199	ln2size = pci_mapsize(testval);
2200	ln2range = pci_maprange(testval);
2201	base = pci_mapbase(map);
2202	barlen = ln2range == 64 ? 2 : 1;
2203
2204	/*
2205	 * For I/O registers, if bottom bit is set, and the next bit up
2206	 * isn't clear, we know we have a BAR that doesn't conform to the
2207	 * spec, so ignore it.  Also, sanity check the size of the data
2208	 * areas to the type of memory involved.  Memory must be at least
2209	 * 16 bytes in size, while I/O ranges must be at least 4.
2210	 */
2211	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2212		return (barlen);
2213	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2214	    (type == SYS_RES_IOPORT && ln2size < 2))
2215		return (barlen);
2216
2217	if (ln2range == 64)
2218		/* Read the other half of a 64bit map register */
2219		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2220	if (bootverbose) {
2221		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2222		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2223		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2224			printf(", port disabled\n");
2225		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2226			printf(", memory disabled\n");
2227		else
2228			printf(", enabled\n");
2229	}
2230
2231	/*
2232	 * If base is 0, then we have problems.  It is best to ignore
2233	 * such entries for the moment.  These will be allocated later if
2234	 * the driver specifically requests them.  However, some
2235	 * removable busses look better when all resources are allocated,
2236	 * so allow '0' to be overriden.
2237	 *
2238	 * Similarly treat maps whose values is the same as the test value
2239	 * read back.  These maps have had all f's written to them by the
2240	 * BIOS in an attempt to disable the resources.
2241	 */
2242	if (!force && (base == 0 || map == testval))
2243		return (barlen);
2244	if ((u_long)base != base) {
2245		device_printf(bus,
2246		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2247		return (barlen);
2248	}
2249
2250	/*
2251	 * This code theoretically does the right thing, but has
2252	 * undesirable side effects in some cases where peripherals
2253	 * respond oddly to having these bits enabled.  Let the user
2254	 * be able to turn them off (since pci_enable_io_modes is 1 by
2255	 * default).
2256	 */
2257	if (pci_enable_io_modes) {
2258		/* Turn on resources that have been left off by a lazy BIOS */
2259		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2260			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2261			cmd |= PCIM_CMD_PORTEN;
2262			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2263		}
2264		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2265			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2266			cmd |= PCIM_CMD_MEMEN;
2267			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2268		}
2269	} else {
2270		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2271			return (barlen);
2272		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2273			return (barlen);
2274	}
2275
2276	count = 1 << ln2size;
2277	if (base == 0 || base == pci_mapbase(testval)) {
2278		start = 0;	/* Let the parent deside */
2279		end = ~0ULL;
2280	} else {
2281		start = base;
2282		end = base + (1 << ln2size) - 1;
2283	}
2284	resource_list_add(rl, type, reg, start, end, count);
2285
2286	/*
2287	 * Not quite sure what to do on failure of allocating the resource
2288	 * since I can postulate several right answers.
2289	 */
2290	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2291	    prefetch ? RF_PREFETCHABLE : 0);
2292	if (res == NULL)
2293		return (barlen);
2294	start = rman_get_start(res);
2295	if ((u_long)start != start) {
2296		/* Wait a minute!  this platform can't do this address. */
2297		device_printf(bus,
2298		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2299		    b, s, f, reg, (uintmax_t)start);
2300		resource_list_release(rl, bus, dev, type, reg, res);
2301		return (barlen);
2302	}
2303	pci_write_config(dev, reg, start, 4);
2304	if (ln2range == 64)
2305		pci_write_config(dev, reg + 4, start >> 32, 4);
2306	return (barlen);
2307}
2308
2309/*
2310 * For ATA devices we need to decide early what addressing mode to use.
2311 * Legacy demands that the primary and secondary ATA ports sits on the
2312 * same addresses that old ISA hardware did. This dictates that we use
2313 * those addresses and ignore the BAR's if we cannot set PCI native
2314 * addressing mode.
2315 */
2316static void
2317pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2318    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2319{
2320	int rid, type, progif;
2321#if 0
2322	/* if this device supports PCI native addressing use it */
2323	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2324	if ((progif & 0x8a) == 0x8a) {
2325		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2326		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2327			printf("Trying ATA native PCI addressing mode\n");
2328			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2329		}
2330	}
2331#endif
2332	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2333	type = SYS_RES_IOPORT;
2334	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2335		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2336		    prefetchmask & (1 << 0));
2337		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2338		    prefetchmask & (1 << 1));
2339	} else {
2340		rid = PCIR_BAR(0);
2341		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2342		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2343		    0);
2344		rid = PCIR_BAR(1);
2345		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2346		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2347		    0);
2348	}
2349	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2350		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2351		    prefetchmask & (1 << 2));
2352		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2353		    prefetchmask & (1 << 3));
2354	} else {
2355		rid = PCIR_BAR(2);
2356		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2357		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2358		    0);
2359		rid = PCIR_BAR(3);
2360		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2361		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2362		    0);
2363	}
2364	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2365	    prefetchmask & (1 << 4));
2366	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2367	    prefetchmask & (1 << 5));
2368}
2369
2370static void
2371pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2372{
2373	struct pci_devinfo *dinfo = device_get_ivars(dev);
2374	pcicfgregs *cfg = &dinfo->cfg;
2375	char tunable_name[64];
2376	int irq;
2377
2378	/* Has to have an intpin to have an interrupt. */
2379	if (cfg->intpin == 0)
2380		return;
2381
2382	/* Let the user override the IRQ with a tunable. */
2383	irq = PCI_INVALID_IRQ;
2384	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2385	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2386	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2387		irq = PCI_INVALID_IRQ;
2388
2389	/*
2390	 * If we didn't get an IRQ via the tunable, then we either use the
2391	 * IRQ value in the intline register or we ask the bus to route an
2392	 * interrupt for us.  If force_route is true, then we only use the
2393	 * value in the intline register if the bus was unable to assign an
2394	 * IRQ.
2395	 */
2396	if (!PCI_INTERRUPT_VALID(irq)) {
2397		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2398			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2399		if (!PCI_INTERRUPT_VALID(irq))
2400			irq = cfg->intline;
2401	}
2402
2403	/* If after all that we don't have an IRQ, just bail. */
2404	if (!PCI_INTERRUPT_VALID(irq))
2405		return;
2406
2407	/* Update the config register if it changed. */
2408	if (irq != cfg->intline) {
2409		cfg->intline = irq;
2410		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2411	}
2412
2413	/* Add this IRQ as rid 0 interrupt resource. */
2414	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2415}
2416
2417void
2418pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2419{
2420	device_t pcib;
2421	struct pci_devinfo *dinfo = device_get_ivars(dev);
2422	pcicfgregs *cfg = &dinfo->cfg;
2423	struct resource_list *rl = &dinfo->resources;
2424	struct pci_quirk *q;
2425	int b, i, f, s;
2426
2427	pcib = device_get_parent(bus);
2428
2429	b = cfg->bus;
2430	s = cfg->slot;
2431	f = cfg->func;
2432
2433	/* ATA devices needs special map treatment */
2434	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2435	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2436	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2437	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2438	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2439		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2440	else
2441		for (i = 0; i < cfg->nummaps;)
2442			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2443			    rl, force, prefetchmask & (1 << i));
2444
2445	/*
2446	 * Add additional, quirked resources.
2447	 */
2448	for (q = &pci_quirks[0]; q->devid; q++) {
2449		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2450		    && q->type == PCI_QUIRK_MAP_REG)
2451			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2452			  force, 0);
2453	}
2454
2455	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2456#ifdef __PCI_REROUTE_INTERRUPT
2457		/*
2458		 * Try to re-route interrupts. Sometimes the BIOS or
2459		 * firmware may leave bogus values in these registers.
2460		 * If the re-route fails, then just stick with what we
2461		 * have.
2462		 */
2463		pci_assign_interrupt(bus, dev, 1);
2464#else
2465		pci_assign_interrupt(bus, dev, 0);
2466#endif
2467	}
2468}
2469
2470void
2471pci_add_children(device_t dev, int busno, size_t dinfo_size)
2472{
2473#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2474	device_t pcib = device_get_parent(dev);
2475	struct pci_devinfo *dinfo;
2476	int maxslots;
2477	int s, f, pcifunchigh;
2478	uint8_t hdrtype;
2479
2480	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2481	    ("dinfo_size too small"));
2482	maxslots = PCIB_MAXSLOTS(pcib);
2483	for (s = 0; s <= maxslots; s++) {
2484		pcifunchigh = 0;
2485		f = 0;
2486		DELAY(1);
2487		hdrtype = REG(PCIR_HDRTYPE, 1);
2488		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2489			continue;
2490		if (hdrtype & PCIM_MFDEV)
2491			pcifunchigh = PCI_FUNCMAX;
2492		for (f = 0; f <= pcifunchigh; f++) {
2493			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2494			if (dinfo != NULL) {
2495				pci_add_child(dev, dinfo);
2496			}
2497		}
2498	}
2499#undef REG
2500}
2501
2502void
2503pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2504{
2505	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2506	device_set_ivars(dinfo->cfg.dev, dinfo);
2507	resource_list_init(&dinfo->resources);
2508	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2509	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2510	pci_print_verbose(dinfo);
2511	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2512}
2513
2514static int
2515pci_probe(device_t dev)
2516{
2517
2518	device_set_desc(dev, "PCI bus");
2519
2520	/* Allow other subclasses to override this driver. */
2521	return (-1000);
2522}
2523
2524static int
2525pci_attach(device_t dev)
2526{
2527	int busno;
2528
2529	/*
2530	 * Since there can be multiple independantly numbered PCI
2531	 * busses on systems with multiple PCI domains, we can't use
2532	 * the unit number to decide which bus we are probing. We ask
2533	 * the parent pcib what our bus number is.
2534	 */
2535	busno = pcib_get_bus(dev);
2536	if (bootverbose)
2537		device_printf(dev, "physical bus=%d\n", busno);
2538
2539	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2540
2541	return (bus_generic_attach(dev));
2542}
2543
2544int
2545pci_suspend(device_t dev)
2546{
2547	int dstate, error, i, numdevs;
2548	device_t acpi_dev, child, *devlist;
2549	struct pci_devinfo *dinfo;
2550
2551	/*
2552	 * Save the PCI configuration space for each child and set the
2553	 * device in the appropriate power state for this sleep state.
2554	 */
2555	acpi_dev = NULL;
2556	if (pci_do_power_resume)
2557		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2558	device_get_children(dev, &devlist, &numdevs);
2559	for (i = 0; i < numdevs; i++) {
2560		child = devlist[i];
2561		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2562		pci_cfg_save(child, dinfo, 0);
2563	}
2564
2565	/* Suspend devices before potentially powering them down. */
2566	error = bus_generic_suspend(dev);
2567	if (error) {
2568		free(devlist, M_TEMP);
2569		return (error);
2570	}
2571
2572	/*
2573	 * Always set the device to D3.  If ACPI suggests a different
2574	 * power state, use it instead.  If ACPI is not present, the
2575	 * firmware is responsible for managing device power.  Skip
2576	 * children who aren't attached since they are powered down
2577	 * separately.  Only manage type 0 devices for now.
2578	 */
2579	for (i = 0; acpi_dev && i < numdevs; i++) {
2580		child = devlist[i];
2581		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2582		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2583			dstate = PCI_POWERSTATE_D3;
2584			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2585			pci_set_powerstate(child, dstate);
2586		}
2587	}
2588	free(devlist, M_TEMP);
2589	return (0);
2590}
2591
2592int
2593pci_resume(device_t dev)
2594{
2595	int i, numdevs;
2596	device_t acpi_dev, child, *devlist;
2597	struct pci_devinfo *dinfo;
2598
2599	/*
2600	 * Set each child to D0 and restore its PCI configuration space.
2601	 */
2602	acpi_dev = NULL;
2603	if (pci_do_power_resume)
2604		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2605	device_get_children(dev, &devlist, &numdevs);
2606	for (i = 0; i < numdevs; i++) {
2607		/*
2608		 * Notify ACPI we're going to D0 but ignore the result.  If
2609		 * ACPI is not present, the firmware is responsible for
2610		 * managing device power.  Only manage type 0 devices for now.
2611		 */
2612		child = devlist[i];
2613		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2614		if (acpi_dev && device_is_attached(child) &&
2615		    dinfo->cfg.hdrtype == 0) {
2616			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2617			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2618		}
2619
2620		/* Now the device is powered up, restore its config space. */
2621		pci_cfg_restore(child, dinfo);
2622	}
2623	free(devlist, M_TEMP);
2624	return (bus_generic_resume(dev));
2625}
2626
2627static void
2628pci_load_vendor_data(void)
2629{
2630	caddr_t vendordata, info;
2631
2632	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2633		info = preload_search_info(vendordata, MODINFO_ADDR);
2634		pci_vendordata = *(char **)info;
2635		info = preload_search_info(vendordata, MODINFO_SIZE);
2636		pci_vendordata_size = *(size_t *)info;
2637		/* terminate the database */
2638		pci_vendordata[pci_vendordata_size] = '\n';
2639	}
2640}
2641
2642void
2643pci_driver_added(device_t dev, driver_t *driver)
2644{
2645	int numdevs;
2646	device_t *devlist;
2647	device_t child;
2648	struct pci_devinfo *dinfo;
2649	int i;
2650
2651	if (bootverbose)
2652		device_printf(dev, "driver added\n");
2653	DEVICE_IDENTIFY(driver, dev);
2654	device_get_children(dev, &devlist, &numdevs);
2655	for (i = 0; i < numdevs; i++) {
2656		child = devlist[i];
2657		if (device_get_state(child) != DS_NOTPRESENT)
2658			continue;
2659		dinfo = device_get_ivars(child);
2660		pci_print_verbose(dinfo);
2661		if (bootverbose)
2662			printf("pci%d:%d:%d: reprobing on driver added\n",
2663			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2664		pci_cfg_restore(child, dinfo);
2665		if (device_probe_and_attach(child) != 0)
2666			pci_cfg_save(child, dinfo, 1);
2667	}
2668	free(devlist, M_TEMP);
2669}
2670
2671int
2672pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2673    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2674{
2675	struct pci_devinfo *dinfo;
2676	struct msix_table_entry *mte;
2677	struct msix_vector *mv;
2678	uint64_t addr;
2679	uint32_t data;
2680	void *cookie;
2681	int error, rid;
2682
2683	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2684	    arg, &cookie);
2685	if (error)
2686		return (error);
2687
2688	/*
2689	 * If this is a direct child, check to see if the interrupt is
2690	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2691	 * us the address and data register values.  If we fail for some
2692	 * reason, teardown the interrupt handler.
2693	 */
2694	rid = rman_get_rid(irq);
2695	if (device_get_parent(child) == dev && rid > 0) {
2696		dinfo = device_get_ivars(child);
2697		if (dinfo->cfg.msi.msi_alloc > 0) {
2698			if (dinfo->cfg.msi.msi_addr == 0) {
2699				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2700			    ("MSI has handlers, but vectors not mapped"));
2701				error = PCIB_MAP_MSI(device_get_parent(dev),
2702				    child, rman_get_start(irq), &addr, &data);
2703				if (error)
2704					goto bad;
2705				dinfo->cfg.msi.msi_addr = addr;
2706				dinfo->cfg.msi.msi_data = data;
2707				pci_enable_msi(child, addr, data);
2708			}
2709			dinfo->cfg.msi.msi_handlers++;
2710		} else {
2711			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2712			    ("No MSI or MSI-X interrupts allocated"));
2713			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2714			    ("MSI-X index too high"));
2715			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2716			KASSERT(mte->mte_vector != 0, ("no message vector"));
2717			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2718			KASSERT(mv->mv_irq == rman_get_start(irq),
2719			    ("IRQ mismatch"));
2720			if (mv->mv_address == 0) {
2721				KASSERT(mte->mte_handlers == 0,
2722		    ("MSI-X table entry has handlers, but vector not mapped"));
2723				error = PCIB_MAP_MSI(device_get_parent(dev),
2724				    child, rman_get_start(irq), &addr, &data);
2725				if (error)
2726					goto bad;
2727				mv->mv_address = addr;
2728				mv->mv_data = data;
2729			}
2730			if (mte->mte_handlers == 0) {
2731				pci_enable_msix(child, rid - 1, mv->mv_address,
2732				    mv->mv_data);
2733				pci_unmask_msix(child, rid - 1);
2734			}
2735			mte->mte_handlers++;
2736		}
2737	bad:
2738		if (error) {
2739			(void)bus_generic_teardown_intr(dev, child, irq,
2740			    cookie);
2741			return (error);
2742		}
2743	}
2744	*cookiep = cookie;
2745	return (0);
2746}
2747
2748int
2749pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2750    void *cookie)
2751{
2752	struct msix_table_entry *mte;
2753	struct resource_list_entry *rle;
2754	struct pci_devinfo *dinfo;
2755	int error, rid;
2756
2757	/*
2758	 * If this is a direct child, check to see if the interrupt is
2759	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2760	 * count and mask the MSI-X message, or disable MSI messages
2761	 * if the count drops to 0.
2762	 */
2763	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2764		return (EINVAL);
2765	rid = rman_get_rid(irq);
2766	if (device_get_parent(child) == dev && rid > 0) {
2767		dinfo = device_get_ivars(child);
2768		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2769		if (rle->res != irq)
2770			return (EINVAL);
2771		if (dinfo->cfg.msi.msi_alloc > 0) {
2772			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2773			    ("MSI-X index too high"));
2774			if (dinfo->cfg.msi.msi_handlers == 0)
2775				return (EINVAL);
2776			dinfo->cfg.msi.msi_handlers--;
2777			if (dinfo->cfg.msi.msi_handlers == 0)
2778				pci_disable_msi(child);
2779		} else {
2780			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2781			    ("No MSI or MSI-X interrupts allocated"));
2782			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2783			    ("MSI-X index too high"));
2784			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2785			if (mte->mte_handlers == 0)
2786				return (EINVAL);
2787			mte->mte_handlers--;
2788			if (mte->mte_handlers == 0)
2789				pci_mask_msix(child, rid - 1);
2790		}
2791	}
2792	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2793	if (device_get_parent(child) == dev && rid > 0)
2794		KASSERT(error == 0,
2795		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2796	return (error);
2797}
2798
2799int
2800pci_print_child(device_t dev, device_t child)
2801{
2802	struct pci_devinfo *dinfo;
2803	struct resource_list *rl;
2804	int retval = 0;
2805
2806	dinfo = device_get_ivars(child);
2807	rl = &dinfo->resources;
2808
2809	retval += bus_print_child_header(dev, child);
2810
2811	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2812	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2813	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2814	if (device_get_flags(dev))
2815		retval += printf(" flags %#x", device_get_flags(dev));
2816
2817	retval += printf(" at device %d.%d", pci_get_slot(child),
2818	    pci_get_function(child));
2819
2820	retval += bus_print_child_footer(dev, child);
2821
2822	return (retval);
2823}
2824
2825static struct
2826{
2827	int	class;
2828	int	subclass;
2829	char	*desc;
2830} pci_nomatch_tab[] = {
2831	{PCIC_OLD,		-1,			"old"},
2832	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2833	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2834	{PCIC_STORAGE,		-1,			"mass storage"},
2835	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2836	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2837	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2838	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2839	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2840	{PCIC_NETWORK,		-1,			"network"},
2841	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2842	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2843	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2844	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2845	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2846	{PCIC_DISPLAY,		-1,			"display"},
2847	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2848	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2849	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2850	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2851	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2852	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2853	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2854	{PCIC_MEMORY,		-1,			"memory"},
2855	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2856	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2857	{PCIC_BRIDGE,		-1,			"bridge"},
2858	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2859	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2860	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2861	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2862	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2863	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2864	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2865	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2866	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2867	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2868	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2869	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2870	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2871	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2872	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2873	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2874	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2875	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2876	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2877	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2878	{PCIC_INPUTDEV,		-1,			"input device"},
2879	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2880	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2881	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2882	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2883	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2884	{PCIC_DOCKING,		-1,			"docking station"},
2885	{PCIC_PROCESSOR,	-1,			"processor"},
2886	{PCIC_SERIALBUS,	-1,			"serial bus"},
2887	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2888	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2889	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2890	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2891	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2892	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2893	{PCIC_WIRELESS,		-1,			"wireless controller"},
2894	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2895	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2896	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2897	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2898	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2899	{PCIC_SATCOM,		-1,			"satellite communication"},
2900	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2901	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2902	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2903	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2904	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2905	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2906	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2907	{PCIC_DASP,		-1,			"dasp"},
2908	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2909	{0, 0,		NULL}
2910};
2911
2912void
2913pci_probe_nomatch(device_t dev, device_t child)
2914{
2915	int	i;
2916	char	*cp, *scp, *device;
2917
2918	/*
2919	 * Look for a listing for this device in a loaded device database.
2920	 */
2921	if ((device = pci_describe_device(child)) != NULL) {
2922		device_printf(dev, "<%s>", device);
2923		free(device, M_DEVBUF);
2924	} else {
2925		/*
2926		 * Scan the class/subclass descriptions for a general
2927		 * description.
2928		 */
2929		cp = "unknown";
2930		scp = NULL;
2931		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2932			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2933				if (pci_nomatch_tab[i].subclass == -1) {
2934					cp = pci_nomatch_tab[i].desc;
2935				} else if (pci_nomatch_tab[i].subclass ==
2936				    pci_get_subclass(child)) {
2937					scp = pci_nomatch_tab[i].desc;
2938				}
2939			}
2940		}
2941		device_printf(dev, "<%s%s%s>",
2942		    cp ? cp : "",
2943		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2944		    scp ? scp : "");
2945	}
2946	printf(" at device %d.%d (no driver attached)\n",
2947	    pci_get_slot(child), pci_get_function(child));
2948	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
2949	return;
2950}
2951
2952/*
2953 * Parse the PCI device database, if loaded, and return a pointer to a
2954 * description of the device.
2955 *
2956 * The database is flat text formatted as follows:
2957 *
2958 * Any line not in a valid format is ignored.
2959 * Lines are terminated with newline '\n' characters.
2960 *
2961 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2962 * the vendor name.
2963 *
2964 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2965 * - devices cannot be listed without a corresponding VENDOR line.
2966 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2967 * another TAB, then the device name.
2968 */
2969
2970/*
2971 * Assuming (ptr) points to the beginning of a line in the database,
2972 * return the vendor or device and description of the next entry.
2973 * The value of (vendor) or (device) inappropriate for the entry type
2974 * is set to -1.  Returns nonzero at the end of the database.
2975 *
2976 * Note that this is slightly unrobust in the face of corrupt data;
2977 * we attempt to safeguard against this by spamming the end of the
2978 * database with a newline when we initialise.
2979 */
2980static int
2981pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2982{
2983	char	*cp = *ptr;
2984	int	left;
2985
2986	*device = -1;
2987	*vendor = -1;
2988	**desc = '\0';
2989	for (;;) {
2990		left = pci_vendordata_size - (cp - pci_vendordata);
2991		if (left <= 0) {
2992			*ptr = cp;
2993			return(1);
2994		}
2995
2996		/* vendor entry? */
2997		if (*cp != '\t' &&
2998		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2999			break;
3000		/* device entry? */
3001		if (*cp == '\t' &&
3002		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3003			break;
3004
3005		/* skip to next line */
3006		while (*cp != '\n' && left > 0) {
3007			cp++;
3008			left--;
3009		}
3010		if (*cp == '\n') {
3011			cp++;
3012			left--;
3013		}
3014	}
3015	/* skip to next line */
3016	while (*cp != '\n' && left > 0) {
3017		cp++;
3018		left--;
3019	}
3020	if (*cp == '\n' && left > 0)
3021		cp++;
3022	*ptr = cp;
3023	return(0);
3024}
3025
3026static char *
3027pci_describe_device(device_t dev)
3028{
3029	int	vendor, device;
3030	char	*desc, *vp, *dp, *line;
3031
3032	desc = vp = dp = NULL;
3033
3034	/*
3035	 * If we have no vendor data, we can't do anything.
3036	 */
3037	if (pci_vendordata == NULL)
3038		goto out;
3039
3040	/*
3041	 * Scan the vendor data looking for this device
3042	 */
3043	line = pci_vendordata;
3044	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3045		goto out;
3046	for (;;) {
3047		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3048			goto out;
3049		if (vendor == pci_get_vendor(dev))
3050			break;
3051	}
3052	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3053		goto out;
3054	for (;;) {
3055		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3056			*dp = 0;
3057			break;
3058		}
3059		if (vendor != -1) {
3060			*dp = 0;
3061			break;
3062		}
3063		if (device == pci_get_device(dev))
3064			break;
3065	}
3066	if (dp[0] == '\0')
3067		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3068	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3069	    NULL)
3070		sprintf(desc, "%s, %s", vp, dp);
3071 out:
3072	if (vp != NULL)
3073		free(vp, M_DEVBUF);
3074	if (dp != NULL)
3075		free(dp, M_DEVBUF);
3076	return(desc);
3077}
3078
3079int
3080pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3081{
3082	struct pci_devinfo *dinfo;
3083	pcicfgregs *cfg;
3084
3085	dinfo = device_get_ivars(child);
3086	cfg = &dinfo->cfg;
3087
3088	switch (which) {
3089	case PCI_IVAR_ETHADDR:
3090		/*
3091		 * The generic accessor doesn't deal with failure, so
3092		 * we set the return value, then return an error.
3093		 */
3094		*((uint8_t **) result) = NULL;
3095		return (EINVAL);
3096	case PCI_IVAR_SUBVENDOR:
3097		*result = cfg->subvendor;
3098		break;
3099	case PCI_IVAR_SUBDEVICE:
3100		*result = cfg->subdevice;
3101		break;
3102	case PCI_IVAR_VENDOR:
3103		*result = cfg->vendor;
3104		break;
3105	case PCI_IVAR_DEVICE:
3106		*result = cfg->device;
3107		break;
3108	case PCI_IVAR_DEVID:
3109		*result = (cfg->device << 16) | cfg->vendor;
3110		break;
3111	case PCI_IVAR_CLASS:
3112		*result = cfg->baseclass;
3113		break;
3114	case PCI_IVAR_SUBCLASS:
3115		*result = cfg->subclass;
3116		break;
3117	case PCI_IVAR_PROGIF:
3118		*result = cfg->progif;
3119		break;
3120	case PCI_IVAR_REVID:
3121		*result = cfg->revid;
3122		break;
3123	case PCI_IVAR_INTPIN:
3124		*result = cfg->intpin;
3125		break;
3126	case PCI_IVAR_IRQ:
3127		*result = cfg->intline;
3128		break;
3129	case PCI_IVAR_BUS:
3130		*result = cfg->bus;
3131		break;
3132	case PCI_IVAR_SLOT:
3133		*result = cfg->slot;
3134		break;
3135	case PCI_IVAR_FUNCTION:
3136		*result = cfg->func;
3137		break;
3138	case PCI_IVAR_CMDREG:
3139		*result = cfg->cmdreg;
3140		break;
3141	case PCI_IVAR_CACHELNSZ:
3142		*result = cfg->cachelnsz;
3143		break;
3144	case PCI_IVAR_MINGNT:
3145		*result = cfg->mingnt;
3146		break;
3147	case PCI_IVAR_MAXLAT:
3148		*result = cfg->maxlat;
3149		break;
3150	case PCI_IVAR_LATTIMER:
3151		*result = cfg->lattimer;
3152		break;
3153	default:
3154		return (ENOENT);
3155	}
3156	return (0);
3157}
3158
3159int
3160pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3161{
3162	struct pci_devinfo *dinfo;
3163
3164	dinfo = device_get_ivars(child);
3165
3166	switch (which) {
3167	case PCI_IVAR_INTPIN:
3168		dinfo->cfg.intpin = value;
3169		return (0);
3170	case PCI_IVAR_ETHADDR:
3171	case PCI_IVAR_SUBVENDOR:
3172	case PCI_IVAR_SUBDEVICE:
3173	case PCI_IVAR_VENDOR:
3174	case PCI_IVAR_DEVICE:
3175	case PCI_IVAR_DEVID:
3176	case PCI_IVAR_CLASS:
3177	case PCI_IVAR_SUBCLASS:
3178	case PCI_IVAR_PROGIF:
3179	case PCI_IVAR_REVID:
3180	case PCI_IVAR_IRQ:
3181	case PCI_IVAR_BUS:
3182	case PCI_IVAR_SLOT:
3183	case PCI_IVAR_FUNCTION:
3184		return (EINVAL);	/* disallow for now */
3185
3186	default:
3187		return (ENOENT);
3188	}
3189}
3190
3191
3192#include "opt_ddb.h"
3193#ifdef DDB
3194#include <ddb/ddb.h>
3195#include <sys/cons.h>
3196
3197/*
3198 * List resources based on pci map registers, used for within ddb
3199 */
3200
3201DB_SHOW_COMMAND(pciregs, db_pci_dump)
3202{
3203	struct pci_devinfo *dinfo;
3204	struct devlist *devlist_head;
3205	struct pci_conf *p;
3206	const char *name;
3207	int i, error, none_count;
3208
3209	none_count = 0;
3210	/* get the head of the device queue */
3211	devlist_head = &pci_devq;
3212
3213	/*
3214	 * Go through the list of devices and print out devices
3215	 */
3216	for (error = 0, i = 0,
3217	     dinfo = STAILQ_FIRST(devlist_head);
3218	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3219	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3220
3221		/* Populate pd_name and pd_unit */
3222		name = NULL;
3223		if (dinfo->cfg.dev)
3224			name = device_get_name(dinfo->cfg.dev);
3225
3226		p = &dinfo->conf;
3227		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
3228			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3229			(name && *name) ? name : "none",
3230			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3231			none_count++,
3232			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3233			p->pc_sel.pc_func, (p->pc_class << 16) |
3234			(p->pc_subclass << 8) | p->pc_progif,
3235			(p->pc_subdevice << 16) | p->pc_subvendor,
3236			(p->pc_device << 16) | p->pc_vendor,
3237			p->pc_revid, p->pc_hdr);
3238	}
3239}
3240#endif /* DDB */
3241
3242static struct resource *
3243pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3244    u_long start, u_long end, u_long count, u_int flags)
3245{
3246	struct pci_devinfo *dinfo = device_get_ivars(child);
3247	struct resource_list *rl = &dinfo->resources;
3248	struct resource_list_entry *rle;
3249	struct resource *res;
3250	pci_addr_t map, testval;
3251	int mapsize;
3252
3253	/*
3254	 * Weed out the bogons, and figure out how large the BAR/map
3255	 * is.  Bars that read back 0 here are bogus and unimplemented.
3256	 * Note: atapci in legacy mode are special and handled elsewhere
3257	 * in the code.  If you have a atapci device in legacy mode and
3258	 * it fails here, that other code is broken.
3259	 */
3260	res = NULL;
3261	map = pci_read_config(child, *rid, 4);
3262	pci_write_config(child, *rid, 0xffffffff, 4);
3263	testval = pci_read_config(child, *rid, 4);
3264	if (pci_maprange(testval) == 64)
3265		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3266	if (pci_mapbase(testval) == 0)
3267		goto out;
3268
3269	/*
3270	 * Restore the original value of the BAR.  We may have reprogrammed
3271	 * the BAR of the low-level console device and when booting verbose,
3272	 * we need the console device addressable.
3273	 */
3274	pci_write_config(child, *rid, map, 4);
3275
3276	if (PCI_BAR_MEM(testval)) {
3277		if (type != SYS_RES_MEMORY) {
3278			if (bootverbose)
3279				device_printf(dev,
3280				    "child %s requested type %d for rid %#x,"
3281				    " but the BAR says it is an memio\n",
3282				    device_get_nameunit(child), type, *rid);
3283			goto out;
3284		}
3285	} else {
3286		if (type != SYS_RES_IOPORT) {
3287			if (bootverbose)
3288				device_printf(dev,
3289				    "child %s requested type %d for rid %#x,"
3290				    " but the BAR says it is an ioport\n",
3291				    device_get_nameunit(child), type, *rid);
3292			goto out;
3293		}
3294	}
3295	/*
3296	 * For real BARs, we need to override the size that
3297	 * the driver requests, because that's what the BAR
3298	 * actually uses and we would otherwise have a
3299	 * situation where we might allocate the excess to
3300	 * another driver, which won't work.
3301	 */
3302	mapsize = pci_mapsize(testval);
3303	count = 1UL << mapsize;
3304	if (RF_ALIGNMENT(flags) < mapsize)
3305		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3306
3307	/*
3308	 * Allocate enough resource, and then write back the
3309	 * appropriate bar for that resource.
3310	 */
3311	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3312	    start, end, count, flags);
3313	if (res == NULL) {
3314		device_printf(child,
3315		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3316		    count, *rid, type, start, end);
3317		goto out;
3318	}
3319	resource_list_add(rl, type, *rid, start, end, count);
3320	rle = resource_list_find(rl, type, *rid);
3321	if (rle == NULL)
3322		panic("pci_alloc_map: unexpectedly can't find resource.");
3323	rle->res = res;
3324	rle->start = rman_get_start(res);
3325	rle->end = rman_get_end(res);
3326	rle->count = count;
3327	if (bootverbose)
3328		device_printf(child,
3329		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3330		    count, *rid, type, rman_get_start(res));
3331	map = rman_get_start(res);
3332out:;
3333	pci_write_config(child, *rid, map, 4);
3334	if (pci_maprange(testval) == 64)
3335		pci_write_config(child, *rid + 4, map >> 32, 4);
3336	return (res);
3337}
3338
3339
3340struct resource *
3341pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3342		   u_long start, u_long end, u_long count, u_int flags)
3343{
3344	struct pci_devinfo *dinfo = device_get_ivars(child);
3345	struct resource_list *rl = &dinfo->resources;
3346	struct resource_list_entry *rle;
3347	pcicfgregs *cfg = &dinfo->cfg;
3348
3349	/*
3350	 * Perform lazy resource allocation
3351	 */
3352	if (device_get_parent(child) == dev) {
3353		switch (type) {
3354		case SYS_RES_IRQ:
3355			/*
3356			 * Can't alloc legacy interrupt once MSI messages
3357			 * have been allocated.
3358			 */
3359			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3360			    cfg->msix.msix_alloc > 0))
3361				return (NULL);
3362			/*
3363			 * If the child device doesn't have an
3364			 * interrupt routed and is deserving of an
3365			 * interrupt, try to assign it one.
3366			 */
3367			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3368			    (cfg->intpin != 0))
3369				pci_assign_interrupt(dev, child, 0);
3370			break;
3371		case SYS_RES_IOPORT:
3372		case SYS_RES_MEMORY:
3373			if (*rid < PCIR_BAR(cfg->nummaps)) {
3374				/*
3375				 * Enable the I/O mode.  We should
3376				 * also be assigning resources too
3377				 * when none are present.  The
3378				 * resource_list_alloc kind of sorta does
3379				 * this...
3380				 */
3381				if (PCI_ENABLE_IO(dev, child, type))
3382					return (NULL);
3383			}
3384			rle = resource_list_find(rl, type, *rid);
3385			if (rle == NULL)
3386				return (pci_alloc_map(dev, child, type, rid,
3387				    start, end, count, flags));
3388			break;
3389		}
3390		/*
3391		 * If we've already allocated the resource, then
3392		 * return it now.  But first we may need to activate
3393		 * it, since we don't allocate the resource as active
3394		 * above.  Normally this would be done down in the
3395		 * nexus, but since we short-circuit that path we have
3396		 * to do its job here.  Not sure if we should free the
3397		 * resource if it fails to activate.
3398		 */
3399		rle = resource_list_find(rl, type, *rid);
3400		if (rle != NULL && rle->res != NULL) {
3401			if (bootverbose)
3402				device_printf(child,
3403			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3404				    rman_get_size(rle->res), *rid, type,
3405				    rman_get_start(rle->res));
3406			if ((flags & RF_ACTIVE) &&
3407			    bus_generic_activate_resource(dev, child, type,
3408			    *rid, rle->res) != 0)
3409				return (NULL);
3410			return (rle->res);
3411		}
3412	}
3413	return (resource_list_alloc(rl, dev, child, type, rid,
3414	    start, end, count, flags));
3415}
3416
3417void
3418pci_delete_resource(device_t dev, device_t child, int type, int rid)
3419{
3420	struct pci_devinfo *dinfo;
3421	struct resource_list *rl;
3422	struct resource_list_entry *rle;
3423
3424	if (device_get_parent(child) != dev)
3425		return;
3426
3427	dinfo = device_get_ivars(child);
3428	rl = &dinfo->resources;
3429	rle = resource_list_find(rl, type, rid);
3430	if (rle) {
3431		if (rle->res) {
3432			if (rman_get_device(rle->res) != dev ||
3433			    rman_get_flags(rle->res) & RF_ACTIVE) {
3434				device_printf(dev, "delete_resource: "
3435				    "Resource still owned by child, oops. "
3436				    "(type=%d, rid=%d, addr=%lx)\n",
3437				    rle->type, rle->rid,
3438				    rman_get_start(rle->res));
3439				return;
3440			}
3441			bus_release_resource(dev, type, rid, rle->res);
3442		}
3443		resource_list_delete(rl, type, rid);
3444	}
3445	/*
3446	 * Why do we turn off the PCI configuration BAR when we delete a
3447	 * resource? -- imp
3448	 */
3449	pci_write_config(child, rid, 0, 4);
3450	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3451}
3452
3453struct resource_list *
3454pci_get_resource_list (device_t dev, device_t child)
3455{
3456	struct pci_devinfo *dinfo = device_get_ivars(child);
3457
3458	return (&dinfo->resources);
3459}
3460
3461uint32_t
3462pci_read_config_method(device_t dev, device_t child, int reg, int width)
3463{
3464	struct pci_devinfo *dinfo = device_get_ivars(child);
3465	pcicfgregs *cfg = &dinfo->cfg;
3466
3467	return (PCIB_READ_CONFIG(device_get_parent(dev),
3468	    cfg->bus, cfg->slot, cfg->func, reg, width));
3469}
3470
3471void
3472pci_write_config_method(device_t dev, device_t child, int reg,
3473    uint32_t val, int width)
3474{
3475	struct pci_devinfo *dinfo = device_get_ivars(child);
3476	pcicfgregs *cfg = &dinfo->cfg;
3477
3478	PCIB_WRITE_CONFIG(device_get_parent(dev),
3479	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3480}
3481
3482int
3483pci_child_location_str_method(device_t dev, device_t child, char *buf,
3484    size_t buflen)
3485{
3486
3487	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3488	    pci_get_function(child));
3489	return (0);
3490}
3491
3492int
3493pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3494    size_t buflen)
3495{
3496	struct pci_devinfo *dinfo;
3497	pcicfgregs *cfg;
3498
3499	dinfo = device_get_ivars(child);
3500	cfg = &dinfo->cfg;
3501	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3502	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3503	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3504	    cfg->progif);
3505	return (0);
3506}
3507
3508int
3509pci_assign_interrupt_method(device_t dev, device_t child)
3510{
3511	struct pci_devinfo *dinfo = device_get_ivars(child);
3512	pcicfgregs *cfg = &dinfo->cfg;
3513
3514	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3515	    cfg->intpin));
3516}
3517
3518static int
3519pci_modevent(module_t mod, int what, void *arg)
3520{
3521	static struct cdev *pci_cdev;
3522
3523	switch (what) {
3524	case MOD_LOAD:
3525		STAILQ_INIT(&pci_devq);
3526		pci_generation = 0;
3527		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3528		    "pci");
3529		pci_load_vendor_data();
3530		break;
3531
3532	case MOD_UNLOAD:
3533		destroy_dev(pci_cdev);
3534		break;
3535	}
3536
3537	return (0);
3538}
3539
3540void
3541pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3542{
3543	int i;
3544
3545	/*
3546	 * Only do header type 0 devices.  Type 1 devices are bridges,
3547	 * which we know need special treatment.  Type 2 devices are
3548	 * cardbus bridges which also require special treatment.
3549	 * Other types are unknown, and we err on the side of safety
3550	 * by ignoring them.
3551	 */
3552	if (dinfo->cfg.hdrtype != 0)
3553		return;
3554
3555	/*
3556	 * Restore the device to full power mode.  We must do this
3557	 * before we restore the registers because moving from D3 to
3558	 * D0 will cause the chip's BARs and some other registers to
3559	 * be reset to some unknown power on reset values.  Cut down
3560	 * the noise on boot by doing nothing if we are already in
3561	 * state D0.
3562	 */
3563	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3564		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3565	}
3566	for (i = 0; i < dinfo->cfg.nummaps; i++)
3567		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3568	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3569	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3570	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3571	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3572	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3573	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3574	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3575	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3576	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3577	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3578
3579	/* Restore MSI and MSI-X configurations if they are present. */
3580	if (dinfo->cfg.msi.msi_location != 0)
3581		pci_resume_msi(dev);
3582	if (dinfo->cfg.msix.msix_location != 0)
3583		pci_resume_msix(dev);
3584}
3585
3586void
3587pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3588{
3589	int i;
3590	uint32_t cls;
3591	int ps;
3592
3593	/*
3594	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3595	 * we know need special treatment.  Type 2 devices are cardbus bridges
3596	 * which also require special treatment.  Other types are unknown, and
3597	 * we err on the side of safety by ignoring them.  Powering down
3598	 * bridges should not be undertaken lightly.
3599	 */
3600	if (dinfo->cfg.hdrtype != 0)
3601		return;
3602	for (i = 0; i < dinfo->cfg.nummaps; i++)
3603		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3604	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3605
3606	/*
3607	 * Some drivers apparently write to these registers w/o updating our
3608	 * cached copy.  No harm happens if we update the copy, so do so here
3609	 * so we can restore them.  The COMMAND register is modified by the
3610	 * bus w/o updating the cache.  This should represent the normally
3611	 * writable portion of the 'defined' part of type 0 headers.  In
3612	 * theory we also need to save/restore the PCI capability structures
3613	 * we know about, but apart from power we don't know any that are
3614	 * writable.
3615	 */
3616	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3617	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3618	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3619	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3620	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3621	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3622	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3623	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3624	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3625	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3626	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3627	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3628	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3629	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3630	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3631
3632	/*
3633	 * don't set the state for display devices, base peripherals and
3634	 * memory devices since bad things happen when they are powered down.
3635	 * We should (a) have drivers that can easily detach and (b) use
3636	 * generic drivers for these devices so that some device actually
3637	 * attaches.  We need to make sure that when we implement (a) we don't
3638	 * power the device down on a reattach.
3639	 */
3640	cls = pci_get_class(dev);
3641	if (!setstate)
3642		return;
3643	switch (pci_do_power_nodriver)
3644	{
3645		case 0:		/* NO powerdown at all */
3646			return;
3647		case 1:		/* Conservative about what to power down */
3648			if (cls == PCIC_STORAGE)
3649				return;
3650			/*FALLTHROUGH*/
3651		case 2:		/* Agressive about what to power down */
3652			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3653			    cls == PCIC_BASEPERIPH)
3654				return;
3655			/*FALLTHROUGH*/
3656		case 3:		/* Power down everything */
3657			break;
3658	}
3659	/*
3660	 * PCI spec says we can only go into D3 state from D0 state.
3661	 * Transition from D[12] into D0 before going to D3 state.
3662	 */
3663	ps = pci_get_powerstate(dev);
3664	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3665		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3666	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3667		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3668}
3669