pci.c revision 173678
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 173678 2007-11-16 20:49:34Z jkim $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static const char	*pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114
115static device_method_t pci_methods[] = {
116	/* Device interface */
117	DEVMETHOD(device_probe,		pci_probe),
118	DEVMETHOD(device_attach,	pci_attach),
119	DEVMETHOD(device_detach,	bus_generic_detach),
120	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121	DEVMETHOD(device_suspend,	pci_suspend),
122	DEVMETHOD(device_resume,	pci_resume),
123
124	/* Bus interface */
125	DEVMETHOD(bus_print_child,	pci_print_child),
126	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129	DEVMETHOD(bus_driver_added,	pci_driver_added),
130	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132
133	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143
144	/* PCI interface */
145	DEVMETHOD(pci_read_config,	pci_read_config_method),
146	DEVMETHOD(pci_write_config,	pci_write_config_method),
147	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163
164	{ 0, 0 }
165};
166
167DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168
169static devclass_t pci_devclass;
170DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171MODULE_VERSION(pci, 1);
172
173static char	*pci_vendordata;
174static size_t	pci_vendordata_size;
175
176
177struct pci_quirk {
178	uint32_t devid;	/* Vendor/device of the card */
179	int	type;
180#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182	int	arg1;
183	int	arg2;
184};
185
186struct pci_quirk pci_quirks[] = {
187	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192
193	/*
194	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196	 */
197	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199
200	/*
201	 * MSI doesn't work on earlier Intel chipsets including
202	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203	 */
204	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211
212	/*
213	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214	 * bridge.
215	 */
216	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	{ 0 }
219};
220
221/* map register information */
222#define	PCI_MAPMEM	0x01	/* memory map */
223#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224#define	PCI_MAPPORT	0x04	/* port map */
225
226struct devlist pci_devq;
227uint32_t pci_generation;
228uint32_t pci_numdevs = 0;
229static int pcie_chipset, pcix_chipset;
230
231/* sysctl vars */
232SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233
234static int pci_enable_io_modes = 1;
235TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237    &pci_enable_io_modes, 1,
238    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239enable these bits correctly.  We'd like to do this all the time, but there\n\
240are some peripherals that this causes problems with.");
241
242static int pci_do_power_nodriver = 0;
243TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245    &pci_do_power_nodriver, 0,
246  "Place a function into D3 state when no driver attaches to it.  0 means\n\
247disable.  1 means conservatively place devices into D3 state.  2 means\n\
248agressively place devices into D3 state.  3 means put absolutely everything\n\
249in D3 state.");
250
251static int pci_do_power_resume = 1;
252TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254    &pci_do_power_resume, 1,
255  "Transition from D3 -> D0 on resume.");
256
257static int pci_do_msi = 1;
258TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260    "Enable support for MSI interrupts");
261
262static int pci_do_msix = 1;
263TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265    "Enable support for MSI-X interrupts");
266
267static int pci_honor_msi_blacklist = 1;
268TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271
272/* Find a device_t by bus/slot/function in domain 0 */
273
274device_t
275pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276{
277
278	return (pci_find_dbsf(0, bus, slot, func));
279}
280
281/* Find a device_t by domain/bus/slot/function */
282
283device_t
284pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285{
286	struct pci_devinfo *dinfo;
287
288	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289		if ((dinfo->cfg.domain == domain) &&
290		    (dinfo->cfg.bus == bus) &&
291		    (dinfo->cfg.slot == slot) &&
292		    (dinfo->cfg.func == func)) {
293			return (dinfo->cfg.dev);
294		}
295	}
296
297	return (NULL);
298}
299
300/* Find a device_t by vendor/device ID */
301
302device_t
303pci_find_device(uint16_t vendor, uint16_t device)
304{
305	struct pci_devinfo *dinfo;
306
307	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308		if ((dinfo->cfg.vendor == vendor) &&
309		    (dinfo->cfg.device == device)) {
310			return (dinfo->cfg.dev);
311		}
312	}
313
314	return (NULL);
315}
316
317/* return base address of memory or port map */
318
319static uint32_t
320pci_mapbase(uint32_t mapreg)
321{
322
323	if (PCI_BAR_MEM(mapreg))
324		return (mapreg & PCIM_BAR_MEM_BASE);
325	else
326		return (mapreg & PCIM_BAR_IO_BASE);
327}
328
329/* return map type of memory or port map */
330
331static const char *
332pci_maptype(unsigned mapreg)
333{
334
335	if (PCI_BAR_IO(mapreg))
336		return ("I/O Port");
337	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338		return ("Prefetchable Memory");
339	return ("Memory");
340}
341
342/* return log2 of map size decoded for memory or port map */
343
344static int
345pci_mapsize(uint32_t testval)
346{
347	int ln2size;
348
349	testval = pci_mapbase(testval);
350	ln2size = 0;
351	if (testval != 0) {
352		while ((testval & 1) == 0)
353		{
354			ln2size++;
355			testval >>= 1;
356		}
357	}
358	return (ln2size);
359}
360
361/* return log2 of address range supported by map register */
362
363static int
364pci_maprange(unsigned mapreg)
365{
366	int ln2range = 0;
367
368	if (PCI_BAR_IO(mapreg))
369		ln2range = 32;
370	else
371		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372		case PCIM_BAR_MEM_32:
373			ln2range = 32;
374			break;
375		case PCIM_BAR_MEM_1MB:
376			ln2range = 20;
377			break;
378		case PCIM_BAR_MEM_64:
379			ln2range = 64;
380			break;
381		}
382	return (ln2range);
383}
384
385/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386
387static void
388pci_fixancient(pcicfgregs *cfg)
389{
390	if (cfg->hdrtype != 0)
391		return;
392
393	/* PCI to PCI bridges use header type 1 */
394	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395		cfg->hdrtype = 1;
396}
397
398/* extract header type specific config data */
399
400static void
401pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402{
403#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404	switch (cfg->hdrtype) {
405	case 0:
406		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408		cfg->nummaps	    = PCI_MAXMAPS_0;
409		break;
410	case 1:
411		cfg->nummaps	    = PCI_MAXMAPS_1;
412		break;
413	case 2:
414		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416		cfg->nummaps	    = PCI_MAXMAPS_2;
417		break;
418	}
419#undef REG
420}
421
422/* read configuration header into pcicfgregs structure */
423struct pci_devinfo *
424pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425{
426#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427	pcicfgregs *cfg = NULL;
428	struct pci_devinfo *devlist_entry;
429	struct devlist *devlist_head;
430
431	devlist_head = &pci_devq;
432
433	devlist_entry = NULL;
434
435	if (REG(PCIR_DEVVENDOR, 4) != -1) {
436		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437		if (devlist_entry == NULL)
438			return (NULL);
439
440		cfg = &devlist_entry->cfg;
441
442		cfg->domain		= d;
443		cfg->bus		= b;
444		cfg->slot		= s;
445		cfg->func		= f;
446		cfg->vendor		= REG(PCIR_VENDOR, 2);
447		cfg->device		= REG(PCIR_DEVICE, 2);
448		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449		cfg->statreg		= REG(PCIR_STATUS, 2);
450		cfg->baseclass		= REG(PCIR_CLASS, 1);
451		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452		cfg->progif		= REG(PCIR_PROGIF, 1);
453		cfg->revid		= REG(PCIR_REVID, 1);
454		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457		cfg->intpin		= REG(PCIR_INTPIN, 1);
458		cfg->intline		= REG(PCIR_INTLINE, 1);
459
460		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462
463		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464		cfg->hdrtype		&= ~PCIM_MFDEV;
465
466		pci_fixancient(cfg);
467		pci_hdrtypedata(pcib, b, s, f, cfg);
468
469		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470			pci_read_extcap(pcib, cfg);
471
472		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473
474		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479
480		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482		devlist_entry->conf.pc_vendor = cfg->vendor;
483		devlist_entry->conf.pc_device = cfg->device;
484
485		devlist_entry->conf.pc_class = cfg->baseclass;
486		devlist_entry->conf.pc_subclass = cfg->subclass;
487		devlist_entry->conf.pc_progif = cfg->progif;
488		devlist_entry->conf.pc_revid = cfg->revid;
489
490		pci_numdevs++;
491		pci_generation++;
492	}
493	return (devlist_entry);
494#undef REG
495}
496
497static void
498pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499{
500#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502#if defined(__i386__) || defined(__amd64__)
503	uint64_t addr;
504#endif
505	uint32_t val;
506	int	ptr, nextptr, ptrptr;
507
508	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509	case 0:
510	case 1:
511		ptrptr = PCIR_CAP_PTR;
512		break;
513	case 2:
514		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515		break;
516	default:
517		return;		/* no extended capabilities support */
518	}
519	nextptr = REG(ptrptr, 1);	/* sanity check? */
520
521	/*
522	 * Read capability entries.
523	 */
524	while (nextptr != 0) {
525		/* Sanity check */
526		if (nextptr > 255) {
527			printf("illegal PCI extended capability offset %d\n",
528			    nextptr);
529			return;
530		}
531		/* Find the next entry */
532		ptr = nextptr;
533		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534
535		/* Process this entry */
536		switch (REG(ptr + PCICAP_ID, 1)) {
537		case PCIY_PMG:		/* PCI power management */
538			if (cfg->pp.pp_cap == 0) {
539				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542				if ((nextptr - ptr) > PCIR_POWER_DATA)
543					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544			}
545			break;
546#if defined(__i386__) || defined(__amd64__)
547		case PCIY_HT:		/* HyperTransport */
548			/* Determine HT-specific capability type. */
549			val = REG(ptr + PCIR_HT_COMMAND, 2);
550			switch (val & PCIM_HTCMD_CAP_MASK) {
551			case PCIM_HTCAP_MSI_MAPPING:
552				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553					/* Sanity check the mapping window. */
554					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555					    4);
556					addr <<= 32;
557					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558					    4);
559					if (addr != MSI_INTEL_ADDR_BASE)
560						device_printf(pcib,
561	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562						    cfg->domain, cfg->bus,
563						    cfg->slot, cfg->func,
564						    (long long)addr);
565				}
566
567				/* Enable MSI -> HT mapping. */
568				val |= PCIM_HTCMD_MSI_ENABLE;
569				WREG(ptr + PCIR_HT_COMMAND, val, 2);
570				break;
571			}
572			break;
573#endif
574		case PCIY_MSI:		/* PCI MSI */
575			cfg->msi.msi_location = ptr;
576			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
577			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
578						     PCIM_MSICTRL_MMC_MASK)>>1);
579			break;
580		case PCIY_MSIX:		/* PCI MSI-X */
581			cfg->msix.msix_location = ptr;
582			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
583			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
584			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
585			val = REG(ptr + PCIR_MSIX_TABLE, 4);
586			cfg->msix.msix_table_bar = PCIR_BAR(val &
587			    PCIM_MSIX_BIR_MASK);
588			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
589			val = REG(ptr + PCIR_MSIX_PBA, 4);
590			cfg->msix.msix_pba_bar = PCIR_BAR(val &
591			    PCIM_MSIX_BIR_MASK);
592			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
593			break;
594		case PCIY_VPD:		/* PCI Vital Product Data */
595			cfg->vpd.vpd_reg = ptr;
596			break;
597		case PCIY_SUBVENDOR:
598			/* Should always be true. */
599			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
600				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
601				cfg->subvendor = val & 0xffff;
602				cfg->subdevice = val >> 16;
603			}
604			break;
605		case PCIY_PCIX:		/* PCI-X */
606			/*
607			 * Assume we have a PCI-X chipset if we have
608			 * at least one PCI-PCI bridge with a PCI-X
609			 * capability.  Note that some systems with
610			 * PCI-express or HT chipsets might match on
611			 * this check as well.
612			 */
613			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
614				pcix_chipset = 1;
615			break;
616		case PCIY_EXPRESS:	/* PCI-express */
617			/*
618			 * Assume we have a PCI-express chipset if we have
619			 * at least one PCI-express root port.
620			 */
621			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
622			if ((val & PCIM_EXP_FLAGS_TYPE) ==
623			    PCIM_EXP_TYPE_ROOT_PORT)
624				pcie_chipset = 1;
625			break;
626		default:
627			break;
628		}
629	}
630/* REG and WREG use carry through to next functions */
631}
632
633/*
634 * PCI Vital Product Data
635 */
636
637#define	PCI_VPD_TIMEOUT		1000000
638
639static int
640pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
641{
642	int count = PCI_VPD_TIMEOUT;
643
644	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
645
646	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
647
648	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
649		if (--count < 0)
650			return (ENXIO);
651		DELAY(1);	/* limit looping */
652	}
653	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
654
655	return (0);
656}
657
658#if 0
659static int
660pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
661{
662	int count = PCI_VPD_TIMEOUT;
663
664	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
665
666	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
667	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
668	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
669		if (--count < 0)
670			return (ENXIO);
671		DELAY(1);	/* limit looping */
672	}
673
674	return (0);
675}
676#endif
677
678#undef PCI_VPD_TIMEOUT
679
680struct vpd_readstate {
681	device_t	pcib;
682	pcicfgregs	*cfg;
683	uint32_t	val;
684	int		bytesinval;
685	int		off;
686	uint8_t		cksum;
687};
688
689static int
690vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
691{
692	uint32_t reg;
693	uint8_t byte;
694
695	if (vrs->bytesinval == 0) {
696		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
697			return (ENXIO);
698		vrs->val = le32toh(reg);
699		vrs->off += 4;
700		byte = vrs->val & 0xff;
701		vrs->bytesinval = 3;
702	} else {
703		vrs->val = vrs->val >> 8;
704		byte = vrs->val & 0xff;
705		vrs->bytesinval--;
706	}
707
708	vrs->cksum += byte;
709	*data = byte;
710	return (0);
711}
712
713static void
714pci_read_vpd(device_t pcib, pcicfgregs *cfg)
715{
716	struct vpd_readstate vrs;
717	int state;
718	int name;
719	int remain;
720	int i;
721	int alloc, off;		/* alloc/off for RO/W arrays */
722	int cksumvalid;
723	int dflen;
724	uint8_t byte;
725	uint8_t byte2;
726
727	/* init vpd reader */
728	vrs.bytesinval = 0;
729	vrs.off = 0;
730	vrs.pcib = pcib;
731	vrs.cfg = cfg;
732	vrs.cksum = 0;
733
734	state = 0;
735	name = remain = i = 0;	/* shut up stupid gcc */
736	alloc = off = 0;	/* shut up stupid gcc */
737	dflen = 0;		/* shut up stupid gcc */
738	cksumvalid = -1;
739	while (state >= 0) {
740		if (vpd_nextbyte(&vrs, &byte)) {
741			state = -2;
742			break;
743		}
744#if 0
745		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
746		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
747		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
748#endif
749		switch (state) {
750		case 0:		/* item name */
751			if (byte & 0x80) {
752				if (vpd_nextbyte(&vrs, &byte2)) {
753					state = -2;
754					break;
755				}
756				remain = byte2;
757				if (vpd_nextbyte(&vrs, &byte2)) {
758					state = -2;
759					break;
760				}
761				remain |= byte2 << 8;
762				if (remain > (0x7f*4 - vrs.off)) {
763					state = -1;
764					printf(
765			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
766					    cfg->domain, cfg->bus, cfg->slot,
767					    cfg->func, remain);
768				}
769				name = byte & 0x7f;
770			} else {
771				remain = byte & 0x7;
772				name = (byte >> 3) & 0xf;
773			}
774			switch (name) {
775			case 0x2:	/* String */
776				cfg->vpd.vpd_ident = malloc(remain + 1,
777				    M_DEVBUF, M_WAITOK);
778				i = 0;
779				state = 1;
780				break;
781			case 0xf:	/* End */
782				state = -1;
783				break;
784			case 0x10:	/* VPD-R */
785				alloc = 8;
786				off = 0;
787				cfg->vpd.vpd_ros = malloc(alloc *
788				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
789				    M_WAITOK | M_ZERO);
790				state = 2;
791				break;
792			case 0x11:	/* VPD-W */
793				alloc = 8;
794				off = 0;
795				cfg->vpd.vpd_w = malloc(alloc *
796				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
797				    M_WAITOK | M_ZERO);
798				state = 5;
799				break;
800			default:	/* Invalid data, abort */
801				state = -1;
802				break;
803			}
804			break;
805
806		case 1:	/* Identifier String */
807			cfg->vpd.vpd_ident[i++] = byte;
808			remain--;
809			if (remain == 0)  {
810				cfg->vpd.vpd_ident[i] = '\0';
811				state = 0;
812			}
813			break;
814
815		case 2:	/* VPD-R Keyword Header */
816			if (off == alloc) {
817				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
818				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
819				    M_DEVBUF, M_WAITOK | M_ZERO);
820			}
821			cfg->vpd.vpd_ros[off].keyword[0] = byte;
822			if (vpd_nextbyte(&vrs, &byte2)) {
823				state = -2;
824				break;
825			}
826			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
827			if (vpd_nextbyte(&vrs, &byte2)) {
828				state = -2;
829				break;
830			}
831			dflen = byte2;
832			if (dflen == 0 &&
833			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
834			    2) == 0) {
835				/*
836				 * if this happens, we can't trust the rest
837				 * of the VPD.
838				 */
839				printf(
840				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
841				    cfg->domain, cfg->bus, cfg->slot,
842				    cfg->func, dflen);
843				cksumvalid = 0;
844				state = -1;
845				break;
846			} else if (dflen == 0) {
847				cfg->vpd.vpd_ros[off].value = malloc(1 *
848				    sizeof(*cfg->vpd.vpd_ros[off].value),
849				    M_DEVBUF, M_WAITOK);
850				cfg->vpd.vpd_ros[off].value[0] = '\x00';
851			} else
852				cfg->vpd.vpd_ros[off].value = malloc(
853				    (dflen + 1) *
854				    sizeof(*cfg->vpd.vpd_ros[off].value),
855				    M_DEVBUF, M_WAITOK);
856			remain -= 3;
857			i = 0;
858			/* keep in sync w/ state 3's transistions */
859			if (dflen == 0 && remain == 0)
860				state = 0;
861			else if (dflen == 0)
862				state = 2;
863			else
864				state = 3;
865			break;
866
867		case 3:	/* VPD-R Keyword Value */
868			cfg->vpd.vpd_ros[off].value[i++] = byte;
869			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
870			    "RV", 2) == 0 && cksumvalid == -1) {
871				if (vrs.cksum == 0)
872					cksumvalid = 1;
873				else {
874					if (bootverbose)
875						printf(
876				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
877						    cfg->domain, cfg->bus,
878						    cfg->slot, cfg->func,
879						    vrs.cksum);
880					cksumvalid = 0;
881					state = -1;
882					break;
883				}
884			}
885			dflen--;
886			remain--;
887			/* keep in sync w/ state 2's transistions */
888			if (dflen == 0)
889				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
890			if (dflen == 0 && remain == 0) {
891				cfg->vpd.vpd_rocnt = off;
892				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
893				    off * sizeof(*cfg->vpd.vpd_ros),
894				    M_DEVBUF, M_WAITOK | M_ZERO);
895				state = 0;
896			} else if (dflen == 0)
897				state = 2;
898			break;
899
900		case 4:
901			remain--;
902			if (remain == 0)
903				state = 0;
904			break;
905
906		case 5:	/* VPD-W Keyword Header */
907			if (off == alloc) {
908				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
909				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
910				    M_DEVBUF, M_WAITOK | M_ZERO);
911			}
912			cfg->vpd.vpd_w[off].keyword[0] = byte;
913			if (vpd_nextbyte(&vrs, &byte2)) {
914				state = -2;
915				break;
916			}
917			cfg->vpd.vpd_w[off].keyword[1] = byte2;
918			if (vpd_nextbyte(&vrs, &byte2)) {
919				state = -2;
920				break;
921			}
922			cfg->vpd.vpd_w[off].len = dflen = byte2;
923			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
924			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
925			    sizeof(*cfg->vpd.vpd_w[off].value),
926			    M_DEVBUF, M_WAITOK);
927			remain -= 3;
928			i = 0;
929			/* keep in sync w/ state 6's transistions */
930			if (dflen == 0 && remain == 0)
931				state = 0;
932			else if (dflen == 0)
933				state = 5;
934			else
935				state = 6;
936			break;
937
938		case 6:	/* VPD-W Keyword Value */
939			cfg->vpd.vpd_w[off].value[i++] = byte;
940			dflen--;
941			remain--;
942			/* keep in sync w/ state 5's transistions */
943			if (dflen == 0)
944				cfg->vpd.vpd_w[off++].value[i++] = '\0';
945			if (dflen == 0 && remain == 0) {
946				cfg->vpd.vpd_wcnt = off;
947				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
948				    off * sizeof(*cfg->vpd.vpd_w),
949				    M_DEVBUF, M_WAITOK | M_ZERO);
950				state = 0;
951			} else if (dflen == 0)
952				state = 5;
953			break;
954
955		default:
956			printf("pci%d:%d:%d:%d: invalid state: %d\n",
957			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
958			    state);
959			state = -1;
960			break;
961		}
962	}
963
964	if (cksumvalid == 0 || state < -1) {
965		/* read-only data bad, clean up */
966		if (cfg->vpd.vpd_ros != NULL) {
967			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
968				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
969			free(cfg->vpd.vpd_ros, M_DEVBUF);
970			cfg->vpd.vpd_ros = NULL;
971		}
972	}
973	if (state < -1) {
974		/* I/O error, clean up */
975		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
976		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
977		if (cfg->vpd.vpd_ident != NULL) {
978			free(cfg->vpd.vpd_ident, M_DEVBUF);
979			cfg->vpd.vpd_ident = NULL;
980		}
981		if (cfg->vpd.vpd_w != NULL) {
982			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
983				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
984			free(cfg->vpd.vpd_w, M_DEVBUF);
985			cfg->vpd.vpd_w = NULL;
986		}
987	}
988	cfg->vpd.vpd_cached = 1;
989#undef REG
990#undef WREG
991}
992
993int
994pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
995{
996	struct pci_devinfo *dinfo = device_get_ivars(child);
997	pcicfgregs *cfg = &dinfo->cfg;
998
999	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1000		pci_read_vpd(device_get_parent(dev), cfg);
1001
1002	*identptr = cfg->vpd.vpd_ident;
1003
1004	if (*identptr == NULL)
1005		return (ENXIO);
1006
1007	return (0);
1008}
1009
1010int
1011pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1012	const char **vptr)
1013{
1014	struct pci_devinfo *dinfo = device_get_ivars(child);
1015	pcicfgregs *cfg = &dinfo->cfg;
1016	int i;
1017
1018	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1019		pci_read_vpd(device_get_parent(dev), cfg);
1020
1021	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1022		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1023		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1024			*vptr = cfg->vpd.vpd_ros[i].value;
1025		}
1026
1027	if (i != cfg->vpd.vpd_rocnt)
1028		return (0);
1029
1030	*vptr = NULL;
1031	return (ENXIO);
1032}
1033
1034/*
1035 * Return the offset in configuration space of the requested extended
1036 * capability entry or 0 if the specified capability was not found.
1037 */
1038int
1039pci_find_extcap_method(device_t dev, device_t child, int capability,
1040    int *capreg)
1041{
1042	struct pci_devinfo *dinfo = device_get_ivars(child);
1043	pcicfgregs *cfg = &dinfo->cfg;
1044	u_int32_t status;
1045	u_int8_t ptr;
1046
1047	/*
1048	 * Check the CAP_LIST bit of the PCI status register first.
1049	 */
1050	status = pci_read_config(child, PCIR_STATUS, 2);
1051	if (!(status & PCIM_STATUS_CAPPRESENT))
1052		return (ENXIO);
1053
1054	/*
1055	 * Determine the start pointer of the capabilities list.
1056	 */
1057	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1058	case 0:
1059	case 1:
1060		ptr = PCIR_CAP_PTR;
1061		break;
1062	case 2:
1063		ptr = PCIR_CAP_PTR_2;
1064		break;
1065	default:
1066		/* XXX: panic? */
1067		return (ENXIO);		/* no extended capabilities support */
1068	}
1069	ptr = pci_read_config(child, ptr, 1);
1070
1071	/*
1072	 * Traverse the capabilities list.
1073	 */
1074	while (ptr != 0) {
1075		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1076			if (capreg != NULL)
1077				*capreg = ptr;
1078			return (0);
1079		}
1080		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1081	}
1082
1083	return (ENOENT);
1084}
1085
1086/*
1087 * Support for MSI-X message interrupts.
1088 */
1089void
1090pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1091{
1092	struct pci_devinfo *dinfo = device_get_ivars(dev);
1093	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1094	uint32_t offset;
1095
1096	KASSERT(msix->msix_table_len > index, ("bogus index"));
1097	offset = msix->msix_table_offset + index * 16;
1098	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1099	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1100	bus_write_4(msix->msix_table_res, offset + 8, data);
1101}
1102
1103void
1104pci_mask_msix(device_t dev, u_int index)
1105{
1106	struct pci_devinfo *dinfo = device_get_ivars(dev);
1107	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1108	uint32_t offset, val;
1109
1110	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1111	offset = msix->msix_table_offset + index * 16 + 12;
1112	val = bus_read_4(msix->msix_table_res, offset);
1113	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1114		val |= PCIM_MSIX_VCTRL_MASK;
1115		bus_write_4(msix->msix_table_res, offset, val);
1116	}
1117}
1118
1119void
1120pci_unmask_msix(device_t dev, u_int index)
1121{
1122	struct pci_devinfo *dinfo = device_get_ivars(dev);
1123	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1124	uint32_t offset, val;
1125
1126	KASSERT(msix->msix_table_len > index, ("bogus index"));
1127	offset = msix->msix_table_offset + index * 16 + 12;
1128	val = bus_read_4(msix->msix_table_res, offset);
1129	if (val & PCIM_MSIX_VCTRL_MASK) {
1130		val &= ~PCIM_MSIX_VCTRL_MASK;
1131		bus_write_4(msix->msix_table_res, offset, val);
1132	}
1133}
1134
1135int
1136pci_pending_msix(device_t dev, u_int index)
1137{
1138	struct pci_devinfo *dinfo = device_get_ivars(dev);
1139	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1140	uint32_t offset, bit;
1141
1142	KASSERT(msix->msix_table_len > index, ("bogus index"));
1143	offset = msix->msix_pba_offset + (index / 32) * 4;
1144	bit = 1 << index % 32;
1145	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1146}
1147
1148/*
1149 * Restore MSI-X registers and table during resume.  If MSI-X is
1150 * enabled then walk the virtual table to restore the actual MSI-X
1151 * table.
1152 */
1153static void
1154pci_resume_msix(device_t dev)
1155{
1156	struct pci_devinfo *dinfo = device_get_ivars(dev);
1157	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1158	struct msix_table_entry *mte;
1159	struct msix_vector *mv;
1160	int i;
1161
1162	if (msix->msix_alloc > 0) {
1163		/* First, mask all vectors. */
1164		for (i = 0; i < msix->msix_msgnum; i++)
1165			pci_mask_msix(dev, i);
1166
1167		/* Second, program any messages with at least one handler. */
1168		for (i = 0; i < msix->msix_table_len; i++) {
1169			mte = &msix->msix_table[i];
1170			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1171				continue;
1172			mv = &msix->msix_vectors[mte->mte_vector - 1];
1173			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1174			pci_unmask_msix(dev, i);
1175		}
1176	}
1177	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1178	    msix->msix_ctrl, 2);
1179}
1180
1181/*
1182 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1183 * returned in *count.  After this function returns, each message will be
1184 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1185 */
1186int
1187pci_alloc_msix_method(device_t dev, device_t child, int *count)
1188{
1189	struct pci_devinfo *dinfo = device_get_ivars(child);
1190	pcicfgregs *cfg = &dinfo->cfg;
1191	struct resource_list_entry *rle;
1192	int actual, error, i, irq, max;
1193
1194	/* Don't let count == 0 get us into trouble. */
1195	if (*count == 0)
1196		return (EINVAL);
1197
1198	/* If rid 0 is allocated, then fail. */
1199	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1200	if (rle != NULL && rle->res != NULL)
1201		return (ENXIO);
1202
1203	/* Already have allocated messages? */
1204	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1205		return (ENXIO);
1206
1207	/* If MSI is blacklisted for this system, fail. */
1208	if (pci_msi_blacklisted())
1209		return (ENXIO);
1210
1211	/* MSI-X capability present? */
1212	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1213		return (ENODEV);
1214
1215	/* Make sure the appropriate BARs are mapped. */
1216	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1217	    cfg->msix.msix_table_bar);
1218	if (rle == NULL || rle->res == NULL ||
1219	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1220		return (ENXIO);
1221	cfg->msix.msix_table_res = rle->res;
1222	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1223		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1224		    cfg->msix.msix_pba_bar);
1225		if (rle == NULL || rle->res == NULL ||
1226		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1227			return (ENXIO);
1228	}
1229	cfg->msix.msix_pba_res = rle->res;
1230
1231	if (bootverbose)
1232		device_printf(child,
1233		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1234		    *count, cfg->msix.msix_msgnum);
1235	max = min(*count, cfg->msix.msix_msgnum);
1236	for (i = 0; i < max; i++) {
1237		/* Allocate a message. */
1238		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1239		if (error)
1240			break;
1241		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1242		    irq, 1);
1243	}
1244	actual = i;
1245
1246	if (bootverbose) {
1247		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1248		if (actual == 1)
1249			device_printf(child, "using IRQ %lu for MSI-X\n",
1250			    rle->start);
1251		else {
1252			int run;
1253
1254			/*
1255			 * Be fancy and try to print contiguous runs of
1256			 * IRQ values as ranges.  'irq' is the previous IRQ.
1257			 * 'run' is true if we are in a range.
1258			 */
1259			device_printf(child, "using IRQs %lu", rle->start);
1260			irq = rle->start;
1261			run = 0;
1262			for (i = 1; i < actual; i++) {
1263				rle = resource_list_find(&dinfo->resources,
1264				    SYS_RES_IRQ, i + 1);
1265
1266				/* Still in a run? */
1267				if (rle->start == irq + 1) {
1268					run = 1;
1269					irq++;
1270					continue;
1271				}
1272
1273				/* Finish previous range. */
1274				if (run) {
1275					printf("-%d", irq);
1276					run = 0;
1277				}
1278
1279				/* Start new range. */
1280				printf(",%lu", rle->start);
1281				irq = rle->start;
1282			}
1283
1284			/* Unfinished range? */
1285			if (run)
1286				printf("-%d", irq);
1287			printf(" for MSI-X\n");
1288		}
1289	}
1290
1291	/* Mask all vectors. */
1292	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1293		pci_mask_msix(child, i);
1294
1295	/* Allocate and initialize vector data and virtual table. */
1296	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1297	    M_DEVBUF, M_WAITOK | M_ZERO);
1298	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1299	    M_DEVBUF, M_WAITOK | M_ZERO);
1300	for (i = 0; i < actual; i++) {
1301		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1302		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1303		cfg->msix.msix_table[i].mte_vector = i + 1;
1304	}
1305
1306	/* Update control register to enable MSI-X. */
1307	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1308	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1309	    cfg->msix.msix_ctrl, 2);
1310
1311	/* Update counts of alloc'd messages. */
1312	cfg->msix.msix_alloc = actual;
1313	cfg->msix.msix_table_len = actual;
1314	*count = actual;
1315	return (0);
1316}
1317
1318/*
1319 * By default, pci_alloc_msix() will assign the allocated IRQ
1320 * resources consecutively to the first N messages in the MSI-X table.
1321 * However, device drivers may want to use different layouts if they
1322 * either receive fewer messages than they asked for, or they wish to
1323 * populate the MSI-X table sparsely.  This method allows the driver
1324 * to specify what layout it wants.  It must be called after a
1325 * successful pci_alloc_msix() but before any of the associated
1326 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1327 *
1328 * The 'vectors' array contains 'count' message vectors.  The array
1329 * maps directly to the MSI-X table in that index 0 in the array
1330 * specifies the vector for the first message in the MSI-X table, etc.
1331 * The vector value in each array index can either be 0 to indicate
1332 * that no vector should be assigned to a message slot, or it can be a
1333 * number from 1 to N (where N is the count returned from a
1334 * succcessful call to pci_alloc_msix()) to indicate which message
1335 * vector (IRQ) to be used for the corresponding message.
1336 *
1337 * On successful return, each message with a non-zero vector will have
1338 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1339 * 1.  Additionally, if any of the IRQs allocated via the previous
1340 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1341 * will be freed back to the system automatically.
1342 *
1343 * For example, suppose a driver has a MSI-X table with 6 messages and
1344 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1345 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1346 * C.  After the call to pci_alloc_msix(), the device will be setup to
1347 * have an MSI-X table of ABC--- (where - means no vector assigned).
1348 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1349 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1350 * be freed back to the system.  This device will also have valid
1351 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1352 *
1353 * In any case, the SYS_RES_IRQ rid X will always map to the message
1354 * at MSI-X table index X - 1 and will only be valid if a vector is
1355 * assigned to that table entry.
1356 */
1357int
1358pci_remap_msix_method(device_t dev, device_t child, int count,
1359    const u_int *vectors)
1360{
1361	struct pci_devinfo *dinfo = device_get_ivars(child);
1362	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1363	struct resource_list_entry *rle;
1364	int i, irq, j, *used;
1365
1366	/*
1367	 * Have to have at least one message in the table but the
1368	 * table can't be bigger than the actual MSI-X table in the
1369	 * device.
1370	 */
1371	if (count == 0 || count > msix->msix_msgnum)
1372		return (EINVAL);
1373
1374	/* Sanity check the vectors. */
1375	for (i = 0; i < count; i++)
1376		if (vectors[i] > msix->msix_alloc)
1377			return (EINVAL);
1378
1379	/*
1380	 * Make sure there aren't any holes in the vectors to be used.
1381	 * It's a big pain to support it, and it doesn't really make
1382	 * sense anyway.  Also, at least one vector must be used.
1383	 */
1384	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1385	    M_ZERO);
1386	for (i = 0; i < count; i++)
1387		if (vectors[i] != 0)
1388			used[vectors[i] - 1] = 1;
1389	for (i = 0; i < msix->msix_alloc - 1; i++)
1390		if (used[i] == 0 && used[i + 1] == 1) {
1391			free(used, M_DEVBUF);
1392			return (EINVAL);
1393		}
1394	if (used[0] != 1) {
1395		free(used, M_DEVBUF);
1396		return (EINVAL);
1397	}
1398
1399	/* Make sure none of the resources are allocated. */
1400	for (i = 0; i < msix->msix_table_len; i++) {
1401		if (msix->msix_table[i].mte_vector == 0)
1402			continue;
1403		if (msix->msix_table[i].mte_handlers > 0)
1404			return (EBUSY);
1405		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1406		KASSERT(rle != NULL, ("missing resource"));
1407		if (rle->res != NULL)
1408			return (EBUSY);
1409	}
1410
1411	/* Free the existing resource list entries. */
1412	for (i = 0; i < msix->msix_table_len; i++) {
1413		if (msix->msix_table[i].mte_vector == 0)
1414			continue;
1415		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1416	}
1417
1418	/*
1419	 * Build the new virtual table keeping track of which vectors are
1420	 * used.
1421	 */
1422	free(msix->msix_table, M_DEVBUF);
1423	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1424	    M_DEVBUF, M_WAITOK | M_ZERO);
1425	for (i = 0; i < count; i++)
1426		msix->msix_table[i].mte_vector = vectors[i];
1427	msix->msix_table_len = count;
1428
1429	/* Free any unused IRQs and resize the vectors array if necessary. */
1430	j = msix->msix_alloc - 1;
1431	if (used[j] == 0) {
1432		struct msix_vector *vec;
1433
1434		while (used[j] == 0) {
1435			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1436			    msix->msix_vectors[j].mv_irq);
1437			j--;
1438		}
1439		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1440		    M_WAITOK);
1441		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1442		    (j + 1));
1443		free(msix->msix_vectors, M_DEVBUF);
1444		msix->msix_vectors = vec;
1445		msix->msix_alloc = j + 1;
1446	}
1447	free(used, M_DEVBUF);
1448
1449	/* Map the IRQs onto the rids. */
1450	for (i = 0; i < count; i++) {
1451		if (vectors[i] == 0)
1452			continue;
1453		irq = msix->msix_vectors[vectors[i]].mv_irq;
1454		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1455		    irq, 1);
1456	}
1457
1458	if (bootverbose) {
1459		device_printf(child, "Remapped MSI-X IRQs as: ");
1460		for (i = 0; i < count; i++) {
1461			if (i != 0)
1462				printf(", ");
1463			if (vectors[i] == 0)
1464				printf("---");
1465			else
1466				printf("%d",
1467				    msix->msix_vectors[vectors[i]].mv_irq);
1468		}
1469		printf("\n");
1470	}
1471
1472	return (0);
1473}
1474
1475static int
1476pci_release_msix(device_t dev, device_t child)
1477{
1478	struct pci_devinfo *dinfo = device_get_ivars(child);
1479	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1480	struct resource_list_entry *rle;
1481	int i;
1482
1483	/* Do we have any messages to release? */
1484	if (msix->msix_alloc == 0)
1485		return (ENODEV);
1486
1487	/* Make sure none of the resources are allocated. */
1488	for (i = 0; i < msix->msix_table_len; i++) {
1489		if (msix->msix_table[i].mte_vector == 0)
1490			continue;
1491		if (msix->msix_table[i].mte_handlers > 0)
1492			return (EBUSY);
1493		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1494		KASSERT(rle != NULL, ("missing resource"));
1495		if (rle->res != NULL)
1496			return (EBUSY);
1497	}
1498
1499	/* Update control register to disable MSI-X. */
1500	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1501	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1502	    msix->msix_ctrl, 2);
1503
1504	/* Free the resource list entries. */
1505	for (i = 0; i < msix->msix_table_len; i++) {
1506		if (msix->msix_table[i].mte_vector == 0)
1507			continue;
1508		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1509	}
1510	free(msix->msix_table, M_DEVBUF);
1511	msix->msix_table_len = 0;
1512
1513	/* Release the IRQs. */
1514	for (i = 0; i < msix->msix_alloc; i++)
1515		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1516		    msix->msix_vectors[i].mv_irq);
1517	free(msix->msix_vectors, M_DEVBUF);
1518	msix->msix_alloc = 0;
1519	return (0);
1520}
1521
1522/*
1523 * Return the max supported MSI-X messages this device supports.
1524 * Basically, assuming the MD code can alloc messages, this function
1525 * should return the maximum value that pci_alloc_msix() can return.
1526 * Thus, it is subject to the tunables, etc.
1527 */
1528int
1529pci_msix_count_method(device_t dev, device_t child)
1530{
1531	struct pci_devinfo *dinfo = device_get_ivars(child);
1532	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1533
1534	if (pci_do_msix && msix->msix_location != 0)
1535		return (msix->msix_msgnum);
1536	return (0);
1537}
1538
1539/*
1540 * Support for MSI message signalled interrupts.
1541 */
1542void
1543pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1544{
1545	struct pci_devinfo *dinfo = device_get_ivars(dev);
1546	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1547
1548	/* Write data and address values. */
1549	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1550	    address & 0xffffffff, 4);
1551	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1552		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1553		    address >> 32, 4);
1554		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1555		    data, 2);
1556	} else
1557		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1558		    2);
1559
1560	/* Enable MSI in the control register. */
1561	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1562	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1563	    2);
1564}
1565
1566void
1567pci_disable_msi(device_t dev)
1568{
1569	struct pci_devinfo *dinfo = device_get_ivars(dev);
1570	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1571
1572	/* Disable MSI in the control register. */
1573	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1574	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1575	    2);
1576}
1577
1578/*
1579 * Restore MSI registers during resume.  If MSI is enabled then
1580 * restore the data and address registers in addition to the control
1581 * register.
1582 */
1583static void
1584pci_resume_msi(device_t dev)
1585{
1586	struct pci_devinfo *dinfo = device_get_ivars(dev);
1587	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1588	uint64_t address;
1589	uint16_t data;
1590
1591	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1592		address = msi->msi_addr;
1593		data = msi->msi_data;
1594		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1595		    address & 0xffffffff, 4);
1596		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1597			pci_write_config(dev, msi->msi_location +
1598			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1599			pci_write_config(dev, msi->msi_location +
1600			    PCIR_MSI_DATA_64BIT, data, 2);
1601		} else
1602			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1603			    data, 2);
1604	}
1605	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1606	    2);
1607}
1608
1609int
1610pci_remap_msi_irq(device_t dev, u_int irq)
1611{
1612	struct pci_devinfo *dinfo = device_get_ivars(dev);
1613	pcicfgregs *cfg = &dinfo->cfg;
1614	struct resource_list_entry *rle;
1615	struct msix_table_entry *mte;
1616	struct msix_vector *mv;
1617	device_t bus;
1618	uint64_t addr;
1619	uint32_t data;
1620	int error, i, j;
1621
1622	bus = device_get_parent(dev);
1623
1624	/*
1625	 * Handle MSI first.  We try to find this IRQ among our list
1626	 * of MSI IRQs.  If we find it, we request updated address and
1627	 * data registers and apply the results.
1628	 */
1629	if (cfg->msi.msi_alloc > 0) {
1630
1631		/* If we don't have any active handlers, nothing to do. */
1632		if (cfg->msi.msi_handlers == 0)
1633			return (0);
1634		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1635			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1636			    i + 1);
1637			if (rle->start == irq) {
1638				error = PCIB_MAP_MSI(device_get_parent(bus),
1639				    dev, irq, &addr, &data);
1640				if (error)
1641					return (error);
1642				pci_disable_msi(dev);
1643				dinfo->cfg.msi.msi_addr = addr;
1644				dinfo->cfg.msi.msi_data = data;
1645				pci_enable_msi(dev, addr, data);
1646				return (0);
1647			}
1648		}
1649		return (ENOENT);
1650	}
1651
1652	/*
1653	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1654	 * we request the updated mapping info.  If that works, we go
1655	 * through all the slots that use this IRQ and update them.
1656	 */
1657	if (cfg->msix.msix_alloc > 0) {
1658		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1659			mv = &cfg->msix.msix_vectors[i];
1660			if (mv->mv_irq == irq) {
1661				error = PCIB_MAP_MSI(device_get_parent(bus),
1662				    dev, irq, &addr, &data);
1663				if (error)
1664					return (error);
1665				mv->mv_address = addr;
1666				mv->mv_data = data;
1667				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1668					mte = &cfg->msix.msix_table[j];
1669					if (mte->mte_vector != i + 1)
1670						continue;
1671					if (mte->mte_handlers == 0)
1672						continue;
1673					pci_mask_msix(dev, j);
1674					pci_enable_msix(dev, j, addr, data);
1675					pci_unmask_msix(dev, j);
1676				}
1677			}
1678		}
1679		return (ENOENT);
1680	}
1681
1682	return (ENOENT);
1683}
1684
1685/*
1686 * Returns true if the specified device is blacklisted because MSI
1687 * doesn't work.
1688 */
1689int
1690pci_msi_device_blacklisted(device_t dev)
1691{
1692	struct pci_quirk *q;
1693
1694	if (!pci_honor_msi_blacklist)
1695		return (0);
1696
1697	for (q = &pci_quirks[0]; q->devid; q++) {
1698		if (q->devid == pci_get_devid(dev) &&
1699		    q->type == PCI_QUIRK_DISABLE_MSI)
1700			return (1);
1701	}
1702	return (0);
1703}
1704
1705/*
1706 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1707 * we just check for blacklisted chipsets as represented by the
1708 * host-PCI bridge at device 0:0:0.  In the future, it may become
1709 * necessary to check other system attributes, such as the kenv values
1710 * that give the motherboard manufacturer and model number.
1711 */
1712static int
1713pci_msi_blacklisted(void)
1714{
1715	device_t dev;
1716
1717	if (!pci_honor_msi_blacklist)
1718		return (0);
1719
1720	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1721	if (!(pcie_chipset || pcix_chipset))
1722		return (1);
1723
1724	dev = pci_find_bsf(0, 0, 0);
1725	if (dev != NULL)
1726		return (pci_msi_device_blacklisted(dev));
1727	return (0);
1728}
1729
1730/*
1731 * Attempt to allocate *count MSI messages.  The actual number allocated is
1732 * returned in *count.  After this function returns, each message will be
1733 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1734 */
1735int
1736pci_alloc_msi_method(device_t dev, device_t child, int *count)
1737{
1738	struct pci_devinfo *dinfo = device_get_ivars(child);
1739	pcicfgregs *cfg = &dinfo->cfg;
1740	struct resource_list_entry *rle;
1741	int actual, error, i, irqs[32];
1742	uint16_t ctrl;
1743
1744	/* Don't let count == 0 get us into trouble. */
1745	if (*count == 0)
1746		return (EINVAL);
1747
1748	/* If rid 0 is allocated, then fail. */
1749	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1750	if (rle != NULL && rle->res != NULL)
1751		return (ENXIO);
1752
1753	/* Already have allocated messages? */
1754	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1755		return (ENXIO);
1756
1757	/* If MSI is blacklisted for this system, fail. */
1758	if (pci_msi_blacklisted())
1759		return (ENXIO);
1760
1761	/* MSI capability present? */
1762	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1763		return (ENODEV);
1764
1765	if (bootverbose)
1766		device_printf(child,
1767		    "attempting to allocate %d MSI vectors (%d supported)\n",
1768		    *count, cfg->msi.msi_msgnum);
1769
1770	/* Don't ask for more than the device supports. */
1771	actual = min(*count, cfg->msi.msi_msgnum);
1772
1773	/* Don't ask for more than 32 messages. */
1774	actual = min(actual, 32);
1775
1776	/* MSI requires power of 2 number of messages. */
1777	if (!powerof2(actual))
1778		return (EINVAL);
1779
1780	for (;;) {
1781		/* Try to allocate N messages. */
1782		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1783		    cfg->msi.msi_msgnum, irqs);
1784		if (error == 0)
1785			break;
1786		if (actual == 1)
1787			return (error);
1788
1789		/* Try N / 2. */
1790		actual >>= 1;
1791	}
1792
1793	/*
1794	 * We now have N actual messages mapped onto SYS_RES_IRQ
1795	 * resources in the irqs[] array, so add new resources
1796	 * starting at rid 1.
1797	 */
1798	for (i = 0; i < actual; i++)
1799		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1800		    irqs[i], irqs[i], 1);
1801
1802	if (bootverbose) {
1803		if (actual == 1)
1804			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1805		else {
1806			int run;
1807
1808			/*
1809			 * Be fancy and try to print contiguous runs
1810			 * of IRQ values as ranges.  'run' is true if
1811			 * we are in a range.
1812			 */
1813			device_printf(child, "using IRQs %d", irqs[0]);
1814			run = 0;
1815			for (i = 1; i < actual; i++) {
1816
1817				/* Still in a run? */
1818				if (irqs[i] == irqs[i - 1] + 1) {
1819					run = 1;
1820					continue;
1821				}
1822
1823				/* Finish previous range. */
1824				if (run) {
1825					printf("-%d", irqs[i - 1]);
1826					run = 0;
1827				}
1828
1829				/* Start new range. */
1830				printf(",%d", irqs[i]);
1831			}
1832
1833			/* Unfinished range? */
1834			if (run)
1835				printf("-%d", irqs[actual - 1]);
1836			printf(" for MSI\n");
1837		}
1838	}
1839
1840	/* Update control register with actual count. */
1841	ctrl = cfg->msi.msi_ctrl;
1842	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1843	ctrl |= (ffs(actual) - 1) << 4;
1844	cfg->msi.msi_ctrl = ctrl;
1845	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1846
1847	/* Update counts of alloc'd messages. */
1848	cfg->msi.msi_alloc = actual;
1849	cfg->msi.msi_handlers = 0;
1850	*count = actual;
1851	return (0);
1852}
1853
1854/* Release the MSI messages associated with this device. */
1855int
1856pci_release_msi_method(device_t dev, device_t child)
1857{
1858	struct pci_devinfo *dinfo = device_get_ivars(child);
1859	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1860	struct resource_list_entry *rle;
1861	int error, i, irqs[32];
1862
1863	/* Try MSI-X first. */
1864	error = pci_release_msix(dev, child);
1865	if (error != ENODEV)
1866		return (error);
1867
1868	/* Do we have any messages to release? */
1869	if (msi->msi_alloc == 0)
1870		return (ENODEV);
1871	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1872
1873	/* Make sure none of the resources are allocated. */
1874	if (msi->msi_handlers > 0)
1875		return (EBUSY);
1876	for (i = 0; i < msi->msi_alloc; i++) {
1877		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1878		KASSERT(rle != NULL, ("missing MSI resource"));
1879		if (rle->res != NULL)
1880			return (EBUSY);
1881		irqs[i] = rle->start;
1882	}
1883
1884	/* Update control register with 0 count. */
1885	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1886	    ("%s: MSI still enabled", __func__));
1887	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1888	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1889	    msi->msi_ctrl, 2);
1890
1891	/* Release the messages. */
1892	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1893	for (i = 0; i < msi->msi_alloc; i++)
1894		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1895
1896	/* Update alloc count. */
1897	msi->msi_alloc = 0;
1898	msi->msi_addr = 0;
1899	msi->msi_data = 0;
1900	return (0);
1901}
1902
1903/*
1904 * Return the max supported MSI messages this device supports.
1905 * Basically, assuming the MD code can alloc messages, this function
1906 * should return the maximum value that pci_alloc_msi() can return.
1907 * Thus, it is subject to the tunables, etc.
1908 */
1909int
1910pci_msi_count_method(device_t dev, device_t child)
1911{
1912	struct pci_devinfo *dinfo = device_get_ivars(child);
1913	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1914
1915	if (pci_do_msi && msi->msi_location != 0)
1916		return (msi->msi_msgnum);
1917	return (0);
1918}
1919
1920/* free pcicfgregs structure and all depending data structures */
1921
1922int
1923pci_freecfg(struct pci_devinfo *dinfo)
1924{
1925	struct devlist *devlist_head;
1926	int i;
1927
1928	devlist_head = &pci_devq;
1929
1930	if (dinfo->cfg.vpd.vpd_reg) {
1931		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1932		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1933			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1934		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1935		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1936			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1937		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1938	}
1939	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1940	free(dinfo, M_DEVBUF);
1941
1942	/* increment the generation count */
1943	pci_generation++;
1944
1945	/* we're losing one device */
1946	pci_numdevs--;
1947	return (0);
1948}
1949
1950/*
1951 * PCI power manangement
1952 */
1953int
1954pci_set_powerstate_method(device_t dev, device_t child, int state)
1955{
1956	struct pci_devinfo *dinfo = device_get_ivars(child);
1957	pcicfgregs *cfg = &dinfo->cfg;
1958	uint16_t status;
1959	int result, oldstate, highest, delay;
1960
1961	if (cfg->pp.pp_cap == 0)
1962		return (EOPNOTSUPP);
1963
1964	/*
1965	 * Optimize a no state change request away.  While it would be OK to
1966	 * write to the hardware in theory, some devices have shown odd
1967	 * behavior when going from D3 -> D3.
1968	 */
1969	oldstate = pci_get_powerstate(child);
1970	if (oldstate == state)
1971		return (0);
1972
1973	/*
1974	 * The PCI power management specification states that after a state
1975	 * transition between PCI power states, system software must
1976	 * guarantee a minimal delay before the function accesses the device.
1977	 * Compute the worst case delay that we need to guarantee before we
1978	 * access the device.  Many devices will be responsive much more
1979	 * quickly than this delay, but there are some that don't respond
1980	 * instantly to state changes.  Transitions to/from D3 state require
1981	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1982	 * is done below with DELAY rather than a sleeper function because
1983	 * this function can be called from contexts where we cannot sleep.
1984	 */
1985	highest = (oldstate > state) ? oldstate : state;
1986	if (highest == PCI_POWERSTATE_D3)
1987	    delay = 10000;
1988	else if (highest == PCI_POWERSTATE_D2)
1989	    delay = 200;
1990	else
1991	    delay = 0;
1992	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1993	    & ~PCIM_PSTAT_DMASK;
1994	result = 0;
1995	switch (state) {
1996	case PCI_POWERSTATE_D0:
1997		status |= PCIM_PSTAT_D0;
1998		break;
1999	case PCI_POWERSTATE_D1:
2000		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2001			return (EOPNOTSUPP);
2002		status |= PCIM_PSTAT_D1;
2003		break;
2004	case PCI_POWERSTATE_D2:
2005		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2006			return (EOPNOTSUPP);
2007		status |= PCIM_PSTAT_D2;
2008		break;
2009	case PCI_POWERSTATE_D3:
2010		status |= PCIM_PSTAT_D3;
2011		break;
2012	default:
2013		return (EINVAL);
2014	}
2015
2016	if (bootverbose)
2017		printf(
2018		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2019		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2020		    dinfo->cfg.func, oldstate, state);
2021
2022	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2023	if (delay)
2024		DELAY(delay);
2025	return (0);
2026}
2027
2028int
2029pci_get_powerstate_method(device_t dev, device_t child)
2030{
2031	struct pci_devinfo *dinfo = device_get_ivars(child);
2032	pcicfgregs *cfg = &dinfo->cfg;
2033	uint16_t status;
2034	int result;
2035
2036	if (cfg->pp.pp_cap != 0) {
2037		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2038		switch (status & PCIM_PSTAT_DMASK) {
2039		case PCIM_PSTAT_D0:
2040			result = PCI_POWERSTATE_D0;
2041			break;
2042		case PCIM_PSTAT_D1:
2043			result = PCI_POWERSTATE_D1;
2044			break;
2045		case PCIM_PSTAT_D2:
2046			result = PCI_POWERSTATE_D2;
2047			break;
2048		case PCIM_PSTAT_D3:
2049			result = PCI_POWERSTATE_D3;
2050			break;
2051		default:
2052			result = PCI_POWERSTATE_UNKNOWN;
2053			break;
2054		}
2055	} else {
2056		/* No support, device is always at D0 */
2057		result = PCI_POWERSTATE_D0;
2058	}
2059	return (result);
2060}
2061
2062/*
2063 * Some convenience functions for PCI device drivers.
2064 */
2065
2066static __inline void
2067pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2068{
2069	uint16_t	command;
2070
2071	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2072	command |= bit;
2073	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2074}
2075
2076static __inline void
2077pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2078{
2079	uint16_t	command;
2080
2081	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2082	command &= ~bit;
2083	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2084}
2085
2086int
2087pci_enable_busmaster_method(device_t dev, device_t child)
2088{
2089	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2090	return (0);
2091}
2092
2093int
2094pci_disable_busmaster_method(device_t dev, device_t child)
2095{
2096	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2097	return (0);
2098}
2099
2100int
2101pci_enable_io_method(device_t dev, device_t child, int space)
2102{
2103	uint16_t command;
2104	uint16_t bit;
2105	char *error;
2106
2107	bit = 0;
2108	error = NULL;
2109
2110	switch(space) {
2111	case SYS_RES_IOPORT:
2112		bit = PCIM_CMD_PORTEN;
2113		error = "port";
2114		break;
2115	case SYS_RES_MEMORY:
2116		bit = PCIM_CMD_MEMEN;
2117		error = "memory";
2118		break;
2119	default:
2120		return (EINVAL);
2121	}
2122	pci_set_command_bit(dev, child, bit);
2123	/* Some devices seem to need a brief stall here, what do to? */
2124	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2125	if (command & bit)
2126		return (0);
2127	device_printf(child, "failed to enable %s mapping!\n", error);
2128	return (ENXIO);
2129}
2130
2131int
2132pci_disable_io_method(device_t dev, device_t child, int space)
2133{
2134	uint16_t command;
2135	uint16_t bit;
2136	char *error;
2137
2138	bit = 0;
2139	error = NULL;
2140
2141	switch(space) {
2142	case SYS_RES_IOPORT:
2143		bit = PCIM_CMD_PORTEN;
2144		error = "port";
2145		break;
2146	case SYS_RES_MEMORY:
2147		bit = PCIM_CMD_MEMEN;
2148		error = "memory";
2149		break;
2150	default:
2151		return (EINVAL);
2152	}
2153	pci_clear_command_bit(dev, child, bit);
2154	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2155	if (command & bit) {
2156		device_printf(child, "failed to disable %s mapping!\n", error);
2157		return (ENXIO);
2158	}
2159	return (0);
2160}
2161
2162/*
2163 * New style pci driver.  Parent device is either a pci-host-bridge or a
2164 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2165 */
2166
2167void
2168pci_print_verbose(struct pci_devinfo *dinfo)
2169{
2170
2171	if (bootverbose) {
2172		pcicfgregs *cfg = &dinfo->cfg;
2173
2174		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2175		    cfg->vendor, cfg->device, cfg->revid);
2176		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2177		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2178		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2179		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2180		    cfg->mfdev);
2181		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2182		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2183		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2184		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2185		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2186		if (cfg->intpin > 0)
2187			printf("\tintpin=%c, irq=%d\n",
2188			    cfg->intpin +'a' -1, cfg->intline);
2189		if (cfg->pp.pp_cap) {
2190			uint16_t status;
2191
2192			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2193			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2194			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2195			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2196			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2197			    status & PCIM_PSTAT_DMASK);
2198		}
2199		if (cfg->msi.msi_location) {
2200			int ctrl;
2201
2202			ctrl = cfg->msi.msi_ctrl;
2203			printf("\tMSI supports %d message%s%s%s\n",
2204			    cfg->msi.msi_msgnum,
2205			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2206			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2207			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2208		}
2209		if (cfg->msix.msix_location) {
2210			printf("\tMSI-X supports %d message%s ",
2211			    cfg->msix.msix_msgnum,
2212			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2213			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2214				printf("in map 0x%x\n",
2215				    cfg->msix.msix_table_bar);
2216			else
2217				printf("in maps 0x%x and 0x%x\n",
2218				    cfg->msix.msix_table_bar,
2219				    cfg->msix.msix_pba_bar);
2220		}
2221	}
2222}
2223
2224static int
2225pci_porten(device_t pcib, int b, int s, int f)
2226{
2227	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2228		& PCIM_CMD_PORTEN) != 0;
2229}
2230
2231static int
2232pci_memen(device_t pcib, int b, int s, int f)
2233{
2234	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2235		& PCIM_CMD_MEMEN) != 0;
2236}
2237
2238/*
2239 * Add a resource based on a pci map register. Return 1 if the map
2240 * register is a 32bit map register or 2 if it is a 64bit register.
2241 */
2242static int
2243pci_add_map(device_t pcib, device_t bus, device_t dev,
2244    int b, int s, int f, int reg, struct resource_list *rl, int force,
2245    int prefetch)
2246{
2247	uint32_t map;
2248	pci_addr_t base;
2249	pci_addr_t start, end, count;
2250	uint8_t ln2size;
2251	uint8_t ln2range;
2252	uint32_t testval;
2253	uint16_t cmd;
2254	int type;
2255	int barlen;
2256	struct resource *res;
2257
2258	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2259	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2260	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2261	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2262
2263	if (PCI_BAR_MEM(map))
2264		type = SYS_RES_MEMORY;
2265	else
2266		type = SYS_RES_IOPORT;
2267	ln2size = pci_mapsize(testval);
2268	ln2range = pci_maprange(testval);
2269	base = pci_mapbase(map);
2270	barlen = ln2range == 64 ? 2 : 1;
2271
2272	/*
2273	 * For I/O registers, if bottom bit is set, and the next bit up
2274	 * isn't clear, we know we have a BAR that doesn't conform to the
2275	 * spec, so ignore it.  Also, sanity check the size of the data
2276	 * areas to the type of memory involved.  Memory must be at least
2277	 * 16 bytes in size, while I/O ranges must be at least 4.
2278	 */
2279	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2280		return (barlen);
2281	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2282	    (type == SYS_RES_IOPORT && ln2size < 2))
2283		return (barlen);
2284
2285	if (ln2range == 64)
2286		/* Read the other half of a 64bit map register */
2287		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2288	if (bootverbose) {
2289		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2290		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2291		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2292			printf(", port disabled\n");
2293		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2294			printf(", memory disabled\n");
2295		else
2296			printf(", enabled\n");
2297	}
2298
2299	/*
2300	 * If base is 0, then we have problems.  It is best to ignore
2301	 * such entries for the moment.  These will be allocated later if
2302	 * the driver specifically requests them.  However, some
2303	 * removable busses look better when all resources are allocated,
2304	 * so allow '0' to be overriden.
2305	 *
2306	 * Similarly treat maps whose values is the same as the test value
2307	 * read back.  These maps have had all f's written to them by the
2308	 * BIOS in an attempt to disable the resources.
2309	 */
2310	if (!force && (base == 0 || map == testval))
2311		return (barlen);
2312	if ((u_long)base != base) {
2313		device_printf(bus,
2314		    "pci%d:%d:%d:%d bar %#x too many address bits",
2315		    pci_get_domain(dev), b, s, f, reg);
2316		return (barlen);
2317	}
2318
2319	/*
2320	 * This code theoretically does the right thing, but has
2321	 * undesirable side effects in some cases where peripherals
2322	 * respond oddly to having these bits enabled.  Let the user
2323	 * be able to turn them off (since pci_enable_io_modes is 1 by
2324	 * default).
2325	 */
2326	if (pci_enable_io_modes) {
2327		/* Turn on resources that have been left off by a lazy BIOS */
2328		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2329			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2330			cmd |= PCIM_CMD_PORTEN;
2331			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2332		}
2333		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2334			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2335			cmd |= PCIM_CMD_MEMEN;
2336			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2337		}
2338	} else {
2339		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2340			return (barlen);
2341		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2342			return (barlen);
2343	}
2344
2345	count = 1 << ln2size;
2346	if (base == 0 || base == pci_mapbase(testval)) {
2347		start = 0;	/* Let the parent deside */
2348		end = ~0ULL;
2349	} else {
2350		start = base;
2351		end = base + (1 << ln2size) - 1;
2352	}
2353	resource_list_add(rl, type, reg, start, end, count);
2354
2355	/*
2356	 * Not quite sure what to do on failure of allocating the resource
2357	 * since I can postulate several right answers.
2358	 */
2359	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2360	    prefetch ? RF_PREFETCHABLE : 0);
2361	if (res == NULL)
2362		return (barlen);
2363	start = rman_get_start(res);
2364	if ((u_long)start != start) {
2365		/* Wait a minute!  this platform can't do this address. */
2366		device_printf(bus,
2367		    "pci%d:%d.%d.%x bar %#x start %#jx, too many bits.",
2368		    pci_get_domain(dev), b, s, f, reg, (uintmax_t)start);
2369		resource_list_release(rl, bus, dev, type, reg, res);
2370		return (barlen);
2371	}
2372	pci_write_config(dev, reg, start, 4);
2373	if (ln2range == 64)
2374		pci_write_config(dev, reg + 4, start >> 32, 4);
2375	return (barlen);
2376}
2377
2378/*
2379 * For ATA devices we need to decide early what addressing mode to use.
2380 * Legacy demands that the primary and secondary ATA ports sits on the
2381 * same addresses that old ISA hardware did. This dictates that we use
2382 * those addresses and ignore the BAR's if we cannot set PCI native
2383 * addressing mode.
2384 */
2385static void
2386pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2387    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2388{
2389	int rid, type, progif;
2390#if 0
2391	/* if this device supports PCI native addressing use it */
2392	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2393	if ((progif & 0x8a) == 0x8a) {
2394		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2395		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2396			printf("Trying ATA native PCI addressing mode\n");
2397			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2398		}
2399	}
2400#endif
2401	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2402	type = SYS_RES_IOPORT;
2403	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2404		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2405		    prefetchmask & (1 << 0));
2406		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2407		    prefetchmask & (1 << 1));
2408	} else {
2409		rid = PCIR_BAR(0);
2410		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2411		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2412		    0);
2413		rid = PCIR_BAR(1);
2414		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2415		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2416		    0);
2417	}
2418	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2419		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2420		    prefetchmask & (1 << 2));
2421		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2422		    prefetchmask & (1 << 3));
2423	} else {
2424		rid = PCIR_BAR(2);
2425		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2426		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2427		    0);
2428		rid = PCIR_BAR(3);
2429		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2430		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2431		    0);
2432	}
2433	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2434	    prefetchmask & (1 << 4));
2435	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2436	    prefetchmask & (1 << 5));
2437}
2438
2439static void
2440pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2441{
2442	struct pci_devinfo *dinfo = device_get_ivars(dev);
2443	pcicfgregs *cfg = &dinfo->cfg;
2444	char tunable_name[64];
2445	int irq;
2446
2447	/* Has to have an intpin to have an interrupt. */
2448	if (cfg->intpin == 0)
2449		return;
2450
2451	/* Let the user override the IRQ with a tunable. */
2452	irq = PCI_INVALID_IRQ;
2453	snprintf(tunable_name, sizeof(tunable_name),
2454	    "hw.pci%d.%d.%d.INT%c.irq",
2455	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2456	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2457		irq = PCI_INVALID_IRQ;
2458
2459	/*
2460	 * If we didn't get an IRQ via the tunable, then we either use the
2461	 * IRQ value in the intline register or we ask the bus to route an
2462	 * interrupt for us.  If force_route is true, then we only use the
2463	 * value in the intline register if the bus was unable to assign an
2464	 * IRQ.
2465	 */
2466	if (!PCI_INTERRUPT_VALID(irq)) {
2467		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2468			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2469		if (!PCI_INTERRUPT_VALID(irq))
2470			irq = cfg->intline;
2471	}
2472
2473	/* If after all that we don't have an IRQ, just bail. */
2474	if (!PCI_INTERRUPT_VALID(irq))
2475		return;
2476
2477	/* Update the config register if it changed. */
2478	if (irq != cfg->intline) {
2479		cfg->intline = irq;
2480		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2481	}
2482
2483	/* Add this IRQ as rid 0 interrupt resource. */
2484	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2485}
2486
2487void
2488pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2489{
2490	device_t pcib;
2491	struct pci_devinfo *dinfo = device_get_ivars(dev);
2492	pcicfgregs *cfg = &dinfo->cfg;
2493	struct resource_list *rl = &dinfo->resources;
2494	struct pci_quirk *q;
2495	int b, i, f, s;
2496
2497	pcib = device_get_parent(bus);
2498
2499	b = cfg->bus;
2500	s = cfg->slot;
2501	f = cfg->func;
2502
2503	/* ATA devices needs special map treatment */
2504	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2505	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2506	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2507	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2508	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2509		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2510	else
2511		for (i = 0; i < cfg->nummaps;)
2512			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2513			    rl, force, prefetchmask & (1 << i));
2514
2515	/*
2516	 * Add additional, quirked resources.
2517	 */
2518	for (q = &pci_quirks[0]; q->devid; q++) {
2519		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2520		    && q->type == PCI_QUIRK_MAP_REG)
2521			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2522			  force, 0);
2523	}
2524
2525	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2526#ifdef __PCI_REROUTE_INTERRUPT
2527		/*
2528		 * Try to re-route interrupts. Sometimes the BIOS or
2529		 * firmware may leave bogus values in these registers.
2530		 * If the re-route fails, then just stick with what we
2531		 * have.
2532		 */
2533		pci_assign_interrupt(bus, dev, 1);
2534#else
2535		pci_assign_interrupt(bus, dev, 0);
2536#endif
2537	}
2538}
2539
2540void
2541pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2542{
2543#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2544	device_t pcib = device_get_parent(dev);
2545	struct pci_devinfo *dinfo;
2546	int maxslots;
2547	int s, f, pcifunchigh;
2548	uint8_t hdrtype;
2549
2550	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2551	    ("dinfo_size too small"));
2552	maxslots = PCIB_MAXSLOTS(pcib);
2553	for (s = 0; s <= maxslots; s++) {
2554		pcifunchigh = 0;
2555		f = 0;
2556		DELAY(1);
2557		hdrtype = REG(PCIR_HDRTYPE, 1);
2558		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2559			continue;
2560		if (hdrtype & PCIM_MFDEV)
2561			pcifunchigh = PCI_FUNCMAX;
2562		for (f = 0; f <= pcifunchigh; f++) {
2563			dinfo = pci_read_device(pcib, domain, busno, s, f,
2564			    dinfo_size);
2565			if (dinfo != NULL) {
2566				pci_add_child(dev, dinfo);
2567			}
2568		}
2569	}
2570#undef REG
2571}
2572
2573void
2574pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2575{
2576	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2577	device_set_ivars(dinfo->cfg.dev, dinfo);
2578	resource_list_init(&dinfo->resources);
2579	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2580	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2581	pci_print_verbose(dinfo);
2582	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2583}
2584
2585static int
2586pci_probe(device_t dev)
2587{
2588
2589	device_set_desc(dev, "PCI bus");
2590
2591	/* Allow other subclasses to override this driver. */
2592	return (-1000);
2593}
2594
2595static int
2596pci_attach(device_t dev)
2597{
2598	int busno, domain;
2599
2600	/*
2601	 * Since there can be multiple independantly numbered PCI
2602	 * busses on systems with multiple PCI domains, we can't use
2603	 * the unit number to decide which bus we are probing. We ask
2604	 * the parent pcib what our domain and bus numbers are.
2605	 */
2606	domain = pcib_get_domain(dev);
2607	busno = pcib_get_bus(dev);
2608	if (bootverbose)
2609		device_printf(dev, "domain=%d, physical bus=%d\n",
2610		    domain, busno);
2611
2612	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2613
2614	return (bus_generic_attach(dev));
2615}
2616
2617int
2618pci_suspend(device_t dev)
2619{
2620	int dstate, error, i, numdevs;
2621	device_t acpi_dev, child, *devlist;
2622	struct pci_devinfo *dinfo;
2623
2624	/*
2625	 * Save the PCI configuration space for each child and set the
2626	 * device in the appropriate power state for this sleep state.
2627	 */
2628	acpi_dev = NULL;
2629	if (pci_do_power_resume)
2630		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2631	device_get_children(dev, &devlist, &numdevs);
2632	for (i = 0; i < numdevs; i++) {
2633		child = devlist[i];
2634		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2635		pci_cfg_save(child, dinfo, 0);
2636	}
2637
2638	/* Suspend devices before potentially powering them down. */
2639	error = bus_generic_suspend(dev);
2640	if (error) {
2641		free(devlist, M_TEMP);
2642		return (error);
2643	}
2644
2645	/*
2646	 * Always set the device to D3.  If ACPI suggests a different
2647	 * power state, use it instead.  If ACPI is not present, the
2648	 * firmware is responsible for managing device power.  Skip
2649	 * children who aren't attached since they are powered down
2650	 * separately.  Only manage type 0 devices for now.
2651	 */
2652	for (i = 0; acpi_dev && i < numdevs; i++) {
2653		child = devlist[i];
2654		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2655		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2656			dstate = PCI_POWERSTATE_D3;
2657			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2658			pci_set_powerstate(child, dstate);
2659		}
2660	}
2661	free(devlist, M_TEMP);
2662	return (0);
2663}
2664
2665int
2666pci_resume(device_t dev)
2667{
2668	int i, numdevs;
2669	device_t acpi_dev, child, *devlist;
2670	struct pci_devinfo *dinfo;
2671
2672	/*
2673	 * Set each child to D0 and restore its PCI configuration space.
2674	 */
2675	acpi_dev = NULL;
2676	if (pci_do_power_resume)
2677		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2678	device_get_children(dev, &devlist, &numdevs);
2679	for (i = 0; i < numdevs; i++) {
2680		/*
2681		 * Notify ACPI we're going to D0 but ignore the result.  If
2682		 * ACPI is not present, the firmware is responsible for
2683		 * managing device power.  Only manage type 0 devices for now.
2684		 */
2685		child = devlist[i];
2686		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2687		if (acpi_dev && device_is_attached(child) &&
2688		    dinfo->cfg.hdrtype == 0) {
2689			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2690			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2691		}
2692
2693		/* Now the device is powered up, restore its config space. */
2694		pci_cfg_restore(child, dinfo);
2695	}
2696	free(devlist, M_TEMP);
2697	return (bus_generic_resume(dev));
2698}
2699
2700static void
2701pci_load_vendor_data(void)
2702{
2703	caddr_t vendordata, info;
2704
2705	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2706		info = preload_search_info(vendordata, MODINFO_ADDR);
2707		pci_vendordata = *(char **)info;
2708		info = preload_search_info(vendordata, MODINFO_SIZE);
2709		pci_vendordata_size = *(size_t *)info;
2710		/* terminate the database */
2711		pci_vendordata[pci_vendordata_size] = '\n';
2712	}
2713}
2714
2715void
2716pci_driver_added(device_t dev, driver_t *driver)
2717{
2718	int numdevs;
2719	device_t *devlist;
2720	device_t child;
2721	struct pci_devinfo *dinfo;
2722	int i;
2723
2724	if (bootverbose)
2725		device_printf(dev, "driver added\n");
2726	DEVICE_IDENTIFY(driver, dev);
2727	device_get_children(dev, &devlist, &numdevs);
2728	for (i = 0; i < numdevs; i++) {
2729		child = devlist[i];
2730		if (device_get_state(child) != DS_NOTPRESENT)
2731			continue;
2732		dinfo = device_get_ivars(child);
2733		pci_print_verbose(dinfo);
2734		if (bootverbose)
2735			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2736			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2737			    dinfo->cfg.func);
2738		pci_cfg_restore(child, dinfo);
2739		if (device_probe_and_attach(child) != 0)
2740			pci_cfg_save(child, dinfo, 1);
2741	}
2742	free(devlist, M_TEMP);
2743}
2744
2745int
2746pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2747    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2748{
2749	struct pci_devinfo *dinfo;
2750	struct msix_table_entry *mte;
2751	struct msix_vector *mv;
2752	uint64_t addr;
2753	uint32_t data;
2754	void *cookie;
2755	int error, rid;
2756
2757	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2758	    arg, &cookie);
2759	if (error)
2760		return (error);
2761
2762	/*
2763	 * If this is a direct child, check to see if the interrupt is
2764	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2765	 * us the address and data register values.  If we fail for some
2766	 * reason, teardown the interrupt handler.
2767	 */
2768	rid = rman_get_rid(irq);
2769	if (device_get_parent(child) == dev && rid > 0) {
2770		dinfo = device_get_ivars(child);
2771		if (dinfo->cfg.msi.msi_alloc > 0) {
2772			if (dinfo->cfg.msi.msi_addr == 0) {
2773				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2774			    ("MSI has handlers, but vectors not mapped"));
2775				error = PCIB_MAP_MSI(device_get_parent(dev),
2776				    child, rman_get_start(irq), &addr, &data);
2777				if (error)
2778					goto bad;
2779				dinfo->cfg.msi.msi_addr = addr;
2780				dinfo->cfg.msi.msi_data = data;
2781				pci_enable_msi(child, addr, data);
2782			}
2783			dinfo->cfg.msi.msi_handlers++;
2784		} else {
2785			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2786			    ("No MSI or MSI-X interrupts allocated"));
2787			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2788			    ("MSI-X index too high"));
2789			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2790			KASSERT(mte->mte_vector != 0, ("no message vector"));
2791			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2792			KASSERT(mv->mv_irq == rman_get_start(irq),
2793			    ("IRQ mismatch"));
2794			if (mv->mv_address == 0) {
2795				KASSERT(mte->mte_handlers == 0,
2796		    ("MSI-X table entry has handlers, but vector not mapped"));
2797				error = PCIB_MAP_MSI(device_get_parent(dev),
2798				    child, rman_get_start(irq), &addr, &data);
2799				if (error)
2800					goto bad;
2801				mv->mv_address = addr;
2802				mv->mv_data = data;
2803			}
2804			if (mte->mte_handlers == 0) {
2805				pci_enable_msix(child, rid - 1, mv->mv_address,
2806				    mv->mv_data);
2807				pci_unmask_msix(child, rid - 1);
2808			}
2809			mte->mte_handlers++;
2810		}
2811	bad:
2812		if (error) {
2813			(void)bus_generic_teardown_intr(dev, child, irq,
2814			    cookie);
2815			return (error);
2816		}
2817	}
2818	*cookiep = cookie;
2819	return (0);
2820}
2821
2822int
2823pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2824    void *cookie)
2825{
2826	struct msix_table_entry *mte;
2827	struct resource_list_entry *rle;
2828	struct pci_devinfo *dinfo;
2829	int error, rid;
2830
2831	/*
2832	 * If this is a direct child, check to see if the interrupt is
2833	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2834	 * count and mask the MSI-X message, or disable MSI messages
2835	 * if the count drops to 0.
2836	 */
2837	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2838		return (EINVAL);
2839	rid = rman_get_rid(irq);
2840	if (device_get_parent(child) == dev && rid > 0) {
2841		dinfo = device_get_ivars(child);
2842		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2843		if (rle->res != irq)
2844			return (EINVAL);
2845		if (dinfo->cfg.msi.msi_alloc > 0) {
2846			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2847			    ("MSI-X index too high"));
2848			if (dinfo->cfg.msi.msi_handlers == 0)
2849				return (EINVAL);
2850			dinfo->cfg.msi.msi_handlers--;
2851			if (dinfo->cfg.msi.msi_handlers == 0)
2852				pci_disable_msi(child);
2853		} else {
2854			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2855			    ("No MSI or MSI-X interrupts allocated"));
2856			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2857			    ("MSI-X index too high"));
2858			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2859			if (mte->mte_handlers == 0)
2860				return (EINVAL);
2861			mte->mte_handlers--;
2862			if (mte->mte_handlers == 0)
2863				pci_mask_msix(child, rid - 1);
2864		}
2865	}
2866	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2867	if (device_get_parent(child) == dev && rid > 0)
2868		KASSERT(error == 0,
2869		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2870	return (error);
2871}
2872
2873int
2874pci_print_child(device_t dev, device_t child)
2875{
2876	struct pci_devinfo *dinfo;
2877	struct resource_list *rl;
2878	int retval = 0;
2879
2880	dinfo = device_get_ivars(child);
2881	rl = &dinfo->resources;
2882
2883	retval += bus_print_child_header(dev, child);
2884
2885	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2886	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2887	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2888	if (device_get_flags(dev))
2889		retval += printf(" flags %#x", device_get_flags(dev));
2890
2891	retval += printf(" at device %d.%d", pci_get_slot(child),
2892	    pci_get_function(child));
2893
2894	retval += bus_print_child_footer(dev, child);
2895
2896	return (retval);
2897}
2898
2899static struct
2900{
2901	int	class;
2902	int	subclass;
2903	char	*desc;
2904} pci_nomatch_tab[] = {
2905	{PCIC_OLD,		-1,			"old"},
2906	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2907	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2908	{PCIC_STORAGE,		-1,			"mass storage"},
2909	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2910	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2911	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2912	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2913	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2914	{PCIC_NETWORK,		-1,			"network"},
2915	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2916	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2917	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2918	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2919	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2920	{PCIC_DISPLAY,		-1,			"display"},
2921	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2922	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2923	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2924	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2925	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2926	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2927	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2928	{PCIC_MEMORY,		-1,			"memory"},
2929	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2930	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2931	{PCIC_BRIDGE,		-1,			"bridge"},
2932	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2933	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2934	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2935	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2936	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2937	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2938	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2939	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2940	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2941	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2942	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2943	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2944	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2945	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2946	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2947	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2948	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2949	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2950	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2951	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2952	{PCIC_INPUTDEV,		-1,			"input device"},
2953	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2954	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2955	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2956	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2957	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2958	{PCIC_DOCKING,		-1,			"docking station"},
2959	{PCIC_PROCESSOR,	-1,			"processor"},
2960	{PCIC_SERIALBUS,	-1,			"serial bus"},
2961	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2962	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2963	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2964	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2965	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2966	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2967	{PCIC_WIRELESS,		-1,			"wireless controller"},
2968	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2969	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2970	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2971	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2972	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2973	{PCIC_SATCOM,		-1,			"satellite communication"},
2974	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2975	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2976	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2977	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2978	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2979	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2980	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2981	{PCIC_DASP,		-1,			"dasp"},
2982	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2983	{0, 0,		NULL}
2984};
2985
2986void
2987pci_probe_nomatch(device_t dev, device_t child)
2988{
2989	int	i;
2990	char	*cp, *scp, *device;
2991
2992	/*
2993	 * Look for a listing for this device in a loaded device database.
2994	 */
2995	if ((device = pci_describe_device(child)) != NULL) {
2996		device_printf(dev, "<%s>", device);
2997		free(device, M_DEVBUF);
2998	} else {
2999		/*
3000		 * Scan the class/subclass descriptions for a general
3001		 * description.
3002		 */
3003		cp = "unknown";
3004		scp = NULL;
3005		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3006			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3007				if (pci_nomatch_tab[i].subclass == -1) {
3008					cp = pci_nomatch_tab[i].desc;
3009				} else if (pci_nomatch_tab[i].subclass ==
3010				    pci_get_subclass(child)) {
3011					scp = pci_nomatch_tab[i].desc;
3012				}
3013			}
3014		}
3015		device_printf(dev, "<%s%s%s>",
3016		    cp ? cp : "",
3017		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3018		    scp ? scp : "");
3019	}
3020	printf(" at device %d.%d (no driver attached)\n",
3021	    pci_get_slot(child), pci_get_function(child));
3022	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3023	return;
3024}
3025
3026/*
3027 * Parse the PCI device database, if loaded, and return a pointer to a
3028 * description of the device.
3029 *
3030 * The database is flat text formatted as follows:
3031 *
3032 * Any line not in a valid format is ignored.
3033 * Lines are terminated with newline '\n' characters.
3034 *
3035 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3036 * the vendor name.
3037 *
3038 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3039 * - devices cannot be listed without a corresponding VENDOR line.
3040 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3041 * another TAB, then the device name.
3042 */
3043
3044/*
3045 * Assuming (ptr) points to the beginning of a line in the database,
3046 * return the vendor or device and description of the next entry.
3047 * The value of (vendor) or (device) inappropriate for the entry type
3048 * is set to -1.  Returns nonzero at the end of the database.
3049 *
3050 * Note that this is slightly unrobust in the face of corrupt data;
3051 * we attempt to safeguard against this by spamming the end of the
3052 * database with a newline when we initialise.
3053 */
3054static int
3055pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3056{
3057	char	*cp = *ptr;
3058	int	left;
3059
3060	*device = -1;
3061	*vendor = -1;
3062	**desc = '\0';
3063	for (;;) {
3064		left = pci_vendordata_size - (cp - pci_vendordata);
3065		if (left <= 0) {
3066			*ptr = cp;
3067			return(1);
3068		}
3069
3070		/* vendor entry? */
3071		if (*cp != '\t' &&
3072		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3073			break;
3074		/* device entry? */
3075		if (*cp == '\t' &&
3076		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3077			break;
3078
3079		/* skip to next line */
3080		while (*cp != '\n' && left > 0) {
3081			cp++;
3082			left--;
3083		}
3084		if (*cp == '\n') {
3085			cp++;
3086			left--;
3087		}
3088	}
3089	/* skip to next line */
3090	while (*cp != '\n' && left > 0) {
3091		cp++;
3092		left--;
3093	}
3094	if (*cp == '\n' && left > 0)
3095		cp++;
3096	*ptr = cp;
3097	return(0);
3098}
3099
3100static char *
3101pci_describe_device(device_t dev)
3102{
3103	int	vendor, device;
3104	char	*desc, *vp, *dp, *line;
3105
3106	desc = vp = dp = NULL;
3107
3108	/*
3109	 * If we have no vendor data, we can't do anything.
3110	 */
3111	if (pci_vendordata == NULL)
3112		goto out;
3113
3114	/*
3115	 * Scan the vendor data looking for this device
3116	 */
3117	line = pci_vendordata;
3118	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3119		goto out;
3120	for (;;) {
3121		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3122			goto out;
3123		if (vendor == pci_get_vendor(dev))
3124			break;
3125	}
3126	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3127		goto out;
3128	for (;;) {
3129		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3130			*dp = 0;
3131			break;
3132		}
3133		if (vendor != -1) {
3134			*dp = 0;
3135			break;
3136		}
3137		if (device == pci_get_device(dev))
3138			break;
3139	}
3140	if (dp[0] == '\0')
3141		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3142	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3143	    NULL)
3144		sprintf(desc, "%s, %s", vp, dp);
3145 out:
3146	if (vp != NULL)
3147		free(vp, M_DEVBUF);
3148	if (dp != NULL)
3149		free(dp, M_DEVBUF);
3150	return(desc);
3151}
3152
3153int
3154pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3155{
3156	struct pci_devinfo *dinfo;
3157	pcicfgregs *cfg;
3158
3159	dinfo = device_get_ivars(child);
3160	cfg = &dinfo->cfg;
3161
3162	switch (which) {
3163	case PCI_IVAR_ETHADDR:
3164		/*
3165		 * The generic accessor doesn't deal with failure, so
3166		 * we set the return value, then return an error.
3167		 */
3168		*((uint8_t **) result) = NULL;
3169		return (EINVAL);
3170	case PCI_IVAR_SUBVENDOR:
3171		*result = cfg->subvendor;
3172		break;
3173	case PCI_IVAR_SUBDEVICE:
3174		*result = cfg->subdevice;
3175		break;
3176	case PCI_IVAR_VENDOR:
3177		*result = cfg->vendor;
3178		break;
3179	case PCI_IVAR_DEVICE:
3180		*result = cfg->device;
3181		break;
3182	case PCI_IVAR_DEVID:
3183		*result = (cfg->device << 16) | cfg->vendor;
3184		break;
3185	case PCI_IVAR_CLASS:
3186		*result = cfg->baseclass;
3187		break;
3188	case PCI_IVAR_SUBCLASS:
3189		*result = cfg->subclass;
3190		break;
3191	case PCI_IVAR_PROGIF:
3192		*result = cfg->progif;
3193		break;
3194	case PCI_IVAR_REVID:
3195		*result = cfg->revid;
3196		break;
3197	case PCI_IVAR_INTPIN:
3198		*result = cfg->intpin;
3199		break;
3200	case PCI_IVAR_IRQ:
3201		*result = cfg->intline;
3202		break;
3203	case PCI_IVAR_DOMAIN:
3204		*result = cfg->domain;
3205		break;
3206	case PCI_IVAR_BUS:
3207		*result = cfg->bus;
3208		break;
3209	case PCI_IVAR_SLOT:
3210		*result = cfg->slot;
3211		break;
3212	case PCI_IVAR_FUNCTION:
3213		*result = cfg->func;
3214		break;
3215	case PCI_IVAR_CMDREG:
3216		*result = cfg->cmdreg;
3217		break;
3218	case PCI_IVAR_CACHELNSZ:
3219		*result = cfg->cachelnsz;
3220		break;
3221	case PCI_IVAR_MINGNT:
3222		*result = cfg->mingnt;
3223		break;
3224	case PCI_IVAR_MAXLAT:
3225		*result = cfg->maxlat;
3226		break;
3227	case PCI_IVAR_LATTIMER:
3228		*result = cfg->lattimer;
3229		break;
3230	default:
3231		return (ENOENT);
3232	}
3233	return (0);
3234}
3235
3236int
3237pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3238{
3239	struct pci_devinfo *dinfo;
3240
3241	dinfo = device_get_ivars(child);
3242
3243	switch (which) {
3244	case PCI_IVAR_INTPIN:
3245		dinfo->cfg.intpin = value;
3246		return (0);
3247	case PCI_IVAR_ETHADDR:
3248	case PCI_IVAR_SUBVENDOR:
3249	case PCI_IVAR_SUBDEVICE:
3250	case PCI_IVAR_VENDOR:
3251	case PCI_IVAR_DEVICE:
3252	case PCI_IVAR_DEVID:
3253	case PCI_IVAR_CLASS:
3254	case PCI_IVAR_SUBCLASS:
3255	case PCI_IVAR_PROGIF:
3256	case PCI_IVAR_REVID:
3257	case PCI_IVAR_IRQ:
3258	case PCI_IVAR_DOMAIN:
3259	case PCI_IVAR_BUS:
3260	case PCI_IVAR_SLOT:
3261	case PCI_IVAR_FUNCTION:
3262		return (EINVAL);	/* disallow for now */
3263
3264	default:
3265		return (ENOENT);
3266	}
3267}
3268
3269
3270#include "opt_ddb.h"
3271#ifdef DDB
3272#include <ddb/ddb.h>
3273#include <sys/cons.h>
3274
3275/*
3276 * List resources based on pci map registers, used for within ddb
3277 */
3278
3279DB_SHOW_COMMAND(pciregs, db_pci_dump)
3280{
3281	struct pci_devinfo *dinfo;
3282	struct devlist *devlist_head;
3283	struct pci_conf *p;
3284	const char *name;
3285	int i, error, none_count;
3286
3287	none_count = 0;
3288	/* get the head of the device queue */
3289	devlist_head = &pci_devq;
3290
3291	/*
3292	 * Go through the list of devices and print out devices
3293	 */
3294	for (error = 0, i = 0,
3295	     dinfo = STAILQ_FIRST(devlist_head);
3296	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3297	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3298
3299		/* Populate pd_name and pd_unit */
3300		name = NULL;
3301		if (dinfo->cfg.dev)
3302			name = device_get_name(dinfo->cfg.dev);
3303
3304		p = &dinfo->conf;
3305		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3306			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3307			(name && *name) ? name : "none",
3308			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3309			none_count++,
3310			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3311			p->pc_sel.pc_func, (p->pc_class << 16) |
3312			(p->pc_subclass << 8) | p->pc_progif,
3313			(p->pc_subdevice << 16) | p->pc_subvendor,
3314			(p->pc_device << 16) | p->pc_vendor,
3315			p->pc_revid, p->pc_hdr);
3316	}
3317}
3318#endif /* DDB */
3319
3320static struct resource *
3321pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3322    u_long start, u_long end, u_long count, u_int flags)
3323{
3324	struct pci_devinfo *dinfo = device_get_ivars(child);
3325	struct resource_list *rl = &dinfo->resources;
3326	struct resource_list_entry *rle;
3327	struct resource *res;
3328	pci_addr_t map, testval;
3329	int mapsize;
3330
3331	/*
3332	 * Weed out the bogons, and figure out how large the BAR/map
3333	 * is.  Bars that read back 0 here are bogus and unimplemented.
3334	 * Note: atapci in legacy mode are special and handled elsewhere
3335	 * in the code.  If you have a atapci device in legacy mode and
3336	 * it fails here, that other code is broken.
3337	 */
3338	res = NULL;
3339	map = pci_read_config(child, *rid, 4);
3340	pci_write_config(child, *rid, 0xffffffff, 4);
3341	testval = pci_read_config(child, *rid, 4);
3342	if (pci_maprange(testval) == 64)
3343		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3344	if (pci_mapbase(testval) == 0)
3345		goto out;
3346
3347	/*
3348	 * Restore the original value of the BAR.  We may have reprogrammed
3349	 * the BAR of the low-level console device and when booting verbose,
3350	 * we need the console device addressable.
3351	 */
3352	pci_write_config(child, *rid, map, 4);
3353
3354	if (PCI_BAR_MEM(testval)) {
3355		if (type != SYS_RES_MEMORY) {
3356			if (bootverbose)
3357				device_printf(dev,
3358				    "child %s requested type %d for rid %#x,"
3359				    " but the BAR says it is an memio\n",
3360				    device_get_nameunit(child), type, *rid);
3361			goto out;
3362		}
3363	} else {
3364		if (type != SYS_RES_IOPORT) {
3365			if (bootverbose)
3366				device_printf(dev,
3367				    "child %s requested type %d for rid %#x,"
3368				    " but the BAR says it is an ioport\n",
3369				    device_get_nameunit(child), type, *rid);
3370			goto out;
3371		}
3372	}
3373	/*
3374	 * For real BARs, we need to override the size that
3375	 * the driver requests, because that's what the BAR
3376	 * actually uses and we would otherwise have a
3377	 * situation where we might allocate the excess to
3378	 * another driver, which won't work.
3379	 */
3380	mapsize = pci_mapsize(testval);
3381	count = 1UL << mapsize;
3382	if (RF_ALIGNMENT(flags) < mapsize)
3383		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3384
3385	/*
3386	 * Allocate enough resource, and then write back the
3387	 * appropriate bar for that resource.
3388	 */
3389	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3390	    start, end, count, flags);
3391	if (res == NULL) {
3392		device_printf(child,
3393		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3394		    count, *rid, type, start, end);
3395		goto out;
3396	}
3397	resource_list_add(rl, type, *rid, start, end, count);
3398	rle = resource_list_find(rl, type, *rid);
3399	if (rle == NULL)
3400		panic("pci_alloc_map: unexpectedly can't find resource.");
3401	rle->res = res;
3402	rle->start = rman_get_start(res);
3403	rle->end = rman_get_end(res);
3404	rle->count = count;
3405	if (bootverbose)
3406		device_printf(child,
3407		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3408		    count, *rid, type, rman_get_start(res));
3409	map = rman_get_start(res);
3410out:;
3411	pci_write_config(child, *rid, map, 4);
3412	if (pci_maprange(testval) == 64)
3413		pci_write_config(child, *rid + 4, map >> 32, 4);
3414	return (res);
3415}
3416
3417
3418struct resource *
3419pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3420		   u_long start, u_long end, u_long count, u_int flags)
3421{
3422	struct pci_devinfo *dinfo = device_get_ivars(child);
3423	struct resource_list *rl = &dinfo->resources;
3424	struct resource_list_entry *rle;
3425	pcicfgregs *cfg = &dinfo->cfg;
3426
3427	/*
3428	 * Perform lazy resource allocation
3429	 */
3430	if (device_get_parent(child) == dev) {
3431		switch (type) {
3432		case SYS_RES_IRQ:
3433			/*
3434			 * Can't alloc legacy interrupt once MSI messages
3435			 * have been allocated.
3436			 */
3437			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3438			    cfg->msix.msix_alloc > 0))
3439				return (NULL);
3440			/*
3441			 * If the child device doesn't have an
3442			 * interrupt routed and is deserving of an
3443			 * interrupt, try to assign it one.
3444			 */
3445			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3446			    (cfg->intpin != 0))
3447				pci_assign_interrupt(dev, child, 0);
3448			break;
3449		case SYS_RES_IOPORT:
3450		case SYS_RES_MEMORY:
3451			if (*rid < PCIR_BAR(cfg->nummaps)) {
3452				/*
3453				 * Enable the I/O mode.  We should
3454				 * also be assigning resources too
3455				 * when none are present.  The
3456				 * resource_list_alloc kind of sorta does
3457				 * this...
3458				 */
3459				if (PCI_ENABLE_IO(dev, child, type))
3460					return (NULL);
3461			}
3462			rle = resource_list_find(rl, type, *rid);
3463			if (rle == NULL)
3464				return (pci_alloc_map(dev, child, type, rid,
3465				    start, end, count, flags));
3466			break;
3467		}
3468		/*
3469		 * If we've already allocated the resource, then
3470		 * return it now.  But first we may need to activate
3471		 * it, since we don't allocate the resource as active
3472		 * above.  Normally this would be done down in the
3473		 * nexus, but since we short-circuit that path we have
3474		 * to do its job here.  Not sure if we should free the
3475		 * resource if it fails to activate.
3476		 */
3477		rle = resource_list_find(rl, type, *rid);
3478		if (rle != NULL && rle->res != NULL) {
3479			if (bootverbose)
3480				device_printf(child,
3481			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3482				    rman_get_size(rle->res), *rid, type,
3483				    rman_get_start(rle->res));
3484			if ((flags & RF_ACTIVE) &&
3485			    bus_generic_activate_resource(dev, child, type,
3486			    *rid, rle->res) != 0)
3487				return (NULL);
3488			return (rle->res);
3489		}
3490	}
3491	return (resource_list_alloc(rl, dev, child, type, rid,
3492	    start, end, count, flags));
3493}
3494
3495void
3496pci_delete_resource(device_t dev, device_t child, int type, int rid)
3497{
3498	struct pci_devinfo *dinfo;
3499	struct resource_list *rl;
3500	struct resource_list_entry *rle;
3501
3502	if (device_get_parent(child) != dev)
3503		return;
3504
3505	dinfo = device_get_ivars(child);
3506	rl = &dinfo->resources;
3507	rle = resource_list_find(rl, type, rid);
3508	if (rle) {
3509		if (rle->res) {
3510			if (rman_get_device(rle->res) != dev ||
3511			    rman_get_flags(rle->res) & RF_ACTIVE) {
3512				device_printf(dev, "delete_resource: "
3513				    "Resource still owned by child, oops. "
3514				    "(type=%d, rid=%d, addr=%lx)\n",
3515				    rle->type, rle->rid,
3516				    rman_get_start(rle->res));
3517				return;
3518			}
3519			bus_release_resource(dev, type, rid, rle->res);
3520		}
3521		resource_list_delete(rl, type, rid);
3522	}
3523	/*
3524	 * Why do we turn off the PCI configuration BAR when we delete a
3525	 * resource? -- imp
3526	 */
3527	pci_write_config(child, rid, 0, 4);
3528	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3529}
3530
3531struct resource_list *
3532pci_get_resource_list (device_t dev, device_t child)
3533{
3534	struct pci_devinfo *dinfo = device_get_ivars(child);
3535
3536	return (&dinfo->resources);
3537}
3538
3539uint32_t
3540pci_read_config_method(device_t dev, device_t child, int reg, int width)
3541{
3542	struct pci_devinfo *dinfo = device_get_ivars(child);
3543	pcicfgregs *cfg = &dinfo->cfg;
3544
3545	return (PCIB_READ_CONFIG(device_get_parent(dev),
3546	    cfg->bus, cfg->slot, cfg->func, reg, width));
3547}
3548
3549void
3550pci_write_config_method(device_t dev, device_t child, int reg,
3551    uint32_t val, int width)
3552{
3553	struct pci_devinfo *dinfo = device_get_ivars(child);
3554	pcicfgregs *cfg = &dinfo->cfg;
3555
3556	PCIB_WRITE_CONFIG(device_get_parent(dev),
3557	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3558}
3559
3560int
3561pci_child_location_str_method(device_t dev, device_t child, char *buf,
3562    size_t buflen)
3563{
3564
3565	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3566	    pci_get_function(child));
3567	return (0);
3568}
3569
3570int
3571pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3572    size_t buflen)
3573{
3574	struct pci_devinfo *dinfo;
3575	pcicfgregs *cfg;
3576
3577	dinfo = device_get_ivars(child);
3578	cfg = &dinfo->cfg;
3579	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3580	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3581	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3582	    cfg->progif);
3583	return (0);
3584}
3585
3586int
3587pci_assign_interrupt_method(device_t dev, device_t child)
3588{
3589	struct pci_devinfo *dinfo = device_get_ivars(child);
3590	pcicfgregs *cfg = &dinfo->cfg;
3591
3592	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3593	    cfg->intpin));
3594}
3595
3596static int
3597pci_modevent(module_t mod, int what, void *arg)
3598{
3599	static struct cdev *pci_cdev;
3600
3601	switch (what) {
3602	case MOD_LOAD:
3603		STAILQ_INIT(&pci_devq);
3604		pci_generation = 0;
3605		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3606		    "pci");
3607		pci_load_vendor_data();
3608		break;
3609
3610	case MOD_UNLOAD:
3611		destroy_dev(pci_cdev);
3612		break;
3613	}
3614
3615	return (0);
3616}
3617
3618void
3619pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3620{
3621	int i;
3622
3623	/*
3624	 * Only do header type 0 devices.  Type 1 devices are bridges,
3625	 * which we know need special treatment.  Type 2 devices are
3626	 * cardbus bridges which also require special treatment.
3627	 * Other types are unknown, and we err on the side of safety
3628	 * by ignoring them.
3629	 */
3630	if (dinfo->cfg.hdrtype != 0)
3631		return;
3632
3633	/*
3634	 * Restore the device to full power mode.  We must do this
3635	 * before we restore the registers because moving from D3 to
3636	 * D0 will cause the chip's BARs and some other registers to
3637	 * be reset to some unknown power on reset values.  Cut down
3638	 * the noise on boot by doing nothing if we are already in
3639	 * state D0.
3640	 */
3641	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3642		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3643	}
3644	for (i = 0; i < dinfo->cfg.nummaps; i++)
3645		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3646	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3647	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3648	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3649	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3650	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3651	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3652	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3653	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3654	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3655	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3656
3657	/* Restore MSI and MSI-X configurations if they are present. */
3658	if (dinfo->cfg.msi.msi_location != 0)
3659		pci_resume_msi(dev);
3660	if (dinfo->cfg.msix.msix_location != 0)
3661		pci_resume_msix(dev);
3662}
3663
3664void
3665pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3666{
3667	int i;
3668	uint32_t cls;
3669	int ps;
3670
3671	/*
3672	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3673	 * we know need special treatment.  Type 2 devices are cardbus bridges
3674	 * which also require special treatment.  Other types are unknown, and
3675	 * we err on the side of safety by ignoring them.  Powering down
3676	 * bridges should not be undertaken lightly.
3677	 */
3678	if (dinfo->cfg.hdrtype != 0)
3679		return;
3680	for (i = 0; i < dinfo->cfg.nummaps; i++)
3681		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3682	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3683
3684	/*
3685	 * Some drivers apparently write to these registers w/o updating our
3686	 * cached copy.  No harm happens if we update the copy, so do so here
3687	 * so we can restore them.  The COMMAND register is modified by the
3688	 * bus w/o updating the cache.  This should represent the normally
3689	 * writable portion of the 'defined' part of type 0 headers.  In
3690	 * theory we also need to save/restore the PCI capability structures
3691	 * we know about, but apart from power we don't know any that are
3692	 * writable.
3693	 */
3694	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3695	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3696	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3697	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3698	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3699	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3700	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3701	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3702	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3703	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3704	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3705	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3706	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3707	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3708	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3709
3710	/*
3711	 * don't set the state for display devices, base peripherals and
3712	 * memory devices since bad things happen when they are powered down.
3713	 * We should (a) have drivers that can easily detach and (b) use
3714	 * generic drivers for these devices so that some device actually
3715	 * attaches.  We need to make sure that when we implement (a) we don't
3716	 * power the device down on a reattach.
3717	 */
3718	cls = pci_get_class(dev);
3719	if (!setstate)
3720		return;
3721	switch (pci_do_power_nodriver)
3722	{
3723		case 0:		/* NO powerdown at all */
3724			return;
3725		case 1:		/* Conservative about what to power down */
3726			if (cls == PCIC_STORAGE)
3727				return;
3728			/*FALLTHROUGH*/
3729		case 2:		/* Agressive about what to power down */
3730			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3731			    cls == PCIC_BASEPERIPH)
3732				return;
3733			/*FALLTHROUGH*/
3734		case 3:		/* Power down everything */
3735			break;
3736	}
3737	/*
3738	 * PCI spec says we can only go into D3 state from D0 state.
3739	 * Transition from D[12] into D0 before going to D3 state.
3740	 */
3741	ps = pci_get_powerstate(dev);
3742	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3743		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3744	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3745		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3746}
3747