pci.c revision 175875
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 175875 2008-02-01 20:31:09Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static const char	*pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114
115static device_method_t pci_methods[] = {
116	/* Device interface */
117	DEVMETHOD(device_probe,		pci_probe),
118	DEVMETHOD(device_attach,	pci_attach),
119	DEVMETHOD(device_detach,	bus_generic_detach),
120	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121	DEVMETHOD(device_suspend,	pci_suspend),
122	DEVMETHOD(device_resume,	pci_resume),
123
124	/* Bus interface */
125	DEVMETHOD(bus_print_child,	pci_print_child),
126	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129	DEVMETHOD(bus_driver_added,	pci_driver_added),
130	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132
133	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143
144	/* PCI interface */
145	DEVMETHOD(pci_read_config,	pci_read_config_method),
146	DEVMETHOD(pci_write_config,	pci_write_config_method),
147	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163
164	{ 0, 0 }
165};
166
167DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168
169static devclass_t pci_devclass;
170DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171MODULE_VERSION(pci, 1);
172
173static char	*pci_vendordata;
174static size_t	pci_vendordata_size;
175
176
177struct pci_quirk {
178	uint32_t devid;	/* Vendor/device of the card */
179	int	type;
180#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182	int	arg1;
183	int	arg2;
184};
185
186struct pci_quirk pci_quirks[] = {
187	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192
193	/*
194	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196	 */
197	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199
200	/*
201	 * MSI doesn't work on earlier Intel chipsets including
202	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203	 */
204	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211
212	/*
213	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214	 * bridge.
215	 */
216	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	{ 0 }
219};
220
221/* map register information */
222#define	PCI_MAPMEM	0x01	/* memory map */
223#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224#define	PCI_MAPPORT	0x04	/* port map */
225
226struct devlist pci_devq;
227uint32_t pci_generation;
228uint32_t pci_numdevs = 0;
229static int pcie_chipset, pcix_chipset;
230
231/* sysctl vars */
232SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233
234static int pci_enable_io_modes = 1;
235TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237    &pci_enable_io_modes, 1,
238    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239enable these bits correctly.  We'd like to do this all the time, but there\n\
240are some peripherals that this causes problems with.");
241
242static int pci_do_power_nodriver = 0;
243TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245    &pci_do_power_nodriver, 0,
246  "Place a function into D3 state when no driver attaches to it.  0 means\n\
247disable.  1 means conservatively place devices into D3 state.  2 means\n\
248agressively place devices into D3 state.  3 means put absolutely everything\n\
249in D3 state.");
250
251static int pci_do_power_resume = 1;
252TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254    &pci_do_power_resume, 1,
255  "Transition from D3 -> D0 on resume.");
256
257static int pci_do_msi = 1;
258TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260    "Enable support for MSI interrupts");
261
262static int pci_do_msix = 1;
263TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265    "Enable support for MSI-X interrupts");
266
267static int pci_honor_msi_blacklist = 1;
268TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271
272/* Find a device_t by bus/slot/function in domain 0 */
273
274device_t
275pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276{
277
278	return (pci_find_dbsf(0, bus, slot, func));
279}
280
281/* Find a device_t by domain/bus/slot/function */
282
283device_t
284pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285{
286	struct pci_devinfo *dinfo;
287
288	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289		if ((dinfo->cfg.domain == domain) &&
290		    (dinfo->cfg.bus == bus) &&
291		    (dinfo->cfg.slot == slot) &&
292		    (dinfo->cfg.func == func)) {
293			return (dinfo->cfg.dev);
294		}
295	}
296
297	return (NULL);
298}
299
300/* Find a device_t by vendor/device ID */
301
302device_t
303pci_find_device(uint16_t vendor, uint16_t device)
304{
305	struct pci_devinfo *dinfo;
306
307	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308		if ((dinfo->cfg.vendor == vendor) &&
309		    (dinfo->cfg.device == device)) {
310			return (dinfo->cfg.dev);
311		}
312	}
313
314	return (NULL);
315}
316
317/* return base address of memory or port map */
318
319static uint32_t
320pci_mapbase(uint32_t mapreg)
321{
322
323	if (PCI_BAR_MEM(mapreg))
324		return (mapreg & PCIM_BAR_MEM_BASE);
325	else
326		return (mapreg & PCIM_BAR_IO_BASE);
327}
328
329/* return map type of memory or port map */
330
331static const char *
332pci_maptype(unsigned mapreg)
333{
334
335	if (PCI_BAR_IO(mapreg))
336		return ("I/O Port");
337	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338		return ("Prefetchable Memory");
339	return ("Memory");
340}
341
342/* return log2 of map size decoded for memory or port map */
343
344static int
345pci_mapsize(uint32_t testval)
346{
347	int ln2size;
348
349	testval = pci_mapbase(testval);
350	ln2size = 0;
351	if (testval != 0) {
352		while ((testval & 1) == 0)
353		{
354			ln2size++;
355			testval >>= 1;
356		}
357	}
358	return (ln2size);
359}
360
361/* return log2 of address range supported by map register */
362
363static int
364pci_maprange(unsigned mapreg)
365{
366	int ln2range = 0;
367
368	if (PCI_BAR_IO(mapreg))
369		ln2range = 32;
370	else
371		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372		case PCIM_BAR_MEM_32:
373			ln2range = 32;
374			break;
375		case PCIM_BAR_MEM_1MB:
376			ln2range = 20;
377			break;
378		case PCIM_BAR_MEM_64:
379			ln2range = 64;
380			break;
381		}
382	return (ln2range);
383}
384
385/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386
387static void
388pci_fixancient(pcicfgregs *cfg)
389{
390	if (cfg->hdrtype != 0)
391		return;
392
393	/* PCI to PCI bridges use header type 1 */
394	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395		cfg->hdrtype = 1;
396}
397
398/* extract header type specific config data */
399
400static void
401pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402{
403#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404	switch (cfg->hdrtype) {
405	case 0:
406		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408		cfg->nummaps	    = PCI_MAXMAPS_0;
409		break;
410	case 1:
411		cfg->nummaps	    = PCI_MAXMAPS_1;
412		break;
413	case 2:
414		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416		cfg->nummaps	    = PCI_MAXMAPS_2;
417		break;
418	}
419#undef REG
420}
421
422/* read configuration header into pcicfgregs structure */
423struct pci_devinfo *
424pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425{
426#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427	pcicfgregs *cfg = NULL;
428	struct pci_devinfo *devlist_entry;
429	struct devlist *devlist_head;
430
431	devlist_head = &pci_devq;
432
433	devlist_entry = NULL;
434
435	if (REG(PCIR_DEVVENDOR, 4) != -1) {
436		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437		if (devlist_entry == NULL)
438			return (NULL);
439
440		cfg = &devlist_entry->cfg;
441
442		cfg->domain		= d;
443		cfg->bus		= b;
444		cfg->slot		= s;
445		cfg->func		= f;
446		cfg->vendor		= REG(PCIR_VENDOR, 2);
447		cfg->device		= REG(PCIR_DEVICE, 2);
448		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449		cfg->statreg		= REG(PCIR_STATUS, 2);
450		cfg->baseclass		= REG(PCIR_CLASS, 1);
451		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452		cfg->progif		= REG(PCIR_PROGIF, 1);
453		cfg->revid		= REG(PCIR_REVID, 1);
454		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457		cfg->intpin		= REG(PCIR_INTPIN, 1);
458		cfg->intline		= REG(PCIR_INTLINE, 1);
459
460		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462
463		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464		cfg->hdrtype		&= ~PCIM_MFDEV;
465
466		pci_fixancient(cfg);
467		pci_hdrtypedata(pcib, b, s, f, cfg);
468
469		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470			pci_read_extcap(pcib, cfg);
471
472		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473
474		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479
480		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482		devlist_entry->conf.pc_vendor = cfg->vendor;
483		devlist_entry->conf.pc_device = cfg->device;
484
485		devlist_entry->conf.pc_class = cfg->baseclass;
486		devlist_entry->conf.pc_subclass = cfg->subclass;
487		devlist_entry->conf.pc_progif = cfg->progif;
488		devlist_entry->conf.pc_revid = cfg->revid;
489
490		pci_numdevs++;
491		pci_generation++;
492	}
493	return (devlist_entry);
494#undef REG
495}
496
497static void
498pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499{
500#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502#if defined(__i386__) || defined(__amd64__)
503	uint64_t addr;
504#endif
505	uint32_t val;
506	int	ptr, nextptr, ptrptr;
507
508	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509	case 0:
510	case 1:
511		ptrptr = PCIR_CAP_PTR;
512		break;
513	case 2:
514		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515		break;
516	default:
517		return;		/* no extended capabilities support */
518	}
519	nextptr = REG(ptrptr, 1);	/* sanity check? */
520
521	/*
522	 * Read capability entries.
523	 */
524	while (nextptr != 0) {
525		/* Sanity check */
526		if (nextptr > 255) {
527			printf("illegal PCI extended capability offset %d\n",
528			    nextptr);
529			return;
530		}
531		/* Find the next entry */
532		ptr = nextptr;
533		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534
535		/* Process this entry */
536		switch (REG(ptr + PCICAP_ID, 1)) {
537		case PCIY_PMG:		/* PCI power management */
538			if (cfg->pp.pp_cap == 0) {
539				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542				if ((nextptr - ptr) > PCIR_POWER_DATA)
543					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544			}
545			break;
546#if defined(__i386__) || defined(__amd64__)
547		case PCIY_HT:		/* HyperTransport */
548			/* Determine HT-specific capability type. */
549			val = REG(ptr + PCIR_HT_COMMAND, 2);
550			switch (val & PCIM_HTCMD_CAP_MASK) {
551			case PCIM_HTCAP_MSI_MAPPING:
552				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553					/* Sanity check the mapping window. */
554					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555					    4);
556					addr <<= 32;
557					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558					    4);
559					if (addr != MSI_INTEL_ADDR_BASE)
560						device_printf(pcib,
561	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562						    cfg->domain, cfg->bus,
563						    cfg->slot, cfg->func,
564						    (long long)addr);
565				}
566
567				/* Enable MSI -> HT mapping. */
568				val |= PCIM_HTCMD_MSI_ENABLE;
569				WREG(ptr + PCIR_HT_COMMAND, val, 2);
570				break;
571			}
572			break;
573#endif
574		case PCIY_MSI:		/* PCI MSI */
575			cfg->msi.msi_location = ptr;
576			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
577			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
578						     PCIM_MSICTRL_MMC_MASK)>>1);
579			break;
580		case PCIY_MSIX:		/* PCI MSI-X */
581			cfg->msix.msix_location = ptr;
582			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
583			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
584			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
585			val = REG(ptr + PCIR_MSIX_TABLE, 4);
586			cfg->msix.msix_table_bar = PCIR_BAR(val &
587			    PCIM_MSIX_BIR_MASK);
588			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
589			val = REG(ptr + PCIR_MSIX_PBA, 4);
590			cfg->msix.msix_pba_bar = PCIR_BAR(val &
591			    PCIM_MSIX_BIR_MASK);
592			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
593			break;
594		case PCIY_VPD:		/* PCI Vital Product Data */
595			cfg->vpd.vpd_reg = ptr;
596			break;
597		case PCIY_SUBVENDOR:
598			/* Should always be true. */
599			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
600				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
601				cfg->subvendor = val & 0xffff;
602				cfg->subdevice = val >> 16;
603			}
604			break;
605		case PCIY_PCIX:		/* PCI-X */
606			/*
607			 * Assume we have a PCI-X chipset if we have
608			 * at least one PCI-PCI bridge with a PCI-X
609			 * capability.  Note that some systems with
610			 * PCI-express or HT chipsets might match on
611			 * this check as well.
612			 */
613			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
614				pcix_chipset = 1;
615			break;
616		case PCIY_EXPRESS:	/* PCI-express */
617			/*
618			 * Assume we have a PCI-express chipset if we have
619			 * at least one PCI-express device.
620			 */
621			pcie_chipset = 1;
622			break;
623		default:
624			break;
625		}
626	}
627/* REG and WREG use carry through to next functions */
628}
629
630/*
631 * PCI Vital Product Data
632 */
633
634#define	PCI_VPD_TIMEOUT		1000000
635
636static int
637pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
638{
639	int count = PCI_VPD_TIMEOUT;
640
641	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
642
643	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
644
645	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
646		if (--count < 0)
647			return (ENXIO);
648		DELAY(1);	/* limit looping */
649	}
650	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
651
652	return (0);
653}
654
655#if 0
656static int
657pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
658{
659	int count = PCI_VPD_TIMEOUT;
660
661	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
662
663	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
664	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
665	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
666		if (--count < 0)
667			return (ENXIO);
668		DELAY(1);	/* limit looping */
669	}
670
671	return (0);
672}
673#endif
674
675#undef PCI_VPD_TIMEOUT
676
677struct vpd_readstate {
678	device_t	pcib;
679	pcicfgregs	*cfg;
680	uint32_t	val;
681	int		bytesinval;
682	int		off;
683	uint8_t		cksum;
684};
685
686static int
687vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
688{
689	uint32_t reg;
690	uint8_t byte;
691
692	if (vrs->bytesinval == 0) {
693		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
694			return (ENXIO);
695		vrs->val = le32toh(reg);
696		vrs->off += 4;
697		byte = vrs->val & 0xff;
698		vrs->bytesinval = 3;
699	} else {
700		vrs->val = vrs->val >> 8;
701		byte = vrs->val & 0xff;
702		vrs->bytesinval--;
703	}
704
705	vrs->cksum += byte;
706	*data = byte;
707	return (0);
708}
709
710static void
711pci_read_vpd(device_t pcib, pcicfgregs *cfg)
712{
713	struct vpd_readstate vrs;
714	int state;
715	int name;
716	int remain;
717	int i;
718	int alloc, off;		/* alloc/off for RO/W arrays */
719	int cksumvalid;
720	int dflen;
721	uint8_t byte;
722	uint8_t byte2;
723
724	/* init vpd reader */
725	vrs.bytesinval = 0;
726	vrs.off = 0;
727	vrs.pcib = pcib;
728	vrs.cfg = cfg;
729	vrs.cksum = 0;
730
731	state = 0;
732	name = remain = i = 0;	/* shut up stupid gcc */
733	alloc = off = 0;	/* shut up stupid gcc */
734	dflen = 0;		/* shut up stupid gcc */
735	cksumvalid = -1;
736	while (state >= 0) {
737		if (vpd_nextbyte(&vrs, &byte)) {
738			state = -2;
739			break;
740		}
741#if 0
742		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
743		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
744		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
745#endif
746		switch (state) {
747		case 0:		/* item name */
748			if (byte & 0x80) {
749				if (vpd_nextbyte(&vrs, &byte2)) {
750					state = -2;
751					break;
752				}
753				remain = byte2;
754				if (vpd_nextbyte(&vrs, &byte2)) {
755					state = -2;
756					break;
757				}
758				remain |= byte2 << 8;
759				if (remain > (0x7f*4 - vrs.off)) {
760					state = -1;
761					printf(
762			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
763					    cfg->domain, cfg->bus, cfg->slot,
764					    cfg->func, remain);
765				}
766				name = byte & 0x7f;
767			} else {
768				remain = byte & 0x7;
769				name = (byte >> 3) & 0xf;
770			}
771			switch (name) {
772			case 0x2:	/* String */
773				cfg->vpd.vpd_ident = malloc(remain + 1,
774				    M_DEVBUF, M_WAITOK);
775				i = 0;
776				state = 1;
777				break;
778			case 0xf:	/* End */
779				state = -1;
780				break;
781			case 0x10:	/* VPD-R */
782				alloc = 8;
783				off = 0;
784				cfg->vpd.vpd_ros = malloc(alloc *
785				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
786				    M_WAITOK | M_ZERO);
787				state = 2;
788				break;
789			case 0x11:	/* VPD-W */
790				alloc = 8;
791				off = 0;
792				cfg->vpd.vpd_w = malloc(alloc *
793				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
794				    M_WAITOK | M_ZERO);
795				state = 5;
796				break;
797			default:	/* Invalid data, abort */
798				state = -1;
799				break;
800			}
801			break;
802
803		case 1:	/* Identifier String */
804			cfg->vpd.vpd_ident[i++] = byte;
805			remain--;
806			if (remain == 0)  {
807				cfg->vpd.vpd_ident[i] = '\0';
808				state = 0;
809			}
810			break;
811
812		case 2:	/* VPD-R Keyword Header */
813			if (off == alloc) {
814				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
815				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
816				    M_DEVBUF, M_WAITOK | M_ZERO);
817			}
818			cfg->vpd.vpd_ros[off].keyword[0] = byte;
819			if (vpd_nextbyte(&vrs, &byte2)) {
820				state = -2;
821				break;
822			}
823			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
824			if (vpd_nextbyte(&vrs, &byte2)) {
825				state = -2;
826				break;
827			}
828			dflen = byte2;
829			if (dflen == 0 &&
830			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
831			    2) == 0) {
832				/*
833				 * if this happens, we can't trust the rest
834				 * of the VPD.
835				 */
836				printf(
837				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
838				    cfg->domain, cfg->bus, cfg->slot,
839				    cfg->func, dflen);
840				cksumvalid = 0;
841				state = -1;
842				break;
843			} else if (dflen == 0) {
844				cfg->vpd.vpd_ros[off].value = malloc(1 *
845				    sizeof(*cfg->vpd.vpd_ros[off].value),
846				    M_DEVBUF, M_WAITOK);
847				cfg->vpd.vpd_ros[off].value[0] = '\x00';
848			} else
849				cfg->vpd.vpd_ros[off].value = malloc(
850				    (dflen + 1) *
851				    sizeof(*cfg->vpd.vpd_ros[off].value),
852				    M_DEVBUF, M_WAITOK);
853			remain -= 3;
854			i = 0;
855			/* keep in sync w/ state 3's transistions */
856			if (dflen == 0 && remain == 0)
857				state = 0;
858			else if (dflen == 0)
859				state = 2;
860			else
861				state = 3;
862			break;
863
864		case 3:	/* VPD-R Keyword Value */
865			cfg->vpd.vpd_ros[off].value[i++] = byte;
866			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
867			    "RV", 2) == 0 && cksumvalid == -1) {
868				if (vrs.cksum == 0)
869					cksumvalid = 1;
870				else {
871					if (bootverbose)
872						printf(
873				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
874						    cfg->domain, cfg->bus,
875						    cfg->slot, cfg->func,
876						    vrs.cksum);
877					cksumvalid = 0;
878					state = -1;
879					break;
880				}
881			}
882			dflen--;
883			remain--;
884			/* keep in sync w/ state 2's transistions */
885			if (dflen == 0)
886				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
887			if (dflen == 0 && remain == 0) {
888				cfg->vpd.vpd_rocnt = off;
889				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
890				    off * sizeof(*cfg->vpd.vpd_ros),
891				    M_DEVBUF, M_WAITOK | M_ZERO);
892				state = 0;
893			} else if (dflen == 0)
894				state = 2;
895			break;
896
897		case 4:
898			remain--;
899			if (remain == 0)
900				state = 0;
901			break;
902
903		case 5:	/* VPD-W Keyword Header */
904			if (off == alloc) {
905				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
906				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
907				    M_DEVBUF, M_WAITOK | M_ZERO);
908			}
909			cfg->vpd.vpd_w[off].keyword[0] = byte;
910			if (vpd_nextbyte(&vrs, &byte2)) {
911				state = -2;
912				break;
913			}
914			cfg->vpd.vpd_w[off].keyword[1] = byte2;
915			if (vpd_nextbyte(&vrs, &byte2)) {
916				state = -2;
917				break;
918			}
919			cfg->vpd.vpd_w[off].len = dflen = byte2;
920			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
921			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
922			    sizeof(*cfg->vpd.vpd_w[off].value),
923			    M_DEVBUF, M_WAITOK);
924			remain -= 3;
925			i = 0;
926			/* keep in sync w/ state 6's transistions */
927			if (dflen == 0 && remain == 0)
928				state = 0;
929			else if (dflen == 0)
930				state = 5;
931			else
932				state = 6;
933			break;
934
935		case 6:	/* VPD-W Keyword Value */
936			cfg->vpd.vpd_w[off].value[i++] = byte;
937			dflen--;
938			remain--;
939			/* keep in sync w/ state 5's transistions */
940			if (dflen == 0)
941				cfg->vpd.vpd_w[off++].value[i++] = '\0';
942			if (dflen == 0 && remain == 0) {
943				cfg->vpd.vpd_wcnt = off;
944				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
945				    off * sizeof(*cfg->vpd.vpd_w),
946				    M_DEVBUF, M_WAITOK | M_ZERO);
947				state = 0;
948			} else if (dflen == 0)
949				state = 5;
950			break;
951
952		default:
953			printf("pci%d:%d:%d:%d: invalid state: %d\n",
954			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
955			    state);
956			state = -1;
957			break;
958		}
959	}
960
961	if (cksumvalid == 0 || state < -1) {
962		/* read-only data bad, clean up */
963		if (cfg->vpd.vpd_ros != NULL) {
964			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
965				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
966			free(cfg->vpd.vpd_ros, M_DEVBUF);
967			cfg->vpd.vpd_ros = NULL;
968		}
969	}
970	if (state < -1) {
971		/* I/O error, clean up */
972		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
973		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
974		if (cfg->vpd.vpd_ident != NULL) {
975			free(cfg->vpd.vpd_ident, M_DEVBUF);
976			cfg->vpd.vpd_ident = NULL;
977		}
978		if (cfg->vpd.vpd_w != NULL) {
979			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
980				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
981			free(cfg->vpd.vpd_w, M_DEVBUF);
982			cfg->vpd.vpd_w = NULL;
983		}
984	}
985	cfg->vpd.vpd_cached = 1;
986#undef REG
987#undef WREG
988}
989
990int
991pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
992{
993	struct pci_devinfo *dinfo = device_get_ivars(child);
994	pcicfgregs *cfg = &dinfo->cfg;
995
996	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
997		pci_read_vpd(device_get_parent(dev), cfg);
998
999	*identptr = cfg->vpd.vpd_ident;
1000
1001	if (*identptr == NULL)
1002		return (ENXIO);
1003
1004	return (0);
1005}
1006
1007int
1008pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1009	const char **vptr)
1010{
1011	struct pci_devinfo *dinfo = device_get_ivars(child);
1012	pcicfgregs *cfg = &dinfo->cfg;
1013	int i;
1014
1015	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1016		pci_read_vpd(device_get_parent(dev), cfg);
1017
1018	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1019		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1020		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1021			*vptr = cfg->vpd.vpd_ros[i].value;
1022		}
1023
1024	if (i != cfg->vpd.vpd_rocnt)
1025		return (0);
1026
1027	*vptr = NULL;
1028	return (ENXIO);
1029}
1030
1031/*
1032 * Return the offset in configuration space of the requested extended
1033 * capability entry or 0 if the specified capability was not found.
1034 */
1035int
1036pci_find_extcap_method(device_t dev, device_t child, int capability,
1037    int *capreg)
1038{
1039	struct pci_devinfo *dinfo = device_get_ivars(child);
1040	pcicfgregs *cfg = &dinfo->cfg;
1041	u_int32_t status;
1042	u_int8_t ptr;
1043
1044	/*
1045	 * Check the CAP_LIST bit of the PCI status register first.
1046	 */
1047	status = pci_read_config(child, PCIR_STATUS, 2);
1048	if (!(status & PCIM_STATUS_CAPPRESENT))
1049		return (ENXIO);
1050
1051	/*
1052	 * Determine the start pointer of the capabilities list.
1053	 */
1054	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1055	case 0:
1056	case 1:
1057		ptr = PCIR_CAP_PTR;
1058		break;
1059	case 2:
1060		ptr = PCIR_CAP_PTR_2;
1061		break;
1062	default:
1063		/* XXX: panic? */
1064		return (ENXIO);		/* no extended capabilities support */
1065	}
1066	ptr = pci_read_config(child, ptr, 1);
1067
1068	/*
1069	 * Traverse the capabilities list.
1070	 */
1071	while (ptr != 0) {
1072		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1073			if (capreg != NULL)
1074				*capreg = ptr;
1075			return (0);
1076		}
1077		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1078	}
1079
1080	return (ENOENT);
1081}
1082
1083/*
1084 * Support for MSI-X message interrupts.
1085 */
1086void
1087pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1088{
1089	struct pci_devinfo *dinfo = device_get_ivars(dev);
1090	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1091	uint32_t offset;
1092
1093	KASSERT(msix->msix_table_len > index, ("bogus index"));
1094	offset = msix->msix_table_offset + index * 16;
1095	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1096	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1097	bus_write_4(msix->msix_table_res, offset + 8, data);
1098}
1099
1100void
1101pci_mask_msix(device_t dev, u_int index)
1102{
1103	struct pci_devinfo *dinfo = device_get_ivars(dev);
1104	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1105	uint32_t offset, val;
1106
1107	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1108	offset = msix->msix_table_offset + index * 16 + 12;
1109	val = bus_read_4(msix->msix_table_res, offset);
1110	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1111		val |= PCIM_MSIX_VCTRL_MASK;
1112		bus_write_4(msix->msix_table_res, offset, val);
1113	}
1114}
1115
1116void
1117pci_unmask_msix(device_t dev, u_int index)
1118{
1119	struct pci_devinfo *dinfo = device_get_ivars(dev);
1120	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1121	uint32_t offset, val;
1122
1123	KASSERT(msix->msix_table_len > index, ("bogus index"));
1124	offset = msix->msix_table_offset + index * 16 + 12;
1125	val = bus_read_4(msix->msix_table_res, offset);
1126	if (val & PCIM_MSIX_VCTRL_MASK) {
1127		val &= ~PCIM_MSIX_VCTRL_MASK;
1128		bus_write_4(msix->msix_table_res, offset, val);
1129	}
1130}
1131
1132int
1133pci_pending_msix(device_t dev, u_int index)
1134{
1135	struct pci_devinfo *dinfo = device_get_ivars(dev);
1136	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1137	uint32_t offset, bit;
1138
1139	KASSERT(msix->msix_table_len > index, ("bogus index"));
1140	offset = msix->msix_pba_offset + (index / 32) * 4;
1141	bit = 1 << index % 32;
1142	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1143}
1144
1145/*
1146 * Restore MSI-X registers and table during resume.  If MSI-X is
1147 * enabled then walk the virtual table to restore the actual MSI-X
1148 * table.
1149 */
1150static void
1151pci_resume_msix(device_t dev)
1152{
1153	struct pci_devinfo *dinfo = device_get_ivars(dev);
1154	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1155	struct msix_table_entry *mte;
1156	struct msix_vector *mv;
1157	int i;
1158
1159	if (msix->msix_alloc > 0) {
1160		/* First, mask all vectors. */
1161		for (i = 0; i < msix->msix_msgnum; i++)
1162			pci_mask_msix(dev, i);
1163
1164		/* Second, program any messages with at least one handler. */
1165		for (i = 0; i < msix->msix_table_len; i++) {
1166			mte = &msix->msix_table[i];
1167			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1168				continue;
1169			mv = &msix->msix_vectors[mte->mte_vector - 1];
1170			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1171			pci_unmask_msix(dev, i);
1172		}
1173	}
1174	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1175	    msix->msix_ctrl, 2);
1176}
1177
1178/*
1179 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1180 * returned in *count.  After this function returns, each message will be
1181 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1182 */
1183int
1184pci_alloc_msix_method(device_t dev, device_t child, int *count)
1185{
1186	struct pci_devinfo *dinfo = device_get_ivars(child);
1187	pcicfgregs *cfg = &dinfo->cfg;
1188	struct resource_list_entry *rle;
1189	int actual, error, i, irq, max;
1190
1191	/* Don't let count == 0 get us into trouble. */
1192	if (*count == 0)
1193		return (EINVAL);
1194
1195	/* If rid 0 is allocated, then fail. */
1196	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1197	if (rle != NULL && rle->res != NULL)
1198		return (ENXIO);
1199
1200	/* Already have allocated messages? */
1201	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1202		return (ENXIO);
1203
1204	/* If MSI is blacklisted for this system, fail. */
1205	if (pci_msi_blacklisted())
1206		return (ENXIO);
1207
1208	/* MSI-X capability present? */
1209	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1210		return (ENODEV);
1211
1212	/* Make sure the appropriate BARs are mapped. */
1213	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1214	    cfg->msix.msix_table_bar);
1215	if (rle == NULL || rle->res == NULL ||
1216	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1217		return (ENXIO);
1218	cfg->msix.msix_table_res = rle->res;
1219	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1220		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1221		    cfg->msix.msix_pba_bar);
1222		if (rle == NULL || rle->res == NULL ||
1223		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1224			return (ENXIO);
1225	}
1226	cfg->msix.msix_pba_res = rle->res;
1227
1228	if (bootverbose)
1229		device_printf(child,
1230		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1231		    *count, cfg->msix.msix_msgnum);
1232	max = min(*count, cfg->msix.msix_msgnum);
1233	for (i = 0; i < max; i++) {
1234		/* Allocate a message. */
1235		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1236		if (error)
1237			break;
1238		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1239		    irq, 1);
1240	}
1241	actual = i;
1242
1243	if (bootverbose) {
1244		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1245		if (actual == 1)
1246			device_printf(child, "using IRQ %lu for MSI-X\n",
1247			    rle->start);
1248		else {
1249			int run;
1250
1251			/*
1252			 * Be fancy and try to print contiguous runs of
1253			 * IRQ values as ranges.  'irq' is the previous IRQ.
1254			 * 'run' is true if we are in a range.
1255			 */
1256			device_printf(child, "using IRQs %lu", rle->start);
1257			irq = rle->start;
1258			run = 0;
1259			for (i = 1; i < actual; i++) {
1260				rle = resource_list_find(&dinfo->resources,
1261				    SYS_RES_IRQ, i + 1);
1262
1263				/* Still in a run? */
1264				if (rle->start == irq + 1) {
1265					run = 1;
1266					irq++;
1267					continue;
1268				}
1269
1270				/* Finish previous range. */
1271				if (run) {
1272					printf("-%d", irq);
1273					run = 0;
1274				}
1275
1276				/* Start new range. */
1277				printf(",%lu", rle->start);
1278				irq = rle->start;
1279			}
1280
1281			/* Unfinished range? */
1282			if (run)
1283				printf("-%d", irq);
1284			printf(" for MSI-X\n");
1285		}
1286	}
1287
1288	/* Mask all vectors. */
1289	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1290		pci_mask_msix(child, i);
1291
1292	/* Allocate and initialize vector data and virtual table. */
1293	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1294	    M_DEVBUF, M_WAITOK | M_ZERO);
1295	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1296	    M_DEVBUF, M_WAITOK | M_ZERO);
1297	for (i = 0; i < actual; i++) {
1298		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1299		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1300		cfg->msix.msix_table[i].mte_vector = i + 1;
1301	}
1302
1303	/* Update control register to enable MSI-X. */
1304	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1305	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1306	    cfg->msix.msix_ctrl, 2);
1307
1308	/* Update counts of alloc'd messages. */
1309	cfg->msix.msix_alloc = actual;
1310	cfg->msix.msix_table_len = actual;
1311	*count = actual;
1312	return (0);
1313}
1314
1315/*
1316 * By default, pci_alloc_msix() will assign the allocated IRQ
1317 * resources consecutively to the first N messages in the MSI-X table.
1318 * However, device drivers may want to use different layouts if they
1319 * either receive fewer messages than they asked for, or they wish to
1320 * populate the MSI-X table sparsely.  This method allows the driver
1321 * to specify what layout it wants.  It must be called after a
1322 * successful pci_alloc_msix() but before any of the associated
1323 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1324 *
1325 * The 'vectors' array contains 'count' message vectors.  The array
1326 * maps directly to the MSI-X table in that index 0 in the array
1327 * specifies the vector for the first message in the MSI-X table, etc.
1328 * The vector value in each array index can either be 0 to indicate
1329 * that no vector should be assigned to a message slot, or it can be a
1330 * number from 1 to N (where N is the count returned from a
1331 * succcessful call to pci_alloc_msix()) to indicate which message
1332 * vector (IRQ) to be used for the corresponding message.
1333 *
1334 * On successful return, each message with a non-zero vector will have
1335 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1336 * 1.  Additionally, if any of the IRQs allocated via the previous
1337 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1338 * will be freed back to the system automatically.
1339 *
1340 * For example, suppose a driver has a MSI-X table with 6 messages and
1341 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1342 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1343 * C.  After the call to pci_alloc_msix(), the device will be setup to
1344 * have an MSI-X table of ABC--- (where - means no vector assigned).
1345 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1346 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1347 * be freed back to the system.  This device will also have valid
1348 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1349 *
1350 * In any case, the SYS_RES_IRQ rid X will always map to the message
1351 * at MSI-X table index X - 1 and will only be valid if a vector is
1352 * assigned to that table entry.
1353 */
1354int
1355pci_remap_msix_method(device_t dev, device_t child, int count,
1356    const u_int *vectors)
1357{
1358	struct pci_devinfo *dinfo = device_get_ivars(child);
1359	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1360	struct resource_list_entry *rle;
1361	int i, irq, j, *used;
1362
1363	/*
1364	 * Have to have at least one message in the table but the
1365	 * table can't be bigger than the actual MSI-X table in the
1366	 * device.
1367	 */
1368	if (count == 0 || count > msix->msix_msgnum)
1369		return (EINVAL);
1370
1371	/* Sanity check the vectors. */
1372	for (i = 0; i < count; i++)
1373		if (vectors[i] > msix->msix_alloc)
1374			return (EINVAL);
1375
1376	/*
1377	 * Make sure there aren't any holes in the vectors to be used.
1378	 * It's a big pain to support it, and it doesn't really make
1379	 * sense anyway.  Also, at least one vector must be used.
1380	 */
1381	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1382	    M_ZERO);
1383	for (i = 0; i < count; i++)
1384		if (vectors[i] != 0)
1385			used[vectors[i] - 1] = 1;
1386	for (i = 0; i < msix->msix_alloc - 1; i++)
1387		if (used[i] == 0 && used[i + 1] == 1) {
1388			free(used, M_DEVBUF);
1389			return (EINVAL);
1390		}
1391	if (used[0] != 1) {
1392		free(used, M_DEVBUF);
1393		return (EINVAL);
1394	}
1395
1396	/* Make sure none of the resources are allocated. */
1397	for (i = 0; i < msix->msix_table_len; i++) {
1398		if (msix->msix_table[i].mte_vector == 0)
1399			continue;
1400		if (msix->msix_table[i].mte_handlers > 0)
1401			return (EBUSY);
1402		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1403		KASSERT(rle != NULL, ("missing resource"));
1404		if (rle->res != NULL)
1405			return (EBUSY);
1406	}
1407
1408	/* Free the existing resource list entries. */
1409	for (i = 0; i < msix->msix_table_len; i++) {
1410		if (msix->msix_table[i].mte_vector == 0)
1411			continue;
1412		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1413	}
1414
1415	/*
1416	 * Build the new virtual table keeping track of which vectors are
1417	 * used.
1418	 */
1419	free(msix->msix_table, M_DEVBUF);
1420	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1421	    M_DEVBUF, M_WAITOK | M_ZERO);
1422	for (i = 0; i < count; i++)
1423		msix->msix_table[i].mte_vector = vectors[i];
1424	msix->msix_table_len = count;
1425
1426	/* Free any unused IRQs and resize the vectors array if necessary. */
1427	j = msix->msix_alloc - 1;
1428	if (used[j] == 0) {
1429		struct msix_vector *vec;
1430
1431		while (used[j] == 0) {
1432			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1433			    msix->msix_vectors[j].mv_irq);
1434			j--;
1435		}
1436		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1437		    M_WAITOK);
1438		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1439		    (j + 1));
1440		free(msix->msix_vectors, M_DEVBUF);
1441		msix->msix_vectors = vec;
1442		msix->msix_alloc = j + 1;
1443	}
1444	free(used, M_DEVBUF);
1445
1446	/* Map the IRQs onto the rids. */
1447	for (i = 0; i < count; i++) {
1448		if (vectors[i] == 0)
1449			continue;
1450		irq = msix->msix_vectors[vectors[i]].mv_irq;
1451		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1452		    irq, 1);
1453	}
1454
1455	if (bootverbose) {
1456		device_printf(child, "Remapped MSI-X IRQs as: ");
1457		for (i = 0; i < count; i++) {
1458			if (i != 0)
1459				printf(", ");
1460			if (vectors[i] == 0)
1461				printf("---");
1462			else
1463				printf("%d",
1464				    msix->msix_vectors[vectors[i]].mv_irq);
1465		}
1466		printf("\n");
1467	}
1468
1469	return (0);
1470}
1471
1472static int
1473pci_release_msix(device_t dev, device_t child)
1474{
1475	struct pci_devinfo *dinfo = device_get_ivars(child);
1476	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1477	struct resource_list_entry *rle;
1478	int i;
1479
1480	/* Do we have any messages to release? */
1481	if (msix->msix_alloc == 0)
1482		return (ENODEV);
1483
1484	/* Make sure none of the resources are allocated. */
1485	for (i = 0; i < msix->msix_table_len; i++) {
1486		if (msix->msix_table[i].mte_vector == 0)
1487			continue;
1488		if (msix->msix_table[i].mte_handlers > 0)
1489			return (EBUSY);
1490		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1491		KASSERT(rle != NULL, ("missing resource"));
1492		if (rle->res != NULL)
1493			return (EBUSY);
1494	}
1495
1496	/* Update control register to disable MSI-X. */
1497	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1498	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1499	    msix->msix_ctrl, 2);
1500
1501	/* Free the resource list entries. */
1502	for (i = 0; i < msix->msix_table_len; i++) {
1503		if (msix->msix_table[i].mte_vector == 0)
1504			continue;
1505		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1506	}
1507	free(msix->msix_table, M_DEVBUF);
1508	msix->msix_table_len = 0;
1509
1510	/* Release the IRQs. */
1511	for (i = 0; i < msix->msix_alloc; i++)
1512		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1513		    msix->msix_vectors[i].mv_irq);
1514	free(msix->msix_vectors, M_DEVBUF);
1515	msix->msix_alloc = 0;
1516	return (0);
1517}
1518
1519/*
1520 * Return the max supported MSI-X messages this device supports.
1521 * Basically, assuming the MD code can alloc messages, this function
1522 * should return the maximum value that pci_alloc_msix() can return.
1523 * Thus, it is subject to the tunables, etc.
1524 */
1525int
1526pci_msix_count_method(device_t dev, device_t child)
1527{
1528	struct pci_devinfo *dinfo = device_get_ivars(child);
1529	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1530
1531	if (pci_do_msix && msix->msix_location != 0)
1532		return (msix->msix_msgnum);
1533	return (0);
1534}
1535
1536/*
1537 * Support for MSI message signalled interrupts.
1538 */
1539void
1540pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1541{
1542	struct pci_devinfo *dinfo = device_get_ivars(dev);
1543	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1544
1545	/* Write data and address values. */
1546	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1547	    address & 0xffffffff, 4);
1548	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1549		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1550		    address >> 32, 4);
1551		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1552		    data, 2);
1553	} else
1554		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1555		    2);
1556
1557	/* Enable MSI in the control register. */
1558	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1559	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1560	    2);
1561}
1562
1563void
1564pci_disable_msi(device_t dev)
1565{
1566	struct pci_devinfo *dinfo = device_get_ivars(dev);
1567	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1568
1569	/* Disable MSI in the control register. */
1570	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1571	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1572	    2);
1573}
1574
1575/*
1576 * Restore MSI registers during resume.  If MSI is enabled then
1577 * restore the data and address registers in addition to the control
1578 * register.
1579 */
1580static void
1581pci_resume_msi(device_t dev)
1582{
1583	struct pci_devinfo *dinfo = device_get_ivars(dev);
1584	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1585	uint64_t address;
1586	uint16_t data;
1587
1588	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1589		address = msi->msi_addr;
1590		data = msi->msi_data;
1591		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1592		    address & 0xffffffff, 4);
1593		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1594			pci_write_config(dev, msi->msi_location +
1595			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1596			pci_write_config(dev, msi->msi_location +
1597			    PCIR_MSI_DATA_64BIT, data, 2);
1598		} else
1599			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1600			    data, 2);
1601	}
1602	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1603	    2);
1604}
1605
1606int
1607pci_remap_msi_irq(device_t dev, u_int irq)
1608{
1609	struct pci_devinfo *dinfo = device_get_ivars(dev);
1610	pcicfgregs *cfg = &dinfo->cfg;
1611	struct resource_list_entry *rle;
1612	struct msix_table_entry *mte;
1613	struct msix_vector *mv;
1614	device_t bus;
1615	uint64_t addr;
1616	uint32_t data;
1617	int error, i, j;
1618
1619	bus = device_get_parent(dev);
1620
1621	/*
1622	 * Handle MSI first.  We try to find this IRQ among our list
1623	 * of MSI IRQs.  If we find it, we request updated address and
1624	 * data registers and apply the results.
1625	 */
1626	if (cfg->msi.msi_alloc > 0) {
1627
1628		/* If we don't have any active handlers, nothing to do. */
1629		if (cfg->msi.msi_handlers == 0)
1630			return (0);
1631		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1632			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1633			    i + 1);
1634			if (rle->start == irq) {
1635				error = PCIB_MAP_MSI(device_get_parent(bus),
1636				    dev, irq, &addr, &data);
1637				if (error)
1638					return (error);
1639				pci_disable_msi(dev);
1640				dinfo->cfg.msi.msi_addr = addr;
1641				dinfo->cfg.msi.msi_data = data;
1642				pci_enable_msi(dev, addr, data);
1643				return (0);
1644			}
1645		}
1646		return (ENOENT);
1647	}
1648
1649	/*
1650	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1651	 * we request the updated mapping info.  If that works, we go
1652	 * through all the slots that use this IRQ and update them.
1653	 */
1654	if (cfg->msix.msix_alloc > 0) {
1655		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1656			mv = &cfg->msix.msix_vectors[i];
1657			if (mv->mv_irq == irq) {
1658				error = PCIB_MAP_MSI(device_get_parent(bus),
1659				    dev, irq, &addr, &data);
1660				if (error)
1661					return (error);
1662				mv->mv_address = addr;
1663				mv->mv_data = data;
1664				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1665					mte = &cfg->msix.msix_table[j];
1666					if (mte->mte_vector != i + 1)
1667						continue;
1668					if (mte->mte_handlers == 0)
1669						continue;
1670					pci_mask_msix(dev, j);
1671					pci_enable_msix(dev, j, addr, data);
1672					pci_unmask_msix(dev, j);
1673				}
1674			}
1675		}
1676		return (ENOENT);
1677	}
1678
1679	return (ENOENT);
1680}
1681
1682/*
1683 * Returns true if the specified device is blacklisted because MSI
1684 * doesn't work.
1685 */
1686int
1687pci_msi_device_blacklisted(device_t dev)
1688{
1689	struct pci_quirk *q;
1690
1691	if (!pci_honor_msi_blacklist)
1692		return (0);
1693
1694	for (q = &pci_quirks[0]; q->devid; q++) {
1695		if (q->devid == pci_get_devid(dev) &&
1696		    q->type == PCI_QUIRK_DISABLE_MSI)
1697			return (1);
1698	}
1699	return (0);
1700}
1701
1702/*
1703 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1704 * we just check for blacklisted chipsets as represented by the
1705 * host-PCI bridge at device 0:0:0.  In the future, it may become
1706 * necessary to check other system attributes, such as the kenv values
1707 * that give the motherboard manufacturer and model number.
1708 */
1709static int
1710pci_msi_blacklisted(void)
1711{
1712	device_t dev;
1713
1714	if (!pci_honor_msi_blacklist)
1715		return (0);
1716
1717	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1718	if (!(pcie_chipset || pcix_chipset))
1719		return (1);
1720
1721	dev = pci_find_bsf(0, 0, 0);
1722	if (dev != NULL)
1723		return (pci_msi_device_blacklisted(dev));
1724	return (0);
1725}
1726
1727/*
1728 * Attempt to allocate *count MSI messages.  The actual number allocated is
1729 * returned in *count.  After this function returns, each message will be
1730 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1731 */
1732int
1733pci_alloc_msi_method(device_t dev, device_t child, int *count)
1734{
1735	struct pci_devinfo *dinfo = device_get_ivars(child);
1736	pcicfgregs *cfg = &dinfo->cfg;
1737	struct resource_list_entry *rle;
1738	int actual, error, i, irqs[32];
1739	uint16_t ctrl;
1740
1741	/* Don't let count == 0 get us into trouble. */
1742	if (*count == 0)
1743		return (EINVAL);
1744
1745	/* If rid 0 is allocated, then fail. */
1746	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1747	if (rle != NULL && rle->res != NULL)
1748		return (ENXIO);
1749
1750	/* Already have allocated messages? */
1751	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1752		return (ENXIO);
1753
1754	/* If MSI is blacklisted for this system, fail. */
1755	if (pci_msi_blacklisted())
1756		return (ENXIO);
1757
1758	/* MSI capability present? */
1759	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1760		return (ENODEV);
1761
1762	if (bootverbose)
1763		device_printf(child,
1764		    "attempting to allocate %d MSI vectors (%d supported)\n",
1765		    *count, cfg->msi.msi_msgnum);
1766
1767	/* Don't ask for more than the device supports. */
1768	actual = min(*count, cfg->msi.msi_msgnum);
1769
1770	/* Don't ask for more than 32 messages. */
1771	actual = min(actual, 32);
1772
1773	/* MSI requires power of 2 number of messages. */
1774	if (!powerof2(actual))
1775		return (EINVAL);
1776
1777	for (;;) {
1778		/* Try to allocate N messages. */
1779		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1780		    cfg->msi.msi_msgnum, irqs);
1781		if (error == 0)
1782			break;
1783		if (actual == 1)
1784			return (error);
1785
1786		/* Try N / 2. */
1787		actual >>= 1;
1788	}
1789
1790	/*
1791	 * We now have N actual messages mapped onto SYS_RES_IRQ
1792	 * resources in the irqs[] array, so add new resources
1793	 * starting at rid 1.
1794	 */
1795	for (i = 0; i < actual; i++)
1796		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1797		    irqs[i], irqs[i], 1);
1798
1799	if (bootverbose) {
1800		if (actual == 1)
1801			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1802		else {
1803			int run;
1804
1805			/*
1806			 * Be fancy and try to print contiguous runs
1807			 * of IRQ values as ranges.  'run' is true if
1808			 * we are in a range.
1809			 */
1810			device_printf(child, "using IRQs %d", irqs[0]);
1811			run = 0;
1812			for (i = 1; i < actual; i++) {
1813
1814				/* Still in a run? */
1815				if (irqs[i] == irqs[i - 1] + 1) {
1816					run = 1;
1817					continue;
1818				}
1819
1820				/* Finish previous range. */
1821				if (run) {
1822					printf("-%d", irqs[i - 1]);
1823					run = 0;
1824				}
1825
1826				/* Start new range. */
1827				printf(",%d", irqs[i]);
1828			}
1829
1830			/* Unfinished range? */
1831			if (run)
1832				printf("-%d", irqs[actual - 1]);
1833			printf(" for MSI\n");
1834		}
1835	}
1836
1837	/* Update control register with actual count. */
1838	ctrl = cfg->msi.msi_ctrl;
1839	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1840	ctrl |= (ffs(actual) - 1) << 4;
1841	cfg->msi.msi_ctrl = ctrl;
1842	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1843
1844	/* Update counts of alloc'd messages. */
1845	cfg->msi.msi_alloc = actual;
1846	cfg->msi.msi_handlers = 0;
1847	*count = actual;
1848	return (0);
1849}
1850
1851/* Release the MSI messages associated with this device. */
1852int
1853pci_release_msi_method(device_t dev, device_t child)
1854{
1855	struct pci_devinfo *dinfo = device_get_ivars(child);
1856	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1857	struct resource_list_entry *rle;
1858	int error, i, irqs[32];
1859
1860	/* Try MSI-X first. */
1861	error = pci_release_msix(dev, child);
1862	if (error != ENODEV)
1863		return (error);
1864
1865	/* Do we have any messages to release? */
1866	if (msi->msi_alloc == 0)
1867		return (ENODEV);
1868	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1869
1870	/* Make sure none of the resources are allocated. */
1871	if (msi->msi_handlers > 0)
1872		return (EBUSY);
1873	for (i = 0; i < msi->msi_alloc; i++) {
1874		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1875		KASSERT(rle != NULL, ("missing MSI resource"));
1876		if (rle->res != NULL)
1877			return (EBUSY);
1878		irqs[i] = rle->start;
1879	}
1880
1881	/* Update control register with 0 count. */
1882	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1883	    ("%s: MSI still enabled", __func__));
1884	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1885	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1886	    msi->msi_ctrl, 2);
1887
1888	/* Release the messages. */
1889	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1890	for (i = 0; i < msi->msi_alloc; i++)
1891		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1892
1893	/* Update alloc count. */
1894	msi->msi_alloc = 0;
1895	msi->msi_addr = 0;
1896	msi->msi_data = 0;
1897	return (0);
1898}
1899
1900/*
1901 * Return the max supported MSI messages this device supports.
1902 * Basically, assuming the MD code can alloc messages, this function
1903 * should return the maximum value that pci_alloc_msi() can return.
1904 * Thus, it is subject to the tunables, etc.
1905 */
1906int
1907pci_msi_count_method(device_t dev, device_t child)
1908{
1909	struct pci_devinfo *dinfo = device_get_ivars(child);
1910	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911
1912	if (pci_do_msi && msi->msi_location != 0)
1913		return (msi->msi_msgnum);
1914	return (0);
1915}
1916
1917/* free pcicfgregs structure and all depending data structures */
1918
1919int
1920pci_freecfg(struct pci_devinfo *dinfo)
1921{
1922	struct devlist *devlist_head;
1923	int i;
1924
1925	devlist_head = &pci_devq;
1926
1927	if (dinfo->cfg.vpd.vpd_reg) {
1928		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1929		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1930			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1931		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1932		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1933			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1934		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1935	}
1936	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1937	free(dinfo, M_DEVBUF);
1938
1939	/* increment the generation count */
1940	pci_generation++;
1941
1942	/* we're losing one device */
1943	pci_numdevs--;
1944	return (0);
1945}
1946
1947/*
1948 * PCI power manangement
1949 */
1950int
1951pci_set_powerstate_method(device_t dev, device_t child, int state)
1952{
1953	struct pci_devinfo *dinfo = device_get_ivars(child);
1954	pcicfgregs *cfg = &dinfo->cfg;
1955	uint16_t status;
1956	int result, oldstate, highest, delay;
1957
1958	if (cfg->pp.pp_cap == 0)
1959		return (EOPNOTSUPP);
1960
1961	/*
1962	 * Optimize a no state change request away.  While it would be OK to
1963	 * write to the hardware in theory, some devices have shown odd
1964	 * behavior when going from D3 -> D3.
1965	 */
1966	oldstate = pci_get_powerstate(child);
1967	if (oldstate == state)
1968		return (0);
1969
1970	/*
1971	 * The PCI power management specification states that after a state
1972	 * transition between PCI power states, system software must
1973	 * guarantee a minimal delay before the function accesses the device.
1974	 * Compute the worst case delay that we need to guarantee before we
1975	 * access the device.  Many devices will be responsive much more
1976	 * quickly than this delay, but there are some that don't respond
1977	 * instantly to state changes.  Transitions to/from D3 state require
1978	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1979	 * is done below with DELAY rather than a sleeper function because
1980	 * this function can be called from contexts where we cannot sleep.
1981	 */
1982	highest = (oldstate > state) ? oldstate : state;
1983	if (highest == PCI_POWERSTATE_D3)
1984	    delay = 10000;
1985	else if (highest == PCI_POWERSTATE_D2)
1986	    delay = 200;
1987	else
1988	    delay = 0;
1989	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1990	    & ~PCIM_PSTAT_DMASK;
1991	result = 0;
1992	switch (state) {
1993	case PCI_POWERSTATE_D0:
1994		status |= PCIM_PSTAT_D0;
1995		break;
1996	case PCI_POWERSTATE_D1:
1997		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1998			return (EOPNOTSUPP);
1999		status |= PCIM_PSTAT_D1;
2000		break;
2001	case PCI_POWERSTATE_D2:
2002		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2003			return (EOPNOTSUPP);
2004		status |= PCIM_PSTAT_D2;
2005		break;
2006	case PCI_POWERSTATE_D3:
2007		status |= PCIM_PSTAT_D3;
2008		break;
2009	default:
2010		return (EINVAL);
2011	}
2012
2013	if (bootverbose)
2014		printf(
2015		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2016		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2017		    dinfo->cfg.func, oldstate, state);
2018
2019	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2020	if (delay)
2021		DELAY(delay);
2022	return (0);
2023}
2024
2025int
2026pci_get_powerstate_method(device_t dev, device_t child)
2027{
2028	struct pci_devinfo *dinfo = device_get_ivars(child);
2029	pcicfgregs *cfg = &dinfo->cfg;
2030	uint16_t status;
2031	int result;
2032
2033	if (cfg->pp.pp_cap != 0) {
2034		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2035		switch (status & PCIM_PSTAT_DMASK) {
2036		case PCIM_PSTAT_D0:
2037			result = PCI_POWERSTATE_D0;
2038			break;
2039		case PCIM_PSTAT_D1:
2040			result = PCI_POWERSTATE_D1;
2041			break;
2042		case PCIM_PSTAT_D2:
2043			result = PCI_POWERSTATE_D2;
2044			break;
2045		case PCIM_PSTAT_D3:
2046			result = PCI_POWERSTATE_D3;
2047			break;
2048		default:
2049			result = PCI_POWERSTATE_UNKNOWN;
2050			break;
2051		}
2052	} else {
2053		/* No support, device is always at D0 */
2054		result = PCI_POWERSTATE_D0;
2055	}
2056	return (result);
2057}
2058
2059/*
2060 * Some convenience functions for PCI device drivers.
2061 */
2062
2063static __inline void
2064pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2065{
2066	uint16_t	command;
2067
2068	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2069	command |= bit;
2070	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2071}
2072
2073static __inline void
2074pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2075{
2076	uint16_t	command;
2077
2078	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2079	command &= ~bit;
2080	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2081}
2082
2083int
2084pci_enable_busmaster_method(device_t dev, device_t child)
2085{
2086	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2087	return (0);
2088}
2089
2090int
2091pci_disable_busmaster_method(device_t dev, device_t child)
2092{
2093	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2094	return (0);
2095}
2096
2097int
2098pci_enable_io_method(device_t dev, device_t child, int space)
2099{
2100	uint16_t command;
2101	uint16_t bit;
2102	char *error;
2103
2104	bit = 0;
2105	error = NULL;
2106
2107	switch(space) {
2108	case SYS_RES_IOPORT:
2109		bit = PCIM_CMD_PORTEN;
2110		error = "port";
2111		break;
2112	case SYS_RES_MEMORY:
2113		bit = PCIM_CMD_MEMEN;
2114		error = "memory";
2115		break;
2116	default:
2117		return (EINVAL);
2118	}
2119	pci_set_command_bit(dev, child, bit);
2120	/* Some devices seem to need a brief stall here, what do to? */
2121	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2122	if (command & bit)
2123		return (0);
2124	device_printf(child, "failed to enable %s mapping!\n", error);
2125	return (ENXIO);
2126}
2127
2128int
2129pci_disable_io_method(device_t dev, device_t child, int space)
2130{
2131	uint16_t command;
2132	uint16_t bit;
2133	char *error;
2134
2135	bit = 0;
2136	error = NULL;
2137
2138	switch(space) {
2139	case SYS_RES_IOPORT:
2140		bit = PCIM_CMD_PORTEN;
2141		error = "port";
2142		break;
2143	case SYS_RES_MEMORY:
2144		bit = PCIM_CMD_MEMEN;
2145		error = "memory";
2146		break;
2147	default:
2148		return (EINVAL);
2149	}
2150	pci_clear_command_bit(dev, child, bit);
2151	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2152	if (command & bit) {
2153		device_printf(child, "failed to disable %s mapping!\n", error);
2154		return (ENXIO);
2155	}
2156	return (0);
2157}
2158
2159/*
2160 * New style pci driver.  Parent device is either a pci-host-bridge or a
2161 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2162 */
2163
2164void
2165pci_print_verbose(struct pci_devinfo *dinfo)
2166{
2167
2168	if (bootverbose) {
2169		pcicfgregs *cfg = &dinfo->cfg;
2170
2171		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2172		    cfg->vendor, cfg->device, cfg->revid);
2173		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2174		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2175		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2176		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2177		    cfg->mfdev);
2178		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2179		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2180		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2181		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2182		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2183		if (cfg->intpin > 0)
2184			printf("\tintpin=%c, irq=%d\n",
2185			    cfg->intpin +'a' -1, cfg->intline);
2186		if (cfg->pp.pp_cap) {
2187			uint16_t status;
2188
2189			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2190			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2191			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2192			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2193			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2194			    status & PCIM_PSTAT_DMASK);
2195		}
2196		if (cfg->msi.msi_location) {
2197			int ctrl;
2198
2199			ctrl = cfg->msi.msi_ctrl;
2200			printf("\tMSI supports %d message%s%s%s\n",
2201			    cfg->msi.msi_msgnum,
2202			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2203			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2204			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2205		}
2206		if (cfg->msix.msix_location) {
2207			printf("\tMSI-X supports %d message%s ",
2208			    cfg->msix.msix_msgnum,
2209			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2210			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2211				printf("in map 0x%x\n",
2212				    cfg->msix.msix_table_bar);
2213			else
2214				printf("in maps 0x%x and 0x%x\n",
2215				    cfg->msix.msix_table_bar,
2216				    cfg->msix.msix_pba_bar);
2217		}
2218	}
2219}
2220
2221static int
2222pci_porten(device_t pcib, int b, int s, int f)
2223{
2224	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2225		& PCIM_CMD_PORTEN) != 0;
2226}
2227
2228static int
2229pci_memen(device_t pcib, int b, int s, int f)
2230{
2231	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2232		& PCIM_CMD_MEMEN) != 0;
2233}
2234
2235/*
2236 * Add a resource based on a pci map register. Return 1 if the map
2237 * register is a 32bit map register or 2 if it is a 64bit register.
2238 */
2239static int
2240pci_add_map(device_t pcib, device_t bus, device_t dev,
2241    int b, int s, int f, int reg, struct resource_list *rl, int force,
2242    int prefetch)
2243{
2244	uint32_t map;
2245	pci_addr_t base;
2246	pci_addr_t start, end, count;
2247	uint8_t ln2size;
2248	uint8_t ln2range;
2249	uint32_t testval;
2250	uint16_t cmd;
2251	int type;
2252	int barlen;
2253	struct resource *res;
2254
2255	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2256	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2257	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2258	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2259
2260	if (PCI_BAR_MEM(map))
2261		type = SYS_RES_MEMORY;
2262	else
2263		type = SYS_RES_IOPORT;
2264	ln2size = pci_mapsize(testval);
2265	ln2range = pci_maprange(testval);
2266	base = pci_mapbase(map);
2267	barlen = ln2range == 64 ? 2 : 1;
2268
2269	/*
2270	 * For I/O registers, if bottom bit is set, and the next bit up
2271	 * isn't clear, we know we have a BAR that doesn't conform to the
2272	 * spec, so ignore it.  Also, sanity check the size of the data
2273	 * areas to the type of memory involved.  Memory must be at least
2274	 * 16 bytes in size, while I/O ranges must be at least 4.
2275	 */
2276	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2277		return (barlen);
2278	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2279	    (type == SYS_RES_IOPORT && ln2size < 2))
2280		return (barlen);
2281
2282	if (ln2range == 64)
2283		/* Read the other half of a 64bit map register */
2284		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2285	if (bootverbose) {
2286		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2287		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2288		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2289			printf(", port disabled\n");
2290		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2291			printf(", memory disabled\n");
2292		else
2293			printf(", enabled\n");
2294	}
2295
2296	/*
2297	 * If base is 0, then we have problems.  It is best to ignore
2298	 * such entries for the moment.  These will be allocated later if
2299	 * the driver specifically requests them.  However, some
2300	 * removable busses look better when all resources are allocated,
2301	 * so allow '0' to be overriden.
2302	 *
2303	 * Similarly treat maps whose values is the same as the test value
2304	 * read back.  These maps have had all f's written to them by the
2305	 * BIOS in an attempt to disable the resources.
2306	 */
2307	if (!force && (base == 0 || map == testval))
2308		return (barlen);
2309	if ((u_long)base != base) {
2310		device_printf(bus,
2311		    "pci%d:%d:%d:%d bar %#x too many address bits",
2312		    pci_get_domain(dev), b, s, f, reg);
2313		return (barlen);
2314	}
2315
2316	/*
2317	 * This code theoretically does the right thing, but has
2318	 * undesirable side effects in some cases where peripherals
2319	 * respond oddly to having these bits enabled.  Let the user
2320	 * be able to turn them off (since pci_enable_io_modes is 1 by
2321	 * default).
2322	 */
2323	if (pci_enable_io_modes) {
2324		/* Turn on resources that have been left off by a lazy BIOS */
2325		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2326			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2327			cmd |= PCIM_CMD_PORTEN;
2328			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2329		}
2330		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2331			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2332			cmd |= PCIM_CMD_MEMEN;
2333			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2334		}
2335	} else {
2336		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2337			return (barlen);
2338		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2339			return (barlen);
2340	}
2341
2342	count = 1 << ln2size;
2343	if (base == 0 || base == pci_mapbase(testval)) {
2344		start = 0;	/* Let the parent deside */
2345		end = ~0ULL;
2346	} else {
2347		start = base;
2348		end = base + (1 << ln2size) - 1;
2349	}
2350	resource_list_add(rl, type, reg, start, end, count);
2351
2352	/*
2353	 * Not quite sure what to do on failure of allocating the resource
2354	 * since I can postulate several right answers.
2355	 */
2356	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2357	    prefetch ? RF_PREFETCHABLE : 0);
2358	if (res == NULL)
2359		return (barlen);
2360	start = rman_get_start(res);
2361	if ((u_long)start != start) {
2362		/* Wait a minute!  this platform can't do this address. */
2363		device_printf(bus,
2364		    "pci%d:%d.%d.%x bar %#x start %#jx, too many bits.",
2365		    pci_get_domain(dev), b, s, f, reg, (uintmax_t)start);
2366		resource_list_release(rl, bus, dev, type, reg, res);
2367		return (barlen);
2368	}
2369	pci_write_config(dev, reg, start, 4);
2370	if (ln2range == 64)
2371		pci_write_config(dev, reg + 4, start >> 32, 4);
2372	return (barlen);
2373}
2374
2375/*
2376 * For ATA devices we need to decide early what addressing mode to use.
2377 * Legacy demands that the primary and secondary ATA ports sits on the
2378 * same addresses that old ISA hardware did. This dictates that we use
2379 * those addresses and ignore the BAR's if we cannot set PCI native
2380 * addressing mode.
2381 */
2382static void
2383pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2384    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2385{
2386	int rid, type, progif;
2387#if 0
2388	/* if this device supports PCI native addressing use it */
2389	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2390	if ((progif & 0x8a) == 0x8a) {
2391		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2392		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2393			printf("Trying ATA native PCI addressing mode\n");
2394			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2395		}
2396	}
2397#endif
2398	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2399	type = SYS_RES_IOPORT;
2400	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2401		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2402		    prefetchmask & (1 << 0));
2403		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2404		    prefetchmask & (1 << 1));
2405	} else {
2406		rid = PCIR_BAR(0);
2407		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2408		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2409		    0);
2410		rid = PCIR_BAR(1);
2411		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2412		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2413		    0);
2414	}
2415	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2416		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2417		    prefetchmask & (1 << 2));
2418		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2419		    prefetchmask & (1 << 3));
2420	} else {
2421		rid = PCIR_BAR(2);
2422		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2423		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2424		    0);
2425		rid = PCIR_BAR(3);
2426		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2427		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2428		    0);
2429	}
2430	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2431	    prefetchmask & (1 << 4));
2432	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2433	    prefetchmask & (1 << 5));
2434}
2435
2436static void
2437pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2438{
2439	struct pci_devinfo *dinfo = device_get_ivars(dev);
2440	pcicfgregs *cfg = &dinfo->cfg;
2441	char tunable_name[64];
2442	int irq;
2443
2444	/* Has to have an intpin to have an interrupt. */
2445	if (cfg->intpin == 0)
2446		return;
2447
2448	/* Let the user override the IRQ with a tunable. */
2449	irq = PCI_INVALID_IRQ;
2450	snprintf(tunable_name, sizeof(tunable_name),
2451	    "hw.pci%d.%d.%d.INT%c.irq",
2452	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2453	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2454		irq = PCI_INVALID_IRQ;
2455
2456	/*
2457	 * If we didn't get an IRQ via the tunable, then we either use the
2458	 * IRQ value in the intline register or we ask the bus to route an
2459	 * interrupt for us.  If force_route is true, then we only use the
2460	 * value in the intline register if the bus was unable to assign an
2461	 * IRQ.
2462	 */
2463	if (!PCI_INTERRUPT_VALID(irq)) {
2464		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2465			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2466		if (!PCI_INTERRUPT_VALID(irq))
2467			irq = cfg->intline;
2468	}
2469
2470	/* If after all that we don't have an IRQ, just bail. */
2471	if (!PCI_INTERRUPT_VALID(irq))
2472		return;
2473
2474	/* Update the config register if it changed. */
2475	if (irq != cfg->intline) {
2476		cfg->intline = irq;
2477		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2478	}
2479
2480	/* Add this IRQ as rid 0 interrupt resource. */
2481	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2482}
2483
2484void
2485pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2486{
2487	device_t pcib;
2488	struct pci_devinfo *dinfo = device_get_ivars(dev);
2489	pcicfgregs *cfg = &dinfo->cfg;
2490	struct resource_list *rl = &dinfo->resources;
2491	struct pci_quirk *q;
2492	int b, i, f, s;
2493
2494	pcib = device_get_parent(bus);
2495
2496	b = cfg->bus;
2497	s = cfg->slot;
2498	f = cfg->func;
2499
2500	/* ATA devices needs special map treatment */
2501	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2502	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2503	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2504	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2505	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2506		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2507	else
2508		for (i = 0; i < cfg->nummaps;)
2509			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2510			    rl, force, prefetchmask & (1 << i));
2511
2512	/*
2513	 * Add additional, quirked resources.
2514	 */
2515	for (q = &pci_quirks[0]; q->devid; q++) {
2516		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2517		    && q->type == PCI_QUIRK_MAP_REG)
2518			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2519			  force, 0);
2520	}
2521
2522	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2523#ifdef __PCI_REROUTE_INTERRUPT
2524		/*
2525		 * Try to re-route interrupts. Sometimes the BIOS or
2526		 * firmware may leave bogus values in these registers.
2527		 * If the re-route fails, then just stick with what we
2528		 * have.
2529		 */
2530		pci_assign_interrupt(bus, dev, 1);
2531#else
2532		pci_assign_interrupt(bus, dev, 0);
2533#endif
2534	}
2535}
2536
2537void
2538pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2539{
2540#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2541	device_t pcib = device_get_parent(dev);
2542	struct pci_devinfo *dinfo;
2543	int maxslots;
2544	int s, f, pcifunchigh;
2545	uint8_t hdrtype;
2546
2547	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2548	    ("dinfo_size too small"));
2549	maxslots = PCIB_MAXSLOTS(pcib);
2550	for (s = 0; s <= maxslots; s++) {
2551		pcifunchigh = 0;
2552		f = 0;
2553		DELAY(1);
2554		hdrtype = REG(PCIR_HDRTYPE, 1);
2555		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2556			continue;
2557		if (hdrtype & PCIM_MFDEV)
2558			pcifunchigh = PCI_FUNCMAX;
2559		for (f = 0; f <= pcifunchigh; f++) {
2560			dinfo = pci_read_device(pcib, domain, busno, s, f,
2561			    dinfo_size);
2562			if (dinfo != NULL) {
2563				pci_add_child(dev, dinfo);
2564			}
2565		}
2566	}
2567#undef REG
2568}
2569
2570void
2571pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2572{
2573	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2574	device_set_ivars(dinfo->cfg.dev, dinfo);
2575	resource_list_init(&dinfo->resources);
2576	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2577	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2578	pci_print_verbose(dinfo);
2579	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2580}
2581
2582static int
2583pci_probe(device_t dev)
2584{
2585
2586	device_set_desc(dev, "PCI bus");
2587
2588	/* Allow other subclasses to override this driver. */
2589	return (-1000);
2590}
2591
2592static int
2593pci_attach(device_t dev)
2594{
2595	int busno, domain;
2596
2597	/*
2598	 * Since there can be multiple independantly numbered PCI
2599	 * busses on systems with multiple PCI domains, we can't use
2600	 * the unit number to decide which bus we are probing. We ask
2601	 * the parent pcib what our domain and bus numbers are.
2602	 */
2603	domain = pcib_get_domain(dev);
2604	busno = pcib_get_bus(dev);
2605	if (bootverbose)
2606		device_printf(dev, "domain=%d, physical bus=%d\n",
2607		    domain, busno);
2608
2609	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2610
2611	return (bus_generic_attach(dev));
2612}
2613
2614int
2615pci_suspend(device_t dev)
2616{
2617	int dstate, error, i, numdevs;
2618	device_t acpi_dev, child, *devlist;
2619	struct pci_devinfo *dinfo;
2620
2621	/*
2622	 * Save the PCI configuration space for each child and set the
2623	 * device in the appropriate power state for this sleep state.
2624	 */
2625	acpi_dev = NULL;
2626	if (pci_do_power_resume)
2627		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2628	device_get_children(dev, &devlist, &numdevs);
2629	for (i = 0; i < numdevs; i++) {
2630		child = devlist[i];
2631		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2632		pci_cfg_save(child, dinfo, 0);
2633	}
2634
2635	/* Suspend devices before potentially powering them down. */
2636	error = bus_generic_suspend(dev);
2637	if (error) {
2638		free(devlist, M_TEMP);
2639		return (error);
2640	}
2641
2642	/*
2643	 * Always set the device to D3.  If ACPI suggests a different
2644	 * power state, use it instead.  If ACPI is not present, the
2645	 * firmware is responsible for managing device power.  Skip
2646	 * children who aren't attached since they are powered down
2647	 * separately.  Only manage type 0 devices for now.
2648	 */
2649	for (i = 0; acpi_dev && i < numdevs; i++) {
2650		child = devlist[i];
2651		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2652		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2653			dstate = PCI_POWERSTATE_D3;
2654			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2655			pci_set_powerstate(child, dstate);
2656		}
2657	}
2658	free(devlist, M_TEMP);
2659	return (0);
2660}
2661
2662int
2663pci_resume(device_t dev)
2664{
2665	int i, numdevs;
2666	device_t acpi_dev, child, *devlist;
2667	struct pci_devinfo *dinfo;
2668
2669	/*
2670	 * Set each child to D0 and restore its PCI configuration space.
2671	 */
2672	acpi_dev = NULL;
2673	if (pci_do_power_resume)
2674		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2675	device_get_children(dev, &devlist, &numdevs);
2676	for (i = 0; i < numdevs; i++) {
2677		/*
2678		 * Notify ACPI we're going to D0 but ignore the result.  If
2679		 * ACPI is not present, the firmware is responsible for
2680		 * managing device power.  Only manage type 0 devices for now.
2681		 */
2682		child = devlist[i];
2683		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2684		if (acpi_dev && device_is_attached(child) &&
2685		    dinfo->cfg.hdrtype == 0) {
2686			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2687			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2688		}
2689
2690		/* Now the device is powered up, restore its config space. */
2691		pci_cfg_restore(child, dinfo);
2692	}
2693	free(devlist, M_TEMP);
2694	return (bus_generic_resume(dev));
2695}
2696
2697static void
2698pci_load_vendor_data(void)
2699{
2700	caddr_t vendordata, info;
2701
2702	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2703		info = preload_search_info(vendordata, MODINFO_ADDR);
2704		pci_vendordata = *(char **)info;
2705		info = preload_search_info(vendordata, MODINFO_SIZE);
2706		pci_vendordata_size = *(size_t *)info;
2707		/* terminate the database */
2708		pci_vendordata[pci_vendordata_size] = '\n';
2709	}
2710}
2711
2712void
2713pci_driver_added(device_t dev, driver_t *driver)
2714{
2715	int numdevs;
2716	device_t *devlist;
2717	device_t child;
2718	struct pci_devinfo *dinfo;
2719	int i;
2720
2721	if (bootverbose)
2722		device_printf(dev, "driver added\n");
2723	DEVICE_IDENTIFY(driver, dev);
2724	device_get_children(dev, &devlist, &numdevs);
2725	for (i = 0; i < numdevs; i++) {
2726		child = devlist[i];
2727		if (device_get_state(child) != DS_NOTPRESENT)
2728			continue;
2729		dinfo = device_get_ivars(child);
2730		pci_print_verbose(dinfo);
2731		if (bootverbose)
2732			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2733			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2734			    dinfo->cfg.func);
2735		pci_cfg_restore(child, dinfo);
2736		if (device_probe_and_attach(child) != 0)
2737			pci_cfg_save(child, dinfo, 1);
2738	}
2739	free(devlist, M_TEMP);
2740}
2741
2742int
2743pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2744    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2745{
2746	struct pci_devinfo *dinfo;
2747	struct msix_table_entry *mte;
2748	struct msix_vector *mv;
2749	uint64_t addr;
2750	uint32_t data;
2751	void *cookie;
2752	int error, rid;
2753
2754	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2755	    arg, &cookie);
2756	if (error)
2757		return (error);
2758
2759	/*
2760	 * If this is a direct child, check to see if the interrupt is
2761	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2762	 * us the address and data register values.  If we fail for some
2763	 * reason, teardown the interrupt handler.
2764	 */
2765	rid = rman_get_rid(irq);
2766	if (device_get_parent(child) == dev && rid > 0) {
2767		dinfo = device_get_ivars(child);
2768		if (dinfo->cfg.msi.msi_alloc > 0) {
2769			if (dinfo->cfg.msi.msi_addr == 0) {
2770				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2771			    ("MSI has handlers, but vectors not mapped"));
2772				error = PCIB_MAP_MSI(device_get_parent(dev),
2773				    child, rman_get_start(irq), &addr, &data);
2774				if (error)
2775					goto bad;
2776				dinfo->cfg.msi.msi_addr = addr;
2777				dinfo->cfg.msi.msi_data = data;
2778				pci_enable_msi(child, addr, data);
2779			}
2780			dinfo->cfg.msi.msi_handlers++;
2781		} else {
2782			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2783			    ("No MSI or MSI-X interrupts allocated"));
2784			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2785			    ("MSI-X index too high"));
2786			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2787			KASSERT(mte->mte_vector != 0, ("no message vector"));
2788			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2789			KASSERT(mv->mv_irq == rman_get_start(irq),
2790			    ("IRQ mismatch"));
2791			if (mv->mv_address == 0) {
2792				KASSERT(mte->mte_handlers == 0,
2793		    ("MSI-X table entry has handlers, but vector not mapped"));
2794				error = PCIB_MAP_MSI(device_get_parent(dev),
2795				    child, rman_get_start(irq), &addr, &data);
2796				if (error)
2797					goto bad;
2798				mv->mv_address = addr;
2799				mv->mv_data = data;
2800			}
2801			if (mte->mte_handlers == 0) {
2802				pci_enable_msix(child, rid - 1, mv->mv_address,
2803				    mv->mv_data);
2804				pci_unmask_msix(child, rid - 1);
2805			}
2806			mte->mte_handlers++;
2807		}
2808	bad:
2809		if (error) {
2810			(void)bus_generic_teardown_intr(dev, child, irq,
2811			    cookie);
2812			return (error);
2813		}
2814	}
2815	*cookiep = cookie;
2816	return (0);
2817}
2818
2819int
2820pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2821    void *cookie)
2822{
2823	struct msix_table_entry *mte;
2824	struct resource_list_entry *rle;
2825	struct pci_devinfo *dinfo;
2826	int error, rid;
2827
2828	/*
2829	 * If this is a direct child, check to see if the interrupt is
2830	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2831	 * count and mask the MSI-X message, or disable MSI messages
2832	 * if the count drops to 0.
2833	 */
2834	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2835		return (EINVAL);
2836	rid = rman_get_rid(irq);
2837	if (device_get_parent(child) == dev && rid > 0) {
2838		dinfo = device_get_ivars(child);
2839		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2840		if (rle->res != irq)
2841			return (EINVAL);
2842		if (dinfo->cfg.msi.msi_alloc > 0) {
2843			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2844			    ("MSI-X index too high"));
2845			if (dinfo->cfg.msi.msi_handlers == 0)
2846				return (EINVAL);
2847			dinfo->cfg.msi.msi_handlers--;
2848			if (dinfo->cfg.msi.msi_handlers == 0)
2849				pci_disable_msi(child);
2850		} else {
2851			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2852			    ("No MSI or MSI-X interrupts allocated"));
2853			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2854			    ("MSI-X index too high"));
2855			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2856			if (mte->mte_handlers == 0)
2857				return (EINVAL);
2858			mte->mte_handlers--;
2859			if (mte->mte_handlers == 0)
2860				pci_mask_msix(child, rid - 1);
2861		}
2862	}
2863	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2864	if (device_get_parent(child) == dev && rid > 0)
2865		KASSERT(error == 0,
2866		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2867	return (error);
2868}
2869
2870int
2871pci_print_child(device_t dev, device_t child)
2872{
2873	struct pci_devinfo *dinfo;
2874	struct resource_list *rl;
2875	int retval = 0;
2876
2877	dinfo = device_get_ivars(child);
2878	rl = &dinfo->resources;
2879
2880	retval += bus_print_child_header(dev, child);
2881
2882	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2883	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2884	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2885	if (device_get_flags(dev))
2886		retval += printf(" flags %#x", device_get_flags(dev));
2887
2888	retval += printf(" at device %d.%d", pci_get_slot(child),
2889	    pci_get_function(child));
2890
2891	retval += bus_print_child_footer(dev, child);
2892
2893	return (retval);
2894}
2895
2896static struct
2897{
2898	int	class;
2899	int	subclass;
2900	char	*desc;
2901} pci_nomatch_tab[] = {
2902	{PCIC_OLD,		-1,			"old"},
2903	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2904	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2905	{PCIC_STORAGE,		-1,			"mass storage"},
2906	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2907	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2908	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2909	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2910	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2911	{PCIC_NETWORK,		-1,			"network"},
2912	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2913	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2914	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2915	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2916	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2917	{PCIC_DISPLAY,		-1,			"display"},
2918	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2919	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2920	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2921	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2922	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2923	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2924	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2925	{PCIC_MEMORY,		-1,			"memory"},
2926	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2927	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2928	{PCIC_BRIDGE,		-1,			"bridge"},
2929	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2930	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2931	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2932	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2933	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2934	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2935	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2936	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2937	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2938	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2939	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2940	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2941	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2942	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2943	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2944	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2945	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2946	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2947	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2948	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2949	{PCIC_INPUTDEV,		-1,			"input device"},
2950	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2951	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2952	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2953	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2954	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2955	{PCIC_DOCKING,		-1,			"docking station"},
2956	{PCIC_PROCESSOR,	-1,			"processor"},
2957	{PCIC_SERIALBUS,	-1,			"serial bus"},
2958	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2959	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2960	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2961	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2962	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2963	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2964	{PCIC_WIRELESS,		-1,			"wireless controller"},
2965	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2966	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2967	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2968	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2969	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2970	{PCIC_SATCOM,		-1,			"satellite communication"},
2971	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2972	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2973	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2974	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2975	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2976	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2977	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2978	{PCIC_DASP,		-1,			"dasp"},
2979	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2980	{0, 0,		NULL}
2981};
2982
2983void
2984pci_probe_nomatch(device_t dev, device_t child)
2985{
2986	int	i;
2987	char	*cp, *scp, *device;
2988
2989	/*
2990	 * Look for a listing for this device in a loaded device database.
2991	 */
2992	if ((device = pci_describe_device(child)) != NULL) {
2993		device_printf(dev, "<%s>", device);
2994		free(device, M_DEVBUF);
2995	} else {
2996		/*
2997		 * Scan the class/subclass descriptions for a general
2998		 * description.
2999		 */
3000		cp = "unknown";
3001		scp = NULL;
3002		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3003			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3004				if (pci_nomatch_tab[i].subclass == -1) {
3005					cp = pci_nomatch_tab[i].desc;
3006				} else if (pci_nomatch_tab[i].subclass ==
3007				    pci_get_subclass(child)) {
3008					scp = pci_nomatch_tab[i].desc;
3009				}
3010			}
3011		}
3012		device_printf(dev, "<%s%s%s>",
3013		    cp ? cp : "",
3014		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3015		    scp ? scp : "");
3016	}
3017	printf(" at device %d.%d (no driver attached)\n",
3018	    pci_get_slot(child), pci_get_function(child));
3019	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3020	return;
3021}
3022
3023/*
3024 * Parse the PCI device database, if loaded, and return a pointer to a
3025 * description of the device.
3026 *
3027 * The database is flat text formatted as follows:
3028 *
3029 * Any line not in a valid format is ignored.
3030 * Lines are terminated with newline '\n' characters.
3031 *
3032 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3033 * the vendor name.
3034 *
3035 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3036 * - devices cannot be listed without a corresponding VENDOR line.
3037 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3038 * another TAB, then the device name.
3039 */
3040
3041/*
3042 * Assuming (ptr) points to the beginning of a line in the database,
3043 * return the vendor or device and description of the next entry.
3044 * The value of (vendor) or (device) inappropriate for the entry type
3045 * is set to -1.  Returns nonzero at the end of the database.
3046 *
3047 * Note that this is slightly unrobust in the face of corrupt data;
3048 * we attempt to safeguard against this by spamming the end of the
3049 * database with a newline when we initialise.
3050 */
3051static int
3052pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3053{
3054	char	*cp = *ptr;
3055	int	left;
3056
3057	*device = -1;
3058	*vendor = -1;
3059	**desc = '\0';
3060	for (;;) {
3061		left = pci_vendordata_size - (cp - pci_vendordata);
3062		if (left <= 0) {
3063			*ptr = cp;
3064			return(1);
3065		}
3066
3067		/* vendor entry? */
3068		if (*cp != '\t' &&
3069		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3070			break;
3071		/* device entry? */
3072		if (*cp == '\t' &&
3073		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3074			break;
3075
3076		/* skip to next line */
3077		while (*cp != '\n' && left > 0) {
3078			cp++;
3079			left--;
3080		}
3081		if (*cp == '\n') {
3082			cp++;
3083			left--;
3084		}
3085	}
3086	/* skip to next line */
3087	while (*cp != '\n' && left > 0) {
3088		cp++;
3089		left--;
3090	}
3091	if (*cp == '\n' && left > 0)
3092		cp++;
3093	*ptr = cp;
3094	return(0);
3095}
3096
3097static char *
3098pci_describe_device(device_t dev)
3099{
3100	int	vendor, device;
3101	char	*desc, *vp, *dp, *line;
3102
3103	desc = vp = dp = NULL;
3104
3105	/*
3106	 * If we have no vendor data, we can't do anything.
3107	 */
3108	if (pci_vendordata == NULL)
3109		goto out;
3110
3111	/*
3112	 * Scan the vendor data looking for this device
3113	 */
3114	line = pci_vendordata;
3115	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3116		goto out;
3117	for (;;) {
3118		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3119			goto out;
3120		if (vendor == pci_get_vendor(dev))
3121			break;
3122	}
3123	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3124		goto out;
3125	for (;;) {
3126		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3127			*dp = 0;
3128			break;
3129		}
3130		if (vendor != -1) {
3131			*dp = 0;
3132			break;
3133		}
3134		if (device == pci_get_device(dev))
3135			break;
3136	}
3137	if (dp[0] == '\0')
3138		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3139	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3140	    NULL)
3141		sprintf(desc, "%s, %s", vp, dp);
3142 out:
3143	if (vp != NULL)
3144		free(vp, M_DEVBUF);
3145	if (dp != NULL)
3146		free(dp, M_DEVBUF);
3147	return(desc);
3148}
3149
3150int
3151pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3152{
3153	struct pci_devinfo *dinfo;
3154	pcicfgregs *cfg;
3155
3156	dinfo = device_get_ivars(child);
3157	cfg = &dinfo->cfg;
3158
3159	switch (which) {
3160	case PCI_IVAR_ETHADDR:
3161		/*
3162		 * The generic accessor doesn't deal with failure, so
3163		 * we set the return value, then return an error.
3164		 */
3165		*((uint8_t **) result) = NULL;
3166		return (EINVAL);
3167	case PCI_IVAR_SUBVENDOR:
3168		*result = cfg->subvendor;
3169		break;
3170	case PCI_IVAR_SUBDEVICE:
3171		*result = cfg->subdevice;
3172		break;
3173	case PCI_IVAR_VENDOR:
3174		*result = cfg->vendor;
3175		break;
3176	case PCI_IVAR_DEVICE:
3177		*result = cfg->device;
3178		break;
3179	case PCI_IVAR_DEVID:
3180		*result = (cfg->device << 16) | cfg->vendor;
3181		break;
3182	case PCI_IVAR_CLASS:
3183		*result = cfg->baseclass;
3184		break;
3185	case PCI_IVAR_SUBCLASS:
3186		*result = cfg->subclass;
3187		break;
3188	case PCI_IVAR_PROGIF:
3189		*result = cfg->progif;
3190		break;
3191	case PCI_IVAR_REVID:
3192		*result = cfg->revid;
3193		break;
3194	case PCI_IVAR_INTPIN:
3195		*result = cfg->intpin;
3196		break;
3197	case PCI_IVAR_IRQ:
3198		*result = cfg->intline;
3199		break;
3200	case PCI_IVAR_DOMAIN:
3201		*result = cfg->domain;
3202		break;
3203	case PCI_IVAR_BUS:
3204		*result = cfg->bus;
3205		break;
3206	case PCI_IVAR_SLOT:
3207		*result = cfg->slot;
3208		break;
3209	case PCI_IVAR_FUNCTION:
3210		*result = cfg->func;
3211		break;
3212	case PCI_IVAR_CMDREG:
3213		*result = cfg->cmdreg;
3214		break;
3215	case PCI_IVAR_CACHELNSZ:
3216		*result = cfg->cachelnsz;
3217		break;
3218	case PCI_IVAR_MINGNT:
3219		*result = cfg->mingnt;
3220		break;
3221	case PCI_IVAR_MAXLAT:
3222		*result = cfg->maxlat;
3223		break;
3224	case PCI_IVAR_LATTIMER:
3225		*result = cfg->lattimer;
3226		break;
3227	default:
3228		return (ENOENT);
3229	}
3230	return (0);
3231}
3232
3233int
3234pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3235{
3236	struct pci_devinfo *dinfo;
3237
3238	dinfo = device_get_ivars(child);
3239
3240	switch (which) {
3241	case PCI_IVAR_INTPIN:
3242		dinfo->cfg.intpin = value;
3243		return (0);
3244	case PCI_IVAR_ETHADDR:
3245	case PCI_IVAR_SUBVENDOR:
3246	case PCI_IVAR_SUBDEVICE:
3247	case PCI_IVAR_VENDOR:
3248	case PCI_IVAR_DEVICE:
3249	case PCI_IVAR_DEVID:
3250	case PCI_IVAR_CLASS:
3251	case PCI_IVAR_SUBCLASS:
3252	case PCI_IVAR_PROGIF:
3253	case PCI_IVAR_REVID:
3254	case PCI_IVAR_IRQ:
3255	case PCI_IVAR_DOMAIN:
3256	case PCI_IVAR_BUS:
3257	case PCI_IVAR_SLOT:
3258	case PCI_IVAR_FUNCTION:
3259		return (EINVAL);	/* disallow for now */
3260
3261	default:
3262		return (ENOENT);
3263	}
3264}
3265
3266
3267#include "opt_ddb.h"
3268#ifdef DDB
3269#include <ddb/ddb.h>
3270#include <sys/cons.h>
3271
3272/*
3273 * List resources based on pci map registers, used for within ddb
3274 */
3275
3276DB_SHOW_COMMAND(pciregs, db_pci_dump)
3277{
3278	struct pci_devinfo *dinfo;
3279	struct devlist *devlist_head;
3280	struct pci_conf *p;
3281	const char *name;
3282	int i, error, none_count;
3283
3284	none_count = 0;
3285	/* get the head of the device queue */
3286	devlist_head = &pci_devq;
3287
3288	/*
3289	 * Go through the list of devices and print out devices
3290	 */
3291	for (error = 0, i = 0,
3292	     dinfo = STAILQ_FIRST(devlist_head);
3293	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3294	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3295
3296		/* Populate pd_name and pd_unit */
3297		name = NULL;
3298		if (dinfo->cfg.dev)
3299			name = device_get_name(dinfo->cfg.dev);
3300
3301		p = &dinfo->conf;
3302		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3303			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3304			(name && *name) ? name : "none",
3305			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3306			none_count++,
3307			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3308			p->pc_sel.pc_func, (p->pc_class << 16) |
3309			(p->pc_subclass << 8) | p->pc_progif,
3310			(p->pc_subdevice << 16) | p->pc_subvendor,
3311			(p->pc_device << 16) | p->pc_vendor,
3312			p->pc_revid, p->pc_hdr);
3313	}
3314}
3315#endif /* DDB */
3316
3317static struct resource *
3318pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3319    u_long start, u_long end, u_long count, u_int flags)
3320{
3321	struct pci_devinfo *dinfo = device_get_ivars(child);
3322	struct resource_list *rl = &dinfo->resources;
3323	struct resource_list_entry *rle;
3324	struct resource *res;
3325	pci_addr_t map, testval;
3326	int mapsize;
3327
3328	/*
3329	 * Weed out the bogons, and figure out how large the BAR/map
3330	 * is.  Bars that read back 0 here are bogus and unimplemented.
3331	 * Note: atapci in legacy mode are special and handled elsewhere
3332	 * in the code.  If you have a atapci device in legacy mode and
3333	 * it fails here, that other code is broken.
3334	 */
3335	res = NULL;
3336	map = pci_read_config(child, *rid, 4);
3337	pci_write_config(child, *rid, 0xffffffff, 4);
3338	testval = pci_read_config(child, *rid, 4);
3339	if (pci_maprange(testval) == 64)
3340		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3341	if (pci_mapbase(testval) == 0)
3342		goto out;
3343
3344	/*
3345	 * Restore the original value of the BAR.  We may have reprogrammed
3346	 * the BAR of the low-level console device and when booting verbose,
3347	 * we need the console device addressable.
3348	 */
3349	pci_write_config(child, *rid, map, 4);
3350
3351	if (PCI_BAR_MEM(testval)) {
3352		if (type != SYS_RES_MEMORY) {
3353			if (bootverbose)
3354				device_printf(dev,
3355				    "child %s requested type %d for rid %#x,"
3356				    " but the BAR says it is an memio\n",
3357				    device_get_nameunit(child), type, *rid);
3358			goto out;
3359		}
3360	} else {
3361		if (type != SYS_RES_IOPORT) {
3362			if (bootverbose)
3363				device_printf(dev,
3364				    "child %s requested type %d for rid %#x,"
3365				    " but the BAR says it is an ioport\n",
3366				    device_get_nameunit(child), type, *rid);
3367			goto out;
3368		}
3369	}
3370	/*
3371	 * For real BARs, we need to override the size that
3372	 * the driver requests, because that's what the BAR
3373	 * actually uses and we would otherwise have a
3374	 * situation where we might allocate the excess to
3375	 * another driver, which won't work.
3376	 */
3377	mapsize = pci_mapsize(testval);
3378	count = 1UL << mapsize;
3379	if (RF_ALIGNMENT(flags) < mapsize)
3380		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3381
3382	/*
3383	 * Allocate enough resource, and then write back the
3384	 * appropriate bar for that resource.
3385	 */
3386	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3387	    start, end, count, flags);
3388	if (res == NULL) {
3389		device_printf(child,
3390		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3391		    count, *rid, type, start, end);
3392		goto out;
3393	}
3394	resource_list_add(rl, type, *rid, start, end, count);
3395	rle = resource_list_find(rl, type, *rid);
3396	if (rle == NULL)
3397		panic("pci_alloc_map: unexpectedly can't find resource.");
3398	rle->res = res;
3399	rle->start = rman_get_start(res);
3400	rle->end = rman_get_end(res);
3401	rle->count = count;
3402	if (bootverbose)
3403		device_printf(child,
3404		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3405		    count, *rid, type, rman_get_start(res));
3406	map = rman_get_start(res);
3407out:;
3408	pci_write_config(child, *rid, map, 4);
3409	if (pci_maprange(testval) == 64)
3410		pci_write_config(child, *rid + 4, map >> 32, 4);
3411	return (res);
3412}
3413
3414
3415struct resource *
3416pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3417		   u_long start, u_long end, u_long count, u_int flags)
3418{
3419	struct pci_devinfo *dinfo = device_get_ivars(child);
3420	struct resource_list *rl = &dinfo->resources;
3421	struct resource_list_entry *rle;
3422	pcicfgregs *cfg = &dinfo->cfg;
3423
3424	/*
3425	 * Perform lazy resource allocation
3426	 */
3427	if (device_get_parent(child) == dev) {
3428		switch (type) {
3429		case SYS_RES_IRQ:
3430			/*
3431			 * Can't alloc legacy interrupt once MSI messages
3432			 * have been allocated.
3433			 */
3434			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3435			    cfg->msix.msix_alloc > 0))
3436				return (NULL);
3437			/*
3438			 * If the child device doesn't have an
3439			 * interrupt routed and is deserving of an
3440			 * interrupt, try to assign it one.
3441			 */
3442			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3443			    (cfg->intpin != 0))
3444				pci_assign_interrupt(dev, child, 0);
3445			break;
3446		case SYS_RES_IOPORT:
3447		case SYS_RES_MEMORY:
3448			if (*rid < PCIR_BAR(cfg->nummaps)) {
3449				/*
3450				 * Enable the I/O mode.  We should
3451				 * also be assigning resources too
3452				 * when none are present.  The
3453				 * resource_list_alloc kind of sorta does
3454				 * this...
3455				 */
3456				if (PCI_ENABLE_IO(dev, child, type))
3457					return (NULL);
3458			}
3459			rle = resource_list_find(rl, type, *rid);
3460			if (rle == NULL)
3461				return (pci_alloc_map(dev, child, type, rid,
3462				    start, end, count, flags));
3463			break;
3464		}
3465		/*
3466		 * If we've already allocated the resource, then
3467		 * return it now.  But first we may need to activate
3468		 * it, since we don't allocate the resource as active
3469		 * above.  Normally this would be done down in the
3470		 * nexus, but since we short-circuit that path we have
3471		 * to do its job here.  Not sure if we should free the
3472		 * resource if it fails to activate.
3473		 */
3474		rle = resource_list_find(rl, type, *rid);
3475		if (rle != NULL && rle->res != NULL) {
3476			if (bootverbose)
3477				device_printf(child,
3478			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3479				    rman_get_size(rle->res), *rid, type,
3480				    rman_get_start(rle->res));
3481			if ((flags & RF_ACTIVE) &&
3482			    bus_generic_activate_resource(dev, child, type,
3483			    *rid, rle->res) != 0)
3484				return (NULL);
3485			return (rle->res);
3486		}
3487	}
3488	return (resource_list_alloc(rl, dev, child, type, rid,
3489	    start, end, count, flags));
3490}
3491
3492void
3493pci_delete_resource(device_t dev, device_t child, int type, int rid)
3494{
3495	struct pci_devinfo *dinfo;
3496	struct resource_list *rl;
3497	struct resource_list_entry *rle;
3498
3499	if (device_get_parent(child) != dev)
3500		return;
3501
3502	dinfo = device_get_ivars(child);
3503	rl = &dinfo->resources;
3504	rle = resource_list_find(rl, type, rid);
3505	if (rle) {
3506		if (rle->res) {
3507			if (rman_get_device(rle->res) != dev ||
3508			    rman_get_flags(rle->res) & RF_ACTIVE) {
3509				device_printf(dev, "delete_resource: "
3510				    "Resource still owned by child, oops. "
3511				    "(type=%d, rid=%d, addr=%lx)\n",
3512				    rle->type, rle->rid,
3513				    rman_get_start(rle->res));
3514				return;
3515			}
3516			bus_release_resource(dev, type, rid, rle->res);
3517		}
3518		resource_list_delete(rl, type, rid);
3519	}
3520	/*
3521	 * Why do we turn off the PCI configuration BAR when we delete a
3522	 * resource? -- imp
3523	 */
3524	pci_write_config(child, rid, 0, 4);
3525	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3526}
3527
3528struct resource_list *
3529pci_get_resource_list (device_t dev, device_t child)
3530{
3531	struct pci_devinfo *dinfo = device_get_ivars(child);
3532
3533	return (&dinfo->resources);
3534}
3535
3536uint32_t
3537pci_read_config_method(device_t dev, device_t child, int reg, int width)
3538{
3539	struct pci_devinfo *dinfo = device_get_ivars(child);
3540	pcicfgregs *cfg = &dinfo->cfg;
3541
3542	return (PCIB_READ_CONFIG(device_get_parent(dev),
3543	    cfg->bus, cfg->slot, cfg->func, reg, width));
3544}
3545
3546void
3547pci_write_config_method(device_t dev, device_t child, int reg,
3548    uint32_t val, int width)
3549{
3550	struct pci_devinfo *dinfo = device_get_ivars(child);
3551	pcicfgregs *cfg = &dinfo->cfg;
3552
3553	PCIB_WRITE_CONFIG(device_get_parent(dev),
3554	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3555}
3556
3557int
3558pci_child_location_str_method(device_t dev, device_t child, char *buf,
3559    size_t buflen)
3560{
3561
3562	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3563	    pci_get_function(child));
3564	return (0);
3565}
3566
3567int
3568pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3569    size_t buflen)
3570{
3571	struct pci_devinfo *dinfo;
3572	pcicfgregs *cfg;
3573
3574	dinfo = device_get_ivars(child);
3575	cfg = &dinfo->cfg;
3576	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3577	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3578	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3579	    cfg->progif);
3580	return (0);
3581}
3582
3583int
3584pci_assign_interrupt_method(device_t dev, device_t child)
3585{
3586	struct pci_devinfo *dinfo = device_get_ivars(child);
3587	pcicfgregs *cfg = &dinfo->cfg;
3588
3589	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3590	    cfg->intpin));
3591}
3592
3593static int
3594pci_modevent(module_t mod, int what, void *arg)
3595{
3596	static struct cdev *pci_cdev;
3597
3598	switch (what) {
3599	case MOD_LOAD:
3600		STAILQ_INIT(&pci_devq);
3601		pci_generation = 0;
3602		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3603		    "pci");
3604		pci_load_vendor_data();
3605		break;
3606
3607	case MOD_UNLOAD:
3608		destroy_dev(pci_cdev);
3609		break;
3610	}
3611
3612	return (0);
3613}
3614
3615void
3616pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3617{
3618	int i;
3619
3620	/*
3621	 * Only do header type 0 devices.  Type 1 devices are bridges,
3622	 * which we know need special treatment.  Type 2 devices are
3623	 * cardbus bridges which also require special treatment.
3624	 * Other types are unknown, and we err on the side of safety
3625	 * by ignoring them.
3626	 */
3627	if (dinfo->cfg.hdrtype != 0)
3628		return;
3629
3630	/*
3631	 * Restore the device to full power mode.  We must do this
3632	 * before we restore the registers because moving from D3 to
3633	 * D0 will cause the chip's BARs and some other registers to
3634	 * be reset to some unknown power on reset values.  Cut down
3635	 * the noise on boot by doing nothing if we are already in
3636	 * state D0.
3637	 */
3638	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3639		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3640	}
3641	for (i = 0; i < dinfo->cfg.nummaps; i++)
3642		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3643	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3644	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3645	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3646	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3647	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3648	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3649	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3650	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3651	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3652	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3653
3654	/* Restore MSI and MSI-X configurations if they are present. */
3655	if (dinfo->cfg.msi.msi_location != 0)
3656		pci_resume_msi(dev);
3657	if (dinfo->cfg.msix.msix_location != 0)
3658		pci_resume_msix(dev);
3659}
3660
3661void
3662pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3663{
3664	int i;
3665	uint32_t cls;
3666	int ps;
3667
3668	/*
3669	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3670	 * we know need special treatment.  Type 2 devices are cardbus bridges
3671	 * which also require special treatment.  Other types are unknown, and
3672	 * we err on the side of safety by ignoring them.  Powering down
3673	 * bridges should not be undertaken lightly.
3674	 */
3675	if (dinfo->cfg.hdrtype != 0)
3676		return;
3677	for (i = 0; i < dinfo->cfg.nummaps; i++)
3678		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3679	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3680
3681	/*
3682	 * Some drivers apparently write to these registers w/o updating our
3683	 * cached copy.  No harm happens if we update the copy, so do so here
3684	 * so we can restore them.  The COMMAND register is modified by the
3685	 * bus w/o updating the cache.  This should represent the normally
3686	 * writable portion of the 'defined' part of type 0 headers.  In
3687	 * theory we also need to save/restore the PCI capability structures
3688	 * we know about, but apart from power we don't know any that are
3689	 * writable.
3690	 */
3691	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3692	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3693	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3694	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3695	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3696	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3697	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3698	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3699	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3700	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3701	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3702	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3703	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3704	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3705	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3706
3707	/*
3708	 * don't set the state for display devices, base peripherals and
3709	 * memory devices since bad things happen when they are powered down.
3710	 * We should (a) have drivers that can easily detach and (b) use
3711	 * generic drivers for these devices so that some device actually
3712	 * attaches.  We need to make sure that when we implement (a) we don't
3713	 * power the device down on a reattach.
3714	 */
3715	cls = pci_get_class(dev);
3716	if (!setstate)
3717		return;
3718	switch (pci_do_power_nodriver)
3719	{
3720		case 0:		/* NO powerdown at all */
3721			return;
3722		case 1:		/* Conservative about what to power down */
3723			if (cls == PCIC_STORAGE)
3724				return;
3725			/*FALLTHROUGH*/
3726		case 2:		/* Agressive about what to power down */
3727			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3728			    cls == PCIC_BASEPERIPH)
3729				return;
3730			/*FALLTHROUGH*/
3731		case 3:		/* Power down everything */
3732			break;
3733	}
3734	/*
3735	 * PCI spec says we can only go into D3 state from D0 state.
3736	 * Transition from D[12] into D0 before going to D3 state.
3737	 */
3738	ps = pci_get_powerstate(dev);
3739	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3740		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3741	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3742		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3743}
3744