pci.c revision 193306
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 193306 2009-06-02 12:35:04Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include "pcib_if.h"
66#include "pci_if.h"
67
68#ifdef __HAVE_ACPI
69#include <contrib/dev/acpica/acpi.h>
70#include "acpi_if.h"
71#else
72#define	ACPI_PWR_FOR_SLEEP(x, y, z)
73#endif
74
75static pci_addr_t	pci_mapbase(uint64_t mapreg);
76static const char	*pci_maptype(uint64_t mapreg);
77static int		pci_mapsize(uint64_t testval);
78static int		pci_maprange(uint64_t mapreg);
79static void		pci_fixancient(pcicfgregs *cfg);
80static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
81
82static int		pci_porten(device_t dev);
83static int		pci_memen(device_t dev);
84static void		pci_assign_interrupt(device_t bus, device_t dev,
85			    int force_route);
86static int		pci_add_map(device_t bus, device_t dev, int reg,
87			    struct resource_list *rl, int force, int prefetch);
88static int		pci_probe(device_t dev);
89static int		pci_attach(device_t dev);
90static void		pci_load_vendor_data(void);
91static int		pci_describe_parse_line(char **ptr, int *vendor,
92			    int *device, char **desc);
93static char		*pci_describe_device(device_t dev);
94static int		pci_modevent(module_t mod, int what, void *arg);
95static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
96			    pcicfgregs *cfg);
97static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
98static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
99			    int reg, uint32_t *data);
100#if 0
101static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
102			    int reg, uint32_t data);
103#endif
104static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
105static void		pci_disable_msi(device_t dev);
106static void		pci_enable_msi(device_t dev, uint64_t address,
107			    uint16_t data);
108static void		pci_enable_msix(device_t dev, u_int index,
109			    uint64_t address, uint32_t data);
110static void		pci_mask_msix(device_t dev, u_int index);
111static void		pci_unmask_msix(device_t dev, u_int index);
112static int		pci_msi_blacklisted(void);
113static void		pci_resume_msi(device_t dev);
114static void		pci_resume_msix(device_t dev);
115
116static device_method_t pci_methods[] = {
117	/* Device interface */
118	DEVMETHOD(device_probe,		pci_probe),
119	DEVMETHOD(device_attach,	pci_attach),
120	DEVMETHOD(device_detach,	bus_generic_detach),
121	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
122	DEVMETHOD(device_suspend,	pci_suspend),
123	DEVMETHOD(device_resume,	pci_resume),
124
125	/* Bus interface */
126	DEVMETHOD(bus_print_child,	pci_print_child),
127	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
128	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
129	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
130	DEVMETHOD(bus_driver_added,	pci_driver_added),
131	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
132	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
133
134	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
135	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
136	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
137	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
138	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
139	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
140	DEVMETHOD(bus_activate_resource, pci_activate_resource),
141	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
142	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
143	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
144
145	/* PCI interface */
146	DEVMETHOD(pci_read_config,	pci_read_config_method),
147	DEVMETHOD(pci_write_config,	pci_write_config_method),
148	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
149	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
150	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
151	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
152	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
153	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
154	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
155	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
156	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
157	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
158	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
159	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
160	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
161	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
162	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
163	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
164
165	{ 0, 0 }
166};
167
168DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
169
170static devclass_t pci_devclass;
171DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
172MODULE_VERSION(pci, 1);
173
174static char	*pci_vendordata;
175static size_t	pci_vendordata_size;
176
177
178struct pci_quirk {
179	uint32_t devid;	/* Vendor/device of the card */
180	int	type;
181#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
182#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
183	int	arg1;
184	int	arg2;
185};
186
187struct pci_quirk pci_quirks[] = {
188	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
189	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
191	/* As does the Serverworks OSB4 (the SMBus mapping register) */
192	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193
194	/*
195	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
196	 * or the CMIC-SL (AKA ServerWorks GC_LE).
197	 */
198	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200
201	/*
202	 * MSI doesn't work on earlier Intel chipsets including
203	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
204	 */
205	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212
213	/*
214	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
215	 * bridge.
216	 */
217	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218
219	{ 0 }
220};
221
222/* map register information */
223#define	PCI_MAPMEM	0x01	/* memory map */
224#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
225#define	PCI_MAPPORT	0x04	/* port map */
226
227struct devlist pci_devq;
228uint32_t pci_generation;
229uint32_t pci_numdevs = 0;
230static int pcie_chipset, pcix_chipset;
231
232/* sysctl vars */
233SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
234
235static int pci_enable_io_modes = 1;
236TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
237SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
238    &pci_enable_io_modes, 1,
239    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
240enable these bits correctly.  We'd like to do this all the time, but there\n\
241are some peripherals that this causes problems with.");
242
243static int pci_do_power_nodriver = 0;
244TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
245SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
246    &pci_do_power_nodriver, 0,
247  "Place a function into D3 state when no driver attaches to it.  0 means\n\
248disable.  1 means conservatively place devices into D3 state.  2 means\n\
249agressively place devices into D3 state.  3 means put absolutely everything\n\
250in D3 state.");
251
252static int pci_do_power_resume = 1;
253TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
254SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
255    &pci_do_power_resume, 1,
256  "Transition from D3 -> D0 on resume.");
257
258static int pci_do_msi = 1;
259TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
260SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
261    "Enable support for MSI interrupts");
262
263static int pci_do_msix = 1;
264TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
265SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
266    "Enable support for MSI-X interrupts");
267
268static int pci_honor_msi_blacklist = 1;
269TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
270SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
271    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
272
273/* Find a device_t by bus/slot/function in domain 0 */
274
275device_t
276pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
277{
278
279	return (pci_find_dbsf(0, bus, slot, func));
280}
281
282/* Find a device_t by domain/bus/slot/function */
283
284device_t
285pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
286{
287	struct pci_devinfo *dinfo;
288
289	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
290		if ((dinfo->cfg.domain == domain) &&
291		    (dinfo->cfg.bus == bus) &&
292		    (dinfo->cfg.slot == slot) &&
293		    (dinfo->cfg.func == func)) {
294			return (dinfo->cfg.dev);
295		}
296	}
297
298	return (NULL);
299}
300
301/* Find a device_t by vendor/device ID */
302
303device_t
304pci_find_device(uint16_t vendor, uint16_t device)
305{
306	struct pci_devinfo *dinfo;
307
308	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
309		if ((dinfo->cfg.vendor == vendor) &&
310		    (dinfo->cfg.device == device)) {
311			return (dinfo->cfg.dev);
312		}
313	}
314
315	return (NULL);
316}
317
318static int
319pci_printf(pcicfgregs *cfg, const char *fmt, ...)
320{
321	va_list ap;
322	int retval;
323
324	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
325	    cfg->func);
326	va_start(ap, fmt);
327	retval += vprintf(fmt, ap);
328	va_end(ap);
329	return (retval);
330}
331
332/* return base address of memory or port map */
333
334static pci_addr_t
335pci_mapbase(uint64_t mapreg)
336{
337
338	if (PCI_BAR_MEM(mapreg))
339		return (mapreg & PCIM_BAR_MEM_BASE);
340	else
341		return (mapreg & PCIM_BAR_IO_BASE);
342}
343
344/* return map type of memory or port map */
345
346static const char *
347pci_maptype(uint64_t mapreg)
348{
349
350	if (PCI_BAR_IO(mapreg))
351		return ("I/O Port");
352	if (mapreg & PCIM_BAR_MEM_PREFETCH)
353		return ("Prefetchable Memory");
354	return ("Memory");
355}
356
357/* return log2 of map size decoded for memory or port map */
358
359static int
360pci_mapsize(uint64_t testval)
361{
362	int ln2size;
363
364	testval = pci_mapbase(testval);
365	ln2size = 0;
366	if (testval != 0) {
367		while ((testval & 1) == 0)
368		{
369			ln2size++;
370			testval >>= 1;
371		}
372	}
373	return (ln2size);
374}
375
376/* return log2 of address range supported by map register */
377
378static int
379pci_maprange(uint64_t mapreg)
380{
381	int ln2range = 0;
382
383	if (PCI_BAR_IO(mapreg))
384		ln2range = 32;
385	else
386		switch (mapreg & PCIM_BAR_MEM_TYPE) {
387		case PCIM_BAR_MEM_32:
388			ln2range = 32;
389			break;
390		case PCIM_BAR_MEM_1MB:
391			ln2range = 20;
392			break;
393		case PCIM_BAR_MEM_64:
394			ln2range = 64;
395			break;
396		}
397	return (ln2range);
398}
399
400/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
401
402static void
403pci_fixancient(pcicfgregs *cfg)
404{
405	if (cfg->hdrtype != 0)
406		return;
407
408	/* PCI to PCI bridges use header type 1 */
409	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
410		cfg->hdrtype = 1;
411}
412
413/* extract header type specific config data */
414
415static void
416pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
417{
418#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
419	switch (cfg->hdrtype) {
420	case 0:
421		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
422		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
423		cfg->nummaps	    = PCI_MAXMAPS_0;
424		break;
425	case 1:
426		cfg->nummaps	    = PCI_MAXMAPS_1;
427		break;
428	case 2:
429		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
430		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
431		cfg->nummaps	    = PCI_MAXMAPS_2;
432		break;
433	}
434#undef REG
435}
436
437/* read configuration header into pcicfgregs structure */
438struct pci_devinfo *
439pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
440{
441#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442	pcicfgregs *cfg = NULL;
443	struct pci_devinfo *devlist_entry;
444	struct devlist *devlist_head;
445
446	devlist_head = &pci_devq;
447
448	devlist_entry = NULL;
449
450	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
451		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
452		if (devlist_entry == NULL)
453			return (NULL);
454
455		cfg = &devlist_entry->cfg;
456
457		cfg->domain		= d;
458		cfg->bus		= b;
459		cfg->slot		= s;
460		cfg->func		= f;
461		cfg->vendor		= REG(PCIR_VENDOR, 2);
462		cfg->device		= REG(PCIR_DEVICE, 2);
463		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
464		cfg->statreg		= REG(PCIR_STATUS, 2);
465		cfg->baseclass		= REG(PCIR_CLASS, 1);
466		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
467		cfg->progif		= REG(PCIR_PROGIF, 1);
468		cfg->revid		= REG(PCIR_REVID, 1);
469		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
470		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
471		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
472		cfg->intpin		= REG(PCIR_INTPIN, 1);
473		cfg->intline		= REG(PCIR_INTLINE, 1);
474
475		cfg->mingnt		= REG(PCIR_MINGNT, 1);
476		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
477
478		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
479		cfg->hdrtype		&= ~PCIM_MFDEV;
480
481		pci_fixancient(cfg);
482		pci_hdrtypedata(pcib, b, s, f, cfg);
483
484		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
485			pci_read_extcap(pcib, cfg);
486
487		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
488
489		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
490		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
491		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
492		devlist_entry->conf.pc_sel.pc_func = cfg->func;
493		devlist_entry->conf.pc_hdr = cfg->hdrtype;
494
495		devlist_entry->conf.pc_subvendor = cfg->subvendor;
496		devlist_entry->conf.pc_subdevice = cfg->subdevice;
497		devlist_entry->conf.pc_vendor = cfg->vendor;
498		devlist_entry->conf.pc_device = cfg->device;
499
500		devlist_entry->conf.pc_class = cfg->baseclass;
501		devlist_entry->conf.pc_subclass = cfg->subclass;
502		devlist_entry->conf.pc_progif = cfg->progif;
503		devlist_entry->conf.pc_revid = cfg->revid;
504
505		pci_numdevs++;
506		pci_generation++;
507	}
508	return (devlist_entry);
509#undef REG
510}
511
512static void
513pci_read_extcap(device_t pcib, pcicfgregs *cfg)
514{
515#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
516#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
517#if defined(__i386__) || defined(__amd64__)
518	uint64_t addr;
519#endif
520	uint32_t val;
521	int	ptr, nextptr, ptrptr;
522
523	switch (cfg->hdrtype & PCIM_HDRTYPE) {
524	case 0:
525	case 1:
526		ptrptr = PCIR_CAP_PTR;
527		break;
528	case 2:
529		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
530		break;
531	default:
532		return;		/* no extended capabilities support */
533	}
534	nextptr = REG(ptrptr, 1);	/* sanity check? */
535
536	/*
537	 * Read capability entries.
538	 */
539	while (nextptr != 0) {
540		/* Sanity check */
541		if (nextptr > 255) {
542			printf("illegal PCI extended capability offset %d\n",
543			    nextptr);
544			return;
545		}
546		/* Find the next entry */
547		ptr = nextptr;
548		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
549
550		/* Process this entry */
551		switch (REG(ptr + PCICAP_ID, 1)) {
552		case PCIY_PMG:		/* PCI power management */
553			if (cfg->pp.pp_cap == 0) {
554				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
555				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
556				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
557				if ((nextptr - ptr) > PCIR_POWER_DATA)
558					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
559			}
560			break;
561#if defined(__i386__) || defined(__amd64__)
562		case PCIY_HT:		/* HyperTransport */
563			/* Determine HT-specific capability type. */
564			val = REG(ptr + PCIR_HT_COMMAND, 2);
565			switch (val & PCIM_HTCMD_CAP_MASK) {
566			case PCIM_HTCAP_MSI_MAPPING:
567				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
568					/* Sanity check the mapping window. */
569					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
570					    4);
571					addr <<= 32;
572					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
573					    4);
574					if (addr != MSI_INTEL_ADDR_BASE)
575						device_printf(pcib,
576	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
577						    cfg->domain, cfg->bus,
578						    cfg->slot, cfg->func,
579						    (long long)addr);
580				} else
581					addr = MSI_INTEL_ADDR_BASE;
582
583				cfg->ht.ht_msimap = ptr;
584				cfg->ht.ht_msictrl = val;
585				cfg->ht.ht_msiaddr = addr;
586				break;
587			}
588			break;
589#endif
590		case PCIY_MSI:		/* PCI MSI */
591			cfg->msi.msi_location = ptr;
592			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
593			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
594						     PCIM_MSICTRL_MMC_MASK)>>1);
595			break;
596		case PCIY_MSIX:		/* PCI MSI-X */
597			cfg->msix.msix_location = ptr;
598			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
599			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
600			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
601			val = REG(ptr + PCIR_MSIX_TABLE, 4);
602			cfg->msix.msix_table_bar = PCIR_BAR(val &
603			    PCIM_MSIX_BIR_MASK);
604			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
605			val = REG(ptr + PCIR_MSIX_PBA, 4);
606			cfg->msix.msix_pba_bar = PCIR_BAR(val &
607			    PCIM_MSIX_BIR_MASK);
608			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
609			break;
610		case PCIY_VPD:		/* PCI Vital Product Data */
611			cfg->vpd.vpd_reg = ptr;
612			break;
613		case PCIY_SUBVENDOR:
614			/* Should always be true. */
615			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
616				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
617				cfg->subvendor = val & 0xffff;
618				cfg->subdevice = val >> 16;
619			}
620			break;
621		case PCIY_PCIX:		/* PCI-X */
622			/*
623			 * Assume we have a PCI-X chipset if we have
624			 * at least one PCI-PCI bridge with a PCI-X
625			 * capability.  Note that some systems with
626			 * PCI-express or HT chipsets might match on
627			 * this check as well.
628			 */
629			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
630				pcix_chipset = 1;
631			break;
632		case PCIY_EXPRESS:	/* PCI-express */
633			/*
634			 * Assume we have a PCI-express chipset if we have
635			 * at least one PCI-express device.
636			 */
637			pcie_chipset = 1;
638			break;
639		default:
640			break;
641		}
642	}
643/* REG and WREG use carry through to next functions */
644}
645
646/*
647 * PCI Vital Product Data
648 */
649
650#define	PCI_VPD_TIMEOUT		1000000
651
652static int
653pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
654{
655	int count = PCI_VPD_TIMEOUT;
656
657	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
658
659	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
660
661	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
662		if (--count < 0)
663			return (ENXIO);
664		DELAY(1);	/* limit looping */
665	}
666	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
667
668	return (0);
669}
670
671#if 0
672static int
673pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
674{
675	int count = PCI_VPD_TIMEOUT;
676
677	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
678
679	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
680	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
681	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
682		if (--count < 0)
683			return (ENXIO);
684		DELAY(1);	/* limit looping */
685	}
686
687	return (0);
688}
689#endif
690
691#undef PCI_VPD_TIMEOUT
692
693struct vpd_readstate {
694	device_t	pcib;
695	pcicfgregs	*cfg;
696	uint32_t	val;
697	int		bytesinval;
698	int		off;
699	uint8_t		cksum;
700};
701
702static int
703vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
704{
705	uint32_t reg;
706	uint8_t byte;
707
708	if (vrs->bytesinval == 0) {
709		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
710			return (ENXIO);
711		vrs->val = le32toh(reg);
712		vrs->off += 4;
713		byte = vrs->val & 0xff;
714		vrs->bytesinval = 3;
715	} else {
716		vrs->val = vrs->val >> 8;
717		byte = vrs->val & 0xff;
718		vrs->bytesinval--;
719	}
720
721	vrs->cksum += byte;
722	*data = byte;
723	return (0);
724}
725
726static void
727pci_read_vpd(device_t pcib, pcicfgregs *cfg)
728{
729	struct vpd_readstate vrs;
730	int state;
731	int name;
732	int remain;
733	int i;
734	int alloc, off;		/* alloc/off for RO/W arrays */
735	int cksumvalid;
736	int dflen;
737	uint8_t byte;
738	uint8_t byte2;
739
740	/* init vpd reader */
741	vrs.bytesinval = 0;
742	vrs.off = 0;
743	vrs.pcib = pcib;
744	vrs.cfg = cfg;
745	vrs.cksum = 0;
746
747	state = 0;
748	name = remain = i = 0;	/* shut up stupid gcc */
749	alloc = off = 0;	/* shut up stupid gcc */
750	dflen = 0;		/* shut up stupid gcc */
751	cksumvalid = -1;
752	while (state >= 0) {
753		if (vpd_nextbyte(&vrs, &byte)) {
754			state = -2;
755			break;
756		}
757#if 0
758		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
759		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
760		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
761#endif
762		switch (state) {
763		case 0:		/* item name */
764			if (byte & 0x80) {
765				if (vpd_nextbyte(&vrs, &byte2)) {
766					state = -2;
767					break;
768				}
769				remain = byte2;
770				if (vpd_nextbyte(&vrs, &byte2)) {
771					state = -2;
772					break;
773				}
774				remain |= byte2 << 8;
775				if (remain > (0x7f*4 - vrs.off)) {
776					state = -1;
777					printf(
778			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
779					    cfg->domain, cfg->bus, cfg->slot,
780					    cfg->func, remain);
781				}
782				name = byte & 0x7f;
783			} else {
784				remain = byte & 0x7;
785				name = (byte >> 3) & 0xf;
786			}
787			switch (name) {
788			case 0x2:	/* String */
789				cfg->vpd.vpd_ident = malloc(remain + 1,
790				    M_DEVBUF, M_WAITOK);
791				i = 0;
792				state = 1;
793				break;
794			case 0xf:	/* End */
795				state = -1;
796				break;
797			case 0x10:	/* VPD-R */
798				alloc = 8;
799				off = 0;
800				cfg->vpd.vpd_ros = malloc(alloc *
801				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
802				    M_WAITOK | M_ZERO);
803				state = 2;
804				break;
805			case 0x11:	/* VPD-W */
806				alloc = 8;
807				off = 0;
808				cfg->vpd.vpd_w = malloc(alloc *
809				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
810				    M_WAITOK | M_ZERO);
811				state = 5;
812				break;
813			default:	/* Invalid data, abort */
814				state = -1;
815				break;
816			}
817			break;
818
819		case 1:	/* Identifier String */
820			cfg->vpd.vpd_ident[i++] = byte;
821			remain--;
822			if (remain == 0)  {
823				cfg->vpd.vpd_ident[i] = '\0';
824				state = 0;
825			}
826			break;
827
828		case 2:	/* VPD-R Keyword Header */
829			if (off == alloc) {
830				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
831				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
832				    M_DEVBUF, M_WAITOK | M_ZERO);
833			}
834			cfg->vpd.vpd_ros[off].keyword[0] = byte;
835			if (vpd_nextbyte(&vrs, &byte2)) {
836				state = -2;
837				break;
838			}
839			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
840			if (vpd_nextbyte(&vrs, &byte2)) {
841				state = -2;
842				break;
843			}
844			dflen = byte2;
845			if (dflen == 0 &&
846			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
847			    2) == 0) {
848				/*
849				 * if this happens, we can't trust the rest
850				 * of the VPD.
851				 */
852				printf(
853				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
854				    cfg->domain, cfg->bus, cfg->slot,
855				    cfg->func, dflen);
856				cksumvalid = 0;
857				state = -1;
858				break;
859			} else if (dflen == 0) {
860				cfg->vpd.vpd_ros[off].value = malloc(1 *
861				    sizeof(*cfg->vpd.vpd_ros[off].value),
862				    M_DEVBUF, M_WAITOK);
863				cfg->vpd.vpd_ros[off].value[0] = '\x00';
864			} else
865				cfg->vpd.vpd_ros[off].value = malloc(
866				    (dflen + 1) *
867				    sizeof(*cfg->vpd.vpd_ros[off].value),
868				    M_DEVBUF, M_WAITOK);
869			remain -= 3;
870			i = 0;
871			/* keep in sync w/ state 3's transistions */
872			if (dflen == 0 && remain == 0)
873				state = 0;
874			else if (dflen == 0)
875				state = 2;
876			else
877				state = 3;
878			break;
879
880		case 3:	/* VPD-R Keyword Value */
881			cfg->vpd.vpd_ros[off].value[i++] = byte;
882			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
883			    "RV", 2) == 0 && cksumvalid == -1) {
884				if (vrs.cksum == 0)
885					cksumvalid = 1;
886				else {
887					if (bootverbose)
888						printf(
889				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
890						    cfg->domain, cfg->bus,
891						    cfg->slot, cfg->func,
892						    vrs.cksum);
893					cksumvalid = 0;
894					state = -1;
895					break;
896				}
897			}
898			dflen--;
899			remain--;
900			/* keep in sync w/ state 2's transistions */
901			if (dflen == 0)
902				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
903			if (dflen == 0 && remain == 0) {
904				cfg->vpd.vpd_rocnt = off;
905				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
906				    off * sizeof(*cfg->vpd.vpd_ros),
907				    M_DEVBUF, M_WAITOK | M_ZERO);
908				state = 0;
909			} else if (dflen == 0)
910				state = 2;
911			break;
912
913		case 4:
914			remain--;
915			if (remain == 0)
916				state = 0;
917			break;
918
919		case 5:	/* VPD-W Keyword Header */
920			if (off == alloc) {
921				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
922				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
923				    M_DEVBUF, M_WAITOK | M_ZERO);
924			}
925			cfg->vpd.vpd_w[off].keyword[0] = byte;
926			if (vpd_nextbyte(&vrs, &byte2)) {
927				state = -2;
928				break;
929			}
930			cfg->vpd.vpd_w[off].keyword[1] = byte2;
931			if (vpd_nextbyte(&vrs, &byte2)) {
932				state = -2;
933				break;
934			}
935			cfg->vpd.vpd_w[off].len = dflen = byte2;
936			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
937			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
938			    sizeof(*cfg->vpd.vpd_w[off].value),
939			    M_DEVBUF, M_WAITOK);
940			remain -= 3;
941			i = 0;
942			/* keep in sync w/ state 6's transistions */
943			if (dflen == 0 && remain == 0)
944				state = 0;
945			else if (dflen == 0)
946				state = 5;
947			else
948				state = 6;
949			break;
950
951		case 6:	/* VPD-W Keyword Value */
952			cfg->vpd.vpd_w[off].value[i++] = byte;
953			dflen--;
954			remain--;
955			/* keep in sync w/ state 5's transistions */
956			if (dflen == 0)
957				cfg->vpd.vpd_w[off++].value[i++] = '\0';
958			if (dflen == 0 && remain == 0) {
959				cfg->vpd.vpd_wcnt = off;
960				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
961				    off * sizeof(*cfg->vpd.vpd_w),
962				    M_DEVBUF, M_WAITOK | M_ZERO);
963				state = 0;
964			} else if (dflen == 0)
965				state = 5;
966			break;
967
968		default:
969			printf("pci%d:%d:%d:%d: invalid state: %d\n",
970			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
971			    state);
972			state = -1;
973			break;
974		}
975	}
976
977	if (cksumvalid == 0 || state < -1) {
978		/* read-only data bad, clean up */
979		if (cfg->vpd.vpd_ros != NULL) {
980			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
981				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
982			free(cfg->vpd.vpd_ros, M_DEVBUF);
983			cfg->vpd.vpd_ros = NULL;
984		}
985	}
986	if (state < -1) {
987		/* I/O error, clean up */
988		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
989		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
990		if (cfg->vpd.vpd_ident != NULL) {
991			free(cfg->vpd.vpd_ident, M_DEVBUF);
992			cfg->vpd.vpd_ident = NULL;
993		}
994		if (cfg->vpd.vpd_w != NULL) {
995			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
996				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
997			free(cfg->vpd.vpd_w, M_DEVBUF);
998			cfg->vpd.vpd_w = NULL;
999		}
1000	}
1001	cfg->vpd.vpd_cached = 1;
1002#undef REG
1003#undef WREG
1004}
1005
1006int
1007pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1008{
1009	struct pci_devinfo *dinfo = device_get_ivars(child);
1010	pcicfgregs *cfg = &dinfo->cfg;
1011
1012	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1013		pci_read_vpd(device_get_parent(dev), cfg);
1014
1015	*identptr = cfg->vpd.vpd_ident;
1016
1017	if (*identptr == NULL)
1018		return (ENXIO);
1019
1020	return (0);
1021}
1022
1023int
1024pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1025	const char **vptr)
1026{
1027	struct pci_devinfo *dinfo = device_get_ivars(child);
1028	pcicfgregs *cfg = &dinfo->cfg;
1029	int i;
1030
1031	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1032		pci_read_vpd(device_get_parent(dev), cfg);
1033
1034	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1035		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1036		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1037			*vptr = cfg->vpd.vpd_ros[i].value;
1038		}
1039
1040	if (i != cfg->vpd.vpd_rocnt)
1041		return (0);
1042
1043	*vptr = NULL;
1044	return (ENXIO);
1045}
1046
1047/*
1048 * Find the requested extended capability and return the offset in
1049 * configuration space via the pointer provided. The function returns
1050 * 0 on success and error code otherwise.
1051 */
1052int
1053pci_find_extcap_method(device_t dev, device_t child, int capability,
1054    int *capreg)
1055{
1056	struct pci_devinfo *dinfo = device_get_ivars(child);
1057	pcicfgregs *cfg = &dinfo->cfg;
1058	u_int32_t status;
1059	u_int8_t ptr;
1060
1061	/*
1062	 * Check the CAP_LIST bit of the PCI status register first.
1063	 */
1064	status = pci_read_config(child, PCIR_STATUS, 2);
1065	if (!(status & PCIM_STATUS_CAPPRESENT))
1066		return (ENXIO);
1067
1068	/*
1069	 * Determine the start pointer of the capabilities list.
1070	 */
1071	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1072	case 0:
1073	case 1:
1074		ptr = PCIR_CAP_PTR;
1075		break;
1076	case 2:
1077		ptr = PCIR_CAP_PTR_2;
1078		break;
1079	default:
1080		/* XXX: panic? */
1081		return (ENXIO);		/* no extended capabilities support */
1082	}
1083	ptr = pci_read_config(child, ptr, 1);
1084
1085	/*
1086	 * Traverse the capabilities list.
1087	 */
1088	while (ptr != 0) {
1089		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1090			if (capreg != NULL)
1091				*capreg = ptr;
1092			return (0);
1093		}
1094		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1095	}
1096
1097	return (ENOENT);
1098}
1099
1100/*
1101 * Support for MSI-X message interrupts.
1102 */
1103void
1104pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1105{
1106	struct pci_devinfo *dinfo = device_get_ivars(dev);
1107	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1108	uint32_t offset;
1109
1110	KASSERT(msix->msix_table_len > index, ("bogus index"));
1111	offset = msix->msix_table_offset + index * 16;
1112	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1113	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1114	bus_write_4(msix->msix_table_res, offset + 8, data);
1115
1116	/* Enable MSI -> HT mapping. */
1117	pci_ht_map_msi(dev, address);
1118}
1119
1120void
1121pci_mask_msix(device_t dev, u_int index)
1122{
1123	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125	uint32_t offset, val;
1126
1127	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1128	offset = msix->msix_table_offset + index * 16 + 12;
1129	val = bus_read_4(msix->msix_table_res, offset);
1130	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1131		val |= PCIM_MSIX_VCTRL_MASK;
1132		bus_write_4(msix->msix_table_res, offset, val);
1133	}
1134}
1135
1136void
1137pci_unmask_msix(device_t dev, u_int index)
1138{
1139	struct pci_devinfo *dinfo = device_get_ivars(dev);
1140	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1141	uint32_t offset, val;
1142
1143	KASSERT(msix->msix_table_len > index, ("bogus index"));
1144	offset = msix->msix_table_offset + index * 16 + 12;
1145	val = bus_read_4(msix->msix_table_res, offset);
1146	if (val & PCIM_MSIX_VCTRL_MASK) {
1147		val &= ~PCIM_MSIX_VCTRL_MASK;
1148		bus_write_4(msix->msix_table_res, offset, val);
1149	}
1150}
1151
1152int
1153pci_pending_msix(device_t dev, u_int index)
1154{
1155	struct pci_devinfo *dinfo = device_get_ivars(dev);
1156	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1157	uint32_t offset, bit;
1158
1159	KASSERT(msix->msix_table_len > index, ("bogus index"));
1160	offset = msix->msix_pba_offset + (index / 32) * 4;
1161	bit = 1 << index % 32;
1162	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1163}
1164
1165/*
1166 * Restore MSI-X registers and table during resume.  If MSI-X is
1167 * enabled then walk the virtual table to restore the actual MSI-X
1168 * table.
1169 */
1170static void
1171pci_resume_msix(device_t dev)
1172{
1173	struct pci_devinfo *dinfo = device_get_ivars(dev);
1174	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1175	struct msix_table_entry *mte;
1176	struct msix_vector *mv;
1177	int i;
1178
1179	if (msix->msix_alloc > 0) {
1180		/* First, mask all vectors. */
1181		for (i = 0; i < msix->msix_msgnum; i++)
1182			pci_mask_msix(dev, i);
1183
1184		/* Second, program any messages with at least one handler. */
1185		for (i = 0; i < msix->msix_table_len; i++) {
1186			mte = &msix->msix_table[i];
1187			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1188				continue;
1189			mv = &msix->msix_vectors[mte->mte_vector - 1];
1190			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1191			pci_unmask_msix(dev, i);
1192		}
1193	}
1194	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1195	    msix->msix_ctrl, 2);
1196}
1197
1198/*
1199 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1200 * returned in *count.  After this function returns, each message will be
1201 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1202 */
1203int
1204pci_alloc_msix_method(device_t dev, device_t child, int *count)
1205{
1206	struct pci_devinfo *dinfo = device_get_ivars(child);
1207	pcicfgregs *cfg = &dinfo->cfg;
1208	struct resource_list_entry *rle;
1209	int actual, error, i, irq, max;
1210
1211	/* Don't let count == 0 get us into trouble. */
1212	if (*count == 0)
1213		return (EINVAL);
1214
1215	/* If rid 0 is allocated, then fail. */
1216	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1217	if (rle != NULL && rle->res != NULL)
1218		return (ENXIO);
1219
1220	/* Already have allocated messages? */
1221	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1222		return (ENXIO);
1223
1224	/* If MSI is blacklisted for this system, fail. */
1225	if (pci_msi_blacklisted())
1226		return (ENXIO);
1227
1228	/* MSI-X capability present? */
1229	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1230		return (ENODEV);
1231
1232	/* Make sure the appropriate BARs are mapped. */
1233	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1234	    cfg->msix.msix_table_bar);
1235	if (rle == NULL || rle->res == NULL ||
1236	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1237		return (ENXIO);
1238	cfg->msix.msix_table_res = rle->res;
1239	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1240		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1241		    cfg->msix.msix_pba_bar);
1242		if (rle == NULL || rle->res == NULL ||
1243		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1244			return (ENXIO);
1245	}
1246	cfg->msix.msix_pba_res = rle->res;
1247
1248	if (bootverbose)
1249		device_printf(child,
1250		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1251		    *count, cfg->msix.msix_msgnum);
1252	max = min(*count, cfg->msix.msix_msgnum);
1253	for (i = 0; i < max; i++) {
1254		/* Allocate a message. */
1255		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1256		if (error)
1257			break;
1258		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1259		    irq, 1);
1260	}
1261	actual = i;
1262
1263	if (bootverbose) {
1264		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1265		if (actual == 1)
1266			device_printf(child, "using IRQ %lu for MSI-X\n",
1267			    rle->start);
1268		else {
1269			int run;
1270
1271			/*
1272			 * Be fancy and try to print contiguous runs of
1273			 * IRQ values as ranges.  'irq' is the previous IRQ.
1274			 * 'run' is true if we are in a range.
1275			 */
1276			device_printf(child, "using IRQs %lu", rle->start);
1277			irq = rle->start;
1278			run = 0;
1279			for (i = 1; i < actual; i++) {
1280				rle = resource_list_find(&dinfo->resources,
1281				    SYS_RES_IRQ, i + 1);
1282
1283				/* Still in a run? */
1284				if (rle->start == irq + 1) {
1285					run = 1;
1286					irq++;
1287					continue;
1288				}
1289
1290				/* Finish previous range. */
1291				if (run) {
1292					printf("-%d", irq);
1293					run = 0;
1294				}
1295
1296				/* Start new range. */
1297				printf(",%lu", rle->start);
1298				irq = rle->start;
1299			}
1300
1301			/* Unfinished range? */
1302			if (run)
1303				printf("-%d", irq);
1304			printf(" for MSI-X\n");
1305		}
1306	}
1307
1308	/* Mask all vectors. */
1309	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1310		pci_mask_msix(child, i);
1311
1312	/* Allocate and initialize vector data and virtual table. */
1313	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1314	    M_DEVBUF, M_WAITOK | M_ZERO);
1315	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1316	    M_DEVBUF, M_WAITOK | M_ZERO);
1317	for (i = 0; i < actual; i++) {
1318		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1319		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1320		cfg->msix.msix_table[i].mte_vector = i + 1;
1321	}
1322
1323	/* Update control register to enable MSI-X. */
1324	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1325	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1326	    cfg->msix.msix_ctrl, 2);
1327
1328	/* Update counts of alloc'd messages. */
1329	cfg->msix.msix_alloc = actual;
1330	cfg->msix.msix_table_len = actual;
1331	*count = actual;
1332	return (0);
1333}
1334
1335/*
1336 * By default, pci_alloc_msix() will assign the allocated IRQ
1337 * resources consecutively to the first N messages in the MSI-X table.
1338 * However, device drivers may want to use different layouts if they
1339 * either receive fewer messages than they asked for, or they wish to
1340 * populate the MSI-X table sparsely.  This method allows the driver
1341 * to specify what layout it wants.  It must be called after a
1342 * successful pci_alloc_msix() but before any of the associated
1343 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1344 *
1345 * The 'vectors' array contains 'count' message vectors.  The array
1346 * maps directly to the MSI-X table in that index 0 in the array
1347 * specifies the vector for the first message in the MSI-X table, etc.
1348 * The vector value in each array index can either be 0 to indicate
1349 * that no vector should be assigned to a message slot, or it can be a
1350 * number from 1 to N (where N is the count returned from a
1351 * succcessful call to pci_alloc_msix()) to indicate which message
1352 * vector (IRQ) to be used for the corresponding message.
1353 *
1354 * On successful return, each message with a non-zero vector will have
1355 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1356 * 1.  Additionally, if any of the IRQs allocated via the previous
1357 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1358 * will be freed back to the system automatically.
1359 *
1360 * For example, suppose a driver has a MSI-X table with 6 messages and
1361 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1362 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1363 * C.  After the call to pci_alloc_msix(), the device will be setup to
1364 * have an MSI-X table of ABC--- (where - means no vector assigned).
1365 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1366 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1367 * be freed back to the system.  This device will also have valid
1368 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1369 *
1370 * In any case, the SYS_RES_IRQ rid X will always map to the message
1371 * at MSI-X table index X - 1 and will only be valid if a vector is
1372 * assigned to that table entry.
1373 */
1374int
1375pci_remap_msix_method(device_t dev, device_t child, int count,
1376    const u_int *vectors)
1377{
1378	struct pci_devinfo *dinfo = device_get_ivars(child);
1379	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1380	struct resource_list_entry *rle;
1381	int i, irq, j, *used;
1382
1383	/*
1384	 * Have to have at least one message in the table but the
1385	 * table can't be bigger than the actual MSI-X table in the
1386	 * device.
1387	 */
1388	if (count == 0 || count > msix->msix_msgnum)
1389		return (EINVAL);
1390
1391	/* Sanity check the vectors. */
1392	for (i = 0; i < count; i++)
1393		if (vectors[i] > msix->msix_alloc)
1394			return (EINVAL);
1395
1396	/*
1397	 * Make sure there aren't any holes in the vectors to be used.
1398	 * It's a big pain to support it, and it doesn't really make
1399	 * sense anyway.  Also, at least one vector must be used.
1400	 */
1401	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1402	    M_ZERO);
1403	for (i = 0; i < count; i++)
1404		if (vectors[i] != 0)
1405			used[vectors[i] - 1] = 1;
1406	for (i = 0; i < msix->msix_alloc - 1; i++)
1407		if (used[i] == 0 && used[i + 1] == 1) {
1408			free(used, M_DEVBUF);
1409			return (EINVAL);
1410		}
1411	if (used[0] != 1) {
1412		free(used, M_DEVBUF);
1413		return (EINVAL);
1414	}
1415
1416	/* Make sure none of the resources are allocated. */
1417	for (i = 0; i < msix->msix_table_len; i++) {
1418		if (msix->msix_table[i].mte_vector == 0)
1419			continue;
1420		if (msix->msix_table[i].mte_handlers > 0)
1421			return (EBUSY);
1422		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1423		KASSERT(rle != NULL, ("missing resource"));
1424		if (rle->res != NULL)
1425			return (EBUSY);
1426	}
1427
1428	/* Free the existing resource list entries. */
1429	for (i = 0; i < msix->msix_table_len; i++) {
1430		if (msix->msix_table[i].mte_vector == 0)
1431			continue;
1432		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1433	}
1434
1435	/*
1436	 * Build the new virtual table keeping track of which vectors are
1437	 * used.
1438	 */
1439	free(msix->msix_table, M_DEVBUF);
1440	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1441	    M_DEVBUF, M_WAITOK | M_ZERO);
1442	for (i = 0; i < count; i++)
1443		msix->msix_table[i].mte_vector = vectors[i];
1444	msix->msix_table_len = count;
1445
1446	/* Free any unused IRQs and resize the vectors array if necessary. */
1447	j = msix->msix_alloc - 1;
1448	if (used[j] == 0) {
1449		struct msix_vector *vec;
1450
1451		while (used[j] == 0) {
1452			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1453			    msix->msix_vectors[j].mv_irq);
1454			j--;
1455		}
1456		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1457		    M_WAITOK);
1458		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1459		    (j + 1));
1460		free(msix->msix_vectors, M_DEVBUF);
1461		msix->msix_vectors = vec;
1462		msix->msix_alloc = j + 1;
1463	}
1464	free(used, M_DEVBUF);
1465
1466	/* Map the IRQs onto the rids. */
1467	for (i = 0; i < count; i++) {
1468		if (vectors[i] == 0)
1469			continue;
1470		irq = msix->msix_vectors[vectors[i]].mv_irq;
1471		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1472		    irq, 1);
1473	}
1474
1475	if (bootverbose) {
1476		device_printf(child, "Remapped MSI-X IRQs as: ");
1477		for (i = 0; i < count; i++) {
1478			if (i != 0)
1479				printf(", ");
1480			if (vectors[i] == 0)
1481				printf("---");
1482			else
1483				printf("%d",
1484				    msix->msix_vectors[vectors[i]].mv_irq);
1485		}
1486		printf("\n");
1487	}
1488
1489	return (0);
1490}
1491
1492static int
1493pci_release_msix(device_t dev, device_t child)
1494{
1495	struct pci_devinfo *dinfo = device_get_ivars(child);
1496	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1497	struct resource_list_entry *rle;
1498	int i;
1499
1500	/* Do we have any messages to release? */
1501	if (msix->msix_alloc == 0)
1502		return (ENODEV);
1503
1504	/* Make sure none of the resources are allocated. */
1505	for (i = 0; i < msix->msix_table_len; i++) {
1506		if (msix->msix_table[i].mte_vector == 0)
1507			continue;
1508		if (msix->msix_table[i].mte_handlers > 0)
1509			return (EBUSY);
1510		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1511		KASSERT(rle != NULL, ("missing resource"));
1512		if (rle->res != NULL)
1513			return (EBUSY);
1514	}
1515
1516	/* Update control register to disable MSI-X. */
1517	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1518	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1519	    msix->msix_ctrl, 2);
1520
1521	/* Free the resource list entries. */
1522	for (i = 0; i < msix->msix_table_len; i++) {
1523		if (msix->msix_table[i].mte_vector == 0)
1524			continue;
1525		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526	}
1527	free(msix->msix_table, M_DEVBUF);
1528	msix->msix_table_len = 0;
1529
1530	/* Release the IRQs. */
1531	for (i = 0; i < msix->msix_alloc; i++)
1532		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1533		    msix->msix_vectors[i].mv_irq);
1534	free(msix->msix_vectors, M_DEVBUF);
1535	msix->msix_alloc = 0;
1536	return (0);
1537}
1538
1539/*
1540 * Return the max supported MSI-X messages this device supports.
1541 * Basically, assuming the MD code can alloc messages, this function
1542 * should return the maximum value that pci_alloc_msix() can return.
1543 * Thus, it is subject to the tunables, etc.
1544 */
1545int
1546pci_msix_count_method(device_t dev, device_t child)
1547{
1548	struct pci_devinfo *dinfo = device_get_ivars(child);
1549	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1550
1551	if (pci_do_msix && msix->msix_location != 0)
1552		return (msix->msix_msgnum);
1553	return (0);
1554}
1555
1556/*
1557 * HyperTransport MSI mapping control
1558 */
1559void
1560pci_ht_map_msi(device_t dev, uint64_t addr)
1561{
1562	struct pci_devinfo *dinfo = device_get_ivars(dev);
1563	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1564
1565	if (!ht->ht_msimap)
1566		return;
1567
1568	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1569	    ht->ht_msiaddr >> 20 == addr >> 20) {
1570		/* Enable MSI -> HT mapping. */
1571		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1572		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1573		    ht->ht_msictrl, 2);
1574	}
1575
1576	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1577		/* Disable MSI -> HT mapping. */
1578		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1579		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1580		    ht->ht_msictrl, 2);
1581	}
1582}
1583
1584/*
1585 * Support for MSI message signalled interrupts.
1586 */
1587void
1588pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1589{
1590	struct pci_devinfo *dinfo = device_get_ivars(dev);
1591	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1592
1593	/* Write data and address values. */
1594	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1595	    address & 0xffffffff, 4);
1596	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1597		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1598		    address >> 32, 4);
1599		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1600		    data, 2);
1601	} else
1602		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1603		    2);
1604
1605	/* Enable MSI in the control register. */
1606	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1607	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1608	    2);
1609
1610	/* Enable MSI -> HT mapping. */
1611	pci_ht_map_msi(dev, address);
1612}
1613
1614void
1615pci_disable_msi(device_t dev)
1616{
1617	struct pci_devinfo *dinfo = device_get_ivars(dev);
1618	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1619
1620	/* Disable MSI -> HT mapping. */
1621	pci_ht_map_msi(dev, 0);
1622
1623	/* Disable MSI in the control register. */
1624	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1625	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1626	    2);
1627}
1628
1629/*
1630 * Restore MSI registers during resume.  If MSI is enabled then
1631 * restore the data and address registers in addition to the control
1632 * register.
1633 */
1634static void
1635pci_resume_msi(device_t dev)
1636{
1637	struct pci_devinfo *dinfo = device_get_ivars(dev);
1638	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1639	uint64_t address;
1640	uint16_t data;
1641
1642	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1643		address = msi->msi_addr;
1644		data = msi->msi_data;
1645		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1646		    address & 0xffffffff, 4);
1647		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1648			pci_write_config(dev, msi->msi_location +
1649			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1650			pci_write_config(dev, msi->msi_location +
1651			    PCIR_MSI_DATA_64BIT, data, 2);
1652		} else
1653			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1654			    data, 2);
1655	}
1656	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1657	    2);
1658}
1659
1660int
1661pci_remap_msi_irq(device_t dev, u_int irq)
1662{
1663	struct pci_devinfo *dinfo = device_get_ivars(dev);
1664	pcicfgregs *cfg = &dinfo->cfg;
1665	struct resource_list_entry *rle;
1666	struct msix_table_entry *mte;
1667	struct msix_vector *mv;
1668	device_t bus;
1669	uint64_t addr;
1670	uint32_t data;
1671	int error, i, j;
1672
1673	bus = device_get_parent(dev);
1674
1675	/*
1676	 * Handle MSI first.  We try to find this IRQ among our list
1677	 * of MSI IRQs.  If we find it, we request updated address and
1678	 * data registers and apply the results.
1679	 */
1680	if (cfg->msi.msi_alloc > 0) {
1681
1682		/* If we don't have any active handlers, nothing to do. */
1683		if (cfg->msi.msi_handlers == 0)
1684			return (0);
1685		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1686			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1687			    i + 1);
1688			if (rle->start == irq) {
1689				error = PCIB_MAP_MSI(device_get_parent(bus),
1690				    dev, irq, &addr, &data);
1691				if (error)
1692					return (error);
1693				pci_disable_msi(dev);
1694				dinfo->cfg.msi.msi_addr = addr;
1695				dinfo->cfg.msi.msi_data = data;
1696				pci_enable_msi(dev, addr, data);
1697				return (0);
1698			}
1699		}
1700		return (ENOENT);
1701	}
1702
1703	/*
1704	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1705	 * we request the updated mapping info.  If that works, we go
1706	 * through all the slots that use this IRQ and update them.
1707	 */
1708	if (cfg->msix.msix_alloc > 0) {
1709		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1710			mv = &cfg->msix.msix_vectors[i];
1711			if (mv->mv_irq == irq) {
1712				error = PCIB_MAP_MSI(device_get_parent(bus),
1713				    dev, irq, &addr, &data);
1714				if (error)
1715					return (error);
1716				mv->mv_address = addr;
1717				mv->mv_data = data;
1718				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1719					mte = &cfg->msix.msix_table[j];
1720					if (mte->mte_vector != i + 1)
1721						continue;
1722					if (mte->mte_handlers == 0)
1723						continue;
1724					pci_mask_msix(dev, j);
1725					pci_enable_msix(dev, j, addr, data);
1726					pci_unmask_msix(dev, j);
1727				}
1728			}
1729		}
1730		return (ENOENT);
1731	}
1732
1733	return (ENOENT);
1734}
1735
1736/*
1737 * Returns true if the specified device is blacklisted because MSI
1738 * doesn't work.
1739 */
1740int
1741pci_msi_device_blacklisted(device_t dev)
1742{
1743	struct pci_quirk *q;
1744
1745	if (!pci_honor_msi_blacklist)
1746		return (0);
1747
1748	for (q = &pci_quirks[0]; q->devid; q++) {
1749		if (q->devid == pci_get_devid(dev) &&
1750		    q->type == PCI_QUIRK_DISABLE_MSI)
1751			return (1);
1752	}
1753	return (0);
1754}
1755
1756/*
1757 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1758 * we just check for blacklisted chipsets as represented by the
1759 * host-PCI bridge at device 0:0:0.  In the future, it may become
1760 * necessary to check other system attributes, such as the kenv values
1761 * that give the motherboard manufacturer and model number.
1762 */
1763static int
1764pci_msi_blacklisted(void)
1765{
1766	device_t dev;
1767
1768	if (!pci_honor_msi_blacklist)
1769		return (0);
1770
1771	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1772	if (!(pcie_chipset || pcix_chipset))
1773		return (1);
1774
1775	dev = pci_find_bsf(0, 0, 0);
1776	if (dev != NULL)
1777		return (pci_msi_device_blacklisted(dev));
1778	return (0);
1779}
1780
1781/*
1782 * Attempt to allocate *count MSI messages.  The actual number allocated is
1783 * returned in *count.  After this function returns, each message will be
1784 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1785 */
1786int
1787pci_alloc_msi_method(device_t dev, device_t child, int *count)
1788{
1789	struct pci_devinfo *dinfo = device_get_ivars(child);
1790	pcicfgregs *cfg = &dinfo->cfg;
1791	struct resource_list_entry *rle;
1792	int actual, error, i, irqs[32];
1793	uint16_t ctrl;
1794
1795	/* Don't let count == 0 get us into trouble. */
1796	if (*count == 0)
1797		return (EINVAL);
1798
1799	/* If rid 0 is allocated, then fail. */
1800	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1801	if (rle != NULL && rle->res != NULL)
1802		return (ENXIO);
1803
1804	/* Already have allocated messages? */
1805	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1806		return (ENXIO);
1807
1808	/* If MSI is blacklisted for this system, fail. */
1809	if (pci_msi_blacklisted())
1810		return (ENXIO);
1811
1812	/* MSI capability present? */
1813	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1814		return (ENODEV);
1815
1816	if (bootverbose)
1817		device_printf(child,
1818		    "attempting to allocate %d MSI vectors (%d supported)\n",
1819		    *count, cfg->msi.msi_msgnum);
1820
1821	/* Don't ask for more than the device supports. */
1822	actual = min(*count, cfg->msi.msi_msgnum);
1823
1824	/* Don't ask for more than 32 messages. */
1825	actual = min(actual, 32);
1826
1827	/* MSI requires power of 2 number of messages. */
1828	if (!powerof2(actual))
1829		return (EINVAL);
1830
1831	for (;;) {
1832		/* Try to allocate N messages. */
1833		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1834		    cfg->msi.msi_msgnum, irqs);
1835		if (error == 0)
1836			break;
1837		if (actual == 1)
1838			return (error);
1839
1840		/* Try N / 2. */
1841		actual >>= 1;
1842	}
1843
1844	/*
1845	 * We now have N actual messages mapped onto SYS_RES_IRQ
1846	 * resources in the irqs[] array, so add new resources
1847	 * starting at rid 1.
1848	 */
1849	for (i = 0; i < actual; i++)
1850		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1851		    irqs[i], irqs[i], 1);
1852
1853	if (bootverbose) {
1854		if (actual == 1)
1855			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1856		else {
1857			int run;
1858
1859			/*
1860			 * Be fancy and try to print contiguous runs
1861			 * of IRQ values as ranges.  'run' is true if
1862			 * we are in a range.
1863			 */
1864			device_printf(child, "using IRQs %d", irqs[0]);
1865			run = 0;
1866			for (i = 1; i < actual; i++) {
1867
1868				/* Still in a run? */
1869				if (irqs[i] == irqs[i - 1] + 1) {
1870					run = 1;
1871					continue;
1872				}
1873
1874				/* Finish previous range. */
1875				if (run) {
1876					printf("-%d", irqs[i - 1]);
1877					run = 0;
1878				}
1879
1880				/* Start new range. */
1881				printf(",%d", irqs[i]);
1882			}
1883
1884			/* Unfinished range? */
1885			if (run)
1886				printf("-%d", irqs[actual - 1]);
1887			printf(" for MSI\n");
1888		}
1889	}
1890
1891	/* Update control register with actual count. */
1892	ctrl = cfg->msi.msi_ctrl;
1893	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1894	ctrl |= (ffs(actual) - 1) << 4;
1895	cfg->msi.msi_ctrl = ctrl;
1896	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1897
1898	/* Update counts of alloc'd messages. */
1899	cfg->msi.msi_alloc = actual;
1900	cfg->msi.msi_handlers = 0;
1901	*count = actual;
1902	return (0);
1903}
1904
1905/* Release the MSI messages associated with this device. */
1906int
1907pci_release_msi_method(device_t dev, device_t child)
1908{
1909	struct pci_devinfo *dinfo = device_get_ivars(child);
1910	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911	struct resource_list_entry *rle;
1912	int error, i, irqs[32];
1913
1914	/* Try MSI-X first. */
1915	error = pci_release_msix(dev, child);
1916	if (error != ENODEV)
1917		return (error);
1918
1919	/* Do we have any messages to release? */
1920	if (msi->msi_alloc == 0)
1921		return (ENODEV);
1922	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1923
1924	/* Make sure none of the resources are allocated. */
1925	if (msi->msi_handlers > 0)
1926		return (EBUSY);
1927	for (i = 0; i < msi->msi_alloc; i++) {
1928		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1929		KASSERT(rle != NULL, ("missing MSI resource"));
1930		if (rle->res != NULL)
1931			return (EBUSY);
1932		irqs[i] = rle->start;
1933	}
1934
1935	/* Update control register with 0 count. */
1936	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1937	    ("%s: MSI still enabled", __func__));
1938	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1939	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1940	    msi->msi_ctrl, 2);
1941
1942	/* Release the messages. */
1943	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1944	for (i = 0; i < msi->msi_alloc; i++)
1945		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1946
1947	/* Update alloc count. */
1948	msi->msi_alloc = 0;
1949	msi->msi_addr = 0;
1950	msi->msi_data = 0;
1951	return (0);
1952}
1953
1954/*
1955 * Return the max supported MSI messages this device supports.
1956 * Basically, assuming the MD code can alloc messages, this function
1957 * should return the maximum value that pci_alloc_msi() can return.
1958 * Thus, it is subject to the tunables, etc.
1959 */
1960int
1961pci_msi_count_method(device_t dev, device_t child)
1962{
1963	struct pci_devinfo *dinfo = device_get_ivars(child);
1964	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1965
1966	if (pci_do_msi && msi->msi_location != 0)
1967		return (msi->msi_msgnum);
1968	return (0);
1969}
1970
1971/* free pcicfgregs structure and all depending data structures */
1972
1973int
1974pci_freecfg(struct pci_devinfo *dinfo)
1975{
1976	struct devlist *devlist_head;
1977	int i;
1978
1979	devlist_head = &pci_devq;
1980
1981	if (dinfo->cfg.vpd.vpd_reg) {
1982		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1983		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1984			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1985		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1986		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1987			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1988		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1989	}
1990	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1991	free(dinfo, M_DEVBUF);
1992
1993	/* increment the generation count */
1994	pci_generation++;
1995
1996	/* we're losing one device */
1997	pci_numdevs--;
1998	return (0);
1999}
2000
2001/*
2002 * PCI power manangement
2003 */
2004int
2005pci_set_powerstate_method(device_t dev, device_t child, int state)
2006{
2007	struct pci_devinfo *dinfo = device_get_ivars(child);
2008	pcicfgregs *cfg = &dinfo->cfg;
2009	uint16_t status;
2010	int result, oldstate, highest, delay;
2011
2012	if (cfg->pp.pp_cap == 0)
2013		return (EOPNOTSUPP);
2014
2015	/*
2016	 * Optimize a no state change request away.  While it would be OK to
2017	 * write to the hardware in theory, some devices have shown odd
2018	 * behavior when going from D3 -> D3.
2019	 */
2020	oldstate = pci_get_powerstate(child);
2021	if (oldstate == state)
2022		return (0);
2023
2024	/*
2025	 * The PCI power management specification states that after a state
2026	 * transition between PCI power states, system software must
2027	 * guarantee a minimal delay before the function accesses the device.
2028	 * Compute the worst case delay that we need to guarantee before we
2029	 * access the device.  Many devices will be responsive much more
2030	 * quickly than this delay, but there are some that don't respond
2031	 * instantly to state changes.  Transitions to/from D3 state require
2032	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2033	 * is done below with DELAY rather than a sleeper function because
2034	 * this function can be called from contexts where we cannot sleep.
2035	 */
2036	highest = (oldstate > state) ? oldstate : state;
2037	if (highest == PCI_POWERSTATE_D3)
2038	    delay = 10000;
2039	else if (highest == PCI_POWERSTATE_D2)
2040	    delay = 200;
2041	else
2042	    delay = 0;
2043	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2044	    & ~PCIM_PSTAT_DMASK;
2045	result = 0;
2046	switch (state) {
2047	case PCI_POWERSTATE_D0:
2048		status |= PCIM_PSTAT_D0;
2049		break;
2050	case PCI_POWERSTATE_D1:
2051		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2052			return (EOPNOTSUPP);
2053		status |= PCIM_PSTAT_D1;
2054		break;
2055	case PCI_POWERSTATE_D2:
2056		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2057			return (EOPNOTSUPP);
2058		status |= PCIM_PSTAT_D2;
2059		break;
2060	case PCI_POWERSTATE_D3:
2061		status |= PCIM_PSTAT_D3;
2062		break;
2063	default:
2064		return (EINVAL);
2065	}
2066
2067	if (bootverbose)
2068		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2069		    state);
2070
2071	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2072	if (delay)
2073		DELAY(delay);
2074	return (0);
2075}
2076
2077int
2078pci_get_powerstate_method(device_t dev, device_t child)
2079{
2080	struct pci_devinfo *dinfo = device_get_ivars(child);
2081	pcicfgregs *cfg = &dinfo->cfg;
2082	uint16_t status;
2083	int result;
2084
2085	if (cfg->pp.pp_cap != 0) {
2086		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2087		switch (status & PCIM_PSTAT_DMASK) {
2088		case PCIM_PSTAT_D0:
2089			result = PCI_POWERSTATE_D0;
2090			break;
2091		case PCIM_PSTAT_D1:
2092			result = PCI_POWERSTATE_D1;
2093			break;
2094		case PCIM_PSTAT_D2:
2095			result = PCI_POWERSTATE_D2;
2096			break;
2097		case PCIM_PSTAT_D3:
2098			result = PCI_POWERSTATE_D3;
2099			break;
2100		default:
2101			result = PCI_POWERSTATE_UNKNOWN;
2102			break;
2103		}
2104	} else {
2105		/* No support, device is always at D0 */
2106		result = PCI_POWERSTATE_D0;
2107	}
2108	return (result);
2109}
2110
2111/*
2112 * Some convenience functions for PCI device drivers.
2113 */
2114
2115static __inline void
2116pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2117{
2118	uint16_t	command;
2119
2120	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2121	command |= bit;
2122	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2123}
2124
2125static __inline void
2126pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2127{
2128	uint16_t	command;
2129
2130	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2131	command &= ~bit;
2132	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2133}
2134
2135int
2136pci_enable_busmaster_method(device_t dev, device_t child)
2137{
2138	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2139	return (0);
2140}
2141
2142int
2143pci_disable_busmaster_method(device_t dev, device_t child)
2144{
2145	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2146	return (0);
2147}
2148
2149int
2150pci_enable_io_method(device_t dev, device_t child, int space)
2151{
2152	uint16_t command;
2153	uint16_t bit;
2154	char *error;
2155
2156	bit = 0;
2157	error = NULL;
2158
2159	switch(space) {
2160	case SYS_RES_IOPORT:
2161		bit = PCIM_CMD_PORTEN;
2162		error = "port";
2163		break;
2164	case SYS_RES_MEMORY:
2165		bit = PCIM_CMD_MEMEN;
2166		error = "memory";
2167		break;
2168	default:
2169		return (EINVAL);
2170	}
2171	pci_set_command_bit(dev, child, bit);
2172	/* Some devices seem to need a brief stall here, what do to? */
2173	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2174	if (command & bit)
2175		return (0);
2176	device_printf(child, "failed to enable %s mapping!\n", error);
2177	return (ENXIO);
2178}
2179
2180int
2181pci_disable_io_method(device_t dev, device_t child, int space)
2182{
2183	uint16_t command;
2184	uint16_t bit;
2185	char *error;
2186
2187	bit = 0;
2188	error = NULL;
2189
2190	switch(space) {
2191	case SYS_RES_IOPORT:
2192		bit = PCIM_CMD_PORTEN;
2193		error = "port";
2194		break;
2195	case SYS_RES_MEMORY:
2196		bit = PCIM_CMD_MEMEN;
2197		error = "memory";
2198		break;
2199	default:
2200		return (EINVAL);
2201	}
2202	pci_clear_command_bit(dev, child, bit);
2203	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2204	if (command & bit) {
2205		device_printf(child, "failed to disable %s mapping!\n", error);
2206		return (ENXIO);
2207	}
2208	return (0);
2209}
2210
2211/*
2212 * New style pci driver.  Parent device is either a pci-host-bridge or a
2213 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2214 */
2215
2216void
2217pci_print_verbose(struct pci_devinfo *dinfo)
2218{
2219
2220	if (bootverbose) {
2221		pcicfgregs *cfg = &dinfo->cfg;
2222
2223		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2224		    cfg->vendor, cfg->device, cfg->revid);
2225		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2226		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2227		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2228		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2229		    cfg->mfdev);
2230		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2231		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2232		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2233		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2234		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2235		if (cfg->intpin > 0)
2236			printf("\tintpin=%c, irq=%d\n",
2237			    cfg->intpin +'a' -1, cfg->intline);
2238		if (cfg->pp.pp_cap) {
2239			uint16_t status;
2240
2241			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2242			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2243			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2244			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2245			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2246			    status & PCIM_PSTAT_DMASK);
2247		}
2248		if (cfg->msi.msi_location) {
2249			int ctrl;
2250
2251			ctrl = cfg->msi.msi_ctrl;
2252			printf("\tMSI supports %d message%s%s%s\n",
2253			    cfg->msi.msi_msgnum,
2254			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2255			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2256			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2257		}
2258		if (cfg->msix.msix_location) {
2259			printf("\tMSI-X supports %d message%s ",
2260			    cfg->msix.msix_msgnum,
2261			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2262			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2263				printf("in map 0x%x\n",
2264				    cfg->msix.msix_table_bar);
2265			else
2266				printf("in maps 0x%x and 0x%x\n",
2267				    cfg->msix.msix_table_bar,
2268				    cfg->msix.msix_pba_bar);
2269		}
2270	}
2271}
2272
2273static int
2274pci_porten(device_t dev)
2275{
2276	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2277}
2278
2279static int
2280pci_memen(device_t dev)
2281{
2282	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2283}
2284
2285static void
2286pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2287{
2288	pci_addr_t map, testval;
2289	int ln2range;
2290	uint16_t cmd;
2291
2292	map = pci_read_config(dev, reg, 4);
2293	ln2range = pci_maprange(map);
2294	if (ln2range == 64)
2295		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2296
2297	/*
2298	 * Disable decoding via the command register before
2299	 * determining the BAR's length since we will be placing it in
2300	 * a weird state.
2301	 */
2302	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2303	pci_write_config(dev, PCIR_COMMAND,
2304	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2305
2306	/*
2307	 * Determine the BAR's length by writing all 1's.  The bottom
2308	 * log_2(size) bits of the BAR will stick as 0 when we read
2309	 * the value back.
2310	 */
2311	pci_write_config(dev, reg, 0xffffffff, 4);
2312	testval = pci_read_config(dev, reg, 4);
2313	if (ln2range == 64) {
2314		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2315		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2316	}
2317
2318	/*
2319	 * Restore the original value of the BAR.  We may have reprogrammed
2320	 * the BAR of the low-level console device and when booting verbose,
2321	 * we need the console device addressable.
2322	 */
2323	pci_write_config(dev, reg, map, 4);
2324	if (ln2range == 64)
2325		pci_write_config(dev, reg + 4, map >> 32, 4);
2326	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2327
2328	*mapp = map;
2329	*testvalp = testval;
2330}
2331
2332static void
2333pci_write_bar(device_t dev, int reg, pci_addr_t base)
2334{
2335	pci_addr_t map;
2336	int ln2range;
2337
2338	map = pci_read_config(dev, reg, 4);
2339	ln2range = pci_maprange(map);
2340	pci_write_config(dev, reg, base, 4);
2341	if (ln2range == 64)
2342		pci_write_config(dev, reg + 4, base >> 32, 4);
2343}
2344
2345/*
2346 * Add a resource based on a pci map register. Return 1 if the map
2347 * register is a 32bit map register or 2 if it is a 64bit register.
2348 */
2349static int
2350pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2351    int force, int prefetch)
2352{
2353	pci_addr_t base, map, testval;
2354	pci_addr_t start, end, count;
2355	int barlen, maprange, mapsize, type;
2356	uint16_t cmd;
2357	struct resource *res;
2358
2359	pci_read_bar(dev, reg, &map, &testval);
2360	if (PCI_BAR_MEM(map)) {
2361		type = SYS_RES_MEMORY;
2362		if (map & PCIM_BAR_MEM_PREFETCH)
2363			prefetch = 1;
2364	} else
2365		type = SYS_RES_IOPORT;
2366	mapsize = pci_mapsize(testval);
2367	base = pci_mapbase(map);
2368	maprange = pci_maprange(map);
2369	barlen = maprange == 64 ? 2 : 1;
2370
2371	/*
2372	 * For I/O registers, if bottom bit is set, and the next bit up
2373	 * isn't clear, we know we have a BAR that doesn't conform to the
2374	 * spec, so ignore it.  Also, sanity check the size of the data
2375	 * areas to the type of memory involved.  Memory must be at least
2376	 * 16 bytes in size, while I/O ranges must be at least 4.
2377	 */
2378	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2379		return (barlen);
2380	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2381	    (type == SYS_RES_IOPORT && mapsize < 2))
2382		return (barlen);
2383
2384	if (bootverbose) {
2385		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2386		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2387		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2388			printf(", port disabled\n");
2389		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2390			printf(", memory disabled\n");
2391		else
2392			printf(", enabled\n");
2393	}
2394
2395	/*
2396	 * If base is 0, then we have problems.  It is best to ignore
2397	 * such entries for the moment.  These will be allocated later if
2398	 * the driver specifically requests them.  However, some
2399	 * removable busses look better when all resources are allocated,
2400	 * so allow '0' to be overriden.
2401	 *
2402	 * Similarly treat maps whose values is the same as the test value
2403	 * read back.  These maps have had all f's written to them by the
2404	 * BIOS in an attempt to disable the resources.
2405	 */
2406	if (!force && (base == 0 || map == testval))
2407		return (barlen);
2408	if ((u_long)base != base) {
2409		device_printf(bus,
2410		    "pci%d:%d:%d:%d bar %#x too many address bits",
2411		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2412		    pci_get_function(dev), reg);
2413		return (barlen);
2414	}
2415
2416	/*
2417	 * This code theoretically does the right thing, but has
2418	 * undesirable side effects in some cases where peripherals
2419	 * respond oddly to having these bits enabled.  Let the user
2420	 * be able to turn them off (since pci_enable_io_modes is 1 by
2421	 * default).
2422	 */
2423	if (pci_enable_io_modes) {
2424		/* Turn on resources that have been left off by a lazy BIOS */
2425		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2426			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2427			cmd |= PCIM_CMD_PORTEN;
2428			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2429		}
2430		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2431			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2432			cmd |= PCIM_CMD_MEMEN;
2433			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2434		}
2435	} else {
2436		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2437			return (barlen);
2438		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2439			return (barlen);
2440	}
2441
2442	count = 1 << mapsize;
2443	if (base == 0 || base == pci_mapbase(testval)) {
2444		start = 0;	/* Let the parent decide. */
2445		end = ~0ULL;
2446	} else {
2447		start = base;
2448		end = base + (1 << mapsize) - 1;
2449	}
2450	resource_list_add(rl, type, reg, start, end, count);
2451
2452	/*
2453	 * Try to allocate the resource for this BAR from our parent
2454	 * so that this resource range is already reserved.  The
2455	 * driver for this device will later inherit this resource in
2456	 * pci_alloc_resource().
2457	 */
2458	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2459	    prefetch ? RF_PREFETCHABLE : 0);
2460	if (res == NULL) {
2461		/*
2462		 * If the allocation fails, clear the BAR and delete
2463		 * the resource list entry to force
2464		 * pci_alloc_resource() to allocate resources from the
2465		 * parent.
2466		 */
2467		resource_list_delete(rl, type, reg);
2468		start = 0;
2469	} else {
2470		start = rman_get_start(res);
2471		rman_set_device(res, bus);
2472	}
2473	pci_write_bar(dev, reg, start);
2474	return (barlen);
2475}
2476
2477/*
2478 * For ATA devices we need to decide early what addressing mode to use.
2479 * Legacy demands that the primary and secondary ATA ports sits on the
2480 * same addresses that old ISA hardware did. This dictates that we use
2481 * those addresses and ignore the BAR's if we cannot set PCI native
2482 * addressing mode.
2483 */
2484static void
2485pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2486    uint32_t prefetchmask)
2487{
2488	struct resource *r;
2489	int rid, type, progif;
2490#if 0
2491	/* if this device supports PCI native addressing use it */
2492	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2493	if ((progif & 0x8a) == 0x8a) {
2494		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2495		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2496			printf("Trying ATA native PCI addressing mode\n");
2497			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2498		}
2499	}
2500#endif
2501	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2502	type = SYS_RES_IOPORT;
2503	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2504		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2505		    prefetchmask & (1 << 0));
2506		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2507		    prefetchmask & (1 << 1));
2508	} else {
2509		rid = PCIR_BAR(0);
2510		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2511		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2512		    8, 0);
2513		rman_set_device(r, bus);
2514		rid = PCIR_BAR(1);
2515		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2516		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2517		    1, 0);
2518		rman_set_device(r, bus);
2519	}
2520	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2521		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2522		    prefetchmask & (1 << 2));
2523		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2524		    prefetchmask & (1 << 3));
2525	} else {
2526		rid = PCIR_BAR(2);
2527		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2528		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2529		    8, 0);
2530		rman_set_device(r, bus);
2531		rid = PCIR_BAR(3);
2532		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2533		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2534		    1, 0);
2535		rman_set_device(r, bus);
2536	}
2537	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2538	    prefetchmask & (1 << 4));
2539	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2540	    prefetchmask & (1 << 5));
2541}
2542
2543static void
2544pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2545{
2546	struct pci_devinfo *dinfo = device_get_ivars(dev);
2547	pcicfgregs *cfg = &dinfo->cfg;
2548	char tunable_name[64];
2549	int irq;
2550
2551	/* Has to have an intpin to have an interrupt. */
2552	if (cfg->intpin == 0)
2553		return;
2554
2555	/* Let the user override the IRQ with a tunable. */
2556	irq = PCI_INVALID_IRQ;
2557	snprintf(tunable_name, sizeof(tunable_name),
2558	    "hw.pci%d.%d.%d.INT%c.irq",
2559	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2560	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2561		irq = PCI_INVALID_IRQ;
2562
2563	/*
2564	 * If we didn't get an IRQ via the tunable, then we either use the
2565	 * IRQ value in the intline register or we ask the bus to route an
2566	 * interrupt for us.  If force_route is true, then we only use the
2567	 * value in the intline register if the bus was unable to assign an
2568	 * IRQ.
2569	 */
2570	if (!PCI_INTERRUPT_VALID(irq)) {
2571		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2572			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2573		if (!PCI_INTERRUPT_VALID(irq))
2574			irq = cfg->intline;
2575	}
2576
2577	/* If after all that we don't have an IRQ, just bail. */
2578	if (!PCI_INTERRUPT_VALID(irq))
2579		return;
2580
2581	/* Update the config register if it changed. */
2582	if (irq != cfg->intline) {
2583		cfg->intline = irq;
2584		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2585	}
2586
2587	/* Add this IRQ as rid 0 interrupt resource. */
2588	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2589}
2590
2591void
2592pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2593{
2594	struct pci_devinfo *dinfo = device_get_ivars(dev);
2595	pcicfgregs *cfg = &dinfo->cfg;
2596	struct resource_list *rl = &dinfo->resources;
2597	struct pci_quirk *q;
2598	int i;
2599
2600	/* ATA devices needs special map treatment */
2601	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2602	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2603	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2604	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2605	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2606		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2607	else
2608		for (i = 0; i < cfg->nummaps;)
2609			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2610			    prefetchmask & (1 << i));
2611
2612	/*
2613	 * Add additional, quirked resources.
2614	 */
2615	for (q = &pci_quirks[0]; q->devid; q++) {
2616		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2617		    && q->type == PCI_QUIRK_MAP_REG)
2618			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2619	}
2620
2621	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2622#ifdef __PCI_REROUTE_INTERRUPT
2623		/*
2624		 * Try to re-route interrupts. Sometimes the BIOS or
2625		 * firmware may leave bogus values in these registers.
2626		 * If the re-route fails, then just stick with what we
2627		 * have.
2628		 */
2629		pci_assign_interrupt(bus, dev, 1);
2630#else
2631		pci_assign_interrupt(bus, dev, 0);
2632#endif
2633	}
2634}
2635
2636void
2637pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2638{
2639#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2640	device_t pcib = device_get_parent(dev);
2641	struct pci_devinfo *dinfo;
2642	int maxslots;
2643	int s, f, pcifunchigh;
2644	uint8_t hdrtype;
2645
2646	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2647	    ("dinfo_size too small"));
2648	maxslots = PCIB_MAXSLOTS(pcib);
2649	for (s = 0; s <= maxslots; s++) {
2650		pcifunchigh = 0;
2651		f = 0;
2652		DELAY(1);
2653		hdrtype = REG(PCIR_HDRTYPE, 1);
2654		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2655			continue;
2656		if (hdrtype & PCIM_MFDEV)
2657			pcifunchigh = PCI_FUNCMAX;
2658		for (f = 0; f <= pcifunchigh; f++) {
2659			dinfo = pci_read_device(pcib, domain, busno, s, f,
2660			    dinfo_size);
2661			if (dinfo != NULL) {
2662				pci_add_child(dev, dinfo);
2663			}
2664		}
2665	}
2666#undef REG
2667}
2668
2669void
2670pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2671{
2672	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2673	device_set_ivars(dinfo->cfg.dev, dinfo);
2674	resource_list_init(&dinfo->resources);
2675	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2676	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2677	pci_print_verbose(dinfo);
2678	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2679}
2680
2681static int
2682pci_probe(device_t dev)
2683{
2684
2685	device_set_desc(dev, "PCI bus");
2686
2687	/* Allow other subclasses to override this driver. */
2688	return (BUS_PROBE_GENERIC);
2689}
2690
2691static int
2692pci_attach(device_t dev)
2693{
2694	int busno, domain;
2695
2696	/*
2697	 * Since there can be multiple independantly numbered PCI
2698	 * busses on systems with multiple PCI domains, we can't use
2699	 * the unit number to decide which bus we are probing. We ask
2700	 * the parent pcib what our domain and bus numbers are.
2701	 */
2702	domain = pcib_get_domain(dev);
2703	busno = pcib_get_bus(dev);
2704	if (bootverbose)
2705		device_printf(dev, "domain=%d, physical bus=%d\n",
2706		    domain, busno);
2707	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2708	return (bus_generic_attach(dev));
2709}
2710
2711int
2712pci_suspend(device_t dev)
2713{
2714	int dstate, error, i, numdevs;
2715	device_t acpi_dev, child, *devlist;
2716	struct pci_devinfo *dinfo;
2717
2718	/*
2719	 * Save the PCI configuration space for each child and set the
2720	 * device in the appropriate power state for this sleep state.
2721	 */
2722	acpi_dev = NULL;
2723	if (pci_do_power_resume)
2724		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2725	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2726		return (error);
2727	for (i = 0; i < numdevs; i++) {
2728		child = devlist[i];
2729		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2730		pci_cfg_save(child, dinfo, 0);
2731	}
2732
2733	/* Suspend devices before potentially powering them down. */
2734	error = bus_generic_suspend(dev);
2735	if (error) {
2736		free(devlist, M_TEMP);
2737		return (error);
2738	}
2739
2740	/*
2741	 * Always set the device to D3.  If ACPI suggests a different
2742	 * power state, use it instead.  If ACPI is not present, the
2743	 * firmware is responsible for managing device power.  Skip
2744	 * children who aren't attached since they are powered down
2745	 * separately.  Only manage type 0 devices for now.
2746	 */
2747	for (i = 0; acpi_dev && i < numdevs; i++) {
2748		child = devlist[i];
2749		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2750		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2751			dstate = PCI_POWERSTATE_D3;
2752			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2753			pci_set_powerstate(child, dstate);
2754		}
2755	}
2756	free(devlist, M_TEMP);
2757	return (0);
2758}
2759
2760int
2761pci_resume(device_t dev)
2762{
2763	int i, numdevs, error;
2764	device_t acpi_dev, child, *devlist;
2765	struct pci_devinfo *dinfo;
2766
2767	/*
2768	 * Set each child to D0 and restore its PCI configuration space.
2769	 */
2770	acpi_dev = NULL;
2771	if (pci_do_power_resume)
2772		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2773	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2774		return (error);
2775	for (i = 0; i < numdevs; i++) {
2776		/*
2777		 * Notify ACPI we're going to D0 but ignore the result.  If
2778		 * ACPI is not present, the firmware is responsible for
2779		 * managing device power.  Only manage type 0 devices for now.
2780		 */
2781		child = devlist[i];
2782		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2783		if (acpi_dev && device_is_attached(child) &&
2784		    dinfo->cfg.hdrtype == 0) {
2785			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2786			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2787		}
2788
2789		/* Now the device is powered up, restore its config space. */
2790		pci_cfg_restore(child, dinfo);
2791	}
2792	free(devlist, M_TEMP);
2793	return (bus_generic_resume(dev));
2794}
2795
2796static void
2797pci_load_vendor_data(void)
2798{
2799	caddr_t vendordata, info;
2800
2801	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2802		info = preload_search_info(vendordata, MODINFO_ADDR);
2803		pci_vendordata = *(char **)info;
2804		info = preload_search_info(vendordata, MODINFO_SIZE);
2805		pci_vendordata_size = *(size_t *)info;
2806		/* terminate the database */
2807		pci_vendordata[pci_vendordata_size] = '\n';
2808	}
2809}
2810
2811void
2812pci_driver_added(device_t dev, driver_t *driver)
2813{
2814	int numdevs;
2815	device_t *devlist;
2816	device_t child;
2817	struct pci_devinfo *dinfo;
2818	int i;
2819
2820	if (bootverbose)
2821		device_printf(dev, "driver added\n");
2822	DEVICE_IDENTIFY(driver, dev);
2823	if (device_get_children(dev, &devlist, &numdevs) != 0)
2824		return;
2825	for (i = 0; i < numdevs; i++) {
2826		child = devlist[i];
2827		if (device_get_state(child) != DS_NOTPRESENT)
2828			continue;
2829		dinfo = device_get_ivars(child);
2830		pci_print_verbose(dinfo);
2831		if (bootverbose)
2832			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2833		pci_cfg_restore(child, dinfo);
2834		if (device_probe_and_attach(child) != 0)
2835			pci_cfg_save(child, dinfo, 1);
2836	}
2837	free(devlist, M_TEMP);
2838}
2839
2840int
2841pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2842    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2843{
2844	struct pci_devinfo *dinfo;
2845	struct msix_table_entry *mte;
2846	struct msix_vector *mv;
2847	uint64_t addr;
2848	uint32_t data;
2849	void *cookie;
2850	int error, rid;
2851
2852	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2853	    arg, &cookie);
2854	if (error)
2855		return (error);
2856
2857	/* If this is not a direct child, just bail out. */
2858	if (device_get_parent(child) != dev) {
2859		*cookiep = cookie;
2860		return(0);
2861	}
2862
2863	rid = rman_get_rid(irq);
2864	if (rid == 0) {
2865		/* Make sure that INTx is enabled */
2866		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2867	} else {
2868		/*
2869		 * Check to see if the interrupt is MSI or MSI-X.
2870		 * Ask our parent to map the MSI and give
2871		 * us the address and data register values.
2872		 * If we fail for some reason, teardown the
2873		 * interrupt handler.
2874		 */
2875		dinfo = device_get_ivars(child);
2876		if (dinfo->cfg.msi.msi_alloc > 0) {
2877			if (dinfo->cfg.msi.msi_addr == 0) {
2878				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2879			    ("MSI has handlers, but vectors not mapped"));
2880				error = PCIB_MAP_MSI(device_get_parent(dev),
2881				    child, rman_get_start(irq), &addr, &data);
2882				if (error)
2883					goto bad;
2884				dinfo->cfg.msi.msi_addr = addr;
2885				dinfo->cfg.msi.msi_data = data;
2886				pci_enable_msi(child, addr, data);
2887			}
2888			dinfo->cfg.msi.msi_handlers++;
2889		} else {
2890			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2891			    ("No MSI or MSI-X interrupts allocated"));
2892			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2893			    ("MSI-X index too high"));
2894			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2895			KASSERT(mte->mte_vector != 0, ("no message vector"));
2896			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2897			KASSERT(mv->mv_irq == rman_get_start(irq),
2898			    ("IRQ mismatch"));
2899			if (mv->mv_address == 0) {
2900				KASSERT(mte->mte_handlers == 0,
2901		    ("MSI-X table entry has handlers, but vector not mapped"));
2902				error = PCIB_MAP_MSI(device_get_parent(dev),
2903				    child, rman_get_start(irq), &addr, &data);
2904				if (error)
2905					goto bad;
2906				mv->mv_address = addr;
2907				mv->mv_data = data;
2908			}
2909			if (mte->mte_handlers == 0) {
2910				pci_enable_msix(child, rid - 1, mv->mv_address,
2911				    mv->mv_data);
2912				pci_unmask_msix(child, rid - 1);
2913			}
2914			mte->mte_handlers++;
2915		}
2916
2917		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2918		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2919	bad:
2920		if (error) {
2921			(void)bus_generic_teardown_intr(dev, child, irq,
2922			    cookie);
2923			return (error);
2924		}
2925	}
2926	*cookiep = cookie;
2927	return (0);
2928}
2929
2930int
2931pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2932    void *cookie)
2933{
2934	struct msix_table_entry *mte;
2935	struct resource_list_entry *rle;
2936	struct pci_devinfo *dinfo;
2937	int error, rid;
2938
2939	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2940		return (EINVAL);
2941
2942	/* If this isn't a direct child, just bail out */
2943	if (device_get_parent(child) != dev)
2944		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2945
2946	rid = rman_get_rid(irq);
2947	if (rid == 0) {
2948		/* Mask INTx */
2949		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2950	} else {
2951		/*
2952		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2953		 * decrement the appropriate handlers count and mask the
2954		 * MSI-X message, or disable MSI messages if the count
2955		 * drops to 0.
2956		 */
2957		dinfo = device_get_ivars(child);
2958		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2959		if (rle->res != irq)
2960			return (EINVAL);
2961		if (dinfo->cfg.msi.msi_alloc > 0) {
2962			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2963			    ("MSI-X index too high"));
2964			if (dinfo->cfg.msi.msi_handlers == 0)
2965				return (EINVAL);
2966			dinfo->cfg.msi.msi_handlers--;
2967			if (dinfo->cfg.msi.msi_handlers == 0)
2968				pci_disable_msi(child);
2969		} else {
2970			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2971			    ("No MSI or MSI-X interrupts allocated"));
2972			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2973			    ("MSI-X index too high"));
2974			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2975			if (mte->mte_handlers == 0)
2976				return (EINVAL);
2977			mte->mte_handlers--;
2978			if (mte->mte_handlers == 0)
2979				pci_mask_msix(child, rid - 1);
2980		}
2981	}
2982	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2983	if (rid > 0)
2984		KASSERT(error == 0,
2985		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2986	return (error);
2987}
2988
2989int
2990pci_print_child(device_t dev, device_t child)
2991{
2992	struct pci_devinfo *dinfo;
2993	struct resource_list *rl;
2994	int retval = 0;
2995
2996	dinfo = device_get_ivars(child);
2997	rl = &dinfo->resources;
2998
2999	retval += bus_print_child_header(dev, child);
3000
3001	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3002	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3003	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3004	if (device_get_flags(dev))
3005		retval += printf(" flags %#x", device_get_flags(dev));
3006
3007	retval += printf(" at device %d.%d", pci_get_slot(child),
3008	    pci_get_function(child));
3009
3010	retval += bus_print_child_footer(dev, child);
3011
3012	return (retval);
3013}
3014
3015static struct
3016{
3017	int	class;
3018	int	subclass;
3019	char	*desc;
3020} pci_nomatch_tab[] = {
3021	{PCIC_OLD,		-1,			"old"},
3022	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3023	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3024	{PCIC_STORAGE,		-1,			"mass storage"},
3025	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3026	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3027	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3028	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3029	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3030	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3031	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3032	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3033	{PCIC_NETWORK,		-1,			"network"},
3034	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3035	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3036	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3037	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3038	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3039	{PCIC_DISPLAY,		-1,			"display"},
3040	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3041	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3042	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3043	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3044	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3045	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3046	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3047	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3048	{PCIC_MEMORY,		-1,			"memory"},
3049	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3050	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3051	{PCIC_BRIDGE,		-1,			"bridge"},
3052	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3053	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3054	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3055	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3056	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3057	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3058	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3059	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3060	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3061	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3062	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3063	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3064	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3065	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3066	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3067	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3068	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3069	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3070	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3071	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3072	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3073	{PCIC_INPUTDEV,		-1,			"input device"},
3074	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3075	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3076	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3077	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3078	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3079	{PCIC_DOCKING,		-1,			"docking station"},
3080	{PCIC_PROCESSOR,	-1,			"processor"},
3081	{PCIC_SERIALBUS,	-1,			"serial bus"},
3082	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3083	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3084	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3085	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3086	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3087	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3088	{PCIC_WIRELESS,		-1,			"wireless controller"},
3089	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3090	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3091	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3092	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3093	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3094	{PCIC_SATCOM,		-1,			"satellite communication"},
3095	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3096	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3097	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3098	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3099	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3100	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3101	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3102	{PCIC_DASP,		-1,			"dasp"},
3103	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3104	{0, 0,		NULL}
3105};
3106
3107void
3108pci_probe_nomatch(device_t dev, device_t child)
3109{
3110	int	i;
3111	char	*cp, *scp, *device;
3112
3113	/*
3114	 * Look for a listing for this device in a loaded device database.
3115	 */
3116	if ((device = pci_describe_device(child)) != NULL) {
3117		device_printf(dev, "<%s>", device);
3118		free(device, M_DEVBUF);
3119	} else {
3120		/*
3121		 * Scan the class/subclass descriptions for a general
3122		 * description.
3123		 */
3124		cp = "unknown";
3125		scp = NULL;
3126		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3127			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3128				if (pci_nomatch_tab[i].subclass == -1) {
3129					cp = pci_nomatch_tab[i].desc;
3130				} else if (pci_nomatch_tab[i].subclass ==
3131				    pci_get_subclass(child)) {
3132					scp = pci_nomatch_tab[i].desc;
3133				}
3134			}
3135		}
3136		device_printf(dev, "<%s%s%s>",
3137		    cp ? cp : "",
3138		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3139		    scp ? scp : "");
3140	}
3141	printf(" at device %d.%d (no driver attached)\n",
3142	    pci_get_slot(child), pci_get_function(child));
3143	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3144	return;
3145}
3146
3147/*
3148 * Parse the PCI device database, if loaded, and return a pointer to a
3149 * description of the device.
3150 *
3151 * The database is flat text formatted as follows:
3152 *
3153 * Any line not in a valid format is ignored.
3154 * Lines are terminated with newline '\n' characters.
3155 *
3156 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3157 * the vendor name.
3158 *
3159 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3160 * - devices cannot be listed without a corresponding VENDOR line.
3161 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3162 * another TAB, then the device name.
3163 */
3164
3165/*
3166 * Assuming (ptr) points to the beginning of a line in the database,
3167 * return the vendor or device and description of the next entry.
3168 * The value of (vendor) or (device) inappropriate for the entry type
3169 * is set to -1.  Returns nonzero at the end of the database.
3170 *
3171 * Note that this is slightly unrobust in the face of corrupt data;
3172 * we attempt to safeguard against this by spamming the end of the
3173 * database with a newline when we initialise.
3174 */
3175static int
3176pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3177{
3178	char	*cp = *ptr;
3179	int	left;
3180
3181	*device = -1;
3182	*vendor = -1;
3183	**desc = '\0';
3184	for (;;) {
3185		left = pci_vendordata_size - (cp - pci_vendordata);
3186		if (left <= 0) {
3187			*ptr = cp;
3188			return(1);
3189		}
3190
3191		/* vendor entry? */
3192		if (*cp != '\t' &&
3193		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3194			break;
3195		/* device entry? */
3196		if (*cp == '\t' &&
3197		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3198			break;
3199
3200		/* skip to next line */
3201		while (*cp != '\n' && left > 0) {
3202			cp++;
3203			left--;
3204		}
3205		if (*cp == '\n') {
3206			cp++;
3207			left--;
3208		}
3209	}
3210	/* skip to next line */
3211	while (*cp != '\n' && left > 0) {
3212		cp++;
3213		left--;
3214	}
3215	if (*cp == '\n' && left > 0)
3216		cp++;
3217	*ptr = cp;
3218	return(0);
3219}
3220
3221static char *
3222pci_describe_device(device_t dev)
3223{
3224	int	vendor, device;
3225	char	*desc, *vp, *dp, *line;
3226
3227	desc = vp = dp = NULL;
3228
3229	/*
3230	 * If we have no vendor data, we can't do anything.
3231	 */
3232	if (pci_vendordata == NULL)
3233		goto out;
3234
3235	/*
3236	 * Scan the vendor data looking for this device
3237	 */
3238	line = pci_vendordata;
3239	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3240		goto out;
3241	for (;;) {
3242		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3243			goto out;
3244		if (vendor == pci_get_vendor(dev))
3245			break;
3246	}
3247	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3248		goto out;
3249	for (;;) {
3250		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3251			*dp = 0;
3252			break;
3253		}
3254		if (vendor != -1) {
3255			*dp = 0;
3256			break;
3257		}
3258		if (device == pci_get_device(dev))
3259			break;
3260	}
3261	if (dp[0] == '\0')
3262		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3263	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3264	    NULL)
3265		sprintf(desc, "%s, %s", vp, dp);
3266 out:
3267	if (vp != NULL)
3268		free(vp, M_DEVBUF);
3269	if (dp != NULL)
3270		free(dp, M_DEVBUF);
3271	return(desc);
3272}
3273
3274int
3275pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3276{
3277	struct pci_devinfo *dinfo;
3278	pcicfgregs *cfg;
3279
3280	dinfo = device_get_ivars(child);
3281	cfg = &dinfo->cfg;
3282
3283	switch (which) {
3284	case PCI_IVAR_ETHADDR:
3285		/*
3286		 * The generic accessor doesn't deal with failure, so
3287		 * we set the return value, then return an error.
3288		 */
3289		*((uint8_t **) result) = NULL;
3290		return (EINVAL);
3291	case PCI_IVAR_SUBVENDOR:
3292		*result = cfg->subvendor;
3293		break;
3294	case PCI_IVAR_SUBDEVICE:
3295		*result = cfg->subdevice;
3296		break;
3297	case PCI_IVAR_VENDOR:
3298		*result = cfg->vendor;
3299		break;
3300	case PCI_IVAR_DEVICE:
3301		*result = cfg->device;
3302		break;
3303	case PCI_IVAR_DEVID:
3304		*result = (cfg->device << 16) | cfg->vendor;
3305		break;
3306	case PCI_IVAR_CLASS:
3307		*result = cfg->baseclass;
3308		break;
3309	case PCI_IVAR_SUBCLASS:
3310		*result = cfg->subclass;
3311		break;
3312	case PCI_IVAR_PROGIF:
3313		*result = cfg->progif;
3314		break;
3315	case PCI_IVAR_REVID:
3316		*result = cfg->revid;
3317		break;
3318	case PCI_IVAR_INTPIN:
3319		*result = cfg->intpin;
3320		break;
3321	case PCI_IVAR_IRQ:
3322		*result = cfg->intline;
3323		break;
3324	case PCI_IVAR_DOMAIN:
3325		*result = cfg->domain;
3326		break;
3327	case PCI_IVAR_BUS:
3328		*result = cfg->bus;
3329		break;
3330	case PCI_IVAR_SLOT:
3331		*result = cfg->slot;
3332		break;
3333	case PCI_IVAR_FUNCTION:
3334		*result = cfg->func;
3335		break;
3336	case PCI_IVAR_CMDREG:
3337		*result = cfg->cmdreg;
3338		break;
3339	case PCI_IVAR_CACHELNSZ:
3340		*result = cfg->cachelnsz;
3341		break;
3342	case PCI_IVAR_MINGNT:
3343		*result = cfg->mingnt;
3344		break;
3345	case PCI_IVAR_MAXLAT:
3346		*result = cfg->maxlat;
3347		break;
3348	case PCI_IVAR_LATTIMER:
3349		*result = cfg->lattimer;
3350		break;
3351	default:
3352		return (ENOENT);
3353	}
3354	return (0);
3355}
3356
3357int
3358pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3359{
3360	struct pci_devinfo *dinfo;
3361
3362	dinfo = device_get_ivars(child);
3363
3364	switch (which) {
3365	case PCI_IVAR_INTPIN:
3366		dinfo->cfg.intpin = value;
3367		return (0);
3368	case PCI_IVAR_ETHADDR:
3369	case PCI_IVAR_SUBVENDOR:
3370	case PCI_IVAR_SUBDEVICE:
3371	case PCI_IVAR_VENDOR:
3372	case PCI_IVAR_DEVICE:
3373	case PCI_IVAR_DEVID:
3374	case PCI_IVAR_CLASS:
3375	case PCI_IVAR_SUBCLASS:
3376	case PCI_IVAR_PROGIF:
3377	case PCI_IVAR_REVID:
3378	case PCI_IVAR_IRQ:
3379	case PCI_IVAR_DOMAIN:
3380	case PCI_IVAR_BUS:
3381	case PCI_IVAR_SLOT:
3382	case PCI_IVAR_FUNCTION:
3383		return (EINVAL);	/* disallow for now */
3384
3385	default:
3386		return (ENOENT);
3387	}
3388}
3389
3390
3391#include "opt_ddb.h"
3392#ifdef DDB
3393#include <ddb/ddb.h>
3394#include <sys/cons.h>
3395
3396/*
3397 * List resources based on pci map registers, used for within ddb
3398 */
3399
3400DB_SHOW_COMMAND(pciregs, db_pci_dump)
3401{
3402	struct pci_devinfo *dinfo;
3403	struct devlist *devlist_head;
3404	struct pci_conf *p;
3405	const char *name;
3406	int i, error, none_count;
3407
3408	none_count = 0;
3409	/* get the head of the device queue */
3410	devlist_head = &pci_devq;
3411
3412	/*
3413	 * Go through the list of devices and print out devices
3414	 */
3415	for (error = 0, i = 0,
3416	     dinfo = STAILQ_FIRST(devlist_head);
3417	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3418	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3419
3420		/* Populate pd_name and pd_unit */
3421		name = NULL;
3422		if (dinfo->cfg.dev)
3423			name = device_get_name(dinfo->cfg.dev);
3424
3425		p = &dinfo->conf;
3426		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3427			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3428			(name && *name) ? name : "none",
3429			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3430			none_count++,
3431			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3432			p->pc_sel.pc_func, (p->pc_class << 16) |
3433			(p->pc_subclass << 8) | p->pc_progif,
3434			(p->pc_subdevice << 16) | p->pc_subvendor,
3435			(p->pc_device << 16) | p->pc_vendor,
3436			p->pc_revid, p->pc_hdr);
3437	}
3438}
3439#endif /* DDB */
3440
3441static struct resource *
3442pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3443    u_long start, u_long end, u_long count, u_int flags)
3444{
3445	struct pci_devinfo *dinfo = device_get_ivars(child);
3446	struct resource_list *rl = &dinfo->resources;
3447	struct resource_list_entry *rle;
3448	struct resource *res;
3449	pci_addr_t map, testval;
3450	int mapsize;
3451
3452	/*
3453	 * Weed out the bogons, and figure out how large the BAR/map
3454	 * is.  Bars that read back 0 here are bogus and unimplemented.
3455	 * Note: atapci in legacy mode are special and handled elsewhere
3456	 * in the code.  If you have a atapci device in legacy mode and
3457	 * it fails here, that other code is broken.
3458	 */
3459	res = NULL;
3460	pci_read_bar(child, *rid, &map, &testval);
3461
3462	/* Ignore a BAR with a base of 0. */
3463	if (pci_mapbase(testval) == 0)
3464		goto out;
3465
3466	if (PCI_BAR_MEM(testval)) {
3467		if (type != SYS_RES_MEMORY) {
3468			if (bootverbose)
3469				device_printf(dev,
3470				    "child %s requested type %d for rid %#x,"
3471				    " but the BAR says it is an memio\n",
3472				    device_get_nameunit(child), type, *rid);
3473			goto out;
3474		}
3475	} else {
3476		if (type != SYS_RES_IOPORT) {
3477			if (bootverbose)
3478				device_printf(dev,
3479				    "child %s requested type %d for rid %#x,"
3480				    " but the BAR says it is an ioport\n",
3481				    device_get_nameunit(child), type, *rid);
3482			goto out;
3483		}
3484	}
3485
3486	/*
3487	 * For real BARs, we need to override the size that
3488	 * the driver requests, because that's what the BAR
3489	 * actually uses and we would otherwise have a
3490	 * situation where we might allocate the excess to
3491	 * another driver, which won't work.
3492	 */
3493	mapsize = pci_mapsize(testval);
3494	count = 1UL << mapsize;
3495	if (RF_ALIGNMENT(flags) < mapsize)
3496		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3497	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3498		flags |= RF_PREFETCHABLE;
3499
3500	/*
3501	 * Allocate enough resource, and then write back the
3502	 * appropriate bar for that resource.
3503	 */
3504	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3505	    start, end, count, flags & ~RF_ACTIVE);
3506	if (res == NULL) {
3507		device_printf(child,
3508		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3509		    count, *rid, type, start, end);
3510		goto out;
3511	}
3512	rman_set_device(res, dev);
3513	resource_list_add(rl, type, *rid, start, end, count);
3514	rle = resource_list_find(rl, type, *rid);
3515	if (rle == NULL)
3516		panic("pci_alloc_map: unexpectedly can't find resource.");
3517	rle->res = res;
3518	rle->start = rman_get_start(res);
3519	rle->end = rman_get_end(res);
3520	rle->count = count;
3521	if (bootverbose)
3522		device_printf(child,
3523		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3524		    count, *rid, type, rman_get_start(res));
3525	map = rman_get_start(res);
3526	pci_write_bar(child, *rid, map);
3527out:;
3528	return (res);
3529}
3530
3531
3532struct resource *
3533pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3534		   u_long start, u_long end, u_long count, u_int flags)
3535{
3536	struct pci_devinfo *dinfo = device_get_ivars(child);
3537	struct resource_list *rl = &dinfo->resources;
3538	struct resource_list_entry *rle;
3539	struct resource *res;
3540	pcicfgregs *cfg = &dinfo->cfg;
3541
3542	if (device_get_parent(child) != dev)
3543		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3544		    type, rid, start, end, count, flags));
3545
3546	/*
3547	 * Perform lazy resource allocation
3548	 */
3549	switch (type) {
3550	case SYS_RES_IRQ:
3551		/*
3552		 * Can't alloc legacy interrupt once MSI messages have
3553		 * been allocated.
3554		 */
3555		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3556		    cfg->msix.msix_alloc > 0))
3557			return (NULL);
3558
3559		/*
3560		 * If the child device doesn't have an interrupt
3561		 * routed and is deserving of an interrupt, try to
3562		 * assign it one.
3563		 */
3564		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3565		    (cfg->intpin != 0))
3566			pci_assign_interrupt(dev, child, 0);
3567		break;
3568	case SYS_RES_IOPORT:
3569	case SYS_RES_MEMORY:
3570		/* Allocate resources for this BAR if needed. */
3571		rle = resource_list_find(rl, type, *rid);
3572		if (rle == NULL) {
3573			res = pci_alloc_map(dev, child, type, rid, start, end,
3574			    count, flags);
3575			if (res == NULL)
3576				return (NULL);
3577			rle = resource_list_find(rl, type, *rid);
3578		}
3579
3580		/*
3581		 * If the resource belongs to the bus, then give it to
3582		 * the child.  We need to activate it if requested
3583		 * since the bus always allocates inactive resources.
3584		 */
3585		if (rle != NULL && rle->res != NULL &&
3586		    rman_get_device(rle->res) == dev) {
3587			if (bootverbose)
3588				device_printf(child,
3589			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3590				    rman_get_size(rle->res), *rid, type,
3591				    rman_get_start(rle->res));
3592			rman_set_device(rle->res, child);
3593			if ((flags & RF_ACTIVE) &&
3594			    bus_activate_resource(child, type, *rid,
3595			    rle->res) != 0)
3596				return (NULL);
3597			return (rle->res);
3598		}
3599	}
3600	return (resource_list_alloc(rl, dev, child, type, rid,
3601	    start, end, count, flags));
3602}
3603
3604int
3605pci_release_resource(device_t dev, device_t child, int type, int rid,
3606    struct resource *r)
3607{
3608	int error;
3609
3610	if (device_get_parent(child) != dev)
3611		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3612		    type, rid, r));
3613
3614	/*
3615	 * For BARs we don't actually want to release the resource.
3616	 * Instead, we deactivate the resource if needed and then give
3617	 * ownership of the BAR back to the bus.
3618	 */
3619	switch (type) {
3620	case SYS_RES_IOPORT:
3621	case SYS_RES_MEMORY:
3622		if (rman_get_device(r) != child)
3623			return (EINVAL);
3624		if (rman_get_flags(r) & RF_ACTIVE) {
3625			error = bus_deactivate_resource(child, type, rid, r);
3626			if (error)
3627				return (error);
3628		}
3629		rman_set_device(r, dev);
3630		return (0);
3631	}
3632	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3633}
3634
3635int
3636pci_activate_resource(device_t dev, device_t child, int type, int rid,
3637    struct resource *r)
3638{
3639	int error;
3640
3641	error = bus_generic_activate_resource(dev, child, type, rid, r);
3642	if (error)
3643		return (error);
3644
3645	/* Enable decoding in the command register when activating BARs. */
3646	if (device_get_parent(child) == dev) {
3647		switch (type) {
3648		case SYS_RES_IOPORT:
3649		case SYS_RES_MEMORY:
3650			error = PCI_ENABLE_IO(dev, child, type);
3651			break;
3652		}
3653	}
3654	return (error);
3655}
3656
3657void
3658pci_delete_resource(device_t dev, device_t child, int type, int rid)
3659{
3660	struct pci_devinfo *dinfo;
3661	struct resource_list *rl;
3662	struct resource_list_entry *rle;
3663
3664	if (device_get_parent(child) != dev)
3665		return;
3666
3667	dinfo = device_get_ivars(child);
3668	rl = &dinfo->resources;
3669	rle = resource_list_find(rl, type, rid);
3670	if (rle == NULL)
3671		return;
3672
3673	if (rle->res) {
3674		if (rman_get_device(rle->res) != dev ||
3675		    rman_get_flags(rle->res) & RF_ACTIVE) {
3676			device_printf(dev, "delete_resource: "
3677			    "Resource still owned by child, oops. "
3678			    "(type=%d, rid=%d, addr=%lx)\n",
3679			    rle->type, rle->rid,
3680			    rman_get_start(rle->res));
3681			return;
3682		}
3683
3684		/*
3685		 * If this is a BAR, clear the BAR so it stops
3686		 * decoding before releasing the resource.
3687		 */
3688		switch (type) {
3689		case SYS_RES_IOPORT:
3690		case SYS_RES_MEMORY:
3691			pci_write_bar(child, rid, 0);
3692			break;
3693		}
3694		bus_release_resource(dev, type, rid, rle->res);
3695	}
3696	resource_list_delete(rl, type, rid);
3697}
3698
3699struct resource_list *
3700pci_get_resource_list (device_t dev, device_t child)
3701{
3702	struct pci_devinfo *dinfo = device_get_ivars(child);
3703
3704	return (&dinfo->resources);
3705}
3706
3707uint32_t
3708pci_read_config_method(device_t dev, device_t child, int reg, int width)
3709{
3710	struct pci_devinfo *dinfo = device_get_ivars(child);
3711	pcicfgregs *cfg = &dinfo->cfg;
3712
3713	return (PCIB_READ_CONFIG(device_get_parent(dev),
3714	    cfg->bus, cfg->slot, cfg->func, reg, width));
3715}
3716
3717void
3718pci_write_config_method(device_t dev, device_t child, int reg,
3719    uint32_t val, int width)
3720{
3721	struct pci_devinfo *dinfo = device_get_ivars(child);
3722	pcicfgregs *cfg = &dinfo->cfg;
3723
3724	PCIB_WRITE_CONFIG(device_get_parent(dev),
3725	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3726}
3727
3728int
3729pci_child_location_str_method(device_t dev, device_t child, char *buf,
3730    size_t buflen)
3731{
3732
3733	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3734	    pci_get_function(child));
3735	return (0);
3736}
3737
3738int
3739pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3740    size_t buflen)
3741{
3742	struct pci_devinfo *dinfo;
3743	pcicfgregs *cfg;
3744
3745	dinfo = device_get_ivars(child);
3746	cfg = &dinfo->cfg;
3747	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3748	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3749	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3750	    cfg->progif);
3751	return (0);
3752}
3753
3754int
3755pci_assign_interrupt_method(device_t dev, device_t child)
3756{
3757	struct pci_devinfo *dinfo = device_get_ivars(child);
3758	pcicfgregs *cfg = &dinfo->cfg;
3759
3760	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3761	    cfg->intpin));
3762}
3763
3764static int
3765pci_modevent(module_t mod, int what, void *arg)
3766{
3767	static struct cdev *pci_cdev;
3768
3769	switch (what) {
3770	case MOD_LOAD:
3771		STAILQ_INIT(&pci_devq);
3772		pci_generation = 0;
3773		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3774		    "pci");
3775		pci_load_vendor_data();
3776		break;
3777
3778	case MOD_UNLOAD:
3779		destroy_dev(pci_cdev);
3780		break;
3781	}
3782
3783	return (0);
3784}
3785
3786void
3787pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3788{
3789	int i;
3790
3791	/*
3792	 * Only do header type 0 devices.  Type 1 devices are bridges,
3793	 * which we know need special treatment.  Type 2 devices are
3794	 * cardbus bridges which also require special treatment.
3795	 * Other types are unknown, and we err on the side of safety
3796	 * by ignoring them.
3797	 */
3798	if (dinfo->cfg.hdrtype != 0)
3799		return;
3800
3801	/*
3802	 * Restore the device to full power mode.  We must do this
3803	 * before we restore the registers because moving from D3 to
3804	 * D0 will cause the chip's BARs and some other registers to
3805	 * be reset to some unknown power on reset values.  Cut down
3806	 * the noise on boot by doing nothing if we are already in
3807	 * state D0.
3808	 */
3809	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3810		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3811	}
3812	for (i = 0; i < dinfo->cfg.nummaps; i++)
3813		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3814	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3815	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3816	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3817	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3818	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3819	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3820	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3821	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3822	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3823	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3824
3825	/* Restore MSI and MSI-X configurations if they are present. */
3826	if (dinfo->cfg.msi.msi_location != 0)
3827		pci_resume_msi(dev);
3828	if (dinfo->cfg.msix.msix_location != 0)
3829		pci_resume_msix(dev);
3830}
3831
3832void
3833pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3834{
3835	int i;
3836	uint32_t cls;
3837	int ps;
3838
3839	/*
3840	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3841	 * we know need special treatment.  Type 2 devices are cardbus bridges
3842	 * which also require special treatment.  Other types are unknown, and
3843	 * we err on the side of safety by ignoring them.  Powering down
3844	 * bridges should not be undertaken lightly.
3845	 */
3846	if (dinfo->cfg.hdrtype != 0)
3847		return;
3848	for (i = 0; i < dinfo->cfg.nummaps; i++)
3849		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3850	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3851
3852	/*
3853	 * Some drivers apparently write to these registers w/o updating our
3854	 * cached copy.  No harm happens if we update the copy, so do so here
3855	 * so we can restore them.  The COMMAND register is modified by the
3856	 * bus w/o updating the cache.  This should represent the normally
3857	 * writable portion of the 'defined' part of type 0 headers.  In
3858	 * theory we also need to save/restore the PCI capability structures
3859	 * we know about, but apart from power we don't know any that are
3860	 * writable.
3861	 */
3862	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3863	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3864	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3865	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3866	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3867	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3868	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3869	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3870	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3871	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3872	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3873	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3874	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3875	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3876	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3877
3878	/*
3879	 * don't set the state for display devices, base peripherals and
3880	 * memory devices since bad things happen when they are powered down.
3881	 * We should (a) have drivers that can easily detach and (b) use
3882	 * generic drivers for these devices so that some device actually
3883	 * attaches.  We need to make sure that when we implement (a) we don't
3884	 * power the device down on a reattach.
3885	 */
3886	cls = pci_get_class(dev);
3887	if (!setstate)
3888		return;
3889	switch (pci_do_power_nodriver)
3890	{
3891		case 0:		/* NO powerdown at all */
3892			return;
3893		case 1:		/* Conservative about what to power down */
3894			if (cls == PCIC_STORAGE)
3895				return;
3896			/*FALLTHROUGH*/
3897		case 2:		/* Agressive about what to power down */
3898			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3899			    cls == PCIC_BASEPERIPH)
3900				return;
3901			/*FALLTHROUGH*/
3902		case 3:		/* Power down everything */
3903			break;
3904	}
3905	/*
3906	 * PCI spec says we can only go into D3 state from D0 state.
3907	 * Transition from D[12] into D0 before going to D3 state.
3908	 */
3909	ps = pci_get_powerstate(dev);
3910	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3911		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3912	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3913		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3914}
3915