pci.c revision 190668
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 190668 2009-04-03 10:15:00Z stas $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static pci_addr_t	pci_mapbase(uint64_t mapreg);
75static const char	*pci_maptype(uint64_t mapreg);
76static int		pci_mapsize(uint64_t testval);
77static int		pci_maprange(uint64_t mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114
115static device_method_t pci_methods[] = {
116	/* Device interface */
117	DEVMETHOD(device_probe,		pci_probe),
118	DEVMETHOD(device_attach,	pci_attach),
119	DEVMETHOD(device_detach,	bus_generic_detach),
120	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121	DEVMETHOD(device_suspend,	pci_suspend),
122	DEVMETHOD(device_resume,	pci_resume),
123
124	/* Bus interface */
125	DEVMETHOD(bus_print_child,	pci_print_child),
126	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129	DEVMETHOD(bus_driver_added,	pci_driver_added),
130	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132
133	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139	DEVMETHOD(bus_activate_resource, pci_activate_resource),
140	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143
144	/* PCI interface */
145	DEVMETHOD(pci_read_config,	pci_read_config_method),
146	DEVMETHOD(pci_write_config,	pci_write_config_method),
147	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163
164	{ 0, 0 }
165};
166
167DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168
169static devclass_t pci_devclass;
170DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171MODULE_VERSION(pci, 1);
172
173static char	*pci_vendordata;
174static size_t	pci_vendordata_size;
175
176
177struct pci_quirk {
178	uint32_t devid;	/* Vendor/device of the card */
179	int	type;
180#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182	int	arg1;
183	int	arg2;
184};
185
186struct pci_quirk pci_quirks[] = {
187	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192
193	/*
194	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196	 */
197	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199
200	/*
201	 * MSI doesn't work on earlier Intel chipsets including
202	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203	 */
204	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211
212	/*
213	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214	 * bridge.
215	 */
216	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	{ 0 }
219};
220
221/* map register information */
222#define	PCI_MAPMEM	0x01	/* memory map */
223#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224#define	PCI_MAPPORT	0x04	/* port map */
225
226struct devlist pci_devq;
227uint32_t pci_generation;
228uint32_t pci_numdevs = 0;
229static int pcie_chipset, pcix_chipset;
230
231/* sysctl vars */
232SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233
234static int pci_enable_io_modes = 1;
235TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237    &pci_enable_io_modes, 1,
238    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239enable these bits correctly.  We'd like to do this all the time, but there\n\
240are some peripherals that this causes problems with.");
241
242static int pci_do_power_nodriver = 0;
243TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245    &pci_do_power_nodriver, 0,
246  "Place a function into D3 state when no driver attaches to it.  0 means\n\
247disable.  1 means conservatively place devices into D3 state.  2 means\n\
248agressively place devices into D3 state.  3 means put absolutely everything\n\
249in D3 state.");
250
251static int pci_do_power_resume = 1;
252TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254    &pci_do_power_resume, 1,
255  "Transition from D3 -> D0 on resume.");
256
257static int pci_do_msi = 1;
258TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260    "Enable support for MSI interrupts");
261
262static int pci_do_msix = 1;
263TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265    "Enable support for MSI-X interrupts");
266
267static int pci_honor_msi_blacklist = 1;
268TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271
272/* Find a device_t by bus/slot/function in domain 0 */
273
274device_t
275pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276{
277
278	return (pci_find_dbsf(0, bus, slot, func));
279}
280
281/* Find a device_t by domain/bus/slot/function */
282
283device_t
284pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285{
286	struct pci_devinfo *dinfo;
287
288	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289		if ((dinfo->cfg.domain == domain) &&
290		    (dinfo->cfg.bus == bus) &&
291		    (dinfo->cfg.slot == slot) &&
292		    (dinfo->cfg.func == func)) {
293			return (dinfo->cfg.dev);
294		}
295	}
296
297	return (NULL);
298}
299
300/* Find a device_t by vendor/device ID */
301
302device_t
303pci_find_device(uint16_t vendor, uint16_t device)
304{
305	struct pci_devinfo *dinfo;
306
307	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308		if ((dinfo->cfg.vendor == vendor) &&
309		    (dinfo->cfg.device == device)) {
310			return (dinfo->cfg.dev);
311		}
312	}
313
314	return (NULL);
315}
316
317/* return base address of memory or port map */
318
319static pci_addr_t
320pci_mapbase(uint64_t mapreg)
321{
322
323	if (PCI_BAR_MEM(mapreg))
324		return (mapreg & PCIM_BAR_MEM_BASE);
325	else
326		return (mapreg & PCIM_BAR_IO_BASE);
327}
328
329/* return map type of memory or port map */
330
331static const char *
332pci_maptype(uint64_t mapreg)
333{
334
335	if (PCI_BAR_IO(mapreg))
336		return ("I/O Port");
337	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338		return ("Prefetchable Memory");
339	return ("Memory");
340}
341
342/* return log2 of map size decoded for memory or port map */
343
344static int
345pci_mapsize(uint64_t testval)
346{
347	int ln2size;
348
349	testval = pci_mapbase(testval);
350	ln2size = 0;
351	if (testval != 0) {
352		while ((testval & 1) == 0)
353		{
354			ln2size++;
355			testval >>= 1;
356		}
357	}
358	return (ln2size);
359}
360
361/* return log2 of address range supported by map register */
362
363static int
364pci_maprange(uint64_t mapreg)
365{
366	int ln2range = 0;
367
368	if (PCI_BAR_IO(mapreg))
369		ln2range = 32;
370	else
371		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372		case PCIM_BAR_MEM_32:
373			ln2range = 32;
374			break;
375		case PCIM_BAR_MEM_1MB:
376			ln2range = 20;
377			break;
378		case PCIM_BAR_MEM_64:
379			ln2range = 64;
380			break;
381		}
382	return (ln2range);
383}
384
385/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386
387static void
388pci_fixancient(pcicfgregs *cfg)
389{
390	if (cfg->hdrtype != 0)
391		return;
392
393	/* PCI to PCI bridges use header type 1 */
394	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395		cfg->hdrtype = 1;
396}
397
398/* extract header type specific config data */
399
400static void
401pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402{
403#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404	switch (cfg->hdrtype) {
405	case 0:
406		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408		cfg->nummaps	    = PCI_MAXMAPS_0;
409		break;
410	case 1:
411		cfg->nummaps	    = PCI_MAXMAPS_1;
412		break;
413	case 2:
414		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416		cfg->nummaps	    = PCI_MAXMAPS_2;
417		break;
418	}
419#undef REG
420}
421
422/* read configuration header into pcicfgregs structure */
423struct pci_devinfo *
424pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425{
426#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427	pcicfgregs *cfg = NULL;
428	struct pci_devinfo *devlist_entry;
429	struct devlist *devlist_head;
430
431	devlist_head = &pci_devq;
432
433	devlist_entry = NULL;
434
435	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
436		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437		if (devlist_entry == NULL)
438			return (NULL);
439
440		cfg = &devlist_entry->cfg;
441
442		cfg->domain		= d;
443		cfg->bus		= b;
444		cfg->slot		= s;
445		cfg->func		= f;
446		cfg->vendor		= REG(PCIR_VENDOR, 2);
447		cfg->device		= REG(PCIR_DEVICE, 2);
448		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449		cfg->statreg		= REG(PCIR_STATUS, 2);
450		cfg->baseclass		= REG(PCIR_CLASS, 1);
451		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452		cfg->progif		= REG(PCIR_PROGIF, 1);
453		cfg->revid		= REG(PCIR_REVID, 1);
454		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457		cfg->intpin		= REG(PCIR_INTPIN, 1);
458		cfg->intline		= REG(PCIR_INTLINE, 1);
459
460		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462
463		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464		cfg->hdrtype		&= ~PCIM_MFDEV;
465
466		pci_fixancient(cfg);
467		pci_hdrtypedata(pcib, b, s, f, cfg);
468
469		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470			pci_read_extcap(pcib, cfg);
471
472		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473
474		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479
480		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482		devlist_entry->conf.pc_vendor = cfg->vendor;
483		devlist_entry->conf.pc_device = cfg->device;
484
485		devlist_entry->conf.pc_class = cfg->baseclass;
486		devlist_entry->conf.pc_subclass = cfg->subclass;
487		devlist_entry->conf.pc_progif = cfg->progif;
488		devlist_entry->conf.pc_revid = cfg->revid;
489
490		pci_numdevs++;
491		pci_generation++;
492	}
493	return (devlist_entry);
494#undef REG
495}
496
497static void
498pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499{
500#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502#if defined(__i386__) || defined(__amd64__)
503	uint64_t addr;
504#endif
505	uint32_t val;
506	int	ptr, nextptr, ptrptr;
507
508	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509	case 0:
510	case 1:
511		ptrptr = PCIR_CAP_PTR;
512		break;
513	case 2:
514		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515		break;
516	default:
517		return;		/* no extended capabilities support */
518	}
519	nextptr = REG(ptrptr, 1);	/* sanity check? */
520
521	/*
522	 * Read capability entries.
523	 */
524	while (nextptr != 0) {
525		/* Sanity check */
526		if (nextptr > 255) {
527			printf("illegal PCI extended capability offset %d\n",
528			    nextptr);
529			return;
530		}
531		/* Find the next entry */
532		ptr = nextptr;
533		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534
535		/* Process this entry */
536		switch (REG(ptr + PCICAP_ID, 1)) {
537		case PCIY_PMG:		/* PCI power management */
538			if (cfg->pp.pp_cap == 0) {
539				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542				if ((nextptr - ptr) > PCIR_POWER_DATA)
543					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544			}
545			break;
546#if defined(__i386__) || defined(__amd64__)
547		case PCIY_HT:		/* HyperTransport */
548			/* Determine HT-specific capability type. */
549			val = REG(ptr + PCIR_HT_COMMAND, 2);
550			switch (val & PCIM_HTCMD_CAP_MASK) {
551			case PCIM_HTCAP_MSI_MAPPING:
552				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553					/* Sanity check the mapping window. */
554					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555					    4);
556					addr <<= 32;
557					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558					    4);
559					if (addr != MSI_INTEL_ADDR_BASE)
560						device_printf(pcib,
561	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562						    cfg->domain, cfg->bus,
563						    cfg->slot, cfg->func,
564						    (long long)addr);
565				} else
566					addr = MSI_INTEL_ADDR_BASE;
567
568				cfg->ht.ht_msimap = ptr;
569				cfg->ht.ht_msictrl = val;
570				cfg->ht.ht_msiaddr = addr;
571				break;
572			}
573			break;
574#endif
575		case PCIY_MSI:		/* PCI MSI */
576			cfg->msi.msi_location = ptr;
577			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
578			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
579						     PCIM_MSICTRL_MMC_MASK)>>1);
580			break;
581		case PCIY_MSIX:		/* PCI MSI-X */
582			cfg->msix.msix_location = ptr;
583			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
584			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
585			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
586			val = REG(ptr + PCIR_MSIX_TABLE, 4);
587			cfg->msix.msix_table_bar = PCIR_BAR(val &
588			    PCIM_MSIX_BIR_MASK);
589			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
590			val = REG(ptr + PCIR_MSIX_PBA, 4);
591			cfg->msix.msix_pba_bar = PCIR_BAR(val &
592			    PCIM_MSIX_BIR_MASK);
593			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
594			break;
595		case PCIY_VPD:		/* PCI Vital Product Data */
596			cfg->vpd.vpd_reg = ptr;
597			break;
598		case PCIY_SUBVENDOR:
599			/* Should always be true. */
600			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
601				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
602				cfg->subvendor = val & 0xffff;
603				cfg->subdevice = val >> 16;
604			}
605			break;
606		case PCIY_PCIX:		/* PCI-X */
607			/*
608			 * Assume we have a PCI-X chipset if we have
609			 * at least one PCI-PCI bridge with a PCI-X
610			 * capability.  Note that some systems with
611			 * PCI-express or HT chipsets might match on
612			 * this check as well.
613			 */
614			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
615				pcix_chipset = 1;
616			break;
617		case PCIY_EXPRESS:	/* PCI-express */
618			/*
619			 * Assume we have a PCI-express chipset if we have
620			 * at least one PCI-express device.
621			 */
622			pcie_chipset = 1;
623			break;
624		default:
625			break;
626		}
627	}
628/* REG and WREG use carry through to next functions */
629}
630
631/*
632 * PCI Vital Product Data
633 */
634
635#define	PCI_VPD_TIMEOUT		1000000
636
637static int
638pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
639{
640	int count = PCI_VPD_TIMEOUT;
641
642	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
643
644	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
645
646	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
647		if (--count < 0)
648			return (ENXIO);
649		DELAY(1);	/* limit looping */
650	}
651	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
652
653	return (0);
654}
655
656#if 0
657static int
658pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
659{
660	int count = PCI_VPD_TIMEOUT;
661
662	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
663
664	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
665	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
666	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
667		if (--count < 0)
668			return (ENXIO);
669		DELAY(1);	/* limit looping */
670	}
671
672	return (0);
673}
674#endif
675
676#undef PCI_VPD_TIMEOUT
677
678struct vpd_readstate {
679	device_t	pcib;
680	pcicfgregs	*cfg;
681	uint32_t	val;
682	int		bytesinval;
683	int		off;
684	uint8_t		cksum;
685};
686
687static int
688vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
689{
690	uint32_t reg;
691	uint8_t byte;
692
693	if (vrs->bytesinval == 0) {
694		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
695			return (ENXIO);
696		vrs->val = le32toh(reg);
697		vrs->off += 4;
698		byte = vrs->val & 0xff;
699		vrs->bytesinval = 3;
700	} else {
701		vrs->val = vrs->val >> 8;
702		byte = vrs->val & 0xff;
703		vrs->bytesinval--;
704	}
705
706	vrs->cksum += byte;
707	*data = byte;
708	return (0);
709}
710
711static void
712pci_read_vpd(device_t pcib, pcicfgregs *cfg)
713{
714	struct vpd_readstate vrs;
715	int state;
716	int name;
717	int remain;
718	int i;
719	int alloc, off;		/* alloc/off for RO/W arrays */
720	int cksumvalid;
721	int dflen;
722	uint8_t byte;
723	uint8_t byte2;
724
725	/* init vpd reader */
726	vrs.bytesinval = 0;
727	vrs.off = 0;
728	vrs.pcib = pcib;
729	vrs.cfg = cfg;
730	vrs.cksum = 0;
731
732	state = 0;
733	name = remain = i = 0;	/* shut up stupid gcc */
734	alloc = off = 0;	/* shut up stupid gcc */
735	dflen = 0;		/* shut up stupid gcc */
736	cksumvalid = -1;
737	while (state >= 0) {
738		if (vpd_nextbyte(&vrs, &byte)) {
739			state = -2;
740			break;
741		}
742#if 0
743		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
744		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
745		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
746#endif
747		switch (state) {
748		case 0:		/* item name */
749			if (byte & 0x80) {
750				if (vpd_nextbyte(&vrs, &byte2)) {
751					state = -2;
752					break;
753				}
754				remain = byte2;
755				if (vpd_nextbyte(&vrs, &byte2)) {
756					state = -2;
757					break;
758				}
759				remain |= byte2 << 8;
760				if (remain > (0x7f*4 - vrs.off)) {
761					state = -1;
762					printf(
763			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
764					    cfg->domain, cfg->bus, cfg->slot,
765					    cfg->func, remain);
766				}
767				name = byte & 0x7f;
768			} else {
769				remain = byte & 0x7;
770				name = (byte >> 3) & 0xf;
771			}
772			switch (name) {
773			case 0x2:	/* String */
774				cfg->vpd.vpd_ident = malloc(remain + 1,
775				    M_DEVBUF, M_WAITOK);
776				i = 0;
777				state = 1;
778				break;
779			case 0xf:	/* End */
780				state = -1;
781				break;
782			case 0x10:	/* VPD-R */
783				alloc = 8;
784				off = 0;
785				cfg->vpd.vpd_ros = malloc(alloc *
786				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
787				    M_WAITOK | M_ZERO);
788				state = 2;
789				break;
790			case 0x11:	/* VPD-W */
791				alloc = 8;
792				off = 0;
793				cfg->vpd.vpd_w = malloc(alloc *
794				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
795				    M_WAITOK | M_ZERO);
796				state = 5;
797				break;
798			default:	/* Invalid data, abort */
799				state = -1;
800				break;
801			}
802			break;
803
804		case 1:	/* Identifier String */
805			cfg->vpd.vpd_ident[i++] = byte;
806			remain--;
807			if (remain == 0)  {
808				cfg->vpd.vpd_ident[i] = '\0';
809				state = 0;
810			}
811			break;
812
813		case 2:	/* VPD-R Keyword Header */
814			if (off == alloc) {
815				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
816				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
817				    M_DEVBUF, M_WAITOK | M_ZERO);
818			}
819			cfg->vpd.vpd_ros[off].keyword[0] = byte;
820			if (vpd_nextbyte(&vrs, &byte2)) {
821				state = -2;
822				break;
823			}
824			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
825			if (vpd_nextbyte(&vrs, &byte2)) {
826				state = -2;
827				break;
828			}
829			dflen = byte2;
830			if (dflen == 0 &&
831			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
832			    2) == 0) {
833				/*
834				 * if this happens, we can't trust the rest
835				 * of the VPD.
836				 */
837				printf(
838				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
839				    cfg->domain, cfg->bus, cfg->slot,
840				    cfg->func, dflen);
841				cksumvalid = 0;
842				state = -1;
843				break;
844			} else if (dflen == 0) {
845				cfg->vpd.vpd_ros[off].value = malloc(1 *
846				    sizeof(*cfg->vpd.vpd_ros[off].value),
847				    M_DEVBUF, M_WAITOK);
848				cfg->vpd.vpd_ros[off].value[0] = '\x00';
849			} else
850				cfg->vpd.vpd_ros[off].value = malloc(
851				    (dflen + 1) *
852				    sizeof(*cfg->vpd.vpd_ros[off].value),
853				    M_DEVBUF, M_WAITOK);
854			remain -= 3;
855			i = 0;
856			/* keep in sync w/ state 3's transistions */
857			if (dflen == 0 && remain == 0)
858				state = 0;
859			else if (dflen == 0)
860				state = 2;
861			else
862				state = 3;
863			break;
864
865		case 3:	/* VPD-R Keyword Value */
866			cfg->vpd.vpd_ros[off].value[i++] = byte;
867			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
868			    "RV", 2) == 0 && cksumvalid == -1) {
869				if (vrs.cksum == 0)
870					cksumvalid = 1;
871				else {
872					if (bootverbose)
873						printf(
874				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
875						    cfg->domain, cfg->bus,
876						    cfg->slot, cfg->func,
877						    vrs.cksum);
878					cksumvalid = 0;
879					state = -1;
880					break;
881				}
882			}
883			dflen--;
884			remain--;
885			/* keep in sync w/ state 2's transistions */
886			if (dflen == 0)
887				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
888			if (dflen == 0 && remain == 0) {
889				cfg->vpd.vpd_rocnt = off;
890				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
891				    off * sizeof(*cfg->vpd.vpd_ros),
892				    M_DEVBUF, M_WAITOK | M_ZERO);
893				state = 0;
894			} else if (dflen == 0)
895				state = 2;
896			break;
897
898		case 4:
899			remain--;
900			if (remain == 0)
901				state = 0;
902			break;
903
904		case 5:	/* VPD-W Keyword Header */
905			if (off == alloc) {
906				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
907				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
908				    M_DEVBUF, M_WAITOK | M_ZERO);
909			}
910			cfg->vpd.vpd_w[off].keyword[0] = byte;
911			if (vpd_nextbyte(&vrs, &byte2)) {
912				state = -2;
913				break;
914			}
915			cfg->vpd.vpd_w[off].keyword[1] = byte2;
916			if (vpd_nextbyte(&vrs, &byte2)) {
917				state = -2;
918				break;
919			}
920			cfg->vpd.vpd_w[off].len = dflen = byte2;
921			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
922			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
923			    sizeof(*cfg->vpd.vpd_w[off].value),
924			    M_DEVBUF, M_WAITOK);
925			remain -= 3;
926			i = 0;
927			/* keep in sync w/ state 6's transistions */
928			if (dflen == 0 && remain == 0)
929				state = 0;
930			else if (dflen == 0)
931				state = 5;
932			else
933				state = 6;
934			break;
935
936		case 6:	/* VPD-W Keyword Value */
937			cfg->vpd.vpd_w[off].value[i++] = byte;
938			dflen--;
939			remain--;
940			/* keep in sync w/ state 5's transistions */
941			if (dflen == 0)
942				cfg->vpd.vpd_w[off++].value[i++] = '\0';
943			if (dflen == 0 && remain == 0) {
944				cfg->vpd.vpd_wcnt = off;
945				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
946				    off * sizeof(*cfg->vpd.vpd_w),
947				    M_DEVBUF, M_WAITOK | M_ZERO);
948				state = 0;
949			} else if (dflen == 0)
950				state = 5;
951			break;
952
953		default:
954			printf("pci%d:%d:%d:%d: invalid state: %d\n",
955			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
956			    state);
957			state = -1;
958			break;
959		}
960	}
961
962	if (cksumvalid == 0 || state < -1) {
963		/* read-only data bad, clean up */
964		if (cfg->vpd.vpd_ros != NULL) {
965			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
966				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
967			free(cfg->vpd.vpd_ros, M_DEVBUF);
968			cfg->vpd.vpd_ros = NULL;
969		}
970	}
971	if (state < -1) {
972		/* I/O error, clean up */
973		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
974		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
975		if (cfg->vpd.vpd_ident != NULL) {
976			free(cfg->vpd.vpd_ident, M_DEVBUF);
977			cfg->vpd.vpd_ident = NULL;
978		}
979		if (cfg->vpd.vpd_w != NULL) {
980			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
981				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
982			free(cfg->vpd.vpd_w, M_DEVBUF);
983			cfg->vpd.vpd_w = NULL;
984		}
985	}
986	cfg->vpd.vpd_cached = 1;
987#undef REG
988#undef WREG
989}
990
991int
992pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
993{
994	struct pci_devinfo *dinfo = device_get_ivars(child);
995	pcicfgregs *cfg = &dinfo->cfg;
996
997	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
998		pci_read_vpd(device_get_parent(dev), cfg);
999
1000	*identptr = cfg->vpd.vpd_ident;
1001
1002	if (*identptr == NULL)
1003		return (ENXIO);
1004
1005	return (0);
1006}
1007
1008int
1009pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1010	const char **vptr)
1011{
1012	struct pci_devinfo *dinfo = device_get_ivars(child);
1013	pcicfgregs *cfg = &dinfo->cfg;
1014	int i;
1015
1016	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1017		pci_read_vpd(device_get_parent(dev), cfg);
1018
1019	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1020		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1021		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1022			*vptr = cfg->vpd.vpd_ros[i].value;
1023		}
1024
1025	if (i != cfg->vpd.vpd_rocnt)
1026		return (0);
1027
1028	*vptr = NULL;
1029	return (ENXIO);
1030}
1031
1032/*
1033 * Find the requested extended capability and return the offset in
1034 * configuration space via the pointer provided. The function returns
1035 * 0 on success and error code otherwise.
1036*/
1037int
1038pci_find_extcap_method(device_t dev, device_t child, int capability,
1039    int *capreg)
1040{
1041	struct pci_devinfo *dinfo = device_get_ivars(child);
1042	pcicfgregs *cfg = &dinfo->cfg;
1043	u_int32_t status;
1044	u_int8_t ptr;
1045
1046	/*
1047	 * Check the CAP_LIST bit of the PCI status register first.
1048	 */
1049	status = pci_read_config(child, PCIR_STATUS, 2);
1050	if (!(status & PCIM_STATUS_CAPPRESENT))
1051		return (ENXIO);
1052
1053	/*
1054	 * Determine the start pointer of the capabilities list.
1055	 */
1056	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1057	case 0:
1058	case 1:
1059		ptr = PCIR_CAP_PTR;
1060		break;
1061	case 2:
1062		ptr = PCIR_CAP_PTR_2;
1063		break;
1064	default:
1065		/* XXX: panic? */
1066		return (ENXIO);		/* no extended capabilities support */
1067	}
1068	ptr = pci_read_config(child, ptr, 1);
1069
1070	/*
1071	 * Traverse the capabilities list.
1072	 */
1073	while (ptr != 0) {
1074		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1075			if (capreg != NULL)
1076				*capreg = ptr;
1077			return (0);
1078		}
1079		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1080	}
1081
1082	return (ENOENT);
1083}
1084
1085/*
1086 * Support for MSI-X message interrupts.
1087 */
1088void
1089pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1090{
1091	struct pci_devinfo *dinfo = device_get_ivars(dev);
1092	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1093	uint32_t offset;
1094
1095	KASSERT(msix->msix_table_len > index, ("bogus index"));
1096	offset = msix->msix_table_offset + index * 16;
1097	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1098	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1099	bus_write_4(msix->msix_table_res, offset + 8, data);
1100
1101	/* Enable MSI -> HT mapping. */
1102	pci_ht_map_msi(dev, address);
1103}
1104
1105void
1106pci_mask_msix(device_t dev, u_int index)
1107{
1108	struct pci_devinfo *dinfo = device_get_ivars(dev);
1109	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1110	uint32_t offset, val;
1111
1112	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1113	offset = msix->msix_table_offset + index * 16 + 12;
1114	val = bus_read_4(msix->msix_table_res, offset);
1115	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1116		val |= PCIM_MSIX_VCTRL_MASK;
1117		bus_write_4(msix->msix_table_res, offset, val);
1118	}
1119}
1120
1121void
1122pci_unmask_msix(device_t dev, u_int index)
1123{
1124	struct pci_devinfo *dinfo = device_get_ivars(dev);
1125	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1126	uint32_t offset, val;
1127
1128	KASSERT(msix->msix_table_len > index, ("bogus index"));
1129	offset = msix->msix_table_offset + index * 16 + 12;
1130	val = bus_read_4(msix->msix_table_res, offset);
1131	if (val & PCIM_MSIX_VCTRL_MASK) {
1132		val &= ~PCIM_MSIX_VCTRL_MASK;
1133		bus_write_4(msix->msix_table_res, offset, val);
1134	}
1135}
1136
1137int
1138pci_pending_msix(device_t dev, u_int index)
1139{
1140	struct pci_devinfo *dinfo = device_get_ivars(dev);
1141	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1142	uint32_t offset, bit;
1143
1144	KASSERT(msix->msix_table_len > index, ("bogus index"));
1145	offset = msix->msix_pba_offset + (index / 32) * 4;
1146	bit = 1 << index % 32;
1147	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1148}
1149
1150/*
1151 * Restore MSI-X registers and table during resume.  If MSI-X is
1152 * enabled then walk the virtual table to restore the actual MSI-X
1153 * table.
1154 */
1155static void
1156pci_resume_msix(device_t dev)
1157{
1158	struct pci_devinfo *dinfo = device_get_ivars(dev);
1159	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1160	struct msix_table_entry *mte;
1161	struct msix_vector *mv;
1162	int i;
1163
1164	if (msix->msix_alloc > 0) {
1165		/* First, mask all vectors. */
1166		for (i = 0; i < msix->msix_msgnum; i++)
1167			pci_mask_msix(dev, i);
1168
1169		/* Second, program any messages with at least one handler. */
1170		for (i = 0; i < msix->msix_table_len; i++) {
1171			mte = &msix->msix_table[i];
1172			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1173				continue;
1174			mv = &msix->msix_vectors[mte->mte_vector - 1];
1175			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1176			pci_unmask_msix(dev, i);
1177		}
1178	}
1179	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1180	    msix->msix_ctrl, 2);
1181}
1182
1183/*
1184 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1185 * returned in *count.  After this function returns, each message will be
1186 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1187 */
1188int
1189pci_alloc_msix_method(device_t dev, device_t child, int *count)
1190{
1191	struct pci_devinfo *dinfo = device_get_ivars(child);
1192	pcicfgregs *cfg = &dinfo->cfg;
1193	struct resource_list_entry *rle;
1194	int actual, error, i, irq, max;
1195
1196	/* Don't let count == 0 get us into trouble. */
1197	if (*count == 0)
1198		return (EINVAL);
1199
1200	/* If rid 0 is allocated, then fail. */
1201	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1202	if (rle != NULL && rle->res != NULL)
1203		return (ENXIO);
1204
1205	/* Already have allocated messages? */
1206	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1207		return (ENXIO);
1208
1209	/* If MSI is blacklisted for this system, fail. */
1210	if (pci_msi_blacklisted())
1211		return (ENXIO);
1212
1213	/* MSI-X capability present? */
1214	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1215		return (ENODEV);
1216
1217	/* Make sure the appropriate BARs are mapped. */
1218	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1219	    cfg->msix.msix_table_bar);
1220	if (rle == NULL || rle->res == NULL ||
1221	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1222		return (ENXIO);
1223	cfg->msix.msix_table_res = rle->res;
1224	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1225		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1226		    cfg->msix.msix_pba_bar);
1227		if (rle == NULL || rle->res == NULL ||
1228		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1229			return (ENXIO);
1230	}
1231	cfg->msix.msix_pba_res = rle->res;
1232
1233	if (bootverbose)
1234		device_printf(child,
1235		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1236		    *count, cfg->msix.msix_msgnum);
1237	max = min(*count, cfg->msix.msix_msgnum);
1238	for (i = 0; i < max; i++) {
1239		/* Allocate a message. */
1240		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1241		if (error)
1242			break;
1243		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1244		    irq, 1);
1245	}
1246	actual = i;
1247
1248	if (bootverbose) {
1249		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1250		if (actual == 1)
1251			device_printf(child, "using IRQ %lu for MSI-X\n",
1252			    rle->start);
1253		else {
1254			int run;
1255
1256			/*
1257			 * Be fancy and try to print contiguous runs of
1258			 * IRQ values as ranges.  'irq' is the previous IRQ.
1259			 * 'run' is true if we are in a range.
1260			 */
1261			device_printf(child, "using IRQs %lu", rle->start);
1262			irq = rle->start;
1263			run = 0;
1264			for (i = 1; i < actual; i++) {
1265				rle = resource_list_find(&dinfo->resources,
1266				    SYS_RES_IRQ, i + 1);
1267
1268				/* Still in a run? */
1269				if (rle->start == irq + 1) {
1270					run = 1;
1271					irq++;
1272					continue;
1273				}
1274
1275				/* Finish previous range. */
1276				if (run) {
1277					printf("-%d", irq);
1278					run = 0;
1279				}
1280
1281				/* Start new range. */
1282				printf(",%lu", rle->start);
1283				irq = rle->start;
1284			}
1285
1286			/* Unfinished range? */
1287			if (run)
1288				printf("-%d", irq);
1289			printf(" for MSI-X\n");
1290		}
1291	}
1292
1293	/* Mask all vectors. */
1294	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1295		pci_mask_msix(child, i);
1296
1297	/* Allocate and initialize vector data and virtual table. */
1298	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1299	    M_DEVBUF, M_WAITOK | M_ZERO);
1300	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1301	    M_DEVBUF, M_WAITOK | M_ZERO);
1302	for (i = 0; i < actual; i++) {
1303		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1304		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1305		cfg->msix.msix_table[i].mte_vector = i + 1;
1306	}
1307
1308	/* Update control register to enable MSI-X. */
1309	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1310	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1311	    cfg->msix.msix_ctrl, 2);
1312
1313	/* Update counts of alloc'd messages. */
1314	cfg->msix.msix_alloc = actual;
1315	cfg->msix.msix_table_len = actual;
1316	*count = actual;
1317	return (0);
1318}
1319
1320/*
1321 * By default, pci_alloc_msix() will assign the allocated IRQ
1322 * resources consecutively to the first N messages in the MSI-X table.
1323 * However, device drivers may want to use different layouts if they
1324 * either receive fewer messages than they asked for, or they wish to
1325 * populate the MSI-X table sparsely.  This method allows the driver
1326 * to specify what layout it wants.  It must be called after a
1327 * successful pci_alloc_msix() but before any of the associated
1328 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1329 *
1330 * The 'vectors' array contains 'count' message vectors.  The array
1331 * maps directly to the MSI-X table in that index 0 in the array
1332 * specifies the vector for the first message in the MSI-X table, etc.
1333 * The vector value in each array index can either be 0 to indicate
1334 * that no vector should be assigned to a message slot, or it can be a
1335 * number from 1 to N (where N is the count returned from a
1336 * succcessful call to pci_alloc_msix()) to indicate which message
1337 * vector (IRQ) to be used for the corresponding message.
1338 *
1339 * On successful return, each message with a non-zero vector will have
1340 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1341 * 1.  Additionally, if any of the IRQs allocated via the previous
1342 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1343 * will be freed back to the system automatically.
1344 *
1345 * For example, suppose a driver has a MSI-X table with 6 messages and
1346 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1347 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1348 * C.  After the call to pci_alloc_msix(), the device will be setup to
1349 * have an MSI-X table of ABC--- (where - means no vector assigned).
1350 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1351 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1352 * be freed back to the system.  This device will also have valid
1353 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1354 *
1355 * In any case, the SYS_RES_IRQ rid X will always map to the message
1356 * at MSI-X table index X - 1 and will only be valid if a vector is
1357 * assigned to that table entry.
1358 */
1359int
1360pci_remap_msix_method(device_t dev, device_t child, int count,
1361    const u_int *vectors)
1362{
1363	struct pci_devinfo *dinfo = device_get_ivars(child);
1364	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1365	struct resource_list_entry *rle;
1366	int i, irq, j, *used;
1367
1368	/*
1369	 * Have to have at least one message in the table but the
1370	 * table can't be bigger than the actual MSI-X table in the
1371	 * device.
1372	 */
1373	if (count == 0 || count > msix->msix_msgnum)
1374		return (EINVAL);
1375
1376	/* Sanity check the vectors. */
1377	for (i = 0; i < count; i++)
1378		if (vectors[i] > msix->msix_alloc)
1379			return (EINVAL);
1380
1381	/*
1382	 * Make sure there aren't any holes in the vectors to be used.
1383	 * It's a big pain to support it, and it doesn't really make
1384	 * sense anyway.  Also, at least one vector must be used.
1385	 */
1386	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1387	    M_ZERO);
1388	for (i = 0; i < count; i++)
1389		if (vectors[i] != 0)
1390			used[vectors[i] - 1] = 1;
1391	for (i = 0; i < msix->msix_alloc - 1; i++)
1392		if (used[i] == 0 && used[i + 1] == 1) {
1393			free(used, M_DEVBUF);
1394			return (EINVAL);
1395		}
1396	if (used[0] != 1) {
1397		free(used, M_DEVBUF);
1398		return (EINVAL);
1399	}
1400
1401	/* Make sure none of the resources are allocated. */
1402	for (i = 0; i < msix->msix_table_len; i++) {
1403		if (msix->msix_table[i].mte_vector == 0)
1404			continue;
1405		if (msix->msix_table[i].mte_handlers > 0)
1406			return (EBUSY);
1407		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1408		KASSERT(rle != NULL, ("missing resource"));
1409		if (rle->res != NULL)
1410			return (EBUSY);
1411	}
1412
1413	/* Free the existing resource list entries. */
1414	for (i = 0; i < msix->msix_table_len; i++) {
1415		if (msix->msix_table[i].mte_vector == 0)
1416			continue;
1417		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1418	}
1419
1420	/*
1421	 * Build the new virtual table keeping track of which vectors are
1422	 * used.
1423	 */
1424	free(msix->msix_table, M_DEVBUF);
1425	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1426	    M_DEVBUF, M_WAITOK | M_ZERO);
1427	for (i = 0; i < count; i++)
1428		msix->msix_table[i].mte_vector = vectors[i];
1429	msix->msix_table_len = count;
1430
1431	/* Free any unused IRQs and resize the vectors array if necessary. */
1432	j = msix->msix_alloc - 1;
1433	if (used[j] == 0) {
1434		struct msix_vector *vec;
1435
1436		while (used[j] == 0) {
1437			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1438			    msix->msix_vectors[j].mv_irq);
1439			j--;
1440		}
1441		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1442		    M_WAITOK);
1443		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1444		    (j + 1));
1445		free(msix->msix_vectors, M_DEVBUF);
1446		msix->msix_vectors = vec;
1447		msix->msix_alloc = j + 1;
1448	}
1449	free(used, M_DEVBUF);
1450
1451	/* Map the IRQs onto the rids. */
1452	for (i = 0; i < count; i++) {
1453		if (vectors[i] == 0)
1454			continue;
1455		irq = msix->msix_vectors[vectors[i]].mv_irq;
1456		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1457		    irq, 1);
1458	}
1459
1460	if (bootverbose) {
1461		device_printf(child, "Remapped MSI-X IRQs as: ");
1462		for (i = 0; i < count; i++) {
1463			if (i != 0)
1464				printf(", ");
1465			if (vectors[i] == 0)
1466				printf("---");
1467			else
1468				printf("%d",
1469				    msix->msix_vectors[vectors[i]].mv_irq);
1470		}
1471		printf("\n");
1472	}
1473
1474	return (0);
1475}
1476
1477static int
1478pci_release_msix(device_t dev, device_t child)
1479{
1480	struct pci_devinfo *dinfo = device_get_ivars(child);
1481	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1482	struct resource_list_entry *rle;
1483	int i;
1484
1485	/* Do we have any messages to release? */
1486	if (msix->msix_alloc == 0)
1487		return (ENODEV);
1488
1489	/* Make sure none of the resources are allocated. */
1490	for (i = 0; i < msix->msix_table_len; i++) {
1491		if (msix->msix_table[i].mte_vector == 0)
1492			continue;
1493		if (msix->msix_table[i].mte_handlers > 0)
1494			return (EBUSY);
1495		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1496		KASSERT(rle != NULL, ("missing resource"));
1497		if (rle->res != NULL)
1498			return (EBUSY);
1499	}
1500
1501	/* Update control register to disable MSI-X. */
1502	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1503	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1504	    msix->msix_ctrl, 2);
1505
1506	/* Free the resource list entries. */
1507	for (i = 0; i < msix->msix_table_len; i++) {
1508		if (msix->msix_table[i].mte_vector == 0)
1509			continue;
1510		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1511	}
1512	free(msix->msix_table, M_DEVBUF);
1513	msix->msix_table_len = 0;
1514
1515	/* Release the IRQs. */
1516	for (i = 0; i < msix->msix_alloc; i++)
1517		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1518		    msix->msix_vectors[i].mv_irq);
1519	free(msix->msix_vectors, M_DEVBUF);
1520	msix->msix_alloc = 0;
1521	return (0);
1522}
1523
1524/*
1525 * Return the max supported MSI-X messages this device supports.
1526 * Basically, assuming the MD code can alloc messages, this function
1527 * should return the maximum value that pci_alloc_msix() can return.
1528 * Thus, it is subject to the tunables, etc.
1529 */
1530int
1531pci_msix_count_method(device_t dev, device_t child)
1532{
1533	struct pci_devinfo *dinfo = device_get_ivars(child);
1534	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1535
1536	if (pci_do_msix && msix->msix_location != 0)
1537		return (msix->msix_msgnum);
1538	return (0);
1539}
1540
1541/*
1542 * HyperTransport MSI mapping control
1543 */
1544void
1545pci_ht_map_msi(device_t dev, uint64_t addr)
1546{
1547	struct pci_devinfo *dinfo = device_get_ivars(dev);
1548	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1549
1550	if (!ht->ht_msimap)
1551		return;
1552
1553	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1554	    ht->ht_msiaddr >> 20 == addr >> 20) {
1555		/* Enable MSI -> HT mapping. */
1556		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1557		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1558		    ht->ht_msictrl, 2);
1559	}
1560
1561	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1562		/* Disable MSI -> HT mapping. */
1563		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1564		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1565		    ht->ht_msictrl, 2);
1566	}
1567}
1568
1569/*
1570 * Support for MSI message signalled interrupts.
1571 */
1572void
1573pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1574{
1575	struct pci_devinfo *dinfo = device_get_ivars(dev);
1576	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1577
1578	/* Write data and address values. */
1579	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1580	    address & 0xffffffff, 4);
1581	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1582		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1583		    address >> 32, 4);
1584		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1585		    data, 2);
1586	} else
1587		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1588		    2);
1589
1590	/* Enable MSI in the control register. */
1591	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1592	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1593	    2);
1594
1595	/* Enable MSI -> HT mapping. */
1596	pci_ht_map_msi(dev, address);
1597}
1598
1599void
1600pci_disable_msi(device_t dev)
1601{
1602	struct pci_devinfo *dinfo = device_get_ivars(dev);
1603	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1604
1605	/* Disable MSI -> HT mapping. */
1606	pci_ht_map_msi(dev, 0);
1607
1608	/* Disable MSI in the control register. */
1609	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1610	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1611	    2);
1612}
1613
1614/*
1615 * Restore MSI registers during resume.  If MSI is enabled then
1616 * restore the data and address registers in addition to the control
1617 * register.
1618 */
1619static void
1620pci_resume_msi(device_t dev)
1621{
1622	struct pci_devinfo *dinfo = device_get_ivars(dev);
1623	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1624	uint64_t address;
1625	uint16_t data;
1626
1627	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1628		address = msi->msi_addr;
1629		data = msi->msi_data;
1630		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1631		    address & 0xffffffff, 4);
1632		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1633			pci_write_config(dev, msi->msi_location +
1634			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1635			pci_write_config(dev, msi->msi_location +
1636			    PCIR_MSI_DATA_64BIT, data, 2);
1637		} else
1638			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1639			    data, 2);
1640	}
1641	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1642	    2);
1643}
1644
1645int
1646pci_remap_msi_irq(device_t dev, u_int irq)
1647{
1648	struct pci_devinfo *dinfo = device_get_ivars(dev);
1649	pcicfgregs *cfg = &dinfo->cfg;
1650	struct resource_list_entry *rle;
1651	struct msix_table_entry *mte;
1652	struct msix_vector *mv;
1653	device_t bus;
1654	uint64_t addr;
1655	uint32_t data;
1656	int error, i, j;
1657
1658	bus = device_get_parent(dev);
1659
1660	/*
1661	 * Handle MSI first.  We try to find this IRQ among our list
1662	 * of MSI IRQs.  If we find it, we request updated address and
1663	 * data registers and apply the results.
1664	 */
1665	if (cfg->msi.msi_alloc > 0) {
1666
1667		/* If we don't have any active handlers, nothing to do. */
1668		if (cfg->msi.msi_handlers == 0)
1669			return (0);
1670		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1671			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1672			    i + 1);
1673			if (rle->start == irq) {
1674				error = PCIB_MAP_MSI(device_get_parent(bus),
1675				    dev, irq, &addr, &data);
1676				if (error)
1677					return (error);
1678				pci_disable_msi(dev);
1679				dinfo->cfg.msi.msi_addr = addr;
1680				dinfo->cfg.msi.msi_data = data;
1681				pci_enable_msi(dev, addr, data);
1682				return (0);
1683			}
1684		}
1685		return (ENOENT);
1686	}
1687
1688	/*
1689	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1690	 * we request the updated mapping info.  If that works, we go
1691	 * through all the slots that use this IRQ and update them.
1692	 */
1693	if (cfg->msix.msix_alloc > 0) {
1694		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1695			mv = &cfg->msix.msix_vectors[i];
1696			if (mv->mv_irq == irq) {
1697				error = PCIB_MAP_MSI(device_get_parent(bus),
1698				    dev, irq, &addr, &data);
1699				if (error)
1700					return (error);
1701				mv->mv_address = addr;
1702				mv->mv_data = data;
1703				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1704					mte = &cfg->msix.msix_table[j];
1705					if (mte->mte_vector != i + 1)
1706						continue;
1707					if (mte->mte_handlers == 0)
1708						continue;
1709					pci_mask_msix(dev, j);
1710					pci_enable_msix(dev, j, addr, data);
1711					pci_unmask_msix(dev, j);
1712				}
1713			}
1714		}
1715		return (ENOENT);
1716	}
1717
1718	return (ENOENT);
1719}
1720
1721/*
1722 * Returns true if the specified device is blacklisted because MSI
1723 * doesn't work.
1724 */
1725int
1726pci_msi_device_blacklisted(device_t dev)
1727{
1728	struct pci_quirk *q;
1729
1730	if (!pci_honor_msi_blacklist)
1731		return (0);
1732
1733	for (q = &pci_quirks[0]; q->devid; q++) {
1734		if (q->devid == pci_get_devid(dev) &&
1735		    q->type == PCI_QUIRK_DISABLE_MSI)
1736			return (1);
1737	}
1738	return (0);
1739}
1740
1741/*
1742 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1743 * we just check for blacklisted chipsets as represented by the
1744 * host-PCI bridge at device 0:0:0.  In the future, it may become
1745 * necessary to check other system attributes, such as the kenv values
1746 * that give the motherboard manufacturer and model number.
1747 */
1748static int
1749pci_msi_blacklisted(void)
1750{
1751	device_t dev;
1752
1753	if (!pci_honor_msi_blacklist)
1754		return (0);
1755
1756	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1757	if (!(pcie_chipset || pcix_chipset))
1758		return (1);
1759
1760	dev = pci_find_bsf(0, 0, 0);
1761	if (dev != NULL)
1762		return (pci_msi_device_blacklisted(dev));
1763	return (0);
1764}
1765
1766/*
1767 * Attempt to allocate *count MSI messages.  The actual number allocated is
1768 * returned in *count.  After this function returns, each message will be
1769 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1770 */
1771int
1772pci_alloc_msi_method(device_t dev, device_t child, int *count)
1773{
1774	struct pci_devinfo *dinfo = device_get_ivars(child);
1775	pcicfgregs *cfg = &dinfo->cfg;
1776	struct resource_list_entry *rle;
1777	int actual, error, i, irqs[32];
1778	uint16_t ctrl;
1779
1780	/* Don't let count == 0 get us into trouble. */
1781	if (*count == 0)
1782		return (EINVAL);
1783
1784	/* If rid 0 is allocated, then fail. */
1785	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1786	if (rle != NULL && rle->res != NULL)
1787		return (ENXIO);
1788
1789	/* Already have allocated messages? */
1790	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1791		return (ENXIO);
1792
1793	/* If MSI is blacklisted for this system, fail. */
1794	if (pci_msi_blacklisted())
1795		return (ENXIO);
1796
1797	/* MSI capability present? */
1798	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1799		return (ENODEV);
1800
1801	if (bootverbose)
1802		device_printf(child,
1803		    "attempting to allocate %d MSI vectors (%d supported)\n",
1804		    *count, cfg->msi.msi_msgnum);
1805
1806	/* Don't ask for more than the device supports. */
1807	actual = min(*count, cfg->msi.msi_msgnum);
1808
1809	/* Don't ask for more than 32 messages. */
1810	actual = min(actual, 32);
1811
1812	/* MSI requires power of 2 number of messages. */
1813	if (!powerof2(actual))
1814		return (EINVAL);
1815
1816	for (;;) {
1817		/* Try to allocate N messages. */
1818		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1819		    cfg->msi.msi_msgnum, irqs);
1820		if (error == 0)
1821			break;
1822		if (actual == 1)
1823			return (error);
1824
1825		/* Try N / 2. */
1826		actual >>= 1;
1827	}
1828
1829	/*
1830	 * We now have N actual messages mapped onto SYS_RES_IRQ
1831	 * resources in the irqs[] array, so add new resources
1832	 * starting at rid 1.
1833	 */
1834	for (i = 0; i < actual; i++)
1835		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1836		    irqs[i], irqs[i], 1);
1837
1838	if (bootverbose) {
1839		if (actual == 1)
1840			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1841		else {
1842			int run;
1843
1844			/*
1845			 * Be fancy and try to print contiguous runs
1846			 * of IRQ values as ranges.  'run' is true if
1847			 * we are in a range.
1848			 */
1849			device_printf(child, "using IRQs %d", irqs[0]);
1850			run = 0;
1851			for (i = 1; i < actual; i++) {
1852
1853				/* Still in a run? */
1854				if (irqs[i] == irqs[i - 1] + 1) {
1855					run = 1;
1856					continue;
1857				}
1858
1859				/* Finish previous range. */
1860				if (run) {
1861					printf("-%d", irqs[i - 1]);
1862					run = 0;
1863				}
1864
1865				/* Start new range. */
1866				printf(",%d", irqs[i]);
1867			}
1868
1869			/* Unfinished range? */
1870			if (run)
1871				printf("-%d", irqs[actual - 1]);
1872			printf(" for MSI\n");
1873		}
1874	}
1875
1876	/* Update control register with actual count. */
1877	ctrl = cfg->msi.msi_ctrl;
1878	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1879	ctrl |= (ffs(actual) - 1) << 4;
1880	cfg->msi.msi_ctrl = ctrl;
1881	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1882
1883	/* Update counts of alloc'd messages. */
1884	cfg->msi.msi_alloc = actual;
1885	cfg->msi.msi_handlers = 0;
1886	*count = actual;
1887	return (0);
1888}
1889
1890/* Release the MSI messages associated with this device. */
1891int
1892pci_release_msi_method(device_t dev, device_t child)
1893{
1894	struct pci_devinfo *dinfo = device_get_ivars(child);
1895	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1896	struct resource_list_entry *rle;
1897	int error, i, irqs[32];
1898
1899	/* Try MSI-X first. */
1900	error = pci_release_msix(dev, child);
1901	if (error != ENODEV)
1902		return (error);
1903
1904	/* Do we have any messages to release? */
1905	if (msi->msi_alloc == 0)
1906		return (ENODEV);
1907	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1908
1909	/* Make sure none of the resources are allocated. */
1910	if (msi->msi_handlers > 0)
1911		return (EBUSY);
1912	for (i = 0; i < msi->msi_alloc; i++) {
1913		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1914		KASSERT(rle != NULL, ("missing MSI resource"));
1915		if (rle->res != NULL)
1916			return (EBUSY);
1917		irqs[i] = rle->start;
1918	}
1919
1920	/* Update control register with 0 count. */
1921	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1922	    ("%s: MSI still enabled", __func__));
1923	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1924	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1925	    msi->msi_ctrl, 2);
1926
1927	/* Release the messages. */
1928	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1929	for (i = 0; i < msi->msi_alloc; i++)
1930		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1931
1932	/* Update alloc count. */
1933	msi->msi_alloc = 0;
1934	msi->msi_addr = 0;
1935	msi->msi_data = 0;
1936	return (0);
1937}
1938
1939/*
1940 * Return the max supported MSI messages this device supports.
1941 * Basically, assuming the MD code can alloc messages, this function
1942 * should return the maximum value that pci_alloc_msi() can return.
1943 * Thus, it is subject to the tunables, etc.
1944 */
1945int
1946pci_msi_count_method(device_t dev, device_t child)
1947{
1948	struct pci_devinfo *dinfo = device_get_ivars(child);
1949	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1950
1951	if (pci_do_msi && msi->msi_location != 0)
1952		return (msi->msi_msgnum);
1953	return (0);
1954}
1955
1956/* free pcicfgregs structure and all depending data structures */
1957
1958int
1959pci_freecfg(struct pci_devinfo *dinfo)
1960{
1961	struct devlist *devlist_head;
1962	int i;
1963
1964	devlist_head = &pci_devq;
1965
1966	if (dinfo->cfg.vpd.vpd_reg) {
1967		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1968		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1969			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1970		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1971		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1972			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1973		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1974	}
1975	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1976	free(dinfo, M_DEVBUF);
1977
1978	/* increment the generation count */
1979	pci_generation++;
1980
1981	/* we're losing one device */
1982	pci_numdevs--;
1983	return (0);
1984}
1985
1986/*
1987 * PCI power manangement
1988 */
1989int
1990pci_set_powerstate_method(device_t dev, device_t child, int state)
1991{
1992	struct pci_devinfo *dinfo = device_get_ivars(child);
1993	pcicfgregs *cfg = &dinfo->cfg;
1994	uint16_t status;
1995	int result, oldstate, highest, delay;
1996
1997	if (cfg->pp.pp_cap == 0)
1998		return (EOPNOTSUPP);
1999
2000	/*
2001	 * Optimize a no state change request away.  While it would be OK to
2002	 * write to the hardware in theory, some devices have shown odd
2003	 * behavior when going from D3 -> D3.
2004	 */
2005	oldstate = pci_get_powerstate(child);
2006	if (oldstate == state)
2007		return (0);
2008
2009	/*
2010	 * The PCI power management specification states that after a state
2011	 * transition between PCI power states, system software must
2012	 * guarantee a minimal delay before the function accesses the device.
2013	 * Compute the worst case delay that we need to guarantee before we
2014	 * access the device.  Many devices will be responsive much more
2015	 * quickly than this delay, but there are some that don't respond
2016	 * instantly to state changes.  Transitions to/from D3 state require
2017	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2018	 * is done below with DELAY rather than a sleeper function because
2019	 * this function can be called from contexts where we cannot sleep.
2020	 */
2021	highest = (oldstate > state) ? oldstate : state;
2022	if (highest == PCI_POWERSTATE_D3)
2023	    delay = 10000;
2024	else if (highest == PCI_POWERSTATE_D2)
2025	    delay = 200;
2026	else
2027	    delay = 0;
2028	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2029	    & ~PCIM_PSTAT_DMASK;
2030	result = 0;
2031	switch (state) {
2032	case PCI_POWERSTATE_D0:
2033		status |= PCIM_PSTAT_D0;
2034		break;
2035	case PCI_POWERSTATE_D1:
2036		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2037			return (EOPNOTSUPP);
2038		status |= PCIM_PSTAT_D1;
2039		break;
2040	case PCI_POWERSTATE_D2:
2041		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2042			return (EOPNOTSUPP);
2043		status |= PCIM_PSTAT_D2;
2044		break;
2045	case PCI_POWERSTATE_D3:
2046		status |= PCIM_PSTAT_D3;
2047		break;
2048	default:
2049		return (EINVAL);
2050	}
2051
2052	if (bootverbose)
2053		printf(
2054		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2055		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2056		    dinfo->cfg.func, oldstate, state);
2057
2058	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2059	if (delay)
2060		DELAY(delay);
2061	return (0);
2062}
2063
2064int
2065pci_get_powerstate_method(device_t dev, device_t child)
2066{
2067	struct pci_devinfo *dinfo = device_get_ivars(child);
2068	pcicfgregs *cfg = &dinfo->cfg;
2069	uint16_t status;
2070	int result;
2071
2072	if (cfg->pp.pp_cap != 0) {
2073		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2074		switch (status & PCIM_PSTAT_DMASK) {
2075		case PCIM_PSTAT_D0:
2076			result = PCI_POWERSTATE_D0;
2077			break;
2078		case PCIM_PSTAT_D1:
2079			result = PCI_POWERSTATE_D1;
2080			break;
2081		case PCIM_PSTAT_D2:
2082			result = PCI_POWERSTATE_D2;
2083			break;
2084		case PCIM_PSTAT_D3:
2085			result = PCI_POWERSTATE_D3;
2086			break;
2087		default:
2088			result = PCI_POWERSTATE_UNKNOWN;
2089			break;
2090		}
2091	} else {
2092		/* No support, device is always at D0 */
2093		result = PCI_POWERSTATE_D0;
2094	}
2095	return (result);
2096}
2097
2098/*
2099 * Some convenience functions for PCI device drivers.
2100 */
2101
2102static __inline void
2103pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2104{
2105	uint16_t	command;
2106
2107	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2108	command |= bit;
2109	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2110}
2111
2112static __inline void
2113pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2114{
2115	uint16_t	command;
2116
2117	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2118	command &= ~bit;
2119	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2120}
2121
2122int
2123pci_enable_busmaster_method(device_t dev, device_t child)
2124{
2125	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2126	return (0);
2127}
2128
2129int
2130pci_disable_busmaster_method(device_t dev, device_t child)
2131{
2132	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2133	return (0);
2134}
2135
2136int
2137pci_enable_io_method(device_t dev, device_t child, int space)
2138{
2139	uint16_t command;
2140	uint16_t bit;
2141	char *error;
2142
2143	bit = 0;
2144	error = NULL;
2145
2146	switch(space) {
2147	case SYS_RES_IOPORT:
2148		bit = PCIM_CMD_PORTEN;
2149		error = "port";
2150		break;
2151	case SYS_RES_MEMORY:
2152		bit = PCIM_CMD_MEMEN;
2153		error = "memory";
2154		break;
2155	default:
2156		return (EINVAL);
2157	}
2158	pci_set_command_bit(dev, child, bit);
2159	/* Some devices seem to need a brief stall here, what do to? */
2160	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2161	if (command & bit)
2162		return (0);
2163	device_printf(child, "failed to enable %s mapping!\n", error);
2164	return (ENXIO);
2165}
2166
2167int
2168pci_disable_io_method(device_t dev, device_t child, int space)
2169{
2170	uint16_t command;
2171	uint16_t bit;
2172	char *error;
2173
2174	bit = 0;
2175	error = NULL;
2176
2177	switch(space) {
2178	case SYS_RES_IOPORT:
2179		bit = PCIM_CMD_PORTEN;
2180		error = "port";
2181		break;
2182	case SYS_RES_MEMORY:
2183		bit = PCIM_CMD_MEMEN;
2184		error = "memory";
2185		break;
2186	default:
2187		return (EINVAL);
2188	}
2189	pci_clear_command_bit(dev, child, bit);
2190	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2191	if (command & bit) {
2192		device_printf(child, "failed to disable %s mapping!\n", error);
2193		return (ENXIO);
2194	}
2195	return (0);
2196}
2197
2198/*
2199 * New style pci driver.  Parent device is either a pci-host-bridge or a
2200 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2201 */
2202
2203void
2204pci_print_verbose(struct pci_devinfo *dinfo)
2205{
2206
2207	if (bootverbose) {
2208		pcicfgregs *cfg = &dinfo->cfg;
2209
2210		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2211		    cfg->vendor, cfg->device, cfg->revid);
2212		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2213		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2214		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2215		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2216		    cfg->mfdev);
2217		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2218		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2219		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2220		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2221		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2222		if (cfg->intpin > 0)
2223			printf("\tintpin=%c, irq=%d\n",
2224			    cfg->intpin +'a' -1, cfg->intline);
2225		if (cfg->pp.pp_cap) {
2226			uint16_t status;
2227
2228			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2229			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2230			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2231			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2232			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2233			    status & PCIM_PSTAT_DMASK);
2234		}
2235		if (cfg->msi.msi_location) {
2236			int ctrl;
2237
2238			ctrl = cfg->msi.msi_ctrl;
2239			printf("\tMSI supports %d message%s%s%s\n",
2240			    cfg->msi.msi_msgnum,
2241			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2242			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2243			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2244		}
2245		if (cfg->msix.msix_location) {
2246			printf("\tMSI-X supports %d message%s ",
2247			    cfg->msix.msix_msgnum,
2248			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2249			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2250				printf("in map 0x%x\n",
2251				    cfg->msix.msix_table_bar);
2252			else
2253				printf("in maps 0x%x and 0x%x\n",
2254				    cfg->msix.msix_table_bar,
2255				    cfg->msix.msix_pba_bar);
2256		}
2257	}
2258}
2259
2260static int
2261pci_porten(device_t pcib, int b, int s, int f)
2262{
2263	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2264		& PCIM_CMD_PORTEN) != 0;
2265}
2266
2267static int
2268pci_memen(device_t pcib, int b, int s, int f)
2269{
2270	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2271		& PCIM_CMD_MEMEN) != 0;
2272}
2273
2274/*
2275 * Add a resource based on a pci map register. Return 1 if the map
2276 * register is a 32bit map register or 2 if it is a 64bit register.
2277 */
2278static int
2279pci_add_map(device_t pcib, device_t bus, device_t dev,
2280    int b, int s, int f, int reg, struct resource_list *rl, int force,
2281    int prefetch)
2282{
2283	pci_addr_t base, map;
2284	pci_addr_t start, end, count;
2285	uint8_t ln2size;
2286	uint8_t ln2range;
2287	uint32_t testval;
2288	uint16_t cmd;
2289	int type;
2290	int barlen;
2291	struct resource *res;
2292
2293	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2294	ln2range = pci_maprange(map);
2295	if (ln2range == 64)
2296		map |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) <<
2297		    32;
2298
2299	/*
2300	 * Disable decoding via the command register before
2301	 * determining the BAR's length since we will be placing it in
2302	 * a weird state.
2303	 */
2304	cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2305	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND,
2306	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2307
2308	/*
2309	 * Determine the BAR's length by writing all 1's.  The bottom
2310	 * log_2(size) bits of the BAR will stick as 0 when we read
2311	 * the value back.
2312	 */
2313	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2314	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2315	if (ln2range == 64) {
2316		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, 0xffffffff, 4);
2317		testval |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4,
2318		    4) << 32;
2319	}
2320
2321	/* Restore the BAR and command register. */
2322	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2323	if (ln2range == 64)
2324		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, map >> 32, 4);
2325	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2326
2327	if (PCI_BAR_MEM(map)) {
2328		type = SYS_RES_MEMORY;
2329		if (map & PCIM_BAR_MEM_PREFETCH)
2330			prefetch = 1;
2331	} else
2332		type = SYS_RES_IOPORT;
2333	ln2size = pci_mapsize(testval);
2334	base = pci_mapbase(map);
2335	barlen = ln2range == 64 ? 2 : 1;
2336
2337	/*
2338	 * For I/O registers, if bottom bit is set, and the next bit up
2339	 * isn't clear, we know we have a BAR that doesn't conform to the
2340	 * spec, so ignore it.  Also, sanity check the size of the data
2341	 * areas to the type of memory involved.  Memory must be at least
2342	 * 16 bytes in size, while I/O ranges must be at least 4.
2343	 */
2344	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2345		return (barlen);
2346	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2347	    (type == SYS_RES_IOPORT && ln2size < 2))
2348		return (barlen);
2349
2350	if (bootverbose) {
2351		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2352		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2353		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2354			printf(", port disabled\n");
2355		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2356			printf(", memory disabled\n");
2357		else
2358			printf(", enabled\n");
2359	}
2360
2361	/*
2362	 * If base is 0, then we have problems.  It is best to ignore
2363	 * such entries for the moment.  These will be allocated later if
2364	 * the driver specifically requests them.  However, some
2365	 * removable busses look better when all resources are allocated,
2366	 * so allow '0' to be overriden.
2367	 *
2368	 * Similarly treat maps whose values is the same as the test value
2369	 * read back.  These maps have had all f's written to them by the
2370	 * BIOS in an attempt to disable the resources.
2371	 */
2372	if (!force && (base == 0 || map == testval))
2373		return (barlen);
2374	if ((u_long)base != base) {
2375		device_printf(bus,
2376		    "pci%d:%d:%d:%d bar %#x too many address bits",
2377		    pci_get_domain(dev), b, s, f, reg);
2378		return (barlen);
2379	}
2380
2381	/*
2382	 * This code theoretically does the right thing, but has
2383	 * undesirable side effects in some cases where peripherals
2384	 * respond oddly to having these bits enabled.  Let the user
2385	 * be able to turn them off (since pci_enable_io_modes is 1 by
2386	 * default).
2387	 */
2388	if (pci_enable_io_modes) {
2389		/* Turn on resources that have been left off by a lazy BIOS */
2390		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2391			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2392			cmd |= PCIM_CMD_PORTEN;
2393			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2394		}
2395		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2396			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2397			cmd |= PCIM_CMD_MEMEN;
2398			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2399		}
2400	} else {
2401		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2402			return (barlen);
2403		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2404			return (barlen);
2405	}
2406
2407	count = 1 << ln2size;
2408	if (base == 0 || base == pci_mapbase(testval)) {
2409		start = 0;	/* Let the parent decide. */
2410		end = ~0ULL;
2411	} else {
2412		start = base;
2413		end = base + (1 << ln2size) - 1;
2414	}
2415	resource_list_add(rl, type, reg, start, end, count);
2416
2417	/*
2418	 * Try to allocate the resource for this BAR from our parent
2419	 * so that this resource range is already reserved.  The
2420	 * driver for this device will later inherit this resource in
2421	 * pci_alloc_resource().
2422	 */
2423	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2424	    prefetch ? RF_PREFETCHABLE : 0);
2425	if (res == NULL) {
2426		/*
2427		 * If the allocation fails, clear the BAR and delete
2428		 * the resource list entry to force
2429		 * pci_alloc_resource() to allocate resources from the
2430		 * parent.
2431		 */
2432		resource_list_delete(rl, type, reg);
2433		start = 0;
2434	} else {
2435		start = rman_get_start(res);
2436		rman_set_device(res, bus);
2437	}
2438	pci_write_config(dev, reg, start, 4);
2439	if (ln2range == 64)
2440		pci_write_config(dev, reg + 4, start >> 32, 4);
2441	return (barlen);
2442}
2443
2444/*
2445 * For ATA devices we need to decide early what addressing mode to use.
2446 * Legacy demands that the primary and secondary ATA ports sits on the
2447 * same addresses that old ISA hardware did. This dictates that we use
2448 * those addresses and ignore the BAR's if we cannot set PCI native
2449 * addressing mode.
2450 */
2451static void
2452pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2453    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2454{
2455	struct resource *r;
2456	int rid, type, progif;
2457#if 0
2458	/* if this device supports PCI native addressing use it */
2459	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2460	if ((progif & 0x8a) == 0x8a) {
2461		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2462		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2463			printf("Trying ATA native PCI addressing mode\n");
2464			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2465		}
2466	}
2467#endif
2468	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2469	type = SYS_RES_IOPORT;
2470	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2471		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2472		    prefetchmask & (1 << 0));
2473		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2474		    prefetchmask & (1 << 1));
2475	} else {
2476		rid = PCIR_BAR(0);
2477		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2478		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2479		    8, 0);
2480		rman_set_device(r, bus);
2481		rid = PCIR_BAR(1);
2482		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2483		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2484		    1, 0);
2485		rman_set_device(r, bus);
2486	}
2487	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2488		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2489		    prefetchmask & (1 << 2));
2490		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2491		    prefetchmask & (1 << 3));
2492	} else {
2493		rid = PCIR_BAR(2);
2494		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2495		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2496		    8, 0);
2497		rman_set_device(r, bus);
2498		rid = PCIR_BAR(3);
2499		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2500		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2501		    1, 0);
2502		rman_set_device(r, bus);
2503	}
2504	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2505	    prefetchmask & (1 << 4));
2506	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2507	    prefetchmask & (1 << 5));
2508}
2509
2510static void
2511pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2512{
2513	struct pci_devinfo *dinfo = device_get_ivars(dev);
2514	pcicfgregs *cfg = &dinfo->cfg;
2515	char tunable_name[64];
2516	int irq;
2517
2518	/* Has to have an intpin to have an interrupt. */
2519	if (cfg->intpin == 0)
2520		return;
2521
2522	/* Let the user override the IRQ with a tunable. */
2523	irq = PCI_INVALID_IRQ;
2524	snprintf(tunable_name, sizeof(tunable_name),
2525	    "hw.pci%d.%d.%d.INT%c.irq",
2526	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2527	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2528		irq = PCI_INVALID_IRQ;
2529
2530	/*
2531	 * If we didn't get an IRQ via the tunable, then we either use the
2532	 * IRQ value in the intline register or we ask the bus to route an
2533	 * interrupt for us.  If force_route is true, then we only use the
2534	 * value in the intline register if the bus was unable to assign an
2535	 * IRQ.
2536	 */
2537	if (!PCI_INTERRUPT_VALID(irq)) {
2538		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2539			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2540		if (!PCI_INTERRUPT_VALID(irq))
2541			irq = cfg->intline;
2542	}
2543
2544	/* If after all that we don't have an IRQ, just bail. */
2545	if (!PCI_INTERRUPT_VALID(irq))
2546		return;
2547
2548	/* Update the config register if it changed. */
2549	if (irq != cfg->intline) {
2550		cfg->intline = irq;
2551		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2552	}
2553
2554	/* Add this IRQ as rid 0 interrupt resource. */
2555	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2556}
2557
2558void
2559pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2560{
2561	device_t pcib;
2562	struct pci_devinfo *dinfo = device_get_ivars(dev);
2563	pcicfgregs *cfg = &dinfo->cfg;
2564	struct resource_list *rl = &dinfo->resources;
2565	struct pci_quirk *q;
2566	int b, i, f, s;
2567
2568	pcib = device_get_parent(bus);
2569
2570	b = cfg->bus;
2571	s = cfg->slot;
2572	f = cfg->func;
2573
2574	/* ATA devices needs special map treatment */
2575	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2576	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2577	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2578	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2579	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2580		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2581	else
2582		for (i = 0; i < cfg->nummaps;)
2583			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2584			    rl, force, prefetchmask & (1 << i));
2585
2586	/*
2587	 * Add additional, quirked resources.
2588	 */
2589	for (q = &pci_quirks[0]; q->devid; q++) {
2590		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2591		    && q->type == PCI_QUIRK_MAP_REG)
2592			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2593			  force, 0);
2594	}
2595
2596	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2597#ifdef __PCI_REROUTE_INTERRUPT
2598		/*
2599		 * Try to re-route interrupts. Sometimes the BIOS or
2600		 * firmware may leave bogus values in these registers.
2601		 * If the re-route fails, then just stick with what we
2602		 * have.
2603		 */
2604		pci_assign_interrupt(bus, dev, 1);
2605#else
2606		pci_assign_interrupt(bus, dev, 0);
2607#endif
2608	}
2609}
2610
2611void
2612pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2613{
2614#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2615	device_t pcib = device_get_parent(dev);
2616	struct pci_devinfo *dinfo;
2617	int maxslots;
2618	int s, f, pcifunchigh;
2619	uint8_t hdrtype;
2620
2621	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2622	    ("dinfo_size too small"));
2623	maxslots = PCIB_MAXSLOTS(pcib);
2624	for (s = 0; s <= maxslots; s++) {
2625		pcifunchigh = 0;
2626		f = 0;
2627		DELAY(1);
2628		hdrtype = REG(PCIR_HDRTYPE, 1);
2629		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2630			continue;
2631		if (hdrtype & PCIM_MFDEV)
2632			pcifunchigh = PCI_FUNCMAX;
2633		for (f = 0; f <= pcifunchigh; f++) {
2634			dinfo = pci_read_device(pcib, domain, busno, s, f,
2635			    dinfo_size);
2636			if (dinfo != NULL) {
2637				pci_add_child(dev, dinfo);
2638			}
2639		}
2640	}
2641#undef REG
2642}
2643
2644void
2645pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2646{
2647	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2648	device_set_ivars(dinfo->cfg.dev, dinfo);
2649	resource_list_init(&dinfo->resources);
2650	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2651	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2652	pci_print_verbose(dinfo);
2653	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2654}
2655
2656static int
2657pci_probe(device_t dev)
2658{
2659
2660	device_set_desc(dev, "PCI bus");
2661
2662	/* Allow other subclasses to override this driver. */
2663	return (BUS_PROBE_GENERIC);
2664}
2665
2666static int
2667pci_attach(device_t dev)
2668{
2669	int busno, domain;
2670
2671	/*
2672	 * Since there can be multiple independantly numbered PCI
2673	 * busses on systems with multiple PCI domains, we can't use
2674	 * the unit number to decide which bus we are probing. We ask
2675	 * the parent pcib what our domain and bus numbers are.
2676	 */
2677	domain = pcib_get_domain(dev);
2678	busno = pcib_get_bus(dev);
2679	if (bootverbose)
2680		device_printf(dev, "domain=%d, physical bus=%d\n",
2681		    domain, busno);
2682	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2683	return (bus_generic_attach(dev));
2684}
2685
2686int
2687pci_suspend(device_t dev)
2688{
2689	int dstate, error, i, numdevs;
2690	device_t acpi_dev, child, *devlist;
2691	struct pci_devinfo *dinfo;
2692
2693	/*
2694	 * Save the PCI configuration space for each child and set the
2695	 * device in the appropriate power state for this sleep state.
2696	 */
2697	acpi_dev = NULL;
2698	if (pci_do_power_resume)
2699		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2700	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2701		return (error);
2702	for (i = 0; i < numdevs; i++) {
2703		child = devlist[i];
2704		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2705		pci_cfg_save(child, dinfo, 0);
2706	}
2707
2708	/* Suspend devices before potentially powering them down. */
2709	error = bus_generic_suspend(dev);
2710	if (error) {
2711		free(devlist, M_TEMP);
2712		return (error);
2713	}
2714
2715	/*
2716	 * Always set the device to D3.  If ACPI suggests a different
2717	 * power state, use it instead.  If ACPI is not present, the
2718	 * firmware is responsible for managing device power.  Skip
2719	 * children who aren't attached since they are powered down
2720	 * separately.  Only manage type 0 devices for now.
2721	 */
2722	for (i = 0; acpi_dev && i < numdevs; i++) {
2723		child = devlist[i];
2724		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2725		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2726			dstate = PCI_POWERSTATE_D3;
2727			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2728			pci_set_powerstate(child, dstate);
2729		}
2730	}
2731	free(devlist, M_TEMP);
2732	return (0);
2733}
2734
2735int
2736pci_resume(device_t dev)
2737{
2738	int i, numdevs, error;
2739	device_t acpi_dev, child, *devlist;
2740	struct pci_devinfo *dinfo;
2741
2742	/*
2743	 * Set each child to D0 and restore its PCI configuration space.
2744	 */
2745	acpi_dev = NULL;
2746	if (pci_do_power_resume)
2747		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2748	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2749		return (error);
2750	for (i = 0; i < numdevs; i++) {
2751		/*
2752		 * Notify ACPI we're going to D0 but ignore the result.  If
2753		 * ACPI is not present, the firmware is responsible for
2754		 * managing device power.  Only manage type 0 devices for now.
2755		 */
2756		child = devlist[i];
2757		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2758		if (acpi_dev && device_is_attached(child) &&
2759		    dinfo->cfg.hdrtype == 0) {
2760			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2761			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2762		}
2763
2764		/* Now the device is powered up, restore its config space. */
2765		pci_cfg_restore(child, dinfo);
2766	}
2767	free(devlist, M_TEMP);
2768	return (bus_generic_resume(dev));
2769}
2770
2771static void
2772pci_load_vendor_data(void)
2773{
2774	caddr_t vendordata, info;
2775
2776	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2777		info = preload_search_info(vendordata, MODINFO_ADDR);
2778		pci_vendordata = *(char **)info;
2779		info = preload_search_info(vendordata, MODINFO_SIZE);
2780		pci_vendordata_size = *(size_t *)info;
2781		/* terminate the database */
2782		pci_vendordata[pci_vendordata_size] = '\n';
2783	}
2784}
2785
2786void
2787pci_driver_added(device_t dev, driver_t *driver)
2788{
2789	int numdevs;
2790	device_t *devlist;
2791	device_t child;
2792	struct pci_devinfo *dinfo;
2793	int i;
2794
2795	if (bootverbose)
2796		device_printf(dev, "driver added\n");
2797	DEVICE_IDENTIFY(driver, dev);
2798	if (device_get_children(dev, &devlist, &numdevs) != 0)
2799		return;
2800	for (i = 0; i < numdevs; i++) {
2801		child = devlist[i];
2802		if (device_get_state(child) != DS_NOTPRESENT)
2803			continue;
2804		dinfo = device_get_ivars(child);
2805		pci_print_verbose(dinfo);
2806		if (bootverbose)
2807			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2808			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2809			    dinfo->cfg.func);
2810		pci_cfg_restore(child, dinfo);
2811		if (device_probe_and_attach(child) != 0)
2812			pci_cfg_save(child, dinfo, 1);
2813	}
2814	free(devlist, M_TEMP);
2815}
2816
2817int
2818pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2819    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2820{
2821	struct pci_devinfo *dinfo;
2822	struct msix_table_entry *mte;
2823	struct msix_vector *mv;
2824	uint64_t addr;
2825	uint32_t data;
2826	void *cookie;
2827	int error, rid;
2828
2829	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2830	    arg, &cookie);
2831	if (error)
2832		return (error);
2833
2834	/* If this is not a direct child, just bail out. */
2835	if (device_get_parent(child) != dev) {
2836		*cookiep = cookie;
2837		return(0);
2838	}
2839
2840	rid = rman_get_rid(irq);
2841	if (rid == 0) {
2842		/* Make sure that INTx is enabled */
2843		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2844	} else {
2845		/*
2846		 * Check to see if the interrupt is MSI or MSI-X.
2847		 * Ask our parent to map the MSI and give
2848		 * us the address and data register values.
2849		 * If we fail for some reason, teardown the
2850		 * interrupt handler.
2851		 */
2852		dinfo = device_get_ivars(child);
2853		if (dinfo->cfg.msi.msi_alloc > 0) {
2854			if (dinfo->cfg.msi.msi_addr == 0) {
2855				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2856			    ("MSI has handlers, but vectors not mapped"));
2857				error = PCIB_MAP_MSI(device_get_parent(dev),
2858				    child, rman_get_start(irq), &addr, &data);
2859				if (error)
2860					goto bad;
2861				dinfo->cfg.msi.msi_addr = addr;
2862				dinfo->cfg.msi.msi_data = data;
2863				pci_enable_msi(child, addr, data);
2864			}
2865			dinfo->cfg.msi.msi_handlers++;
2866		} else {
2867			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2868			    ("No MSI or MSI-X interrupts allocated"));
2869			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2870			    ("MSI-X index too high"));
2871			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2872			KASSERT(mte->mte_vector != 0, ("no message vector"));
2873			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2874			KASSERT(mv->mv_irq == rman_get_start(irq),
2875			    ("IRQ mismatch"));
2876			if (mv->mv_address == 0) {
2877				KASSERT(mte->mte_handlers == 0,
2878		    ("MSI-X table entry has handlers, but vector not mapped"));
2879				error = PCIB_MAP_MSI(device_get_parent(dev),
2880				    child, rman_get_start(irq), &addr, &data);
2881				if (error)
2882					goto bad;
2883				mv->mv_address = addr;
2884				mv->mv_data = data;
2885			}
2886			if (mte->mte_handlers == 0) {
2887				pci_enable_msix(child, rid - 1, mv->mv_address,
2888				    mv->mv_data);
2889				pci_unmask_msix(child, rid - 1);
2890			}
2891			mte->mte_handlers++;
2892		}
2893
2894		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2895		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2896	bad:
2897		if (error) {
2898			(void)bus_generic_teardown_intr(dev, child, irq,
2899			    cookie);
2900			return (error);
2901		}
2902	}
2903	*cookiep = cookie;
2904	return (0);
2905}
2906
2907int
2908pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2909    void *cookie)
2910{
2911	struct msix_table_entry *mte;
2912	struct resource_list_entry *rle;
2913	struct pci_devinfo *dinfo;
2914	int error, rid;
2915
2916	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2917		return (EINVAL);
2918
2919	/* If this isn't a direct child, just bail out */
2920	if (device_get_parent(child) != dev)
2921		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2922
2923	rid = rman_get_rid(irq);
2924	if (rid == 0) {
2925		/* Mask INTx */
2926		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2927	} else {
2928		/*
2929		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2930		 * decrement the appropriate handlers count and mask the
2931		 * MSI-X message, or disable MSI messages if the count
2932		 * drops to 0.
2933		 */
2934		dinfo = device_get_ivars(child);
2935		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2936		if (rle->res != irq)
2937			return (EINVAL);
2938		if (dinfo->cfg.msi.msi_alloc > 0) {
2939			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2940			    ("MSI-X index too high"));
2941			if (dinfo->cfg.msi.msi_handlers == 0)
2942				return (EINVAL);
2943			dinfo->cfg.msi.msi_handlers--;
2944			if (dinfo->cfg.msi.msi_handlers == 0)
2945				pci_disable_msi(child);
2946		} else {
2947			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2948			    ("No MSI or MSI-X interrupts allocated"));
2949			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2950			    ("MSI-X index too high"));
2951			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2952			if (mte->mte_handlers == 0)
2953				return (EINVAL);
2954			mte->mte_handlers--;
2955			if (mte->mte_handlers == 0)
2956				pci_mask_msix(child, rid - 1);
2957		}
2958	}
2959	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2960	if (rid > 0)
2961		KASSERT(error == 0,
2962		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2963	return (error);
2964}
2965
2966int
2967pci_print_child(device_t dev, device_t child)
2968{
2969	struct pci_devinfo *dinfo;
2970	struct resource_list *rl;
2971	int retval = 0;
2972
2973	dinfo = device_get_ivars(child);
2974	rl = &dinfo->resources;
2975
2976	retval += bus_print_child_header(dev, child);
2977
2978	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2979	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2980	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2981	if (device_get_flags(dev))
2982		retval += printf(" flags %#x", device_get_flags(dev));
2983
2984	retval += printf(" at device %d.%d", pci_get_slot(child),
2985	    pci_get_function(child));
2986
2987	retval += bus_print_child_footer(dev, child);
2988
2989	return (retval);
2990}
2991
2992static struct
2993{
2994	int	class;
2995	int	subclass;
2996	char	*desc;
2997} pci_nomatch_tab[] = {
2998	{PCIC_OLD,		-1,			"old"},
2999	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3000	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3001	{PCIC_STORAGE,		-1,			"mass storage"},
3002	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3003	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3004	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3005	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3006	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3007	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3008	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3009	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3010	{PCIC_NETWORK,		-1,			"network"},
3011	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3012	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3013	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3014	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3015	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3016	{PCIC_DISPLAY,		-1,			"display"},
3017	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3018	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3019	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3020	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3021	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3022	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3023	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3024	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3025	{PCIC_MEMORY,		-1,			"memory"},
3026	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3027	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3028	{PCIC_BRIDGE,		-1,			"bridge"},
3029	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3030	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3031	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3032	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3033	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3034	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3035	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3036	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3037	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3038	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3039	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3040	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3041	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3042	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3043	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3044	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3045	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3046	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3047	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3048	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3049	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3050	{PCIC_INPUTDEV,		-1,			"input device"},
3051	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3052	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3053	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3054	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3055	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3056	{PCIC_DOCKING,		-1,			"docking station"},
3057	{PCIC_PROCESSOR,	-1,			"processor"},
3058	{PCIC_SERIALBUS,	-1,			"serial bus"},
3059	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3060	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3061	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3062	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3063	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3064	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3065	{PCIC_WIRELESS,		-1,			"wireless controller"},
3066	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3067	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3068	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3069	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3070	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3071	{PCIC_SATCOM,		-1,			"satellite communication"},
3072	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3073	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3074	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3075	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3076	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3077	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3078	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3079	{PCIC_DASP,		-1,			"dasp"},
3080	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3081	{0, 0,		NULL}
3082};
3083
3084void
3085pci_probe_nomatch(device_t dev, device_t child)
3086{
3087	int	i;
3088	char	*cp, *scp, *device;
3089
3090	/*
3091	 * Look for a listing for this device in a loaded device database.
3092	 */
3093	if ((device = pci_describe_device(child)) != NULL) {
3094		device_printf(dev, "<%s>", device);
3095		free(device, M_DEVBUF);
3096	} else {
3097		/*
3098		 * Scan the class/subclass descriptions for a general
3099		 * description.
3100		 */
3101		cp = "unknown";
3102		scp = NULL;
3103		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3104			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3105				if (pci_nomatch_tab[i].subclass == -1) {
3106					cp = pci_nomatch_tab[i].desc;
3107				} else if (pci_nomatch_tab[i].subclass ==
3108				    pci_get_subclass(child)) {
3109					scp = pci_nomatch_tab[i].desc;
3110				}
3111			}
3112		}
3113		device_printf(dev, "<%s%s%s>",
3114		    cp ? cp : "",
3115		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3116		    scp ? scp : "");
3117	}
3118	printf(" at device %d.%d (no driver attached)\n",
3119	    pci_get_slot(child), pci_get_function(child));
3120	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3121	return;
3122}
3123
3124/*
3125 * Parse the PCI device database, if loaded, and return a pointer to a
3126 * description of the device.
3127 *
3128 * The database is flat text formatted as follows:
3129 *
3130 * Any line not in a valid format is ignored.
3131 * Lines are terminated with newline '\n' characters.
3132 *
3133 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3134 * the vendor name.
3135 *
3136 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3137 * - devices cannot be listed without a corresponding VENDOR line.
3138 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3139 * another TAB, then the device name.
3140 */
3141
3142/*
3143 * Assuming (ptr) points to the beginning of a line in the database,
3144 * return the vendor or device and description of the next entry.
3145 * The value of (vendor) or (device) inappropriate for the entry type
3146 * is set to -1.  Returns nonzero at the end of the database.
3147 *
3148 * Note that this is slightly unrobust in the face of corrupt data;
3149 * we attempt to safeguard against this by spamming the end of the
3150 * database with a newline when we initialise.
3151 */
3152static int
3153pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3154{
3155	char	*cp = *ptr;
3156	int	left;
3157
3158	*device = -1;
3159	*vendor = -1;
3160	**desc = '\0';
3161	for (;;) {
3162		left = pci_vendordata_size - (cp - pci_vendordata);
3163		if (left <= 0) {
3164			*ptr = cp;
3165			return(1);
3166		}
3167
3168		/* vendor entry? */
3169		if (*cp != '\t' &&
3170		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3171			break;
3172		/* device entry? */
3173		if (*cp == '\t' &&
3174		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3175			break;
3176
3177		/* skip to next line */
3178		while (*cp != '\n' && left > 0) {
3179			cp++;
3180			left--;
3181		}
3182		if (*cp == '\n') {
3183			cp++;
3184			left--;
3185		}
3186	}
3187	/* skip to next line */
3188	while (*cp != '\n' && left > 0) {
3189		cp++;
3190		left--;
3191	}
3192	if (*cp == '\n' && left > 0)
3193		cp++;
3194	*ptr = cp;
3195	return(0);
3196}
3197
3198static char *
3199pci_describe_device(device_t dev)
3200{
3201	int	vendor, device;
3202	char	*desc, *vp, *dp, *line;
3203
3204	desc = vp = dp = NULL;
3205
3206	/*
3207	 * If we have no vendor data, we can't do anything.
3208	 */
3209	if (pci_vendordata == NULL)
3210		goto out;
3211
3212	/*
3213	 * Scan the vendor data looking for this device
3214	 */
3215	line = pci_vendordata;
3216	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3217		goto out;
3218	for (;;) {
3219		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3220			goto out;
3221		if (vendor == pci_get_vendor(dev))
3222			break;
3223	}
3224	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3225		goto out;
3226	for (;;) {
3227		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3228			*dp = 0;
3229			break;
3230		}
3231		if (vendor != -1) {
3232			*dp = 0;
3233			break;
3234		}
3235		if (device == pci_get_device(dev))
3236			break;
3237	}
3238	if (dp[0] == '\0')
3239		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3240	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3241	    NULL)
3242		sprintf(desc, "%s, %s", vp, dp);
3243 out:
3244	if (vp != NULL)
3245		free(vp, M_DEVBUF);
3246	if (dp != NULL)
3247		free(dp, M_DEVBUF);
3248	return(desc);
3249}
3250
3251int
3252pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3253{
3254	struct pci_devinfo *dinfo;
3255	pcicfgregs *cfg;
3256
3257	dinfo = device_get_ivars(child);
3258	cfg = &dinfo->cfg;
3259
3260	switch (which) {
3261	case PCI_IVAR_ETHADDR:
3262		/*
3263		 * The generic accessor doesn't deal with failure, so
3264		 * we set the return value, then return an error.
3265		 */
3266		*((uint8_t **) result) = NULL;
3267		return (EINVAL);
3268	case PCI_IVAR_SUBVENDOR:
3269		*result = cfg->subvendor;
3270		break;
3271	case PCI_IVAR_SUBDEVICE:
3272		*result = cfg->subdevice;
3273		break;
3274	case PCI_IVAR_VENDOR:
3275		*result = cfg->vendor;
3276		break;
3277	case PCI_IVAR_DEVICE:
3278		*result = cfg->device;
3279		break;
3280	case PCI_IVAR_DEVID:
3281		*result = (cfg->device << 16) | cfg->vendor;
3282		break;
3283	case PCI_IVAR_CLASS:
3284		*result = cfg->baseclass;
3285		break;
3286	case PCI_IVAR_SUBCLASS:
3287		*result = cfg->subclass;
3288		break;
3289	case PCI_IVAR_PROGIF:
3290		*result = cfg->progif;
3291		break;
3292	case PCI_IVAR_REVID:
3293		*result = cfg->revid;
3294		break;
3295	case PCI_IVAR_INTPIN:
3296		*result = cfg->intpin;
3297		break;
3298	case PCI_IVAR_IRQ:
3299		*result = cfg->intline;
3300		break;
3301	case PCI_IVAR_DOMAIN:
3302		*result = cfg->domain;
3303		break;
3304	case PCI_IVAR_BUS:
3305		*result = cfg->bus;
3306		break;
3307	case PCI_IVAR_SLOT:
3308		*result = cfg->slot;
3309		break;
3310	case PCI_IVAR_FUNCTION:
3311		*result = cfg->func;
3312		break;
3313	case PCI_IVAR_CMDREG:
3314		*result = cfg->cmdreg;
3315		break;
3316	case PCI_IVAR_CACHELNSZ:
3317		*result = cfg->cachelnsz;
3318		break;
3319	case PCI_IVAR_MINGNT:
3320		*result = cfg->mingnt;
3321		break;
3322	case PCI_IVAR_MAXLAT:
3323		*result = cfg->maxlat;
3324		break;
3325	case PCI_IVAR_LATTIMER:
3326		*result = cfg->lattimer;
3327		break;
3328	default:
3329		return (ENOENT);
3330	}
3331	return (0);
3332}
3333
3334int
3335pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3336{
3337	struct pci_devinfo *dinfo;
3338
3339	dinfo = device_get_ivars(child);
3340
3341	switch (which) {
3342	case PCI_IVAR_INTPIN:
3343		dinfo->cfg.intpin = value;
3344		return (0);
3345	case PCI_IVAR_ETHADDR:
3346	case PCI_IVAR_SUBVENDOR:
3347	case PCI_IVAR_SUBDEVICE:
3348	case PCI_IVAR_VENDOR:
3349	case PCI_IVAR_DEVICE:
3350	case PCI_IVAR_DEVID:
3351	case PCI_IVAR_CLASS:
3352	case PCI_IVAR_SUBCLASS:
3353	case PCI_IVAR_PROGIF:
3354	case PCI_IVAR_REVID:
3355	case PCI_IVAR_IRQ:
3356	case PCI_IVAR_DOMAIN:
3357	case PCI_IVAR_BUS:
3358	case PCI_IVAR_SLOT:
3359	case PCI_IVAR_FUNCTION:
3360		return (EINVAL);	/* disallow for now */
3361
3362	default:
3363		return (ENOENT);
3364	}
3365}
3366
3367
3368#include "opt_ddb.h"
3369#ifdef DDB
3370#include <ddb/ddb.h>
3371#include <sys/cons.h>
3372
3373/*
3374 * List resources based on pci map registers, used for within ddb
3375 */
3376
3377DB_SHOW_COMMAND(pciregs, db_pci_dump)
3378{
3379	struct pci_devinfo *dinfo;
3380	struct devlist *devlist_head;
3381	struct pci_conf *p;
3382	const char *name;
3383	int i, error, none_count;
3384
3385	none_count = 0;
3386	/* get the head of the device queue */
3387	devlist_head = &pci_devq;
3388
3389	/*
3390	 * Go through the list of devices and print out devices
3391	 */
3392	for (error = 0, i = 0,
3393	     dinfo = STAILQ_FIRST(devlist_head);
3394	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3395	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3396
3397		/* Populate pd_name and pd_unit */
3398		name = NULL;
3399		if (dinfo->cfg.dev)
3400			name = device_get_name(dinfo->cfg.dev);
3401
3402		p = &dinfo->conf;
3403		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3404			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3405			(name && *name) ? name : "none",
3406			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3407			none_count++,
3408			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3409			p->pc_sel.pc_func, (p->pc_class << 16) |
3410			(p->pc_subclass << 8) | p->pc_progif,
3411			(p->pc_subdevice << 16) | p->pc_subvendor,
3412			(p->pc_device << 16) | p->pc_vendor,
3413			p->pc_revid, p->pc_hdr);
3414	}
3415}
3416#endif /* DDB */
3417
3418static struct resource *
3419pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3420    u_long start, u_long end, u_long count, u_int flags)
3421{
3422	struct pci_devinfo *dinfo = device_get_ivars(child);
3423	struct resource_list *rl = &dinfo->resources;
3424	struct resource_list_entry *rle;
3425	struct resource *res;
3426	pci_addr_t map, testval;
3427	uint16_t cmd;
3428	int maprange, mapsize;
3429
3430	/*
3431	 * Weed out the bogons, and figure out how large the BAR/map
3432	 * is.  Bars that read back 0 here are bogus and unimplemented.
3433	 * Note: atapci in legacy mode are special and handled elsewhere
3434	 * in the code.  If you have a atapci device in legacy mode and
3435	 * it fails here, that other code is broken.
3436	 */
3437	res = NULL;
3438	map = pci_read_config(child, *rid, 4);
3439	maprange = pci_maprange(map);
3440	if (maprange == 64)
3441		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3442
3443	/*
3444	 * Disable decoding via the command register before
3445	 * determining the BAR's length since we will be placing it in
3446	 * a weird state.
3447	 */
3448	cmd = pci_read_config(child, PCIR_COMMAND, 2);
3449	pci_write_config(child, PCIR_COMMAND,
3450	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
3451
3452	/* Determine the BAR's length. */
3453	pci_write_config(child, *rid, 0xffffffff, 4);
3454	testval = pci_read_config(child, *rid, 4);
3455	if (maprange == 64) {
3456		pci_write_config(child, *rid + 4, 0xffffffff, 4);
3457		testval |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) <<
3458		    32;
3459	}
3460
3461	/*
3462	 * Restore the original value of the BAR.  We may have reprogrammed
3463	 * the BAR of the low-level console device and when booting verbose,
3464	 * we need the console device addressable.
3465	 */
3466	pci_write_config(child, *rid, map, 4);
3467	if (maprange == 64)
3468		pci_write_config(child, *rid + 4, map >> 32, 4);
3469	pci_write_config(child, PCIR_COMMAND, cmd, 2);
3470
3471	/* Ignore a BAR with a base of 0. */
3472	if (pci_mapbase(testval) == 0)
3473		goto out;
3474
3475	if (PCI_BAR_MEM(testval)) {
3476		if (type != SYS_RES_MEMORY) {
3477			if (bootverbose)
3478				device_printf(dev,
3479				    "child %s requested type %d for rid %#x,"
3480				    " but the BAR says it is an memio\n",
3481				    device_get_nameunit(child), type, *rid);
3482			goto out;
3483		}
3484	} else {
3485		if (type != SYS_RES_IOPORT) {
3486			if (bootverbose)
3487				device_printf(dev,
3488				    "child %s requested type %d for rid %#x,"
3489				    " but the BAR says it is an ioport\n",
3490				    device_get_nameunit(child), type, *rid);
3491			goto out;
3492		}
3493	}
3494	/*
3495	 * For real BARs, we need to override the size that
3496	 * the driver requests, because that's what the BAR
3497	 * actually uses and we would otherwise have a
3498	 * situation where we might allocate the excess to
3499	 * another driver, which won't work.
3500	 */
3501	mapsize = pci_mapsize(testval);
3502	count = 1UL << mapsize;
3503	if (RF_ALIGNMENT(flags) < mapsize)
3504		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3505	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3506		flags |= RF_PREFETCHABLE;
3507
3508	/*
3509	 * Allocate enough resource, and then write back the
3510	 * appropriate bar for that resource.
3511	 */
3512	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3513	    start, end, count, flags & ~RF_ACTIVE);
3514	if (res == NULL) {
3515		device_printf(child,
3516		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3517		    count, *rid, type, start, end);
3518		goto out;
3519	}
3520	rman_set_device(res, dev);
3521	resource_list_add(rl, type, *rid, start, end, count);
3522	rle = resource_list_find(rl, type, *rid);
3523	if (rle == NULL)
3524		panic("pci_alloc_map: unexpectedly can't find resource.");
3525	rle->res = res;
3526	rle->start = rman_get_start(res);
3527	rle->end = rman_get_end(res);
3528	rle->count = count;
3529	if (bootverbose)
3530		device_printf(child,
3531		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3532		    count, *rid, type, rman_get_start(res));
3533	map = rman_get_start(res);
3534	pci_write_config(child, *rid, map, 4);
3535	if (maprange == 64)
3536		pci_write_config(child, *rid + 4, map >> 32, 4);
3537out:;
3538	return (res);
3539}
3540
3541
3542struct resource *
3543pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3544		   u_long start, u_long end, u_long count, u_int flags)
3545{
3546	struct pci_devinfo *dinfo = device_get_ivars(child);
3547	struct resource_list *rl = &dinfo->resources;
3548	struct resource_list_entry *rle;
3549	struct resource *res;
3550	pcicfgregs *cfg = &dinfo->cfg;
3551
3552	if (device_get_parent(child) != dev)
3553		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3554		    type, rid, start, end, count, flags));
3555
3556	/*
3557	 * Perform lazy resource allocation
3558	 */
3559	switch (type) {
3560	case SYS_RES_IRQ:
3561		/*
3562		 * Can't alloc legacy interrupt once MSI messages have
3563		 * been allocated.
3564		 */
3565		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3566		    cfg->msix.msix_alloc > 0))
3567			return (NULL);
3568
3569		/*
3570		 * If the child device doesn't have an interrupt
3571		 * routed and is deserving of an interrupt, try to
3572		 * assign it one.
3573		 */
3574		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3575		    (cfg->intpin != 0))
3576			pci_assign_interrupt(dev, child, 0);
3577		break;
3578	case SYS_RES_IOPORT:
3579	case SYS_RES_MEMORY:
3580		/* Allocate resources for this BAR if needed. */
3581		rle = resource_list_find(rl, type, *rid);
3582		if (rle == NULL) {
3583			res = pci_alloc_map(dev, child, type, rid, start, end,
3584			    count, flags);
3585			if (res == NULL)
3586				return (NULL);
3587			rle = resource_list_find(rl, type, *rid);
3588		}
3589
3590		/*
3591		 * If the resource belongs to the bus, then give it to
3592		 * the child.  We need to activate it if requested
3593		 * since the bus always allocates inactive resources.
3594		 */
3595		if (rle != NULL && rle->res != NULL &&
3596		    rman_get_device(rle->res) == dev) {
3597			if (bootverbose)
3598				device_printf(child,
3599			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3600				    rman_get_size(rle->res), *rid, type,
3601				    rman_get_start(rle->res));
3602			rman_set_device(rle->res, child);
3603			if ((flags & RF_ACTIVE) &&
3604			    bus_activate_resource(child, type, *rid,
3605			    rle->res) != 0)
3606				return (NULL);
3607			return (rle->res);
3608		}
3609	}
3610	return (resource_list_alloc(rl, dev, child, type, rid,
3611	    start, end, count, flags));
3612}
3613
3614int
3615pci_release_resource(device_t dev, device_t child, int type, int rid,
3616    struct resource *r)
3617{
3618	int error;
3619
3620	if (device_get_parent(child) != dev)
3621		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3622		    type, rid, r));
3623
3624	/*
3625	 * For BARs we don't actually want to release the resource.
3626	 * Instead, we deactivate the resource if needed and then give
3627	 * ownership of the BAR back to the bus.
3628	 */
3629	switch (type) {
3630	case SYS_RES_IOPORT:
3631	case SYS_RES_MEMORY:
3632		if (rman_get_device(r) != child)
3633			return (EINVAL);
3634		if (rman_get_flags(r) & RF_ACTIVE) {
3635			error = bus_deactivate_resource(child, type, rid, r);
3636			if (error)
3637				return (error);
3638		}
3639		rman_set_device(r, dev);
3640		return (0);
3641	}
3642	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3643}
3644
3645int
3646pci_activate_resource(device_t dev, device_t child, int type, int rid,
3647    struct resource *r)
3648{
3649	int error;
3650
3651	error = bus_generic_activate_resource(dev, child, type, rid, r);
3652	if (error)
3653		return (error);
3654
3655	/* Enable decoding in the command register when activating BARs. */
3656	if (device_get_parent(child) == dev) {
3657		switch (type) {
3658		case SYS_RES_IOPORT:
3659		case SYS_RES_MEMORY:
3660			error = PCI_ENABLE_IO(dev, child, type);
3661			break;
3662		}
3663	}
3664	return (error);
3665}
3666
3667void
3668pci_delete_resource(device_t dev, device_t child, int type, int rid)
3669{
3670	struct pci_devinfo *dinfo;
3671	struct resource_list *rl;
3672	struct resource_list_entry *rle;
3673
3674	if (device_get_parent(child) != dev)
3675		return;
3676
3677	dinfo = device_get_ivars(child);
3678	rl = &dinfo->resources;
3679	rle = resource_list_find(rl, type, rid);
3680	if (rle == NULL)
3681		return;
3682
3683	if (rle->res) {
3684		if (rman_get_device(rle->res) != dev ||
3685		    rman_get_flags(rle->res) & RF_ACTIVE) {
3686			device_printf(dev, "delete_resource: "
3687			    "Resource still owned by child, oops. "
3688			    "(type=%d, rid=%d, addr=%lx)\n",
3689			    rle->type, rle->rid,
3690			    rman_get_start(rle->res));
3691			return;
3692		}
3693
3694		/*
3695		 * If this is a BAR, clear the BAR so it stops
3696		 * decoding before releasing the resource.
3697		 */
3698		switch (type) {
3699		case SYS_RES_IOPORT:
3700		case SYS_RES_MEMORY:
3701			/* XXX: 64-bit BARs? */
3702			pci_write_config(child, rid, 0, 4);
3703			break;
3704		}
3705		bus_release_resource(dev, type, rid, rle->res);
3706	}
3707	resource_list_delete(rl, type, rid);
3708}
3709
3710struct resource_list *
3711pci_get_resource_list (device_t dev, device_t child)
3712{
3713	struct pci_devinfo *dinfo = device_get_ivars(child);
3714
3715	return (&dinfo->resources);
3716}
3717
3718uint32_t
3719pci_read_config_method(device_t dev, device_t child, int reg, int width)
3720{
3721	struct pci_devinfo *dinfo = device_get_ivars(child);
3722	pcicfgregs *cfg = &dinfo->cfg;
3723
3724	return (PCIB_READ_CONFIG(device_get_parent(dev),
3725	    cfg->bus, cfg->slot, cfg->func, reg, width));
3726}
3727
3728void
3729pci_write_config_method(device_t dev, device_t child, int reg,
3730    uint32_t val, int width)
3731{
3732	struct pci_devinfo *dinfo = device_get_ivars(child);
3733	pcicfgregs *cfg = &dinfo->cfg;
3734
3735	PCIB_WRITE_CONFIG(device_get_parent(dev),
3736	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3737}
3738
3739int
3740pci_child_location_str_method(device_t dev, device_t child, char *buf,
3741    size_t buflen)
3742{
3743
3744	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3745	    pci_get_function(child));
3746	return (0);
3747}
3748
3749int
3750pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3751    size_t buflen)
3752{
3753	struct pci_devinfo *dinfo;
3754	pcicfgregs *cfg;
3755
3756	dinfo = device_get_ivars(child);
3757	cfg = &dinfo->cfg;
3758	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3759	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3760	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3761	    cfg->progif);
3762	return (0);
3763}
3764
3765int
3766pci_assign_interrupt_method(device_t dev, device_t child)
3767{
3768	struct pci_devinfo *dinfo = device_get_ivars(child);
3769	pcicfgregs *cfg = &dinfo->cfg;
3770
3771	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3772	    cfg->intpin));
3773}
3774
3775static int
3776pci_modevent(module_t mod, int what, void *arg)
3777{
3778	static struct cdev *pci_cdev;
3779
3780	switch (what) {
3781	case MOD_LOAD:
3782		STAILQ_INIT(&pci_devq);
3783		pci_generation = 0;
3784		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3785		    "pci");
3786		pci_load_vendor_data();
3787		break;
3788
3789	case MOD_UNLOAD:
3790		destroy_dev(pci_cdev);
3791		break;
3792	}
3793
3794	return (0);
3795}
3796
3797void
3798pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3799{
3800	int i;
3801
3802	/*
3803	 * Only do header type 0 devices.  Type 1 devices are bridges,
3804	 * which we know need special treatment.  Type 2 devices are
3805	 * cardbus bridges which also require special treatment.
3806	 * Other types are unknown, and we err on the side of safety
3807	 * by ignoring them.
3808	 */
3809	if (dinfo->cfg.hdrtype != 0)
3810		return;
3811
3812	/*
3813	 * Restore the device to full power mode.  We must do this
3814	 * before we restore the registers because moving from D3 to
3815	 * D0 will cause the chip's BARs and some other registers to
3816	 * be reset to some unknown power on reset values.  Cut down
3817	 * the noise on boot by doing nothing if we are already in
3818	 * state D0.
3819	 */
3820	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3821		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3822	}
3823	for (i = 0; i < dinfo->cfg.nummaps; i++)
3824		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3825	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3826	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3827	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3828	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3829	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3830	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3831	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3832	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3833	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3834	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3835
3836	/* Restore MSI and MSI-X configurations if they are present. */
3837	if (dinfo->cfg.msi.msi_location != 0)
3838		pci_resume_msi(dev);
3839	if (dinfo->cfg.msix.msix_location != 0)
3840		pci_resume_msix(dev);
3841}
3842
3843void
3844pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3845{
3846	int i;
3847	uint32_t cls;
3848	int ps;
3849
3850	/*
3851	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3852	 * we know need special treatment.  Type 2 devices are cardbus bridges
3853	 * which also require special treatment.  Other types are unknown, and
3854	 * we err on the side of safety by ignoring them.  Powering down
3855	 * bridges should not be undertaken lightly.
3856	 */
3857	if (dinfo->cfg.hdrtype != 0)
3858		return;
3859	for (i = 0; i < dinfo->cfg.nummaps; i++)
3860		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3861	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3862
3863	/*
3864	 * Some drivers apparently write to these registers w/o updating our
3865	 * cached copy.  No harm happens if we update the copy, so do so here
3866	 * so we can restore them.  The COMMAND register is modified by the
3867	 * bus w/o updating the cache.  This should represent the normally
3868	 * writable portion of the 'defined' part of type 0 headers.  In
3869	 * theory we also need to save/restore the PCI capability structures
3870	 * we know about, but apart from power we don't know any that are
3871	 * writable.
3872	 */
3873	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3874	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3875	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3876	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3877	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3878	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3879	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3880	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3881	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3882	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3883	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3884	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3885	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3886	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3887	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3888
3889	/*
3890	 * don't set the state for display devices, base peripherals and
3891	 * memory devices since bad things happen when they are powered down.
3892	 * We should (a) have drivers that can easily detach and (b) use
3893	 * generic drivers for these devices so that some device actually
3894	 * attaches.  We need to make sure that when we implement (a) we don't
3895	 * power the device down on a reattach.
3896	 */
3897	cls = pci_get_class(dev);
3898	if (!setstate)
3899		return;
3900	switch (pci_do_power_nodriver)
3901	{
3902		case 0:		/* NO powerdown at all */
3903			return;
3904		case 1:		/* Conservative about what to power down */
3905			if (cls == PCIC_STORAGE)
3906				return;
3907			/*FALLTHROUGH*/
3908		case 2:		/* Agressive about what to power down */
3909			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3910			    cls == PCIC_BASEPERIPH)
3911				return;
3912			/*FALLTHROUGH*/
3913		case 3:		/* Power down everything */
3914			break;
3915	}
3916	/*
3917	 * PCI spec says we can only go into D3 state from D0 state.
3918	 * Transition from D[12] into D0 before going to D3 state.
3919	 */
3920	ps = pci_get_powerstate(dev);
3921	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3922		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3923	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3924		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3925}
3926