pci.c revision 189611
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 189611 2009-03-10 06:21:52Z marcel $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static pci_addr_t	pci_mapbase(uint64_t mapreg);
75static const char	*pci_maptype(uint64_t mapreg);
76static int		pci_mapsize(uint64_t testval);
77static int		pci_maprange(uint64_t mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114
115static device_method_t pci_methods[] = {
116	/* Device interface */
117	DEVMETHOD(device_probe,		pci_probe),
118	DEVMETHOD(device_attach,	pci_attach),
119	DEVMETHOD(device_detach,	bus_generic_detach),
120	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121	DEVMETHOD(device_suspend,	pci_suspend),
122	DEVMETHOD(device_resume,	pci_resume),
123
124	/* Bus interface */
125	DEVMETHOD(bus_print_child,	pci_print_child),
126	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129	DEVMETHOD(bus_driver_added,	pci_driver_added),
130	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132
133	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139	DEVMETHOD(bus_activate_resource, pci_activate_resource),
140	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143
144	/* PCI interface */
145	DEVMETHOD(pci_read_config,	pci_read_config_method),
146	DEVMETHOD(pci_write_config,	pci_write_config_method),
147	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163
164	{ 0, 0 }
165};
166
167DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168
169static devclass_t pci_devclass;
170DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171MODULE_VERSION(pci, 1);
172
173static char	*pci_vendordata;
174static size_t	pci_vendordata_size;
175
176
177struct pci_quirk {
178	uint32_t devid;	/* Vendor/device of the card */
179	int	type;
180#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182	int	arg1;
183	int	arg2;
184};
185
186struct pci_quirk pci_quirks[] = {
187	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192
193	/*
194	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196	 */
197	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199
200	/*
201	 * MSI doesn't work on earlier Intel chipsets including
202	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203	 */
204	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211
212	/*
213	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214	 * bridge.
215	 */
216	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217
218	{ 0 }
219};
220
221/* map register information */
222#define	PCI_MAPMEM	0x01	/* memory map */
223#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224#define	PCI_MAPPORT	0x04	/* port map */
225
226struct devlist pci_devq;
227uint32_t pci_generation;
228uint32_t pci_numdevs = 0;
229static int pcie_chipset, pcix_chipset;
230
231/* sysctl vars */
232SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233
234static int pci_enable_io_modes = 1;
235TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237    &pci_enable_io_modes, 1,
238    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239enable these bits correctly.  We'd like to do this all the time, but there\n\
240are some peripherals that this causes problems with.");
241
242static int pci_do_power_nodriver = 0;
243TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245    &pci_do_power_nodriver, 0,
246  "Place a function into D3 state when no driver attaches to it.  0 means\n\
247disable.  1 means conservatively place devices into D3 state.  2 means\n\
248agressively place devices into D3 state.  3 means put absolutely everything\n\
249in D3 state.");
250
251static int pci_do_power_resume = 1;
252TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254    &pci_do_power_resume, 1,
255  "Transition from D3 -> D0 on resume.");
256
257static int pci_do_msi = 1;
258TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260    "Enable support for MSI interrupts");
261
262static int pci_do_msix = 1;
263TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265    "Enable support for MSI-X interrupts");
266
267static int pci_honor_msi_blacklist = 1;
268TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271
272/* Find a device_t by bus/slot/function in domain 0 */
273
274device_t
275pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276{
277
278	return (pci_find_dbsf(0, bus, slot, func));
279}
280
281/* Find a device_t by domain/bus/slot/function */
282
283device_t
284pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285{
286	struct pci_devinfo *dinfo;
287
288	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289		if ((dinfo->cfg.domain == domain) &&
290		    (dinfo->cfg.bus == bus) &&
291		    (dinfo->cfg.slot == slot) &&
292		    (dinfo->cfg.func == func)) {
293			return (dinfo->cfg.dev);
294		}
295	}
296
297	return (NULL);
298}
299
300/* Find a device_t by vendor/device ID */
301
302device_t
303pci_find_device(uint16_t vendor, uint16_t device)
304{
305	struct pci_devinfo *dinfo;
306
307	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308		if ((dinfo->cfg.vendor == vendor) &&
309		    (dinfo->cfg.device == device)) {
310			return (dinfo->cfg.dev);
311		}
312	}
313
314	return (NULL);
315}
316
317/* return base address of memory or port map */
318
319static pci_addr_t
320pci_mapbase(uint64_t mapreg)
321{
322
323	if (PCI_BAR_MEM(mapreg))
324		return (mapreg & PCIM_BAR_MEM_BASE);
325	else
326		return (mapreg & PCIM_BAR_IO_BASE);
327}
328
329/* return map type of memory or port map */
330
331static const char *
332pci_maptype(uint64_t mapreg)
333{
334
335	if (PCI_BAR_IO(mapreg))
336		return ("I/O Port");
337	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338		return ("Prefetchable Memory");
339	return ("Memory");
340}
341
342/* return log2 of map size decoded for memory or port map */
343
344static int
345pci_mapsize(uint64_t testval)
346{
347	int ln2size;
348
349	testval = pci_mapbase(testval);
350	ln2size = 0;
351	if (testval != 0) {
352		while ((testval & 1) == 0)
353		{
354			ln2size++;
355			testval >>= 1;
356		}
357	}
358	return (ln2size);
359}
360
361/* return log2 of address range supported by map register */
362
363static int
364pci_maprange(uint64_t mapreg)
365{
366	int ln2range = 0;
367
368	if (PCI_BAR_IO(mapreg))
369		ln2range = 32;
370	else
371		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372		case PCIM_BAR_MEM_32:
373			ln2range = 32;
374			break;
375		case PCIM_BAR_MEM_1MB:
376			ln2range = 20;
377			break;
378		case PCIM_BAR_MEM_64:
379			ln2range = 64;
380			break;
381		}
382	return (ln2range);
383}
384
385/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386
387static void
388pci_fixancient(pcicfgregs *cfg)
389{
390	if (cfg->hdrtype != 0)
391		return;
392
393	/* PCI to PCI bridges use header type 1 */
394	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395		cfg->hdrtype = 1;
396}
397
398/* extract header type specific config data */
399
400static void
401pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402{
403#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404	switch (cfg->hdrtype) {
405	case 0:
406		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408		cfg->nummaps	    = PCI_MAXMAPS_0;
409		break;
410	case 1:
411		cfg->nummaps	    = PCI_MAXMAPS_1;
412		break;
413	case 2:
414		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416		cfg->nummaps	    = PCI_MAXMAPS_2;
417		break;
418	}
419#undef REG
420}
421
422/* read configuration header into pcicfgregs structure */
423struct pci_devinfo *
424pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425{
426#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427	pcicfgregs *cfg = NULL;
428	struct pci_devinfo *devlist_entry;
429	struct devlist *devlist_head;
430
431	devlist_head = &pci_devq;
432
433	devlist_entry = NULL;
434
435	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
436		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437		if (devlist_entry == NULL)
438			return (NULL);
439
440		cfg = &devlist_entry->cfg;
441
442		cfg->domain		= d;
443		cfg->bus		= b;
444		cfg->slot		= s;
445		cfg->func		= f;
446		cfg->vendor		= REG(PCIR_VENDOR, 2);
447		cfg->device		= REG(PCIR_DEVICE, 2);
448		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449		cfg->statreg		= REG(PCIR_STATUS, 2);
450		cfg->baseclass		= REG(PCIR_CLASS, 1);
451		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452		cfg->progif		= REG(PCIR_PROGIF, 1);
453		cfg->revid		= REG(PCIR_REVID, 1);
454		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457		cfg->intpin		= REG(PCIR_INTPIN, 1);
458		cfg->intline		= REG(PCIR_INTLINE, 1);
459
460		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462
463		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464		cfg->hdrtype		&= ~PCIM_MFDEV;
465
466		pci_fixancient(cfg);
467		pci_hdrtypedata(pcib, b, s, f, cfg);
468
469		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470			pci_read_extcap(pcib, cfg);
471
472		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473
474		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479
480		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482		devlist_entry->conf.pc_vendor = cfg->vendor;
483		devlist_entry->conf.pc_device = cfg->device;
484
485		devlist_entry->conf.pc_class = cfg->baseclass;
486		devlist_entry->conf.pc_subclass = cfg->subclass;
487		devlist_entry->conf.pc_progif = cfg->progif;
488		devlist_entry->conf.pc_revid = cfg->revid;
489
490		pci_numdevs++;
491		pci_generation++;
492	}
493	return (devlist_entry);
494#undef REG
495}
496
497static void
498pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499{
500#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502#if defined(__i386__) || defined(__amd64__)
503	uint64_t addr;
504#endif
505	uint32_t val;
506	int	ptr, nextptr, ptrptr;
507
508	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509	case 0:
510	case 1:
511		ptrptr = PCIR_CAP_PTR;
512		break;
513	case 2:
514		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515		break;
516	default:
517		return;		/* no extended capabilities support */
518	}
519	nextptr = REG(ptrptr, 1);	/* sanity check? */
520
521	/*
522	 * Read capability entries.
523	 */
524	while (nextptr != 0) {
525		/* Sanity check */
526		if (nextptr > 255) {
527			printf("illegal PCI extended capability offset %d\n",
528			    nextptr);
529			return;
530		}
531		/* Find the next entry */
532		ptr = nextptr;
533		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534
535		/* Process this entry */
536		switch (REG(ptr + PCICAP_ID, 1)) {
537		case PCIY_PMG:		/* PCI power management */
538			if (cfg->pp.pp_cap == 0) {
539				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542				if ((nextptr - ptr) > PCIR_POWER_DATA)
543					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544			}
545			break;
546#if defined(__i386__) || defined(__amd64__)
547		case PCIY_HT:		/* HyperTransport */
548			/* Determine HT-specific capability type. */
549			val = REG(ptr + PCIR_HT_COMMAND, 2);
550			switch (val & PCIM_HTCMD_CAP_MASK) {
551			case PCIM_HTCAP_MSI_MAPPING:
552				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553					/* Sanity check the mapping window. */
554					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555					    4);
556					addr <<= 32;
557					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558					    4);
559					if (addr != MSI_INTEL_ADDR_BASE)
560						device_printf(pcib,
561	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562						    cfg->domain, cfg->bus,
563						    cfg->slot, cfg->func,
564						    (long long)addr);
565				} else
566					addr = MSI_INTEL_ADDR_BASE;
567
568				cfg->ht.ht_msimap = ptr;
569				cfg->ht.ht_msictrl = val;
570				cfg->ht.ht_msiaddr = addr;
571				break;
572			}
573			break;
574#endif
575		case PCIY_MSI:		/* PCI MSI */
576			cfg->msi.msi_location = ptr;
577			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
578			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
579						     PCIM_MSICTRL_MMC_MASK)>>1);
580			break;
581		case PCIY_MSIX:		/* PCI MSI-X */
582			cfg->msix.msix_location = ptr;
583			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
584			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
585			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
586			val = REG(ptr + PCIR_MSIX_TABLE, 4);
587			cfg->msix.msix_table_bar = PCIR_BAR(val &
588			    PCIM_MSIX_BIR_MASK);
589			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
590			val = REG(ptr + PCIR_MSIX_PBA, 4);
591			cfg->msix.msix_pba_bar = PCIR_BAR(val &
592			    PCIM_MSIX_BIR_MASK);
593			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
594			break;
595		case PCIY_VPD:		/* PCI Vital Product Data */
596			cfg->vpd.vpd_reg = ptr;
597			break;
598		case PCIY_SUBVENDOR:
599			/* Should always be true. */
600			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
601				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
602				cfg->subvendor = val & 0xffff;
603				cfg->subdevice = val >> 16;
604			}
605			break;
606		case PCIY_PCIX:		/* PCI-X */
607			/*
608			 * Assume we have a PCI-X chipset if we have
609			 * at least one PCI-PCI bridge with a PCI-X
610			 * capability.  Note that some systems with
611			 * PCI-express or HT chipsets might match on
612			 * this check as well.
613			 */
614			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
615				pcix_chipset = 1;
616			break;
617		case PCIY_EXPRESS:	/* PCI-express */
618			/*
619			 * Assume we have a PCI-express chipset if we have
620			 * at least one PCI-express device.
621			 */
622			pcie_chipset = 1;
623			break;
624		default:
625			break;
626		}
627	}
628/* REG and WREG use carry through to next functions */
629}
630
631/*
632 * PCI Vital Product Data
633 */
634
635#define	PCI_VPD_TIMEOUT		1000000
636
637static int
638pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
639{
640	int count = PCI_VPD_TIMEOUT;
641
642	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
643
644	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
645
646	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
647		if (--count < 0)
648			return (ENXIO);
649		DELAY(1);	/* limit looping */
650	}
651	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
652
653	return (0);
654}
655
656#if 0
657static int
658pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
659{
660	int count = PCI_VPD_TIMEOUT;
661
662	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
663
664	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
665	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
666	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
667		if (--count < 0)
668			return (ENXIO);
669		DELAY(1);	/* limit looping */
670	}
671
672	return (0);
673}
674#endif
675
676#undef PCI_VPD_TIMEOUT
677
678struct vpd_readstate {
679	device_t	pcib;
680	pcicfgregs	*cfg;
681	uint32_t	val;
682	int		bytesinval;
683	int		off;
684	uint8_t		cksum;
685};
686
687static int
688vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
689{
690	uint32_t reg;
691	uint8_t byte;
692
693	if (vrs->bytesinval == 0) {
694		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
695			return (ENXIO);
696		vrs->val = le32toh(reg);
697		vrs->off += 4;
698		byte = vrs->val & 0xff;
699		vrs->bytesinval = 3;
700	} else {
701		vrs->val = vrs->val >> 8;
702		byte = vrs->val & 0xff;
703		vrs->bytesinval--;
704	}
705
706	vrs->cksum += byte;
707	*data = byte;
708	return (0);
709}
710
711static void
712pci_read_vpd(device_t pcib, pcicfgregs *cfg)
713{
714	struct vpd_readstate vrs;
715	int state;
716	int name;
717	int remain;
718	int i;
719	int alloc, off;		/* alloc/off for RO/W arrays */
720	int cksumvalid;
721	int dflen;
722	uint8_t byte;
723	uint8_t byte2;
724
725	/* init vpd reader */
726	vrs.bytesinval = 0;
727	vrs.off = 0;
728	vrs.pcib = pcib;
729	vrs.cfg = cfg;
730	vrs.cksum = 0;
731
732	state = 0;
733	name = remain = i = 0;	/* shut up stupid gcc */
734	alloc = off = 0;	/* shut up stupid gcc */
735	dflen = 0;		/* shut up stupid gcc */
736	cksumvalid = -1;
737	while (state >= 0) {
738		if (vpd_nextbyte(&vrs, &byte)) {
739			state = -2;
740			break;
741		}
742#if 0
743		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
744		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
745		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
746#endif
747		switch (state) {
748		case 0:		/* item name */
749			if (byte & 0x80) {
750				if (vpd_nextbyte(&vrs, &byte2)) {
751					state = -2;
752					break;
753				}
754				remain = byte2;
755				if (vpd_nextbyte(&vrs, &byte2)) {
756					state = -2;
757					break;
758				}
759				remain |= byte2 << 8;
760				if (remain > (0x7f*4 - vrs.off)) {
761					state = -1;
762					printf(
763			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
764					    cfg->domain, cfg->bus, cfg->slot,
765					    cfg->func, remain);
766				}
767				name = byte & 0x7f;
768			} else {
769				remain = byte & 0x7;
770				name = (byte >> 3) & 0xf;
771			}
772			switch (name) {
773			case 0x2:	/* String */
774				cfg->vpd.vpd_ident = malloc(remain + 1,
775				    M_DEVBUF, M_WAITOK);
776				i = 0;
777				state = 1;
778				break;
779			case 0xf:	/* End */
780				state = -1;
781				break;
782			case 0x10:	/* VPD-R */
783				alloc = 8;
784				off = 0;
785				cfg->vpd.vpd_ros = malloc(alloc *
786				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
787				    M_WAITOK | M_ZERO);
788				state = 2;
789				break;
790			case 0x11:	/* VPD-W */
791				alloc = 8;
792				off = 0;
793				cfg->vpd.vpd_w = malloc(alloc *
794				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
795				    M_WAITOK | M_ZERO);
796				state = 5;
797				break;
798			default:	/* Invalid data, abort */
799				state = -1;
800				break;
801			}
802			break;
803
804		case 1:	/* Identifier String */
805			cfg->vpd.vpd_ident[i++] = byte;
806			remain--;
807			if (remain == 0)  {
808				cfg->vpd.vpd_ident[i] = '\0';
809				state = 0;
810			}
811			break;
812
813		case 2:	/* VPD-R Keyword Header */
814			if (off == alloc) {
815				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
816				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
817				    M_DEVBUF, M_WAITOK | M_ZERO);
818			}
819			cfg->vpd.vpd_ros[off].keyword[0] = byte;
820			if (vpd_nextbyte(&vrs, &byte2)) {
821				state = -2;
822				break;
823			}
824			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
825			if (vpd_nextbyte(&vrs, &byte2)) {
826				state = -2;
827				break;
828			}
829			dflen = byte2;
830			if (dflen == 0 &&
831			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
832			    2) == 0) {
833				/*
834				 * if this happens, we can't trust the rest
835				 * of the VPD.
836				 */
837				printf(
838				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
839				    cfg->domain, cfg->bus, cfg->slot,
840				    cfg->func, dflen);
841				cksumvalid = 0;
842				state = -1;
843				break;
844			} else if (dflen == 0) {
845				cfg->vpd.vpd_ros[off].value = malloc(1 *
846				    sizeof(*cfg->vpd.vpd_ros[off].value),
847				    M_DEVBUF, M_WAITOK);
848				cfg->vpd.vpd_ros[off].value[0] = '\x00';
849			} else
850				cfg->vpd.vpd_ros[off].value = malloc(
851				    (dflen + 1) *
852				    sizeof(*cfg->vpd.vpd_ros[off].value),
853				    M_DEVBUF, M_WAITOK);
854			remain -= 3;
855			i = 0;
856			/* keep in sync w/ state 3's transistions */
857			if (dflen == 0 && remain == 0)
858				state = 0;
859			else if (dflen == 0)
860				state = 2;
861			else
862				state = 3;
863			break;
864
865		case 3:	/* VPD-R Keyword Value */
866			cfg->vpd.vpd_ros[off].value[i++] = byte;
867			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
868			    "RV", 2) == 0 && cksumvalid == -1) {
869				if (vrs.cksum == 0)
870					cksumvalid = 1;
871				else {
872					if (bootverbose)
873						printf(
874				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
875						    cfg->domain, cfg->bus,
876						    cfg->slot, cfg->func,
877						    vrs.cksum);
878					cksumvalid = 0;
879					state = -1;
880					break;
881				}
882			}
883			dflen--;
884			remain--;
885			/* keep in sync w/ state 2's transistions */
886			if (dflen == 0)
887				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
888			if (dflen == 0 && remain == 0) {
889				cfg->vpd.vpd_rocnt = off;
890				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
891				    off * sizeof(*cfg->vpd.vpd_ros),
892				    M_DEVBUF, M_WAITOK | M_ZERO);
893				state = 0;
894			} else if (dflen == 0)
895				state = 2;
896			break;
897
898		case 4:
899			remain--;
900			if (remain == 0)
901				state = 0;
902			break;
903
904		case 5:	/* VPD-W Keyword Header */
905			if (off == alloc) {
906				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
907				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
908				    M_DEVBUF, M_WAITOK | M_ZERO);
909			}
910			cfg->vpd.vpd_w[off].keyword[0] = byte;
911			if (vpd_nextbyte(&vrs, &byte2)) {
912				state = -2;
913				break;
914			}
915			cfg->vpd.vpd_w[off].keyword[1] = byte2;
916			if (vpd_nextbyte(&vrs, &byte2)) {
917				state = -2;
918				break;
919			}
920			cfg->vpd.vpd_w[off].len = dflen = byte2;
921			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
922			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
923			    sizeof(*cfg->vpd.vpd_w[off].value),
924			    M_DEVBUF, M_WAITOK);
925			remain -= 3;
926			i = 0;
927			/* keep in sync w/ state 6's transistions */
928			if (dflen == 0 && remain == 0)
929				state = 0;
930			else if (dflen == 0)
931				state = 5;
932			else
933				state = 6;
934			break;
935
936		case 6:	/* VPD-W Keyword Value */
937			cfg->vpd.vpd_w[off].value[i++] = byte;
938			dflen--;
939			remain--;
940			/* keep in sync w/ state 5's transistions */
941			if (dflen == 0)
942				cfg->vpd.vpd_w[off++].value[i++] = '\0';
943			if (dflen == 0 && remain == 0) {
944				cfg->vpd.vpd_wcnt = off;
945				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
946				    off * sizeof(*cfg->vpd.vpd_w),
947				    M_DEVBUF, M_WAITOK | M_ZERO);
948				state = 0;
949			} else if (dflen == 0)
950				state = 5;
951			break;
952
953		default:
954			printf("pci%d:%d:%d:%d: invalid state: %d\n",
955			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
956			    state);
957			state = -1;
958			break;
959		}
960	}
961
962	if (cksumvalid == 0 || state < -1) {
963		/* read-only data bad, clean up */
964		if (cfg->vpd.vpd_ros != NULL) {
965			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
966				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
967			free(cfg->vpd.vpd_ros, M_DEVBUF);
968			cfg->vpd.vpd_ros = NULL;
969		}
970	}
971	if (state < -1) {
972		/* I/O error, clean up */
973		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
974		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
975		if (cfg->vpd.vpd_ident != NULL) {
976			free(cfg->vpd.vpd_ident, M_DEVBUF);
977			cfg->vpd.vpd_ident = NULL;
978		}
979		if (cfg->vpd.vpd_w != NULL) {
980			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
981				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
982			free(cfg->vpd.vpd_w, M_DEVBUF);
983			cfg->vpd.vpd_w = NULL;
984		}
985	}
986	cfg->vpd.vpd_cached = 1;
987#undef REG
988#undef WREG
989}
990
991int
992pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
993{
994	struct pci_devinfo *dinfo = device_get_ivars(child);
995	pcicfgregs *cfg = &dinfo->cfg;
996
997	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
998		pci_read_vpd(device_get_parent(dev), cfg);
999
1000	*identptr = cfg->vpd.vpd_ident;
1001
1002	if (*identptr == NULL)
1003		return (ENXIO);
1004
1005	return (0);
1006}
1007
1008int
1009pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1010	const char **vptr)
1011{
1012	struct pci_devinfo *dinfo = device_get_ivars(child);
1013	pcicfgregs *cfg = &dinfo->cfg;
1014	int i;
1015
1016	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1017		pci_read_vpd(device_get_parent(dev), cfg);
1018
1019	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1020		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1021		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1022			*vptr = cfg->vpd.vpd_ros[i].value;
1023		}
1024
1025	if (i != cfg->vpd.vpd_rocnt)
1026		return (0);
1027
1028	*vptr = NULL;
1029	return (ENXIO);
1030}
1031
1032/*
1033 * Return the offset in configuration space of the requested extended
1034 * capability entry or 0 if the specified capability was not found.
1035 */
1036int
1037pci_find_extcap_method(device_t dev, device_t child, int capability,
1038    int *capreg)
1039{
1040	struct pci_devinfo *dinfo = device_get_ivars(child);
1041	pcicfgregs *cfg = &dinfo->cfg;
1042	u_int32_t status;
1043	u_int8_t ptr;
1044
1045	/*
1046	 * Check the CAP_LIST bit of the PCI status register first.
1047	 */
1048	status = pci_read_config(child, PCIR_STATUS, 2);
1049	if (!(status & PCIM_STATUS_CAPPRESENT))
1050		return (ENXIO);
1051
1052	/*
1053	 * Determine the start pointer of the capabilities list.
1054	 */
1055	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1056	case 0:
1057	case 1:
1058		ptr = PCIR_CAP_PTR;
1059		break;
1060	case 2:
1061		ptr = PCIR_CAP_PTR_2;
1062		break;
1063	default:
1064		/* XXX: panic? */
1065		return (ENXIO);		/* no extended capabilities support */
1066	}
1067	ptr = pci_read_config(child, ptr, 1);
1068
1069	/*
1070	 * Traverse the capabilities list.
1071	 */
1072	while (ptr != 0) {
1073		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1074			if (capreg != NULL)
1075				*capreg = ptr;
1076			return (0);
1077		}
1078		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1079	}
1080
1081	return (ENOENT);
1082}
1083
1084/*
1085 * Support for MSI-X message interrupts.
1086 */
1087void
1088pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1089{
1090	struct pci_devinfo *dinfo = device_get_ivars(dev);
1091	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1092	uint32_t offset;
1093
1094	KASSERT(msix->msix_table_len > index, ("bogus index"));
1095	offset = msix->msix_table_offset + index * 16;
1096	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1097	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1098	bus_write_4(msix->msix_table_res, offset + 8, data);
1099
1100	/* Enable MSI -> HT mapping. */
1101	pci_ht_map_msi(dev, address);
1102}
1103
1104void
1105pci_mask_msix(device_t dev, u_int index)
1106{
1107	struct pci_devinfo *dinfo = device_get_ivars(dev);
1108	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1109	uint32_t offset, val;
1110
1111	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1112	offset = msix->msix_table_offset + index * 16 + 12;
1113	val = bus_read_4(msix->msix_table_res, offset);
1114	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1115		val |= PCIM_MSIX_VCTRL_MASK;
1116		bus_write_4(msix->msix_table_res, offset, val);
1117	}
1118}
1119
1120void
1121pci_unmask_msix(device_t dev, u_int index)
1122{
1123	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125	uint32_t offset, val;
1126
1127	KASSERT(msix->msix_table_len > index, ("bogus index"));
1128	offset = msix->msix_table_offset + index * 16 + 12;
1129	val = bus_read_4(msix->msix_table_res, offset);
1130	if (val & PCIM_MSIX_VCTRL_MASK) {
1131		val &= ~PCIM_MSIX_VCTRL_MASK;
1132		bus_write_4(msix->msix_table_res, offset, val);
1133	}
1134}
1135
1136int
1137pci_pending_msix(device_t dev, u_int index)
1138{
1139	struct pci_devinfo *dinfo = device_get_ivars(dev);
1140	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1141	uint32_t offset, bit;
1142
1143	KASSERT(msix->msix_table_len > index, ("bogus index"));
1144	offset = msix->msix_pba_offset + (index / 32) * 4;
1145	bit = 1 << index % 32;
1146	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1147}
1148
1149/*
1150 * Restore MSI-X registers and table during resume.  If MSI-X is
1151 * enabled then walk the virtual table to restore the actual MSI-X
1152 * table.
1153 */
1154static void
1155pci_resume_msix(device_t dev)
1156{
1157	struct pci_devinfo *dinfo = device_get_ivars(dev);
1158	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1159	struct msix_table_entry *mte;
1160	struct msix_vector *mv;
1161	int i;
1162
1163	if (msix->msix_alloc > 0) {
1164		/* First, mask all vectors. */
1165		for (i = 0; i < msix->msix_msgnum; i++)
1166			pci_mask_msix(dev, i);
1167
1168		/* Second, program any messages with at least one handler. */
1169		for (i = 0; i < msix->msix_table_len; i++) {
1170			mte = &msix->msix_table[i];
1171			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1172				continue;
1173			mv = &msix->msix_vectors[mte->mte_vector - 1];
1174			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1175			pci_unmask_msix(dev, i);
1176		}
1177	}
1178	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1179	    msix->msix_ctrl, 2);
1180}
1181
1182/*
1183 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1184 * returned in *count.  After this function returns, each message will be
1185 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1186 */
1187int
1188pci_alloc_msix_method(device_t dev, device_t child, int *count)
1189{
1190	struct pci_devinfo *dinfo = device_get_ivars(child);
1191	pcicfgregs *cfg = &dinfo->cfg;
1192	struct resource_list_entry *rle;
1193	int actual, error, i, irq, max;
1194
1195	/* Don't let count == 0 get us into trouble. */
1196	if (*count == 0)
1197		return (EINVAL);
1198
1199	/* If rid 0 is allocated, then fail. */
1200	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1201	if (rle != NULL && rle->res != NULL)
1202		return (ENXIO);
1203
1204	/* Already have allocated messages? */
1205	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1206		return (ENXIO);
1207
1208	/* If MSI is blacklisted for this system, fail. */
1209	if (pci_msi_blacklisted())
1210		return (ENXIO);
1211
1212	/* MSI-X capability present? */
1213	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1214		return (ENODEV);
1215
1216	/* Make sure the appropriate BARs are mapped. */
1217	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1218	    cfg->msix.msix_table_bar);
1219	if (rle == NULL || rle->res == NULL ||
1220	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1221		return (ENXIO);
1222	cfg->msix.msix_table_res = rle->res;
1223	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1224		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1225		    cfg->msix.msix_pba_bar);
1226		if (rle == NULL || rle->res == NULL ||
1227		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1228			return (ENXIO);
1229	}
1230	cfg->msix.msix_pba_res = rle->res;
1231
1232	if (bootverbose)
1233		device_printf(child,
1234		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1235		    *count, cfg->msix.msix_msgnum);
1236	max = min(*count, cfg->msix.msix_msgnum);
1237	for (i = 0; i < max; i++) {
1238		/* Allocate a message. */
1239		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1240		if (error)
1241			break;
1242		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1243		    irq, 1);
1244	}
1245	actual = i;
1246
1247	if (bootverbose) {
1248		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1249		if (actual == 1)
1250			device_printf(child, "using IRQ %lu for MSI-X\n",
1251			    rle->start);
1252		else {
1253			int run;
1254
1255			/*
1256			 * Be fancy and try to print contiguous runs of
1257			 * IRQ values as ranges.  'irq' is the previous IRQ.
1258			 * 'run' is true if we are in a range.
1259			 */
1260			device_printf(child, "using IRQs %lu", rle->start);
1261			irq = rle->start;
1262			run = 0;
1263			for (i = 1; i < actual; i++) {
1264				rle = resource_list_find(&dinfo->resources,
1265				    SYS_RES_IRQ, i + 1);
1266
1267				/* Still in a run? */
1268				if (rle->start == irq + 1) {
1269					run = 1;
1270					irq++;
1271					continue;
1272				}
1273
1274				/* Finish previous range. */
1275				if (run) {
1276					printf("-%d", irq);
1277					run = 0;
1278				}
1279
1280				/* Start new range. */
1281				printf(",%lu", rle->start);
1282				irq = rle->start;
1283			}
1284
1285			/* Unfinished range? */
1286			if (run)
1287				printf("-%d", irq);
1288			printf(" for MSI-X\n");
1289		}
1290	}
1291
1292	/* Mask all vectors. */
1293	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1294		pci_mask_msix(child, i);
1295
1296	/* Allocate and initialize vector data and virtual table. */
1297	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1298	    M_DEVBUF, M_WAITOK | M_ZERO);
1299	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1300	    M_DEVBUF, M_WAITOK | M_ZERO);
1301	for (i = 0; i < actual; i++) {
1302		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1303		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1304		cfg->msix.msix_table[i].mte_vector = i + 1;
1305	}
1306
1307	/* Update control register to enable MSI-X. */
1308	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1309	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1310	    cfg->msix.msix_ctrl, 2);
1311
1312	/* Update counts of alloc'd messages. */
1313	cfg->msix.msix_alloc = actual;
1314	cfg->msix.msix_table_len = actual;
1315	*count = actual;
1316	return (0);
1317}
1318
1319/*
1320 * By default, pci_alloc_msix() will assign the allocated IRQ
1321 * resources consecutively to the first N messages in the MSI-X table.
1322 * However, device drivers may want to use different layouts if they
1323 * either receive fewer messages than they asked for, or they wish to
1324 * populate the MSI-X table sparsely.  This method allows the driver
1325 * to specify what layout it wants.  It must be called after a
1326 * successful pci_alloc_msix() but before any of the associated
1327 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1328 *
1329 * The 'vectors' array contains 'count' message vectors.  The array
1330 * maps directly to the MSI-X table in that index 0 in the array
1331 * specifies the vector for the first message in the MSI-X table, etc.
1332 * The vector value in each array index can either be 0 to indicate
1333 * that no vector should be assigned to a message slot, or it can be a
1334 * number from 1 to N (where N is the count returned from a
1335 * succcessful call to pci_alloc_msix()) to indicate which message
1336 * vector (IRQ) to be used for the corresponding message.
1337 *
1338 * On successful return, each message with a non-zero vector will have
1339 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1340 * 1.  Additionally, if any of the IRQs allocated via the previous
1341 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1342 * will be freed back to the system automatically.
1343 *
1344 * For example, suppose a driver has a MSI-X table with 6 messages and
1345 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1346 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1347 * C.  After the call to pci_alloc_msix(), the device will be setup to
1348 * have an MSI-X table of ABC--- (where - means no vector assigned).
1349 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1350 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1351 * be freed back to the system.  This device will also have valid
1352 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1353 *
1354 * In any case, the SYS_RES_IRQ rid X will always map to the message
1355 * at MSI-X table index X - 1 and will only be valid if a vector is
1356 * assigned to that table entry.
1357 */
1358int
1359pci_remap_msix_method(device_t dev, device_t child, int count,
1360    const u_int *vectors)
1361{
1362	struct pci_devinfo *dinfo = device_get_ivars(child);
1363	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1364	struct resource_list_entry *rle;
1365	int i, irq, j, *used;
1366
1367	/*
1368	 * Have to have at least one message in the table but the
1369	 * table can't be bigger than the actual MSI-X table in the
1370	 * device.
1371	 */
1372	if (count == 0 || count > msix->msix_msgnum)
1373		return (EINVAL);
1374
1375	/* Sanity check the vectors. */
1376	for (i = 0; i < count; i++)
1377		if (vectors[i] > msix->msix_alloc)
1378			return (EINVAL);
1379
1380	/*
1381	 * Make sure there aren't any holes in the vectors to be used.
1382	 * It's a big pain to support it, and it doesn't really make
1383	 * sense anyway.  Also, at least one vector must be used.
1384	 */
1385	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1386	    M_ZERO);
1387	for (i = 0; i < count; i++)
1388		if (vectors[i] != 0)
1389			used[vectors[i] - 1] = 1;
1390	for (i = 0; i < msix->msix_alloc - 1; i++)
1391		if (used[i] == 0 && used[i + 1] == 1) {
1392			free(used, M_DEVBUF);
1393			return (EINVAL);
1394		}
1395	if (used[0] != 1) {
1396		free(used, M_DEVBUF);
1397		return (EINVAL);
1398	}
1399
1400	/* Make sure none of the resources are allocated. */
1401	for (i = 0; i < msix->msix_table_len; i++) {
1402		if (msix->msix_table[i].mte_vector == 0)
1403			continue;
1404		if (msix->msix_table[i].mte_handlers > 0)
1405			return (EBUSY);
1406		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1407		KASSERT(rle != NULL, ("missing resource"));
1408		if (rle->res != NULL)
1409			return (EBUSY);
1410	}
1411
1412	/* Free the existing resource list entries. */
1413	for (i = 0; i < msix->msix_table_len; i++) {
1414		if (msix->msix_table[i].mte_vector == 0)
1415			continue;
1416		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1417	}
1418
1419	/*
1420	 * Build the new virtual table keeping track of which vectors are
1421	 * used.
1422	 */
1423	free(msix->msix_table, M_DEVBUF);
1424	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1425	    M_DEVBUF, M_WAITOK | M_ZERO);
1426	for (i = 0; i < count; i++)
1427		msix->msix_table[i].mte_vector = vectors[i];
1428	msix->msix_table_len = count;
1429
1430	/* Free any unused IRQs and resize the vectors array if necessary. */
1431	j = msix->msix_alloc - 1;
1432	if (used[j] == 0) {
1433		struct msix_vector *vec;
1434
1435		while (used[j] == 0) {
1436			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1437			    msix->msix_vectors[j].mv_irq);
1438			j--;
1439		}
1440		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1441		    M_WAITOK);
1442		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1443		    (j + 1));
1444		free(msix->msix_vectors, M_DEVBUF);
1445		msix->msix_vectors = vec;
1446		msix->msix_alloc = j + 1;
1447	}
1448	free(used, M_DEVBUF);
1449
1450	/* Map the IRQs onto the rids. */
1451	for (i = 0; i < count; i++) {
1452		if (vectors[i] == 0)
1453			continue;
1454		irq = msix->msix_vectors[vectors[i]].mv_irq;
1455		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1456		    irq, 1);
1457	}
1458
1459	if (bootverbose) {
1460		device_printf(child, "Remapped MSI-X IRQs as: ");
1461		for (i = 0; i < count; i++) {
1462			if (i != 0)
1463				printf(", ");
1464			if (vectors[i] == 0)
1465				printf("---");
1466			else
1467				printf("%d",
1468				    msix->msix_vectors[vectors[i]].mv_irq);
1469		}
1470		printf("\n");
1471	}
1472
1473	return (0);
1474}
1475
1476static int
1477pci_release_msix(device_t dev, device_t child)
1478{
1479	struct pci_devinfo *dinfo = device_get_ivars(child);
1480	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481	struct resource_list_entry *rle;
1482	int i;
1483
1484	/* Do we have any messages to release? */
1485	if (msix->msix_alloc == 0)
1486		return (ENODEV);
1487
1488	/* Make sure none of the resources are allocated. */
1489	for (i = 0; i < msix->msix_table_len; i++) {
1490		if (msix->msix_table[i].mte_vector == 0)
1491			continue;
1492		if (msix->msix_table[i].mte_handlers > 0)
1493			return (EBUSY);
1494		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1495		KASSERT(rle != NULL, ("missing resource"));
1496		if (rle->res != NULL)
1497			return (EBUSY);
1498	}
1499
1500	/* Update control register to disable MSI-X. */
1501	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1502	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1503	    msix->msix_ctrl, 2);
1504
1505	/* Free the resource list entries. */
1506	for (i = 0; i < msix->msix_table_len; i++) {
1507		if (msix->msix_table[i].mte_vector == 0)
1508			continue;
1509		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1510	}
1511	free(msix->msix_table, M_DEVBUF);
1512	msix->msix_table_len = 0;
1513
1514	/* Release the IRQs. */
1515	for (i = 0; i < msix->msix_alloc; i++)
1516		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1517		    msix->msix_vectors[i].mv_irq);
1518	free(msix->msix_vectors, M_DEVBUF);
1519	msix->msix_alloc = 0;
1520	return (0);
1521}
1522
1523/*
1524 * Return the max supported MSI-X messages this device supports.
1525 * Basically, assuming the MD code can alloc messages, this function
1526 * should return the maximum value that pci_alloc_msix() can return.
1527 * Thus, it is subject to the tunables, etc.
1528 */
1529int
1530pci_msix_count_method(device_t dev, device_t child)
1531{
1532	struct pci_devinfo *dinfo = device_get_ivars(child);
1533	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534
1535	if (pci_do_msix && msix->msix_location != 0)
1536		return (msix->msix_msgnum);
1537	return (0);
1538}
1539
1540/*
1541 * HyperTransport MSI mapping control
1542 */
1543void
1544pci_ht_map_msi(device_t dev, uint64_t addr)
1545{
1546	struct pci_devinfo *dinfo = device_get_ivars(dev);
1547	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1548
1549	if (!ht->ht_msimap)
1550		return;
1551
1552	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1553	    ht->ht_msiaddr >> 20 == addr >> 20) {
1554		/* Enable MSI -> HT mapping. */
1555		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1556		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1557		    ht->ht_msictrl, 2);
1558	}
1559
1560	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1561		/* Disable MSI -> HT mapping. */
1562		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1563		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1564		    ht->ht_msictrl, 2);
1565	}
1566}
1567
1568/*
1569 * Support for MSI message signalled interrupts.
1570 */
1571void
1572pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1573{
1574	struct pci_devinfo *dinfo = device_get_ivars(dev);
1575	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1576
1577	/* Write data and address values. */
1578	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1579	    address & 0xffffffff, 4);
1580	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1581		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1582		    address >> 32, 4);
1583		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1584		    data, 2);
1585	} else
1586		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1587		    2);
1588
1589	/* Enable MSI in the control register. */
1590	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1591	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1592	    2);
1593
1594	/* Enable MSI -> HT mapping. */
1595	pci_ht_map_msi(dev, address);
1596}
1597
1598void
1599pci_disable_msi(device_t dev)
1600{
1601	struct pci_devinfo *dinfo = device_get_ivars(dev);
1602	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1603
1604	/* Disable MSI -> HT mapping. */
1605	pci_ht_map_msi(dev, 0);
1606
1607	/* Disable MSI in the control register. */
1608	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1609	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1610	    2);
1611}
1612
1613/*
1614 * Restore MSI registers during resume.  If MSI is enabled then
1615 * restore the data and address registers in addition to the control
1616 * register.
1617 */
1618static void
1619pci_resume_msi(device_t dev)
1620{
1621	struct pci_devinfo *dinfo = device_get_ivars(dev);
1622	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1623	uint64_t address;
1624	uint16_t data;
1625
1626	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1627		address = msi->msi_addr;
1628		data = msi->msi_data;
1629		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1630		    address & 0xffffffff, 4);
1631		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1632			pci_write_config(dev, msi->msi_location +
1633			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1634			pci_write_config(dev, msi->msi_location +
1635			    PCIR_MSI_DATA_64BIT, data, 2);
1636		} else
1637			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1638			    data, 2);
1639	}
1640	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1641	    2);
1642}
1643
1644int
1645pci_remap_msi_irq(device_t dev, u_int irq)
1646{
1647	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648	pcicfgregs *cfg = &dinfo->cfg;
1649	struct resource_list_entry *rle;
1650	struct msix_table_entry *mte;
1651	struct msix_vector *mv;
1652	device_t bus;
1653	uint64_t addr;
1654	uint32_t data;
1655	int error, i, j;
1656
1657	bus = device_get_parent(dev);
1658
1659	/*
1660	 * Handle MSI first.  We try to find this IRQ among our list
1661	 * of MSI IRQs.  If we find it, we request updated address and
1662	 * data registers and apply the results.
1663	 */
1664	if (cfg->msi.msi_alloc > 0) {
1665
1666		/* If we don't have any active handlers, nothing to do. */
1667		if (cfg->msi.msi_handlers == 0)
1668			return (0);
1669		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1670			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1671			    i + 1);
1672			if (rle->start == irq) {
1673				error = PCIB_MAP_MSI(device_get_parent(bus),
1674				    dev, irq, &addr, &data);
1675				if (error)
1676					return (error);
1677				pci_disable_msi(dev);
1678				dinfo->cfg.msi.msi_addr = addr;
1679				dinfo->cfg.msi.msi_data = data;
1680				pci_enable_msi(dev, addr, data);
1681				return (0);
1682			}
1683		}
1684		return (ENOENT);
1685	}
1686
1687	/*
1688	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1689	 * we request the updated mapping info.  If that works, we go
1690	 * through all the slots that use this IRQ and update them.
1691	 */
1692	if (cfg->msix.msix_alloc > 0) {
1693		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1694			mv = &cfg->msix.msix_vectors[i];
1695			if (mv->mv_irq == irq) {
1696				error = PCIB_MAP_MSI(device_get_parent(bus),
1697				    dev, irq, &addr, &data);
1698				if (error)
1699					return (error);
1700				mv->mv_address = addr;
1701				mv->mv_data = data;
1702				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1703					mte = &cfg->msix.msix_table[j];
1704					if (mte->mte_vector != i + 1)
1705						continue;
1706					if (mte->mte_handlers == 0)
1707						continue;
1708					pci_mask_msix(dev, j);
1709					pci_enable_msix(dev, j, addr, data);
1710					pci_unmask_msix(dev, j);
1711				}
1712			}
1713		}
1714		return (ENOENT);
1715	}
1716
1717	return (ENOENT);
1718}
1719
1720/*
1721 * Returns true if the specified device is blacklisted because MSI
1722 * doesn't work.
1723 */
1724int
1725pci_msi_device_blacklisted(device_t dev)
1726{
1727	struct pci_quirk *q;
1728
1729	if (!pci_honor_msi_blacklist)
1730		return (0);
1731
1732	for (q = &pci_quirks[0]; q->devid; q++) {
1733		if (q->devid == pci_get_devid(dev) &&
1734		    q->type == PCI_QUIRK_DISABLE_MSI)
1735			return (1);
1736	}
1737	return (0);
1738}
1739
1740/*
1741 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1742 * we just check for blacklisted chipsets as represented by the
1743 * host-PCI bridge at device 0:0:0.  In the future, it may become
1744 * necessary to check other system attributes, such as the kenv values
1745 * that give the motherboard manufacturer and model number.
1746 */
1747static int
1748pci_msi_blacklisted(void)
1749{
1750	device_t dev;
1751
1752	if (!pci_honor_msi_blacklist)
1753		return (0);
1754
1755	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1756	if (!(pcie_chipset || pcix_chipset))
1757		return (1);
1758
1759	dev = pci_find_bsf(0, 0, 0);
1760	if (dev != NULL)
1761		return (pci_msi_device_blacklisted(dev));
1762	return (0);
1763}
1764
1765/*
1766 * Attempt to allocate *count MSI messages.  The actual number allocated is
1767 * returned in *count.  After this function returns, each message will be
1768 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1769 */
1770int
1771pci_alloc_msi_method(device_t dev, device_t child, int *count)
1772{
1773	struct pci_devinfo *dinfo = device_get_ivars(child);
1774	pcicfgregs *cfg = &dinfo->cfg;
1775	struct resource_list_entry *rle;
1776	int actual, error, i, irqs[32];
1777	uint16_t ctrl;
1778
1779	/* Don't let count == 0 get us into trouble. */
1780	if (*count == 0)
1781		return (EINVAL);
1782
1783	/* If rid 0 is allocated, then fail. */
1784	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1785	if (rle != NULL && rle->res != NULL)
1786		return (ENXIO);
1787
1788	/* Already have allocated messages? */
1789	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1790		return (ENXIO);
1791
1792	/* If MSI is blacklisted for this system, fail. */
1793	if (pci_msi_blacklisted())
1794		return (ENXIO);
1795
1796	/* MSI capability present? */
1797	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1798		return (ENODEV);
1799
1800	if (bootverbose)
1801		device_printf(child,
1802		    "attempting to allocate %d MSI vectors (%d supported)\n",
1803		    *count, cfg->msi.msi_msgnum);
1804
1805	/* Don't ask for more than the device supports. */
1806	actual = min(*count, cfg->msi.msi_msgnum);
1807
1808	/* Don't ask for more than 32 messages. */
1809	actual = min(actual, 32);
1810
1811	/* MSI requires power of 2 number of messages. */
1812	if (!powerof2(actual))
1813		return (EINVAL);
1814
1815	for (;;) {
1816		/* Try to allocate N messages. */
1817		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1818		    cfg->msi.msi_msgnum, irqs);
1819		if (error == 0)
1820			break;
1821		if (actual == 1)
1822			return (error);
1823
1824		/* Try N / 2. */
1825		actual >>= 1;
1826	}
1827
1828	/*
1829	 * We now have N actual messages mapped onto SYS_RES_IRQ
1830	 * resources in the irqs[] array, so add new resources
1831	 * starting at rid 1.
1832	 */
1833	for (i = 0; i < actual; i++)
1834		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1835		    irqs[i], irqs[i], 1);
1836
1837	if (bootverbose) {
1838		if (actual == 1)
1839			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1840		else {
1841			int run;
1842
1843			/*
1844			 * Be fancy and try to print contiguous runs
1845			 * of IRQ values as ranges.  'run' is true if
1846			 * we are in a range.
1847			 */
1848			device_printf(child, "using IRQs %d", irqs[0]);
1849			run = 0;
1850			for (i = 1; i < actual; i++) {
1851
1852				/* Still in a run? */
1853				if (irqs[i] == irqs[i - 1] + 1) {
1854					run = 1;
1855					continue;
1856				}
1857
1858				/* Finish previous range. */
1859				if (run) {
1860					printf("-%d", irqs[i - 1]);
1861					run = 0;
1862				}
1863
1864				/* Start new range. */
1865				printf(",%d", irqs[i]);
1866			}
1867
1868			/* Unfinished range? */
1869			if (run)
1870				printf("-%d", irqs[actual - 1]);
1871			printf(" for MSI\n");
1872		}
1873	}
1874
1875	/* Update control register with actual count. */
1876	ctrl = cfg->msi.msi_ctrl;
1877	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1878	ctrl |= (ffs(actual) - 1) << 4;
1879	cfg->msi.msi_ctrl = ctrl;
1880	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1881
1882	/* Update counts of alloc'd messages. */
1883	cfg->msi.msi_alloc = actual;
1884	cfg->msi.msi_handlers = 0;
1885	*count = actual;
1886	return (0);
1887}
1888
1889/* Release the MSI messages associated with this device. */
1890int
1891pci_release_msi_method(device_t dev, device_t child)
1892{
1893	struct pci_devinfo *dinfo = device_get_ivars(child);
1894	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1895	struct resource_list_entry *rle;
1896	int error, i, irqs[32];
1897
1898	/* Try MSI-X first. */
1899	error = pci_release_msix(dev, child);
1900	if (error != ENODEV)
1901		return (error);
1902
1903	/* Do we have any messages to release? */
1904	if (msi->msi_alloc == 0)
1905		return (ENODEV);
1906	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1907
1908	/* Make sure none of the resources are allocated. */
1909	if (msi->msi_handlers > 0)
1910		return (EBUSY);
1911	for (i = 0; i < msi->msi_alloc; i++) {
1912		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1913		KASSERT(rle != NULL, ("missing MSI resource"));
1914		if (rle->res != NULL)
1915			return (EBUSY);
1916		irqs[i] = rle->start;
1917	}
1918
1919	/* Update control register with 0 count. */
1920	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1921	    ("%s: MSI still enabled", __func__));
1922	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1923	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1924	    msi->msi_ctrl, 2);
1925
1926	/* Release the messages. */
1927	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1928	for (i = 0; i < msi->msi_alloc; i++)
1929		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1930
1931	/* Update alloc count. */
1932	msi->msi_alloc = 0;
1933	msi->msi_addr = 0;
1934	msi->msi_data = 0;
1935	return (0);
1936}
1937
1938/*
1939 * Return the max supported MSI messages this device supports.
1940 * Basically, assuming the MD code can alloc messages, this function
1941 * should return the maximum value that pci_alloc_msi() can return.
1942 * Thus, it is subject to the tunables, etc.
1943 */
1944int
1945pci_msi_count_method(device_t dev, device_t child)
1946{
1947	struct pci_devinfo *dinfo = device_get_ivars(child);
1948	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1949
1950	if (pci_do_msi && msi->msi_location != 0)
1951		return (msi->msi_msgnum);
1952	return (0);
1953}
1954
1955/* free pcicfgregs structure and all depending data structures */
1956
1957int
1958pci_freecfg(struct pci_devinfo *dinfo)
1959{
1960	struct devlist *devlist_head;
1961	int i;
1962
1963	devlist_head = &pci_devq;
1964
1965	if (dinfo->cfg.vpd.vpd_reg) {
1966		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1967		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1968			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1969		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1970		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1971			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1972		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1973	}
1974	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1975	free(dinfo, M_DEVBUF);
1976
1977	/* increment the generation count */
1978	pci_generation++;
1979
1980	/* we're losing one device */
1981	pci_numdevs--;
1982	return (0);
1983}
1984
1985/*
1986 * PCI power manangement
1987 */
1988int
1989pci_set_powerstate_method(device_t dev, device_t child, int state)
1990{
1991	struct pci_devinfo *dinfo = device_get_ivars(child);
1992	pcicfgregs *cfg = &dinfo->cfg;
1993	uint16_t status;
1994	int result, oldstate, highest, delay;
1995
1996	if (cfg->pp.pp_cap == 0)
1997		return (EOPNOTSUPP);
1998
1999	/*
2000	 * Optimize a no state change request away.  While it would be OK to
2001	 * write to the hardware in theory, some devices have shown odd
2002	 * behavior when going from D3 -> D3.
2003	 */
2004	oldstate = pci_get_powerstate(child);
2005	if (oldstate == state)
2006		return (0);
2007
2008	/*
2009	 * The PCI power management specification states that after a state
2010	 * transition between PCI power states, system software must
2011	 * guarantee a minimal delay before the function accesses the device.
2012	 * Compute the worst case delay that we need to guarantee before we
2013	 * access the device.  Many devices will be responsive much more
2014	 * quickly than this delay, but there are some that don't respond
2015	 * instantly to state changes.  Transitions to/from D3 state require
2016	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2017	 * is done below with DELAY rather than a sleeper function because
2018	 * this function can be called from contexts where we cannot sleep.
2019	 */
2020	highest = (oldstate > state) ? oldstate : state;
2021	if (highest == PCI_POWERSTATE_D3)
2022	    delay = 10000;
2023	else if (highest == PCI_POWERSTATE_D2)
2024	    delay = 200;
2025	else
2026	    delay = 0;
2027	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2028	    & ~PCIM_PSTAT_DMASK;
2029	result = 0;
2030	switch (state) {
2031	case PCI_POWERSTATE_D0:
2032		status |= PCIM_PSTAT_D0;
2033		break;
2034	case PCI_POWERSTATE_D1:
2035		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2036			return (EOPNOTSUPP);
2037		status |= PCIM_PSTAT_D1;
2038		break;
2039	case PCI_POWERSTATE_D2:
2040		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2041			return (EOPNOTSUPP);
2042		status |= PCIM_PSTAT_D2;
2043		break;
2044	case PCI_POWERSTATE_D3:
2045		status |= PCIM_PSTAT_D3;
2046		break;
2047	default:
2048		return (EINVAL);
2049	}
2050
2051	if (bootverbose)
2052		printf(
2053		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2054		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2055		    dinfo->cfg.func, oldstate, state);
2056
2057	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2058	if (delay)
2059		DELAY(delay);
2060	return (0);
2061}
2062
2063int
2064pci_get_powerstate_method(device_t dev, device_t child)
2065{
2066	struct pci_devinfo *dinfo = device_get_ivars(child);
2067	pcicfgregs *cfg = &dinfo->cfg;
2068	uint16_t status;
2069	int result;
2070
2071	if (cfg->pp.pp_cap != 0) {
2072		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2073		switch (status & PCIM_PSTAT_DMASK) {
2074		case PCIM_PSTAT_D0:
2075			result = PCI_POWERSTATE_D0;
2076			break;
2077		case PCIM_PSTAT_D1:
2078			result = PCI_POWERSTATE_D1;
2079			break;
2080		case PCIM_PSTAT_D2:
2081			result = PCI_POWERSTATE_D2;
2082			break;
2083		case PCIM_PSTAT_D3:
2084			result = PCI_POWERSTATE_D3;
2085			break;
2086		default:
2087			result = PCI_POWERSTATE_UNKNOWN;
2088			break;
2089		}
2090	} else {
2091		/* No support, device is always at D0 */
2092		result = PCI_POWERSTATE_D0;
2093	}
2094	return (result);
2095}
2096
2097/*
2098 * Some convenience functions for PCI device drivers.
2099 */
2100
2101static __inline void
2102pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2103{
2104	uint16_t	command;
2105
2106	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2107	command |= bit;
2108	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2109}
2110
2111static __inline void
2112pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2113{
2114	uint16_t	command;
2115
2116	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2117	command &= ~bit;
2118	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2119}
2120
2121int
2122pci_enable_busmaster_method(device_t dev, device_t child)
2123{
2124	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2125	return (0);
2126}
2127
2128int
2129pci_disable_busmaster_method(device_t dev, device_t child)
2130{
2131	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2132	return (0);
2133}
2134
2135int
2136pci_enable_io_method(device_t dev, device_t child, int space)
2137{
2138	uint16_t command;
2139	uint16_t bit;
2140	char *error;
2141
2142	bit = 0;
2143	error = NULL;
2144
2145	switch(space) {
2146	case SYS_RES_IOPORT:
2147		bit = PCIM_CMD_PORTEN;
2148		error = "port";
2149		break;
2150	case SYS_RES_MEMORY:
2151		bit = PCIM_CMD_MEMEN;
2152		error = "memory";
2153		break;
2154	default:
2155		return (EINVAL);
2156	}
2157	pci_set_command_bit(dev, child, bit);
2158	/* Some devices seem to need a brief stall here, what do to? */
2159	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2160	if (command & bit)
2161		return (0);
2162	device_printf(child, "failed to enable %s mapping!\n", error);
2163	return (ENXIO);
2164}
2165
2166int
2167pci_disable_io_method(device_t dev, device_t child, int space)
2168{
2169	uint16_t command;
2170	uint16_t bit;
2171	char *error;
2172
2173	bit = 0;
2174	error = NULL;
2175
2176	switch(space) {
2177	case SYS_RES_IOPORT:
2178		bit = PCIM_CMD_PORTEN;
2179		error = "port";
2180		break;
2181	case SYS_RES_MEMORY:
2182		bit = PCIM_CMD_MEMEN;
2183		error = "memory";
2184		break;
2185	default:
2186		return (EINVAL);
2187	}
2188	pci_clear_command_bit(dev, child, bit);
2189	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2190	if (command & bit) {
2191		device_printf(child, "failed to disable %s mapping!\n", error);
2192		return (ENXIO);
2193	}
2194	return (0);
2195}
2196
2197/*
2198 * New style pci driver.  Parent device is either a pci-host-bridge or a
2199 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2200 */
2201
2202void
2203pci_print_verbose(struct pci_devinfo *dinfo)
2204{
2205
2206	if (bootverbose) {
2207		pcicfgregs *cfg = &dinfo->cfg;
2208
2209		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2210		    cfg->vendor, cfg->device, cfg->revid);
2211		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2212		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2213		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2214		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2215		    cfg->mfdev);
2216		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2217		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2218		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2219		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2220		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2221		if (cfg->intpin > 0)
2222			printf("\tintpin=%c, irq=%d\n",
2223			    cfg->intpin +'a' -1, cfg->intline);
2224		if (cfg->pp.pp_cap) {
2225			uint16_t status;
2226
2227			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2228			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2229			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2230			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2231			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2232			    status & PCIM_PSTAT_DMASK);
2233		}
2234		if (cfg->msi.msi_location) {
2235			int ctrl;
2236
2237			ctrl = cfg->msi.msi_ctrl;
2238			printf("\tMSI supports %d message%s%s%s\n",
2239			    cfg->msi.msi_msgnum,
2240			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2241			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2242			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2243		}
2244		if (cfg->msix.msix_location) {
2245			printf("\tMSI-X supports %d message%s ",
2246			    cfg->msix.msix_msgnum,
2247			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2248			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2249				printf("in map 0x%x\n",
2250				    cfg->msix.msix_table_bar);
2251			else
2252				printf("in maps 0x%x and 0x%x\n",
2253				    cfg->msix.msix_table_bar,
2254				    cfg->msix.msix_pba_bar);
2255		}
2256	}
2257}
2258
2259static int
2260pci_porten(device_t pcib, int b, int s, int f)
2261{
2262	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2263		& PCIM_CMD_PORTEN) != 0;
2264}
2265
2266static int
2267pci_memen(device_t pcib, int b, int s, int f)
2268{
2269	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2270		& PCIM_CMD_MEMEN) != 0;
2271}
2272
2273/*
2274 * Add a resource based on a pci map register. Return 1 if the map
2275 * register is a 32bit map register or 2 if it is a 64bit register.
2276 */
2277static int
2278pci_add_map(device_t pcib, device_t bus, device_t dev,
2279    int b, int s, int f, int reg, struct resource_list *rl, int force,
2280    int prefetch)
2281{
2282	pci_addr_t base, map;
2283	pci_addr_t start, end, count;
2284	uint8_t ln2size;
2285	uint8_t ln2range;
2286	uint32_t testval;
2287	uint16_t cmd;
2288	int type;
2289	int barlen;
2290	struct resource *res;
2291
2292	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2293	ln2range = pci_maprange(map);
2294	if (ln2range == 64)
2295		map |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) <<
2296		    32;
2297
2298	/*
2299	 * Disable decoding via the command register before
2300	 * determining the BAR's length since we will be placing it in
2301	 * a weird state.
2302	 */
2303	cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2304	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND,
2305	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2306
2307	/*
2308	 * Determine the BAR's length by writing all 1's.  The bottom
2309	 * log_2(size) bits of the BAR will stick as 0 when we read
2310	 * the value back.
2311	 */
2312	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2313	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2314	if (ln2range == 64) {
2315		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, 0xffffffff, 4);
2316		testval |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4,
2317		    4) << 32;
2318	}
2319
2320	/* Restore the BAR and command register. */
2321	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2322	if (ln2range == 64)
2323		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, map >> 32, 4);
2324	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2325
2326	if (PCI_BAR_MEM(map)) {
2327		type = SYS_RES_MEMORY;
2328		if (map & PCIM_BAR_MEM_PREFETCH)
2329			prefetch = 1;
2330	} else
2331		type = SYS_RES_IOPORT;
2332	ln2size = pci_mapsize(testval);
2333	base = pci_mapbase(map);
2334	barlen = ln2range == 64 ? 2 : 1;
2335
2336	/*
2337	 * For I/O registers, if bottom bit is set, and the next bit up
2338	 * isn't clear, we know we have a BAR that doesn't conform to the
2339	 * spec, so ignore it.  Also, sanity check the size of the data
2340	 * areas to the type of memory involved.  Memory must be at least
2341	 * 16 bytes in size, while I/O ranges must be at least 4.
2342	 */
2343	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2344		return (barlen);
2345	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2346	    (type == SYS_RES_IOPORT && ln2size < 2))
2347		return (barlen);
2348
2349	if (bootverbose) {
2350		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2351		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2352		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2353			printf(", port disabled\n");
2354		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2355			printf(", memory disabled\n");
2356		else
2357			printf(", enabled\n");
2358	}
2359
2360	/*
2361	 * If base is 0, then we have problems.  It is best to ignore
2362	 * such entries for the moment.  These will be allocated later if
2363	 * the driver specifically requests them.  However, some
2364	 * removable busses look better when all resources are allocated,
2365	 * so allow '0' to be overriden.
2366	 *
2367	 * Similarly treat maps whose values is the same as the test value
2368	 * read back.  These maps have had all f's written to them by the
2369	 * BIOS in an attempt to disable the resources.
2370	 */
2371	if (!force && (base == 0 || map == testval))
2372		return (barlen);
2373	if ((u_long)base != base) {
2374		device_printf(bus,
2375		    "pci%d:%d:%d:%d bar %#x too many address bits",
2376		    pci_get_domain(dev), b, s, f, reg);
2377		return (barlen);
2378	}
2379
2380	/*
2381	 * This code theoretically does the right thing, but has
2382	 * undesirable side effects in some cases where peripherals
2383	 * respond oddly to having these bits enabled.  Let the user
2384	 * be able to turn them off (since pci_enable_io_modes is 1 by
2385	 * default).
2386	 */
2387	if (pci_enable_io_modes) {
2388		/* Turn on resources that have been left off by a lazy BIOS */
2389		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2390			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2391			cmd |= PCIM_CMD_PORTEN;
2392			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2393		}
2394		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2395			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2396			cmd |= PCIM_CMD_MEMEN;
2397			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2398		}
2399	} else {
2400		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2401			return (barlen);
2402		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2403			return (barlen);
2404	}
2405
2406	count = 1 << ln2size;
2407	if (base == 0 || base == pci_mapbase(testval)) {
2408		start = 0;	/* Let the parent decide. */
2409		end = ~0ULL;
2410	} else {
2411		start = base;
2412		end = base + (1 << ln2size) - 1;
2413	}
2414	resource_list_add(rl, type, reg, start, end, count);
2415
2416	/*
2417	 * Try to allocate the resource for this BAR from our parent
2418	 * so that this resource range is already reserved.  The
2419	 * driver for this device will later inherit this resource in
2420	 * pci_alloc_resource().
2421	 */
2422	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2423	    prefetch ? RF_PREFETCHABLE : 0);
2424	if (res == NULL) {
2425		/*
2426		 * If the allocation fails, clear the BAR and delete
2427		 * the resource list entry to force
2428		 * pci_alloc_resource() to allocate resources from the
2429		 * parent.
2430		 */
2431		resource_list_delete(rl, type, reg);
2432		start = 0;
2433	} else {
2434		start = rman_get_start(res);
2435		rman_set_device(res, bus);
2436	}
2437	pci_write_config(dev, reg, start, 4);
2438	if (ln2range == 64)
2439		pci_write_config(dev, reg + 4, start >> 32, 4);
2440	return (barlen);
2441}
2442
2443/*
2444 * For ATA devices we need to decide early what addressing mode to use.
2445 * Legacy demands that the primary and secondary ATA ports sits on the
2446 * same addresses that old ISA hardware did. This dictates that we use
2447 * those addresses and ignore the BAR's if we cannot set PCI native
2448 * addressing mode.
2449 */
2450static void
2451pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2452    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2453{
2454	struct resource *r;
2455	int rid, type, progif;
2456#if 0
2457	/* if this device supports PCI native addressing use it */
2458	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2459	if ((progif & 0x8a) == 0x8a) {
2460		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2461		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2462			printf("Trying ATA native PCI addressing mode\n");
2463			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2464		}
2465	}
2466#endif
2467	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2468	type = SYS_RES_IOPORT;
2469	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2470		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2471		    prefetchmask & (1 << 0));
2472		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2473		    prefetchmask & (1 << 1));
2474	} else {
2475		rid = PCIR_BAR(0);
2476		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2477		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2478		    8, 0);
2479		rman_set_device(r, bus);
2480		rid = PCIR_BAR(1);
2481		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2482		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2483		    1, 0);
2484		rman_set_device(r, bus);
2485	}
2486	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2487		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2488		    prefetchmask & (1 << 2));
2489		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2490		    prefetchmask & (1 << 3));
2491	} else {
2492		rid = PCIR_BAR(2);
2493		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2494		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2495		    8, 0);
2496		rman_set_device(r, bus);
2497		rid = PCIR_BAR(3);
2498		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2499		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2500		    1, 0);
2501		rman_set_device(r, bus);
2502	}
2503	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2504	    prefetchmask & (1 << 4));
2505	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2506	    prefetchmask & (1 << 5));
2507}
2508
2509static void
2510pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2511{
2512	struct pci_devinfo *dinfo = device_get_ivars(dev);
2513	pcicfgregs *cfg = &dinfo->cfg;
2514	char tunable_name[64];
2515	int irq;
2516
2517	/* Has to have an intpin to have an interrupt. */
2518	if (cfg->intpin == 0)
2519		return;
2520
2521	/* Let the user override the IRQ with a tunable. */
2522	irq = PCI_INVALID_IRQ;
2523	snprintf(tunable_name, sizeof(tunable_name),
2524	    "hw.pci%d.%d.%d.INT%c.irq",
2525	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2526	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2527		irq = PCI_INVALID_IRQ;
2528
2529	/*
2530	 * If we didn't get an IRQ via the tunable, then we either use the
2531	 * IRQ value in the intline register or we ask the bus to route an
2532	 * interrupt for us.  If force_route is true, then we only use the
2533	 * value in the intline register if the bus was unable to assign an
2534	 * IRQ.
2535	 */
2536	if (!PCI_INTERRUPT_VALID(irq)) {
2537		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2538			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2539		if (!PCI_INTERRUPT_VALID(irq))
2540			irq = cfg->intline;
2541	}
2542
2543	/* If after all that we don't have an IRQ, just bail. */
2544	if (!PCI_INTERRUPT_VALID(irq))
2545		return;
2546
2547	/* Update the config register if it changed. */
2548	if (irq != cfg->intline) {
2549		cfg->intline = irq;
2550		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2551	}
2552
2553	/* Add this IRQ as rid 0 interrupt resource. */
2554	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2555}
2556
2557void
2558pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2559{
2560	device_t pcib;
2561	struct pci_devinfo *dinfo = device_get_ivars(dev);
2562	pcicfgregs *cfg = &dinfo->cfg;
2563	struct resource_list *rl = &dinfo->resources;
2564	struct pci_quirk *q;
2565	int b, i, f, s;
2566
2567	pcib = device_get_parent(bus);
2568
2569	b = cfg->bus;
2570	s = cfg->slot;
2571	f = cfg->func;
2572
2573	/* ATA devices needs special map treatment */
2574	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2575	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2576	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2577	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2578	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2579		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2580	else
2581		for (i = 0; i < cfg->nummaps;)
2582			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2583			    rl, force, prefetchmask & (1 << i));
2584
2585	/*
2586	 * Add additional, quirked resources.
2587	 */
2588	for (q = &pci_quirks[0]; q->devid; q++) {
2589		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2590		    && q->type == PCI_QUIRK_MAP_REG)
2591			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2592			  force, 0);
2593	}
2594
2595	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2596#ifdef __PCI_REROUTE_INTERRUPT
2597		/*
2598		 * Try to re-route interrupts. Sometimes the BIOS or
2599		 * firmware may leave bogus values in these registers.
2600		 * If the re-route fails, then just stick with what we
2601		 * have.
2602		 */
2603		pci_assign_interrupt(bus, dev, 1);
2604#else
2605		pci_assign_interrupt(bus, dev, 0);
2606#endif
2607	}
2608}
2609
2610void
2611pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2612{
2613#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2614	device_t pcib = device_get_parent(dev);
2615	struct pci_devinfo *dinfo;
2616	int maxslots;
2617	int s, f, pcifunchigh;
2618	uint8_t hdrtype;
2619
2620	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2621	    ("dinfo_size too small"));
2622	maxslots = PCIB_MAXSLOTS(pcib);
2623	for (s = 0; s <= maxslots; s++) {
2624		pcifunchigh = 0;
2625		f = 0;
2626		DELAY(1);
2627		hdrtype = REG(PCIR_HDRTYPE, 1);
2628		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2629			continue;
2630		if (hdrtype & PCIM_MFDEV)
2631			pcifunchigh = PCI_FUNCMAX;
2632		for (f = 0; f <= pcifunchigh; f++) {
2633			dinfo = pci_read_device(pcib, domain, busno, s, f,
2634			    dinfo_size);
2635			if (dinfo != NULL) {
2636				pci_add_child(dev, dinfo);
2637			}
2638		}
2639	}
2640#undef REG
2641}
2642
2643void
2644pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2645{
2646	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2647	device_set_ivars(dinfo->cfg.dev, dinfo);
2648	resource_list_init(&dinfo->resources);
2649	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2650	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2651	pci_print_verbose(dinfo);
2652	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2653}
2654
2655static int
2656pci_probe(device_t dev)
2657{
2658
2659	device_set_desc(dev, "PCI bus");
2660
2661	/* Allow other subclasses to override this driver. */
2662	return (BUS_PROBE_GENERIC);
2663}
2664
2665static int
2666pci_attach(device_t dev)
2667{
2668	int busno, domain;
2669
2670	/*
2671	 * Since there can be multiple independantly numbered PCI
2672	 * busses on systems with multiple PCI domains, we can't use
2673	 * the unit number to decide which bus we are probing. We ask
2674	 * the parent pcib what our domain and bus numbers are.
2675	 */
2676	domain = pcib_get_domain(dev);
2677	busno = pcib_get_bus(dev);
2678	if (bootverbose)
2679		device_printf(dev, "domain=%d, physical bus=%d\n",
2680		    domain, busno);
2681	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2682	return (bus_generic_attach(dev));
2683}
2684
2685int
2686pci_suspend(device_t dev)
2687{
2688	int dstate, error, i, numdevs;
2689	device_t acpi_dev, child, *devlist;
2690	struct pci_devinfo *dinfo;
2691
2692	/*
2693	 * Save the PCI configuration space for each child and set the
2694	 * device in the appropriate power state for this sleep state.
2695	 */
2696	acpi_dev = NULL;
2697	if (pci_do_power_resume)
2698		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2699	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2700		return (error);
2701	for (i = 0; i < numdevs; i++) {
2702		child = devlist[i];
2703		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2704		pci_cfg_save(child, dinfo, 0);
2705	}
2706
2707	/* Suspend devices before potentially powering them down. */
2708	error = bus_generic_suspend(dev);
2709	if (error) {
2710		free(devlist, M_TEMP);
2711		return (error);
2712	}
2713
2714	/*
2715	 * Always set the device to D3.  If ACPI suggests a different
2716	 * power state, use it instead.  If ACPI is not present, the
2717	 * firmware is responsible for managing device power.  Skip
2718	 * children who aren't attached since they are powered down
2719	 * separately.  Only manage type 0 devices for now.
2720	 */
2721	for (i = 0; acpi_dev && i < numdevs; i++) {
2722		child = devlist[i];
2723		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2724		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2725			dstate = PCI_POWERSTATE_D3;
2726			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2727			pci_set_powerstate(child, dstate);
2728		}
2729	}
2730	free(devlist, M_TEMP);
2731	return (0);
2732}
2733
2734int
2735pci_resume(device_t dev)
2736{
2737	int i, numdevs, error;
2738	device_t acpi_dev, child, *devlist;
2739	struct pci_devinfo *dinfo;
2740
2741	/*
2742	 * Set each child to D0 and restore its PCI configuration space.
2743	 */
2744	acpi_dev = NULL;
2745	if (pci_do_power_resume)
2746		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2747	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2748		return (error);
2749	for (i = 0; i < numdevs; i++) {
2750		/*
2751		 * Notify ACPI we're going to D0 but ignore the result.  If
2752		 * ACPI is not present, the firmware is responsible for
2753		 * managing device power.  Only manage type 0 devices for now.
2754		 */
2755		child = devlist[i];
2756		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2757		if (acpi_dev && device_is_attached(child) &&
2758		    dinfo->cfg.hdrtype == 0) {
2759			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2760			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2761		}
2762
2763		/* Now the device is powered up, restore its config space. */
2764		pci_cfg_restore(child, dinfo);
2765	}
2766	free(devlist, M_TEMP);
2767	return (bus_generic_resume(dev));
2768}
2769
2770static void
2771pci_load_vendor_data(void)
2772{
2773	caddr_t vendordata, info;
2774
2775	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2776		info = preload_search_info(vendordata, MODINFO_ADDR);
2777		pci_vendordata = *(char **)info;
2778		info = preload_search_info(vendordata, MODINFO_SIZE);
2779		pci_vendordata_size = *(size_t *)info;
2780		/* terminate the database */
2781		pci_vendordata[pci_vendordata_size] = '\n';
2782	}
2783}
2784
2785void
2786pci_driver_added(device_t dev, driver_t *driver)
2787{
2788	int numdevs;
2789	device_t *devlist;
2790	device_t child;
2791	struct pci_devinfo *dinfo;
2792	int i;
2793
2794	if (bootverbose)
2795		device_printf(dev, "driver added\n");
2796	DEVICE_IDENTIFY(driver, dev);
2797	if (device_get_children(dev, &devlist, &numdevs) != 0)
2798		return;
2799	for (i = 0; i < numdevs; i++) {
2800		child = devlist[i];
2801		if (device_get_state(child) != DS_NOTPRESENT)
2802			continue;
2803		dinfo = device_get_ivars(child);
2804		pci_print_verbose(dinfo);
2805		if (bootverbose)
2806			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2807			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2808			    dinfo->cfg.func);
2809		pci_cfg_restore(child, dinfo);
2810		if (device_probe_and_attach(child) != 0)
2811			pci_cfg_save(child, dinfo, 1);
2812	}
2813	free(devlist, M_TEMP);
2814}
2815
2816int
2817pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2818    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2819{
2820	struct pci_devinfo *dinfo;
2821	struct msix_table_entry *mte;
2822	struct msix_vector *mv;
2823	uint64_t addr;
2824	uint32_t data;
2825	void *cookie;
2826	int error, rid;
2827
2828	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2829	    arg, &cookie);
2830	if (error)
2831		return (error);
2832
2833	/* If this is not a direct child, just bail out. */
2834	if (device_get_parent(child) != dev) {
2835		*cookiep = cookie;
2836		return(0);
2837	}
2838
2839	rid = rman_get_rid(irq);
2840	if (rid == 0) {
2841		/* Make sure that INTx is enabled */
2842		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2843	} else {
2844		/*
2845		 * Check to see if the interrupt is MSI or MSI-X.
2846		 * Ask our parent to map the MSI and give
2847		 * us the address and data register values.
2848		 * If we fail for some reason, teardown the
2849		 * interrupt handler.
2850		 */
2851		dinfo = device_get_ivars(child);
2852		if (dinfo->cfg.msi.msi_alloc > 0) {
2853			if (dinfo->cfg.msi.msi_addr == 0) {
2854				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2855			    ("MSI has handlers, but vectors not mapped"));
2856				error = PCIB_MAP_MSI(device_get_parent(dev),
2857				    child, rman_get_start(irq), &addr, &data);
2858				if (error)
2859					goto bad;
2860				dinfo->cfg.msi.msi_addr = addr;
2861				dinfo->cfg.msi.msi_data = data;
2862				pci_enable_msi(child, addr, data);
2863			}
2864			dinfo->cfg.msi.msi_handlers++;
2865		} else {
2866			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2867			    ("No MSI or MSI-X interrupts allocated"));
2868			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2869			    ("MSI-X index too high"));
2870			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2871			KASSERT(mte->mte_vector != 0, ("no message vector"));
2872			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2873			KASSERT(mv->mv_irq == rman_get_start(irq),
2874			    ("IRQ mismatch"));
2875			if (mv->mv_address == 0) {
2876				KASSERT(mte->mte_handlers == 0,
2877		    ("MSI-X table entry has handlers, but vector not mapped"));
2878				error = PCIB_MAP_MSI(device_get_parent(dev),
2879				    child, rman_get_start(irq), &addr, &data);
2880				if (error)
2881					goto bad;
2882				mv->mv_address = addr;
2883				mv->mv_data = data;
2884			}
2885			if (mte->mte_handlers == 0) {
2886				pci_enable_msix(child, rid - 1, mv->mv_address,
2887				    mv->mv_data);
2888				pci_unmask_msix(child, rid - 1);
2889			}
2890			mte->mte_handlers++;
2891		}
2892
2893		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2894		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2895	bad:
2896		if (error) {
2897			(void)bus_generic_teardown_intr(dev, child, irq,
2898			    cookie);
2899			return (error);
2900		}
2901	}
2902	*cookiep = cookie;
2903	return (0);
2904}
2905
2906int
2907pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2908    void *cookie)
2909{
2910	struct msix_table_entry *mte;
2911	struct resource_list_entry *rle;
2912	struct pci_devinfo *dinfo;
2913	int error, rid;
2914
2915	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2916		return (EINVAL);
2917
2918	/* If this isn't a direct child, just bail out */
2919	if (device_get_parent(child) != dev)
2920		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2921
2922	rid = rman_get_rid(irq);
2923	if (rid == 0) {
2924		/* Mask INTx */
2925		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2926	} else {
2927		/*
2928		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2929		 * decrement the appropriate handlers count and mask the
2930		 * MSI-X message, or disable MSI messages if the count
2931		 * drops to 0.
2932		 */
2933		dinfo = device_get_ivars(child);
2934		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2935		if (rle->res != irq)
2936			return (EINVAL);
2937		if (dinfo->cfg.msi.msi_alloc > 0) {
2938			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2939			    ("MSI-X index too high"));
2940			if (dinfo->cfg.msi.msi_handlers == 0)
2941				return (EINVAL);
2942			dinfo->cfg.msi.msi_handlers--;
2943			if (dinfo->cfg.msi.msi_handlers == 0)
2944				pci_disable_msi(child);
2945		} else {
2946			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2947			    ("No MSI or MSI-X interrupts allocated"));
2948			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2949			    ("MSI-X index too high"));
2950			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2951			if (mte->mte_handlers == 0)
2952				return (EINVAL);
2953			mte->mte_handlers--;
2954			if (mte->mte_handlers == 0)
2955				pci_mask_msix(child, rid - 1);
2956		}
2957	}
2958	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2959	if (rid > 0)
2960		KASSERT(error == 0,
2961		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2962	return (error);
2963}
2964
2965int
2966pci_print_child(device_t dev, device_t child)
2967{
2968	struct pci_devinfo *dinfo;
2969	struct resource_list *rl;
2970	int retval = 0;
2971
2972	dinfo = device_get_ivars(child);
2973	rl = &dinfo->resources;
2974
2975	retval += bus_print_child_header(dev, child);
2976
2977	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2978	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2979	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2980	if (device_get_flags(dev))
2981		retval += printf(" flags %#x", device_get_flags(dev));
2982
2983	retval += printf(" at device %d.%d", pci_get_slot(child),
2984	    pci_get_function(child));
2985
2986	retval += bus_print_child_footer(dev, child);
2987
2988	return (retval);
2989}
2990
2991static struct
2992{
2993	int	class;
2994	int	subclass;
2995	char	*desc;
2996} pci_nomatch_tab[] = {
2997	{PCIC_OLD,		-1,			"old"},
2998	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2999	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3000	{PCIC_STORAGE,		-1,			"mass storage"},
3001	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3002	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3003	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3004	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3005	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3006	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3007	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3008	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3009	{PCIC_NETWORK,		-1,			"network"},
3010	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3011	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3012	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3013	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3014	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3015	{PCIC_DISPLAY,		-1,			"display"},
3016	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3017	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3018	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3019	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3020	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3021	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3022	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3023	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3024	{PCIC_MEMORY,		-1,			"memory"},
3025	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3026	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3027	{PCIC_BRIDGE,		-1,			"bridge"},
3028	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3029	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3030	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3031	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3032	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3033	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3034	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3035	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3036	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3037	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3038	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3039	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3040	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3041	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3042	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3043	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3044	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3045	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3046	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3047	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3048	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3049	{PCIC_INPUTDEV,		-1,			"input device"},
3050	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3051	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3052	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3053	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3054	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3055	{PCIC_DOCKING,		-1,			"docking station"},
3056	{PCIC_PROCESSOR,	-1,			"processor"},
3057	{PCIC_SERIALBUS,	-1,			"serial bus"},
3058	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3059	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3060	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3061	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3062	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3063	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3064	{PCIC_WIRELESS,		-1,			"wireless controller"},
3065	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3066	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3067	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3068	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3069	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3070	{PCIC_SATCOM,		-1,			"satellite communication"},
3071	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3072	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3073	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3074	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3075	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3076	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3077	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3078	{PCIC_DASP,		-1,			"dasp"},
3079	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3080	{0, 0,		NULL}
3081};
3082
3083void
3084pci_probe_nomatch(device_t dev, device_t child)
3085{
3086	int	i;
3087	char	*cp, *scp, *device;
3088
3089	/*
3090	 * Look for a listing for this device in a loaded device database.
3091	 */
3092	if ((device = pci_describe_device(child)) != NULL) {
3093		device_printf(dev, "<%s>", device);
3094		free(device, M_DEVBUF);
3095	} else {
3096		/*
3097		 * Scan the class/subclass descriptions for a general
3098		 * description.
3099		 */
3100		cp = "unknown";
3101		scp = NULL;
3102		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3103			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3104				if (pci_nomatch_tab[i].subclass == -1) {
3105					cp = pci_nomatch_tab[i].desc;
3106				} else if (pci_nomatch_tab[i].subclass ==
3107				    pci_get_subclass(child)) {
3108					scp = pci_nomatch_tab[i].desc;
3109				}
3110			}
3111		}
3112		device_printf(dev, "<%s%s%s>",
3113		    cp ? cp : "",
3114		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3115		    scp ? scp : "");
3116	}
3117	printf(" at device %d.%d (no driver attached)\n",
3118	    pci_get_slot(child), pci_get_function(child));
3119	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3120	return;
3121}
3122
3123/*
3124 * Parse the PCI device database, if loaded, and return a pointer to a
3125 * description of the device.
3126 *
3127 * The database is flat text formatted as follows:
3128 *
3129 * Any line not in a valid format is ignored.
3130 * Lines are terminated with newline '\n' characters.
3131 *
3132 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3133 * the vendor name.
3134 *
3135 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3136 * - devices cannot be listed without a corresponding VENDOR line.
3137 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3138 * another TAB, then the device name.
3139 */
3140
3141/*
3142 * Assuming (ptr) points to the beginning of a line in the database,
3143 * return the vendor or device and description of the next entry.
3144 * The value of (vendor) or (device) inappropriate for the entry type
3145 * is set to -1.  Returns nonzero at the end of the database.
3146 *
3147 * Note that this is slightly unrobust in the face of corrupt data;
3148 * we attempt to safeguard against this by spamming the end of the
3149 * database with a newline when we initialise.
3150 */
3151static int
3152pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3153{
3154	char	*cp = *ptr;
3155	int	left;
3156
3157	*device = -1;
3158	*vendor = -1;
3159	**desc = '\0';
3160	for (;;) {
3161		left = pci_vendordata_size - (cp - pci_vendordata);
3162		if (left <= 0) {
3163			*ptr = cp;
3164			return(1);
3165		}
3166
3167		/* vendor entry? */
3168		if (*cp != '\t' &&
3169		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3170			break;
3171		/* device entry? */
3172		if (*cp == '\t' &&
3173		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3174			break;
3175
3176		/* skip to next line */
3177		while (*cp != '\n' && left > 0) {
3178			cp++;
3179			left--;
3180		}
3181		if (*cp == '\n') {
3182			cp++;
3183			left--;
3184		}
3185	}
3186	/* skip to next line */
3187	while (*cp != '\n' && left > 0) {
3188		cp++;
3189		left--;
3190	}
3191	if (*cp == '\n' && left > 0)
3192		cp++;
3193	*ptr = cp;
3194	return(0);
3195}
3196
3197static char *
3198pci_describe_device(device_t dev)
3199{
3200	int	vendor, device;
3201	char	*desc, *vp, *dp, *line;
3202
3203	desc = vp = dp = NULL;
3204
3205	/*
3206	 * If we have no vendor data, we can't do anything.
3207	 */
3208	if (pci_vendordata == NULL)
3209		goto out;
3210
3211	/*
3212	 * Scan the vendor data looking for this device
3213	 */
3214	line = pci_vendordata;
3215	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3216		goto out;
3217	for (;;) {
3218		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3219			goto out;
3220		if (vendor == pci_get_vendor(dev))
3221			break;
3222	}
3223	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3224		goto out;
3225	for (;;) {
3226		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3227			*dp = 0;
3228			break;
3229		}
3230		if (vendor != -1) {
3231			*dp = 0;
3232			break;
3233		}
3234		if (device == pci_get_device(dev))
3235			break;
3236	}
3237	if (dp[0] == '\0')
3238		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3239	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3240	    NULL)
3241		sprintf(desc, "%s, %s", vp, dp);
3242 out:
3243	if (vp != NULL)
3244		free(vp, M_DEVBUF);
3245	if (dp != NULL)
3246		free(dp, M_DEVBUF);
3247	return(desc);
3248}
3249
3250int
3251pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3252{
3253	struct pci_devinfo *dinfo;
3254	pcicfgregs *cfg;
3255
3256	dinfo = device_get_ivars(child);
3257	cfg = &dinfo->cfg;
3258
3259	switch (which) {
3260	case PCI_IVAR_ETHADDR:
3261		/*
3262		 * The generic accessor doesn't deal with failure, so
3263		 * we set the return value, then return an error.
3264		 */
3265		*((uint8_t **) result) = NULL;
3266		return (EINVAL);
3267	case PCI_IVAR_SUBVENDOR:
3268		*result = cfg->subvendor;
3269		break;
3270	case PCI_IVAR_SUBDEVICE:
3271		*result = cfg->subdevice;
3272		break;
3273	case PCI_IVAR_VENDOR:
3274		*result = cfg->vendor;
3275		break;
3276	case PCI_IVAR_DEVICE:
3277		*result = cfg->device;
3278		break;
3279	case PCI_IVAR_DEVID:
3280		*result = (cfg->device << 16) | cfg->vendor;
3281		break;
3282	case PCI_IVAR_CLASS:
3283		*result = cfg->baseclass;
3284		break;
3285	case PCI_IVAR_SUBCLASS:
3286		*result = cfg->subclass;
3287		break;
3288	case PCI_IVAR_PROGIF:
3289		*result = cfg->progif;
3290		break;
3291	case PCI_IVAR_REVID:
3292		*result = cfg->revid;
3293		break;
3294	case PCI_IVAR_INTPIN:
3295		*result = cfg->intpin;
3296		break;
3297	case PCI_IVAR_IRQ:
3298		*result = cfg->intline;
3299		break;
3300	case PCI_IVAR_DOMAIN:
3301		*result = cfg->domain;
3302		break;
3303	case PCI_IVAR_BUS:
3304		*result = cfg->bus;
3305		break;
3306	case PCI_IVAR_SLOT:
3307		*result = cfg->slot;
3308		break;
3309	case PCI_IVAR_FUNCTION:
3310		*result = cfg->func;
3311		break;
3312	case PCI_IVAR_CMDREG:
3313		*result = cfg->cmdreg;
3314		break;
3315	case PCI_IVAR_CACHELNSZ:
3316		*result = cfg->cachelnsz;
3317		break;
3318	case PCI_IVAR_MINGNT:
3319		*result = cfg->mingnt;
3320		break;
3321	case PCI_IVAR_MAXLAT:
3322		*result = cfg->maxlat;
3323		break;
3324	case PCI_IVAR_LATTIMER:
3325		*result = cfg->lattimer;
3326		break;
3327	default:
3328		return (ENOENT);
3329	}
3330	return (0);
3331}
3332
3333int
3334pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3335{
3336	struct pci_devinfo *dinfo;
3337
3338	dinfo = device_get_ivars(child);
3339
3340	switch (which) {
3341	case PCI_IVAR_INTPIN:
3342		dinfo->cfg.intpin = value;
3343		return (0);
3344	case PCI_IVAR_ETHADDR:
3345	case PCI_IVAR_SUBVENDOR:
3346	case PCI_IVAR_SUBDEVICE:
3347	case PCI_IVAR_VENDOR:
3348	case PCI_IVAR_DEVICE:
3349	case PCI_IVAR_DEVID:
3350	case PCI_IVAR_CLASS:
3351	case PCI_IVAR_SUBCLASS:
3352	case PCI_IVAR_PROGIF:
3353	case PCI_IVAR_REVID:
3354	case PCI_IVAR_IRQ:
3355	case PCI_IVAR_DOMAIN:
3356	case PCI_IVAR_BUS:
3357	case PCI_IVAR_SLOT:
3358	case PCI_IVAR_FUNCTION:
3359		return (EINVAL);	/* disallow for now */
3360
3361	default:
3362		return (ENOENT);
3363	}
3364}
3365
3366
3367#include "opt_ddb.h"
3368#ifdef DDB
3369#include <ddb/ddb.h>
3370#include <sys/cons.h>
3371
3372/*
3373 * List resources based on pci map registers, used for within ddb
3374 */
3375
3376DB_SHOW_COMMAND(pciregs, db_pci_dump)
3377{
3378	struct pci_devinfo *dinfo;
3379	struct devlist *devlist_head;
3380	struct pci_conf *p;
3381	const char *name;
3382	int i, error, none_count;
3383
3384	none_count = 0;
3385	/* get the head of the device queue */
3386	devlist_head = &pci_devq;
3387
3388	/*
3389	 * Go through the list of devices and print out devices
3390	 */
3391	for (error = 0, i = 0,
3392	     dinfo = STAILQ_FIRST(devlist_head);
3393	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3394	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3395
3396		/* Populate pd_name and pd_unit */
3397		name = NULL;
3398		if (dinfo->cfg.dev)
3399			name = device_get_name(dinfo->cfg.dev);
3400
3401		p = &dinfo->conf;
3402		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3403			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3404			(name && *name) ? name : "none",
3405			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3406			none_count++,
3407			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3408			p->pc_sel.pc_func, (p->pc_class << 16) |
3409			(p->pc_subclass << 8) | p->pc_progif,
3410			(p->pc_subdevice << 16) | p->pc_subvendor,
3411			(p->pc_device << 16) | p->pc_vendor,
3412			p->pc_revid, p->pc_hdr);
3413	}
3414}
3415#endif /* DDB */
3416
3417static struct resource *
3418pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3419    u_long start, u_long end, u_long count, u_int flags)
3420{
3421	struct pci_devinfo *dinfo = device_get_ivars(child);
3422	struct resource_list *rl = &dinfo->resources;
3423	struct resource_list_entry *rle;
3424	struct resource *res;
3425	pci_addr_t map, testval;
3426	uint16_t cmd;
3427	int maprange, mapsize;
3428
3429	/*
3430	 * Weed out the bogons, and figure out how large the BAR/map
3431	 * is.  Bars that read back 0 here are bogus and unimplemented.
3432	 * Note: atapci in legacy mode are special and handled elsewhere
3433	 * in the code.  If you have a atapci device in legacy mode and
3434	 * it fails here, that other code is broken.
3435	 */
3436	res = NULL;
3437	map = pci_read_config(child, *rid, 4);
3438	maprange = pci_maprange(map);
3439	if (maprange == 64)
3440		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3441
3442	/*
3443	 * Disable decoding via the command register before
3444	 * determining the BAR's length since we will be placing it in
3445	 * a weird state.
3446	 */
3447	cmd = pci_read_config(child, PCIR_COMMAND, 2);
3448	pci_write_config(child, PCIR_COMMAND,
3449	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
3450
3451	/* Determine the BAR's length. */
3452	pci_write_config(child, *rid, 0xffffffff, 4);
3453	testval = pci_read_config(child, *rid, 4);
3454	if (maprange == 64) {
3455		pci_write_config(child, *rid + 4, 0xffffffff, 4);
3456		testval |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) <<
3457		    32;
3458	}
3459
3460	/*
3461	 * Restore the original value of the BAR.  We may have reprogrammed
3462	 * the BAR of the low-level console device and when booting verbose,
3463	 * we need the console device addressable.
3464	 */
3465	pci_write_config(child, *rid, map, 4);
3466	if (maprange == 64)
3467		pci_write_config(child, *rid + 4, map >> 32, 4);
3468	pci_write_config(child, PCIR_COMMAND, cmd, 2);
3469
3470	/* Ignore a BAR with a base of 0. */
3471	if (pci_mapbase(testval) == 0)
3472		goto out;
3473
3474	if (PCI_BAR_MEM(testval)) {
3475		if (type != SYS_RES_MEMORY) {
3476			if (bootverbose)
3477				device_printf(dev,
3478				    "child %s requested type %d for rid %#x,"
3479				    " but the BAR says it is an memio\n",
3480				    device_get_nameunit(child), type, *rid);
3481			goto out;
3482		}
3483	} else {
3484		if (type != SYS_RES_IOPORT) {
3485			if (bootverbose)
3486				device_printf(dev,
3487				    "child %s requested type %d for rid %#x,"
3488				    " but the BAR says it is an ioport\n",
3489				    device_get_nameunit(child), type, *rid);
3490			goto out;
3491		}
3492	}
3493	/*
3494	 * For real BARs, we need to override the size that
3495	 * the driver requests, because that's what the BAR
3496	 * actually uses and we would otherwise have a
3497	 * situation where we might allocate the excess to
3498	 * another driver, which won't work.
3499	 */
3500	mapsize = pci_mapsize(testval);
3501	count = 1UL << mapsize;
3502	if (RF_ALIGNMENT(flags) < mapsize)
3503		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3504	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3505		flags |= RF_PREFETCHABLE;
3506
3507	/*
3508	 * Allocate enough resource, and then write back the
3509	 * appropriate bar for that resource.
3510	 */
3511	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3512	    start, end, count, flags & ~RF_ACTIVE);
3513	if (res == NULL) {
3514		device_printf(child,
3515		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3516		    count, *rid, type, start, end);
3517		goto out;
3518	}
3519	rman_set_device(res, dev);
3520	resource_list_add(rl, type, *rid, start, end, count);
3521	rle = resource_list_find(rl, type, *rid);
3522	if (rle == NULL)
3523		panic("pci_alloc_map: unexpectedly can't find resource.");
3524	rle->res = res;
3525	rle->start = rman_get_start(res);
3526	rle->end = rman_get_end(res);
3527	rle->count = count;
3528	if (bootverbose)
3529		device_printf(child,
3530		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3531		    count, *rid, type, rman_get_start(res));
3532	map = rman_get_start(res);
3533	pci_write_config(child, *rid, map, 4);
3534	if (maprange == 64)
3535		pci_write_config(child, *rid + 4, map >> 32, 4);
3536out:;
3537	return (res);
3538}
3539
3540
3541struct resource *
3542pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3543		   u_long start, u_long end, u_long count, u_int flags)
3544{
3545	struct pci_devinfo *dinfo = device_get_ivars(child);
3546	struct resource_list *rl = &dinfo->resources;
3547	struct resource_list_entry *rle;
3548	struct resource *res;
3549	pcicfgregs *cfg = &dinfo->cfg;
3550
3551	if (device_get_parent(child) != dev)
3552		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3553		    type, rid, start, end, count, flags));
3554
3555	/*
3556	 * Perform lazy resource allocation
3557	 */
3558	switch (type) {
3559	case SYS_RES_IRQ:
3560		/*
3561		 * Can't alloc legacy interrupt once MSI messages have
3562		 * been allocated.
3563		 */
3564		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3565		    cfg->msix.msix_alloc > 0))
3566			return (NULL);
3567
3568		/*
3569		 * If the child device doesn't have an interrupt
3570		 * routed and is deserving of an interrupt, try to
3571		 * assign it one.
3572		 */
3573		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3574		    (cfg->intpin != 0))
3575			pci_assign_interrupt(dev, child, 0);
3576		break;
3577	case SYS_RES_IOPORT:
3578	case SYS_RES_MEMORY:
3579		/* Allocate resources for this BAR if needed. */
3580		rle = resource_list_find(rl, type, *rid);
3581		if (rle == NULL) {
3582			res = pci_alloc_map(dev, child, type, rid, start, end,
3583			    count, flags);
3584			if (res == NULL)
3585				return (NULL);
3586			rle = resource_list_find(rl, type, *rid);
3587		}
3588
3589		/*
3590		 * If the resource belongs to the bus, then give it to
3591		 * the child.  We need to activate it if requested
3592		 * since the bus always allocates inactive resources.
3593		 */
3594		if (rle != NULL && rle->res != NULL &&
3595		    rman_get_device(rle->res) == dev) {
3596			if (bootverbose)
3597				device_printf(child,
3598			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3599				    rman_get_size(rle->res), *rid, type,
3600				    rman_get_start(rle->res));
3601			rman_set_device(rle->res, child);
3602			if ((flags & RF_ACTIVE) &&
3603			    bus_activate_resource(child, type, *rid,
3604			    rle->res) != 0)
3605				return (NULL);
3606			return (rle->res);
3607		}
3608	}
3609	return (resource_list_alloc(rl, dev, child, type, rid,
3610	    start, end, count, flags));
3611}
3612
3613int
3614pci_release_resource(device_t dev, device_t child, int type, int rid,
3615    struct resource *r)
3616{
3617	int error;
3618
3619	if (device_get_parent(child) != dev)
3620		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3621		    type, rid, r));
3622
3623	/*
3624	 * For BARs we don't actually want to release the resource.
3625	 * Instead, we deactivate the resource if needed and then give
3626	 * ownership of the BAR back to the bus.
3627	 */
3628	switch (type) {
3629	case SYS_RES_IOPORT:
3630	case SYS_RES_MEMORY:
3631		if (rman_get_device(r) != child)
3632			return (EINVAL);
3633		if (rman_get_flags(r) & RF_ACTIVE) {
3634			error = bus_deactivate_resource(child, type, rid, r);
3635			if (error)
3636				return (error);
3637		}
3638		rman_set_device(r, dev);
3639		return (0);
3640	}
3641	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3642}
3643
3644int
3645pci_activate_resource(device_t dev, device_t child, int type, int rid,
3646    struct resource *r)
3647{
3648	int error;
3649
3650	error = bus_generic_activate_resource(dev, child, type, rid, r);
3651	if (error)
3652		return (error);
3653
3654	/* Enable decoding in the command register when activating BARs. */
3655	if (device_get_parent(child) == dev) {
3656		switch (type) {
3657		case SYS_RES_IOPORT:
3658		case SYS_RES_MEMORY:
3659			error = PCI_ENABLE_IO(dev, child, type);
3660			break;
3661		}
3662	}
3663	return (error);
3664}
3665
3666void
3667pci_delete_resource(device_t dev, device_t child, int type, int rid)
3668{
3669	struct pci_devinfo *dinfo;
3670	struct resource_list *rl;
3671	struct resource_list_entry *rle;
3672
3673	if (device_get_parent(child) != dev)
3674		return;
3675
3676	dinfo = device_get_ivars(child);
3677	rl = &dinfo->resources;
3678	rle = resource_list_find(rl, type, rid);
3679	if (rle == NULL)
3680		return;
3681
3682	if (rle->res) {
3683		if (rman_get_device(rle->res) != dev ||
3684		    rman_get_flags(rle->res) & RF_ACTIVE) {
3685			device_printf(dev, "delete_resource: "
3686			    "Resource still owned by child, oops. "
3687			    "(type=%d, rid=%d, addr=%lx)\n",
3688			    rle->type, rle->rid,
3689			    rman_get_start(rle->res));
3690			return;
3691		}
3692
3693		/*
3694		 * If this is a BAR, clear the BAR so it stops
3695		 * decoding before releasing the resource.
3696		 */
3697		switch (type) {
3698		case SYS_RES_IOPORT:
3699		case SYS_RES_MEMORY:
3700			/* XXX: 64-bit BARs? */
3701			pci_write_config(child, rid, 0, 4);
3702			break;
3703		}
3704		bus_release_resource(dev, type, rid, rle->res);
3705	}
3706	resource_list_delete(rl, type, rid);
3707}
3708
3709struct resource_list *
3710pci_get_resource_list (device_t dev, device_t child)
3711{
3712	struct pci_devinfo *dinfo = device_get_ivars(child);
3713
3714	return (&dinfo->resources);
3715}
3716
3717uint32_t
3718pci_read_config_method(device_t dev, device_t child, int reg, int width)
3719{
3720	struct pci_devinfo *dinfo = device_get_ivars(child);
3721	pcicfgregs *cfg = &dinfo->cfg;
3722
3723	return (PCIB_READ_CONFIG(device_get_parent(dev),
3724	    cfg->bus, cfg->slot, cfg->func, reg, width));
3725}
3726
3727void
3728pci_write_config_method(device_t dev, device_t child, int reg,
3729    uint32_t val, int width)
3730{
3731	struct pci_devinfo *dinfo = device_get_ivars(child);
3732	pcicfgregs *cfg = &dinfo->cfg;
3733
3734	PCIB_WRITE_CONFIG(device_get_parent(dev),
3735	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3736}
3737
3738int
3739pci_child_location_str_method(device_t dev, device_t child, char *buf,
3740    size_t buflen)
3741{
3742
3743	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3744	    pci_get_function(child));
3745	return (0);
3746}
3747
3748int
3749pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3750    size_t buflen)
3751{
3752	struct pci_devinfo *dinfo;
3753	pcicfgregs *cfg;
3754
3755	dinfo = device_get_ivars(child);
3756	cfg = &dinfo->cfg;
3757	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3758	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3759	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3760	    cfg->progif);
3761	return (0);
3762}
3763
3764int
3765pci_assign_interrupt_method(device_t dev, device_t child)
3766{
3767	struct pci_devinfo *dinfo = device_get_ivars(child);
3768	pcicfgregs *cfg = &dinfo->cfg;
3769
3770	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3771	    cfg->intpin));
3772}
3773
3774static int
3775pci_modevent(module_t mod, int what, void *arg)
3776{
3777	static struct cdev *pci_cdev;
3778
3779	switch (what) {
3780	case MOD_LOAD:
3781		STAILQ_INIT(&pci_devq);
3782		pci_generation = 0;
3783		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3784		    "pci");
3785		pci_load_vendor_data();
3786		break;
3787
3788	case MOD_UNLOAD:
3789		destroy_dev(pci_cdev);
3790		break;
3791	}
3792
3793	return (0);
3794}
3795
3796void
3797pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3798{
3799	int i;
3800
3801	/*
3802	 * Only do header type 0 devices.  Type 1 devices are bridges,
3803	 * which we know need special treatment.  Type 2 devices are
3804	 * cardbus bridges which also require special treatment.
3805	 * Other types are unknown, and we err on the side of safety
3806	 * by ignoring them.
3807	 */
3808	if (dinfo->cfg.hdrtype != 0)
3809		return;
3810
3811	/*
3812	 * Restore the device to full power mode.  We must do this
3813	 * before we restore the registers because moving from D3 to
3814	 * D0 will cause the chip's BARs and some other registers to
3815	 * be reset to some unknown power on reset values.  Cut down
3816	 * the noise on boot by doing nothing if we are already in
3817	 * state D0.
3818	 */
3819	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3820		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3821	}
3822	for (i = 0; i < dinfo->cfg.nummaps; i++)
3823		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3824	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3825	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3826	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3827	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3828	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3829	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3830	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3831	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3832	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3833	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3834
3835	/* Restore MSI and MSI-X configurations if they are present. */
3836	if (dinfo->cfg.msi.msi_location != 0)
3837		pci_resume_msi(dev);
3838	if (dinfo->cfg.msix.msix_location != 0)
3839		pci_resume_msix(dev);
3840}
3841
3842void
3843pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3844{
3845	int i;
3846	uint32_t cls;
3847	int ps;
3848
3849	/*
3850	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3851	 * we know need special treatment.  Type 2 devices are cardbus bridges
3852	 * which also require special treatment.  Other types are unknown, and
3853	 * we err on the side of safety by ignoring them.  Powering down
3854	 * bridges should not be undertaken lightly.
3855	 */
3856	if (dinfo->cfg.hdrtype != 0)
3857		return;
3858	for (i = 0; i < dinfo->cfg.nummaps; i++)
3859		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3860	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3861
3862	/*
3863	 * Some drivers apparently write to these registers w/o updating our
3864	 * cached copy.  No harm happens if we update the copy, so do so here
3865	 * so we can restore them.  The COMMAND register is modified by the
3866	 * bus w/o updating the cache.  This should represent the normally
3867	 * writable portion of the 'defined' part of type 0 headers.  In
3868	 * theory we also need to save/restore the PCI capability structures
3869	 * we know about, but apart from power we don't know any that are
3870	 * writable.
3871	 */
3872	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3873	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3874	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3875	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3876	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3877	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3878	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3879	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3880	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3881	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3882	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3883	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3884	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3885	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3886	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3887
3888	/*
3889	 * don't set the state for display devices, base peripherals and
3890	 * memory devices since bad things happen when they are powered down.
3891	 * We should (a) have drivers that can easily detach and (b) use
3892	 * generic drivers for these devices so that some device actually
3893	 * attaches.  We need to make sure that when we implement (a) we don't
3894	 * power the device down on a reattach.
3895	 */
3896	cls = pci_get_class(dev);
3897	if (!setstate)
3898		return;
3899	switch (pci_do_power_nodriver)
3900	{
3901		case 0:		/* NO powerdown at all */
3902			return;
3903		case 1:		/* Conservative about what to power down */
3904			if (cls == PCIC_STORAGE)
3905				return;
3906			/*FALLTHROUGH*/
3907		case 2:		/* Agressive about what to power down */
3908			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3909			    cls == PCIC_BASEPERIPH)
3910				return;
3911			/*FALLTHROUGH*/
3912		case 3:		/* Power down everything */
3913			break;
3914	}
3915	/*
3916	 * PCI spec says we can only go into D3 state from D0 state.
3917	 * Transition from D[12] into D0 before going to D3 state.
3918	 */
3919	ps = pci_get_powerstate(dev);
3920	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3921		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3922	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3923		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3924}
3925