pci.c revision 214065
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 214065 2010-10-19 17:15:22Z jkim $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72static pci_addr_t	pci_mapbase(uint64_t mapreg);
73static const char	*pci_maptype(uint64_t mapreg);
74static int		pci_mapsize(uint64_t testval);
75static int		pci_maprange(uint64_t mapreg);
76static pci_addr_t	pci_rombase(uint64_t mapreg);
77static int		pci_romsize(uint64_t testval);
78static void		pci_fixancient(pcicfgregs *cfg);
79static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80
81static int		pci_porten(device_t dev);
82static int		pci_memen(device_t dev);
83static void		pci_assign_interrupt(device_t bus, device_t dev,
84			    int force_route);
85static int		pci_add_map(device_t bus, device_t dev, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114static int		pci_remap_intr_method(device_t bus, device_t dev,
115			    u_int irq);
116
117static device_method_t pci_methods[] = {
118	/* Device interface */
119	DEVMETHOD(device_probe,		pci_probe),
120	DEVMETHOD(device_attach,	pci_attach),
121	DEVMETHOD(device_detach,	bus_generic_detach),
122	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123	DEVMETHOD(device_suspend,	pci_suspend),
124	DEVMETHOD(device_resume,	pci_resume),
125
126	/* Bus interface */
127	DEVMETHOD(bus_print_child,	pci_print_child),
128	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131	DEVMETHOD(bus_driver_added,	pci_driver_added),
132	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134
135	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146
147	/* PCI interface */
148	DEVMETHOD(pci_read_config,	pci_read_config_method),
149	DEVMETHOD(pci_write_config,	pci_write_config_method),
150	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166
167	{ 0, 0 }
168};
169
170DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171
172static devclass_t pci_devclass;
173DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174MODULE_VERSION(pci, 1);
175
176static char	*pci_vendordata;
177static size_t	pci_vendordata_size;
178
179
180struct pci_quirk {
181	uint32_t devid;	/* Vendor/device of the card */
182	int	type;
183#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185	int	arg1;
186	int	arg2;
187};
188
189struct pci_quirk pci_quirks[] = {
190	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
191	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193	/* As does the Serverworks OSB4 (the SMBus mapping register) */
194	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195
196	/*
197	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
198	 * or the CMIC-SL (AKA ServerWorks GC_LE).
199	 */
200	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work on earlier Intel chipsets including
205	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
206	 */
207	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214
215	/*
216	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
217	 * bridge.
218	 */
219	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220
221	{ 0 }
222};
223
224/* map register information */
225#define	PCI_MAPMEM	0x01	/* memory map */
226#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
227#define	PCI_MAPPORT	0x04	/* port map */
228
229struct devlist pci_devq;
230uint32_t pci_generation;
231uint32_t pci_numdevs = 0;
232static int pcie_chipset, pcix_chipset;
233
234/* sysctl vars */
235SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
236
237static int pci_enable_io_modes = 1;
238TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
239SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
240    &pci_enable_io_modes, 1,
241    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
242enable these bits correctly.  We'd like to do this all the time, but there\n\
243are some peripherals that this causes problems with.");
244
245static int pci_do_power_nodriver = 0;
246TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
247SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
248    &pci_do_power_nodriver, 0,
249  "Place a function into D3 state when no driver attaches to it.  0 means\n\
250disable.  1 means conservatively place devices into D3 state.  2 means\n\
251agressively place devices into D3 state.  3 means put absolutely everything\n\
252in D3 state.");
253
254int pci_do_power_resume = 1;
255TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
256SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
257    &pci_do_power_resume, 1,
258  "Transition from D3 -> D0 on resume.");
259
260static int pci_do_msi = 1;
261TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
262SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
263    "Enable support for MSI interrupts");
264
265static int pci_do_msix = 1;
266TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
267SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
268    "Enable support for MSI-X interrupts");
269
270static int pci_honor_msi_blacklist = 1;
271TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
272SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
273    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
274
275#if defined(__i386__) || defined(__amd64__)
276static int pci_usb_takeover = 1;
277#else
278static int pci_usb_takeover = 0;
279#endif
280TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
281SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
282    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
283Disable this if you depend on BIOS emulation of USB devices, that is\n\
284you use USB devices (like keyboard or mouse) but do not load USB drivers");
285
286/* Find a device_t by bus/slot/function in domain 0 */
287
288device_t
289pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
290{
291
292	return (pci_find_dbsf(0, bus, slot, func));
293}
294
295/* Find a device_t by domain/bus/slot/function */
296
297device_t
298pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
299{
300	struct pci_devinfo *dinfo;
301
302	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
303		if ((dinfo->cfg.domain == domain) &&
304		    (dinfo->cfg.bus == bus) &&
305		    (dinfo->cfg.slot == slot) &&
306		    (dinfo->cfg.func == func)) {
307			return (dinfo->cfg.dev);
308		}
309	}
310
311	return (NULL);
312}
313
314/* Find a device_t by vendor/device ID */
315
316device_t
317pci_find_device(uint16_t vendor, uint16_t device)
318{
319	struct pci_devinfo *dinfo;
320
321	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322		if ((dinfo->cfg.vendor == vendor) &&
323		    (dinfo->cfg.device == device)) {
324			return (dinfo->cfg.dev);
325		}
326	}
327
328	return (NULL);
329}
330
331static int
332pci_printf(pcicfgregs *cfg, const char *fmt, ...)
333{
334	va_list ap;
335	int retval;
336
337	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
338	    cfg->func);
339	va_start(ap, fmt);
340	retval += vprintf(fmt, ap);
341	va_end(ap);
342	return (retval);
343}
344
345/* return base address of memory or port map */
346
347static pci_addr_t
348pci_mapbase(uint64_t mapreg)
349{
350
351	if (PCI_BAR_MEM(mapreg))
352		return (mapreg & PCIM_BAR_MEM_BASE);
353	else
354		return (mapreg & PCIM_BAR_IO_BASE);
355}
356
357/* return map type of memory or port map */
358
359static const char *
360pci_maptype(uint64_t mapreg)
361{
362
363	if (PCI_BAR_IO(mapreg))
364		return ("I/O Port");
365	if (mapreg & PCIM_BAR_MEM_PREFETCH)
366		return ("Prefetchable Memory");
367	return ("Memory");
368}
369
370/* return log2 of map size decoded for memory or port map */
371
372static int
373pci_mapsize(uint64_t testval)
374{
375	int ln2size;
376
377	testval = pci_mapbase(testval);
378	ln2size = 0;
379	if (testval != 0) {
380		while ((testval & 1) == 0)
381		{
382			ln2size++;
383			testval >>= 1;
384		}
385	}
386	return (ln2size);
387}
388
389/* return base address of device ROM */
390
391static pci_addr_t
392pci_rombase(uint64_t mapreg)
393{
394
395	return (mapreg & PCIM_BIOS_ADDR_MASK);
396}
397
398/* return log2 of map size decided for device ROM */
399
400static int
401pci_romsize(uint64_t testval)
402{
403	int ln2size;
404
405	testval = pci_rombase(testval);
406	ln2size = 0;
407	if (testval != 0) {
408		while ((testval & 1) == 0)
409		{
410			ln2size++;
411			testval >>= 1;
412		}
413	}
414	return (ln2size);
415}
416
417/* return log2 of address range supported by map register */
418
419static int
420pci_maprange(uint64_t mapreg)
421{
422	int ln2range = 0;
423
424	if (PCI_BAR_IO(mapreg))
425		ln2range = 32;
426	else
427		switch (mapreg & PCIM_BAR_MEM_TYPE) {
428		case PCIM_BAR_MEM_32:
429			ln2range = 32;
430			break;
431		case PCIM_BAR_MEM_1MB:
432			ln2range = 20;
433			break;
434		case PCIM_BAR_MEM_64:
435			ln2range = 64;
436			break;
437		}
438	return (ln2range);
439}
440
441/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
442
443static void
444pci_fixancient(pcicfgregs *cfg)
445{
446	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
447		return;
448
449	/* PCI to PCI bridges use header type 1 */
450	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
451		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
452}
453
454/* extract header type specific config data */
455
456static void
457pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
458{
459#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
460	switch (cfg->hdrtype & PCIM_HDRTYPE) {
461	case PCIM_HDRTYPE_NORMAL:
462		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
463		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
464		cfg->nummaps	    = PCI_MAXMAPS_0;
465		break;
466	case PCIM_HDRTYPE_BRIDGE:
467		cfg->nummaps	    = PCI_MAXMAPS_1;
468		break;
469	case PCIM_HDRTYPE_CARDBUS:
470		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
471		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
472		cfg->nummaps	    = PCI_MAXMAPS_2;
473		break;
474	}
475#undef REG
476}
477
478/* read configuration header into pcicfgregs structure */
479struct pci_devinfo *
480pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
481{
482#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
483	pcicfgregs *cfg = NULL;
484	struct pci_devinfo *devlist_entry;
485	struct devlist *devlist_head;
486
487	devlist_head = &pci_devq;
488
489	devlist_entry = NULL;
490
491	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
492		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
493		if (devlist_entry == NULL)
494			return (NULL);
495
496		cfg = &devlist_entry->cfg;
497
498		cfg->domain		= d;
499		cfg->bus		= b;
500		cfg->slot		= s;
501		cfg->func		= f;
502		cfg->vendor		= REG(PCIR_VENDOR, 2);
503		cfg->device		= REG(PCIR_DEVICE, 2);
504		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
505		cfg->statreg		= REG(PCIR_STATUS, 2);
506		cfg->baseclass		= REG(PCIR_CLASS, 1);
507		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
508		cfg->progif		= REG(PCIR_PROGIF, 1);
509		cfg->revid		= REG(PCIR_REVID, 1);
510		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
511		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
512		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
513		cfg->intpin		= REG(PCIR_INTPIN, 1);
514		cfg->intline		= REG(PCIR_INTLINE, 1);
515
516		cfg->mingnt		= REG(PCIR_MINGNT, 1);
517		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
518
519		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
520		cfg->hdrtype		&= ~PCIM_MFDEV;
521
522		pci_fixancient(cfg);
523		pci_hdrtypedata(pcib, b, s, f, cfg);
524
525		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
526			pci_read_extcap(pcib, cfg);
527
528		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
529
530		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
531		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
532		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
533		devlist_entry->conf.pc_sel.pc_func = cfg->func;
534		devlist_entry->conf.pc_hdr = cfg->hdrtype;
535
536		devlist_entry->conf.pc_subvendor = cfg->subvendor;
537		devlist_entry->conf.pc_subdevice = cfg->subdevice;
538		devlist_entry->conf.pc_vendor = cfg->vendor;
539		devlist_entry->conf.pc_device = cfg->device;
540
541		devlist_entry->conf.pc_class = cfg->baseclass;
542		devlist_entry->conf.pc_subclass = cfg->subclass;
543		devlist_entry->conf.pc_progif = cfg->progif;
544		devlist_entry->conf.pc_revid = cfg->revid;
545
546		pci_numdevs++;
547		pci_generation++;
548	}
549	return (devlist_entry);
550#undef REG
551}
552
553static void
554pci_read_extcap(device_t pcib, pcicfgregs *cfg)
555{
556#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
557#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
558#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
559	uint64_t addr;
560#endif
561	uint32_t val;
562	int	ptr, nextptr, ptrptr;
563
564	switch (cfg->hdrtype & PCIM_HDRTYPE) {
565	case PCIM_HDRTYPE_NORMAL:
566	case PCIM_HDRTYPE_BRIDGE:
567		ptrptr = PCIR_CAP_PTR;
568		break;
569	case PCIM_HDRTYPE_CARDBUS:
570		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
571		break;
572	default:
573		return;		/* no extended capabilities support */
574	}
575	nextptr = REG(ptrptr, 1);	/* sanity check? */
576
577	/*
578	 * Read capability entries.
579	 */
580	while (nextptr != 0) {
581		/* Sanity check */
582		if (nextptr > 255) {
583			printf("illegal PCI extended capability offset %d\n",
584			    nextptr);
585			return;
586		}
587		/* Find the next entry */
588		ptr = nextptr;
589		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
590
591		/* Process this entry */
592		switch (REG(ptr + PCICAP_ID, 1)) {
593		case PCIY_PMG:		/* PCI power management */
594			if (cfg->pp.pp_cap == 0) {
595				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
596				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
597				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
598				if ((nextptr - ptr) > PCIR_POWER_DATA)
599					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
600			}
601			break;
602#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
603		case PCIY_HT:		/* HyperTransport */
604			/* Determine HT-specific capability type. */
605			val = REG(ptr + PCIR_HT_COMMAND, 2);
606			switch (val & PCIM_HTCMD_CAP_MASK) {
607			case PCIM_HTCAP_MSI_MAPPING:
608				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
609					/* Sanity check the mapping window. */
610					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
611					    4);
612					addr <<= 32;
613					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
614					    4);
615					if (addr != MSI_INTEL_ADDR_BASE)
616						device_printf(pcib,
617	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
618						    cfg->domain, cfg->bus,
619						    cfg->slot, cfg->func,
620						    (long long)addr);
621				} else
622					addr = MSI_INTEL_ADDR_BASE;
623
624				cfg->ht.ht_msimap = ptr;
625				cfg->ht.ht_msictrl = val;
626				cfg->ht.ht_msiaddr = addr;
627				break;
628			}
629			break;
630#endif
631		case PCIY_MSI:		/* PCI MSI */
632			cfg->msi.msi_location = ptr;
633			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
634			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
635						     PCIM_MSICTRL_MMC_MASK)>>1);
636			break;
637		case PCIY_MSIX:		/* PCI MSI-X */
638			cfg->msix.msix_location = ptr;
639			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
640			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
641			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
642			val = REG(ptr + PCIR_MSIX_TABLE, 4);
643			cfg->msix.msix_table_bar = PCIR_BAR(val &
644			    PCIM_MSIX_BIR_MASK);
645			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
646			val = REG(ptr + PCIR_MSIX_PBA, 4);
647			cfg->msix.msix_pba_bar = PCIR_BAR(val &
648			    PCIM_MSIX_BIR_MASK);
649			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
650			break;
651		case PCIY_VPD:		/* PCI Vital Product Data */
652			cfg->vpd.vpd_reg = ptr;
653			break;
654		case PCIY_SUBVENDOR:
655			/* Should always be true. */
656			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
657			    PCIM_HDRTYPE_BRIDGE) {
658				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
659				cfg->subvendor = val & 0xffff;
660				cfg->subdevice = val >> 16;
661			}
662			break;
663		case PCIY_PCIX:		/* PCI-X */
664			/*
665			 * Assume we have a PCI-X chipset if we have
666			 * at least one PCI-PCI bridge with a PCI-X
667			 * capability.  Note that some systems with
668			 * PCI-express or HT chipsets might match on
669			 * this check as well.
670			 */
671			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
672			    PCIM_HDRTYPE_BRIDGE)
673				pcix_chipset = 1;
674			break;
675		case PCIY_EXPRESS:	/* PCI-express */
676			/*
677			 * Assume we have a PCI-express chipset if we have
678			 * at least one PCI-express device.
679			 */
680			pcie_chipset = 1;
681			break;
682		default:
683			break;
684		}
685	}
686/* REG and WREG use carry through to next functions */
687}
688
689/*
690 * PCI Vital Product Data
691 */
692
693#define	PCI_VPD_TIMEOUT		1000000
694
695static int
696pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
697{
698	int count = PCI_VPD_TIMEOUT;
699
700	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
701
702	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
703
704	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
705		if (--count < 0)
706			return (ENXIO);
707		DELAY(1);	/* limit looping */
708	}
709	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
710
711	return (0);
712}
713
714#if 0
715static int
716pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
717{
718	int count = PCI_VPD_TIMEOUT;
719
720	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
721
722	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
723	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
724	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
725		if (--count < 0)
726			return (ENXIO);
727		DELAY(1);	/* limit looping */
728	}
729
730	return (0);
731}
732#endif
733
734#undef PCI_VPD_TIMEOUT
735
736struct vpd_readstate {
737	device_t	pcib;
738	pcicfgregs	*cfg;
739	uint32_t	val;
740	int		bytesinval;
741	int		off;
742	uint8_t		cksum;
743};
744
745static int
746vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
747{
748	uint32_t reg;
749	uint8_t byte;
750
751	if (vrs->bytesinval == 0) {
752		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
753			return (ENXIO);
754		vrs->val = le32toh(reg);
755		vrs->off += 4;
756		byte = vrs->val & 0xff;
757		vrs->bytesinval = 3;
758	} else {
759		vrs->val = vrs->val >> 8;
760		byte = vrs->val & 0xff;
761		vrs->bytesinval--;
762	}
763
764	vrs->cksum += byte;
765	*data = byte;
766	return (0);
767}
768
769static void
770pci_read_vpd(device_t pcib, pcicfgregs *cfg)
771{
772	struct vpd_readstate vrs;
773	int state;
774	int name;
775	int remain;
776	int i;
777	int alloc, off;		/* alloc/off for RO/W arrays */
778	int cksumvalid;
779	int dflen;
780	uint8_t byte;
781	uint8_t byte2;
782
783	/* init vpd reader */
784	vrs.bytesinval = 0;
785	vrs.off = 0;
786	vrs.pcib = pcib;
787	vrs.cfg = cfg;
788	vrs.cksum = 0;
789
790	state = 0;
791	name = remain = i = 0;	/* shut up stupid gcc */
792	alloc = off = 0;	/* shut up stupid gcc */
793	dflen = 0;		/* shut up stupid gcc */
794	cksumvalid = -1;
795	while (state >= 0) {
796		if (vpd_nextbyte(&vrs, &byte)) {
797			state = -2;
798			break;
799		}
800#if 0
801		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
802		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
803		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
804#endif
805		switch (state) {
806		case 0:		/* item name */
807			if (byte & 0x80) {
808				if (vpd_nextbyte(&vrs, &byte2)) {
809					state = -2;
810					break;
811				}
812				remain = byte2;
813				if (vpd_nextbyte(&vrs, &byte2)) {
814					state = -2;
815					break;
816				}
817				remain |= byte2 << 8;
818				if (remain > (0x7f*4 - vrs.off)) {
819					state = -1;
820					printf(
821			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
822					    cfg->domain, cfg->bus, cfg->slot,
823					    cfg->func, remain);
824				}
825				name = byte & 0x7f;
826			} else {
827				remain = byte & 0x7;
828				name = (byte >> 3) & 0xf;
829			}
830			switch (name) {
831			case 0x2:	/* String */
832				cfg->vpd.vpd_ident = malloc(remain + 1,
833				    M_DEVBUF, M_WAITOK);
834				i = 0;
835				state = 1;
836				break;
837			case 0xf:	/* End */
838				state = -1;
839				break;
840			case 0x10:	/* VPD-R */
841				alloc = 8;
842				off = 0;
843				cfg->vpd.vpd_ros = malloc(alloc *
844				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
845				    M_WAITOK | M_ZERO);
846				state = 2;
847				break;
848			case 0x11:	/* VPD-W */
849				alloc = 8;
850				off = 0;
851				cfg->vpd.vpd_w = malloc(alloc *
852				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
853				    M_WAITOK | M_ZERO);
854				state = 5;
855				break;
856			default:	/* Invalid data, abort */
857				state = -1;
858				break;
859			}
860			break;
861
862		case 1:	/* Identifier String */
863			cfg->vpd.vpd_ident[i++] = byte;
864			remain--;
865			if (remain == 0)  {
866				cfg->vpd.vpd_ident[i] = '\0';
867				state = 0;
868			}
869			break;
870
871		case 2:	/* VPD-R Keyword Header */
872			if (off == alloc) {
873				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
874				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
875				    M_DEVBUF, M_WAITOK | M_ZERO);
876			}
877			cfg->vpd.vpd_ros[off].keyword[0] = byte;
878			if (vpd_nextbyte(&vrs, &byte2)) {
879				state = -2;
880				break;
881			}
882			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
883			if (vpd_nextbyte(&vrs, &byte2)) {
884				state = -2;
885				break;
886			}
887			dflen = byte2;
888			if (dflen == 0 &&
889			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
890			    2) == 0) {
891				/*
892				 * if this happens, we can't trust the rest
893				 * of the VPD.
894				 */
895				printf(
896				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
897				    cfg->domain, cfg->bus, cfg->slot,
898				    cfg->func, dflen);
899				cksumvalid = 0;
900				state = -1;
901				break;
902			} else if (dflen == 0) {
903				cfg->vpd.vpd_ros[off].value = malloc(1 *
904				    sizeof(*cfg->vpd.vpd_ros[off].value),
905				    M_DEVBUF, M_WAITOK);
906				cfg->vpd.vpd_ros[off].value[0] = '\x00';
907			} else
908				cfg->vpd.vpd_ros[off].value = malloc(
909				    (dflen + 1) *
910				    sizeof(*cfg->vpd.vpd_ros[off].value),
911				    M_DEVBUF, M_WAITOK);
912			remain -= 3;
913			i = 0;
914			/* keep in sync w/ state 3's transistions */
915			if (dflen == 0 && remain == 0)
916				state = 0;
917			else if (dflen == 0)
918				state = 2;
919			else
920				state = 3;
921			break;
922
923		case 3:	/* VPD-R Keyword Value */
924			cfg->vpd.vpd_ros[off].value[i++] = byte;
925			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
926			    "RV", 2) == 0 && cksumvalid == -1) {
927				if (vrs.cksum == 0)
928					cksumvalid = 1;
929				else {
930					if (bootverbose)
931						printf(
932				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
933						    cfg->domain, cfg->bus,
934						    cfg->slot, cfg->func,
935						    vrs.cksum);
936					cksumvalid = 0;
937					state = -1;
938					break;
939				}
940			}
941			dflen--;
942			remain--;
943			/* keep in sync w/ state 2's transistions */
944			if (dflen == 0)
945				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
946			if (dflen == 0 && remain == 0) {
947				cfg->vpd.vpd_rocnt = off;
948				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
949				    off * sizeof(*cfg->vpd.vpd_ros),
950				    M_DEVBUF, M_WAITOK | M_ZERO);
951				state = 0;
952			} else if (dflen == 0)
953				state = 2;
954			break;
955
956		case 4:
957			remain--;
958			if (remain == 0)
959				state = 0;
960			break;
961
962		case 5:	/* VPD-W Keyword Header */
963			if (off == alloc) {
964				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
965				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
966				    M_DEVBUF, M_WAITOK | M_ZERO);
967			}
968			cfg->vpd.vpd_w[off].keyword[0] = byte;
969			if (vpd_nextbyte(&vrs, &byte2)) {
970				state = -2;
971				break;
972			}
973			cfg->vpd.vpd_w[off].keyword[1] = byte2;
974			if (vpd_nextbyte(&vrs, &byte2)) {
975				state = -2;
976				break;
977			}
978			cfg->vpd.vpd_w[off].len = dflen = byte2;
979			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
980			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
981			    sizeof(*cfg->vpd.vpd_w[off].value),
982			    M_DEVBUF, M_WAITOK);
983			remain -= 3;
984			i = 0;
985			/* keep in sync w/ state 6's transistions */
986			if (dflen == 0 && remain == 0)
987				state = 0;
988			else if (dflen == 0)
989				state = 5;
990			else
991				state = 6;
992			break;
993
994		case 6:	/* VPD-W Keyword Value */
995			cfg->vpd.vpd_w[off].value[i++] = byte;
996			dflen--;
997			remain--;
998			/* keep in sync w/ state 5's transistions */
999			if (dflen == 0)
1000				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1001			if (dflen == 0 && remain == 0) {
1002				cfg->vpd.vpd_wcnt = off;
1003				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1004				    off * sizeof(*cfg->vpd.vpd_w),
1005				    M_DEVBUF, M_WAITOK | M_ZERO);
1006				state = 0;
1007			} else if (dflen == 0)
1008				state = 5;
1009			break;
1010
1011		default:
1012			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1013			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1014			    state);
1015			state = -1;
1016			break;
1017		}
1018	}
1019
1020	if (cksumvalid == 0 || state < -1) {
1021		/* read-only data bad, clean up */
1022		if (cfg->vpd.vpd_ros != NULL) {
1023			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1024				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1025			free(cfg->vpd.vpd_ros, M_DEVBUF);
1026			cfg->vpd.vpd_ros = NULL;
1027		}
1028	}
1029	if (state < -1) {
1030		/* I/O error, clean up */
1031		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1032		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1033		if (cfg->vpd.vpd_ident != NULL) {
1034			free(cfg->vpd.vpd_ident, M_DEVBUF);
1035			cfg->vpd.vpd_ident = NULL;
1036		}
1037		if (cfg->vpd.vpd_w != NULL) {
1038			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1039				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1040			free(cfg->vpd.vpd_w, M_DEVBUF);
1041			cfg->vpd.vpd_w = NULL;
1042		}
1043	}
1044	cfg->vpd.vpd_cached = 1;
1045#undef REG
1046#undef WREG
1047}
1048
1049int
1050pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1051{
1052	struct pci_devinfo *dinfo = device_get_ivars(child);
1053	pcicfgregs *cfg = &dinfo->cfg;
1054
1055	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1056		pci_read_vpd(device_get_parent(dev), cfg);
1057
1058	*identptr = cfg->vpd.vpd_ident;
1059
1060	if (*identptr == NULL)
1061		return (ENXIO);
1062
1063	return (0);
1064}
1065
1066int
1067pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1068	const char **vptr)
1069{
1070	struct pci_devinfo *dinfo = device_get_ivars(child);
1071	pcicfgregs *cfg = &dinfo->cfg;
1072	int i;
1073
1074	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1075		pci_read_vpd(device_get_parent(dev), cfg);
1076
1077	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1078		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1079		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1080			*vptr = cfg->vpd.vpd_ros[i].value;
1081		}
1082
1083	if (i != cfg->vpd.vpd_rocnt)
1084		return (0);
1085
1086	*vptr = NULL;
1087	return (ENXIO);
1088}
1089
1090/*
1091 * Find the requested extended capability and return the offset in
1092 * configuration space via the pointer provided. The function returns
1093 * 0 on success and error code otherwise.
1094 */
1095int
1096pci_find_extcap_method(device_t dev, device_t child, int capability,
1097    int *capreg)
1098{
1099	struct pci_devinfo *dinfo = device_get_ivars(child);
1100	pcicfgregs *cfg = &dinfo->cfg;
1101	u_int32_t status;
1102	u_int8_t ptr;
1103
1104	/*
1105	 * Check the CAP_LIST bit of the PCI status register first.
1106	 */
1107	status = pci_read_config(child, PCIR_STATUS, 2);
1108	if (!(status & PCIM_STATUS_CAPPRESENT))
1109		return (ENXIO);
1110
1111	/*
1112	 * Determine the start pointer of the capabilities list.
1113	 */
1114	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1115	case PCIM_HDRTYPE_NORMAL:
1116	case PCIM_HDRTYPE_BRIDGE:
1117		ptr = PCIR_CAP_PTR;
1118		break;
1119	case PCIM_HDRTYPE_CARDBUS:
1120		ptr = PCIR_CAP_PTR_2;
1121		break;
1122	default:
1123		/* XXX: panic? */
1124		return (ENXIO);		/* no extended capabilities support */
1125	}
1126	ptr = pci_read_config(child, ptr, 1);
1127
1128	/*
1129	 * Traverse the capabilities list.
1130	 */
1131	while (ptr != 0) {
1132		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1133			if (capreg != NULL)
1134				*capreg = ptr;
1135			return (0);
1136		}
1137		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1138	}
1139
1140	return (ENOENT);
1141}
1142
1143/*
1144 * Support for MSI-X message interrupts.
1145 */
1146void
1147pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1148{
1149	struct pci_devinfo *dinfo = device_get_ivars(dev);
1150	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1151	uint32_t offset;
1152
1153	KASSERT(msix->msix_table_len > index, ("bogus index"));
1154	offset = msix->msix_table_offset + index * 16;
1155	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1156	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1157	bus_write_4(msix->msix_table_res, offset + 8, data);
1158
1159	/* Enable MSI -> HT mapping. */
1160	pci_ht_map_msi(dev, address);
1161}
1162
1163void
1164pci_mask_msix(device_t dev, u_int index)
1165{
1166	struct pci_devinfo *dinfo = device_get_ivars(dev);
1167	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1168	uint32_t offset, val;
1169
1170	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1171	offset = msix->msix_table_offset + index * 16 + 12;
1172	val = bus_read_4(msix->msix_table_res, offset);
1173	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1174		val |= PCIM_MSIX_VCTRL_MASK;
1175		bus_write_4(msix->msix_table_res, offset, val);
1176	}
1177}
1178
1179void
1180pci_unmask_msix(device_t dev, u_int index)
1181{
1182	struct pci_devinfo *dinfo = device_get_ivars(dev);
1183	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1184	uint32_t offset, val;
1185
1186	KASSERT(msix->msix_table_len > index, ("bogus index"));
1187	offset = msix->msix_table_offset + index * 16 + 12;
1188	val = bus_read_4(msix->msix_table_res, offset);
1189	if (val & PCIM_MSIX_VCTRL_MASK) {
1190		val &= ~PCIM_MSIX_VCTRL_MASK;
1191		bus_write_4(msix->msix_table_res, offset, val);
1192	}
1193}
1194
1195int
1196pci_pending_msix(device_t dev, u_int index)
1197{
1198	struct pci_devinfo *dinfo = device_get_ivars(dev);
1199	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1200	uint32_t offset, bit;
1201
1202	KASSERT(msix->msix_table_len > index, ("bogus index"));
1203	offset = msix->msix_pba_offset + (index / 32) * 4;
1204	bit = 1 << index % 32;
1205	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1206}
1207
1208/*
1209 * Restore MSI-X registers and table during resume.  If MSI-X is
1210 * enabled then walk the virtual table to restore the actual MSI-X
1211 * table.
1212 */
1213static void
1214pci_resume_msix(device_t dev)
1215{
1216	struct pci_devinfo *dinfo = device_get_ivars(dev);
1217	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1218	struct msix_table_entry *mte;
1219	struct msix_vector *mv;
1220	int i;
1221
1222	if (msix->msix_alloc > 0) {
1223		/* First, mask all vectors. */
1224		for (i = 0; i < msix->msix_msgnum; i++)
1225			pci_mask_msix(dev, i);
1226
1227		/* Second, program any messages with at least one handler. */
1228		for (i = 0; i < msix->msix_table_len; i++) {
1229			mte = &msix->msix_table[i];
1230			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1231				continue;
1232			mv = &msix->msix_vectors[mte->mte_vector - 1];
1233			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1234			pci_unmask_msix(dev, i);
1235		}
1236	}
1237	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1238	    msix->msix_ctrl, 2);
1239}
1240
1241/*
1242 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1243 * returned in *count.  After this function returns, each message will be
1244 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1245 */
1246int
1247pci_alloc_msix_method(device_t dev, device_t child, int *count)
1248{
1249	struct pci_devinfo *dinfo = device_get_ivars(child);
1250	pcicfgregs *cfg = &dinfo->cfg;
1251	struct resource_list_entry *rle;
1252	int actual, error, i, irq, max;
1253
1254	/* Don't let count == 0 get us into trouble. */
1255	if (*count == 0)
1256		return (EINVAL);
1257
1258	/* If rid 0 is allocated, then fail. */
1259	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1260	if (rle != NULL && rle->res != NULL)
1261		return (ENXIO);
1262
1263	/* Already have allocated messages? */
1264	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1265		return (ENXIO);
1266
1267	/* If MSI is blacklisted for this system, fail. */
1268	if (pci_msi_blacklisted())
1269		return (ENXIO);
1270
1271	/* MSI-X capability present? */
1272	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1273		return (ENODEV);
1274
1275	/* Make sure the appropriate BARs are mapped. */
1276	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1277	    cfg->msix.msix_table_bar);
1278	if (rle == NULL || rle->res == NULL ||
1279	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1280		return (ENXIO);
1281	cfg->msix.msix_table_res = rle->res;
1282	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1283		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1284		    cfg->msix.msix_pba_bar);
1285		if (rle == NULL || rle->res == NULL ||
1286		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1287			return (ENXIO);
1288	}
1289	cfg->msix.msix_pba_res = rle->res;
1290
1291	if (bootverbose)
1292		device_printf(child,
1293		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1294		    *count, cfg->msix.msix_msgnum);
1295	max = min(*count, cfg->msix.msix_msgnum);
1296	for (i = 0; i < max; i++) {
1297		/* Allocate a message. */
1298		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1299		if (error)
1300			break;
1301		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1302		    irq, 1);
1303	}
1304	actual = i;
1305
1306	if (bootverbose) {
1307		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1308		if (actual == 1)
1309			device_printf(child, "using IRQ %lu for MSI-X\n",
1310			    rle->start);
1311		else {
1312			int run;
1313
1314			/*
1315			 * Be fancy and try to print contiguous runs of
1316			 * IRQ values as ranges.  'irq' is the previous IRQ.
1317			 * 'run' is true if we are in a range.
1318			 */
1319			device_printf(child, "using IRQs %lu", rle->start);
1320			irq = rle->start;
1321			run = 0;
1322			for (i = 1; i < actual; i++) {
1323				rle = resource_list_find(&dinfo->resources,
1324				    SYS_RES_IRQ, i + 1);
1325
1326				/* Still in a run? */
1327				if (rle->start == irq + 1) {
1328					run = 1;
1329					irq++;
1330					continue;
1331				}
1332
1333				/* Finish previous range. */
1334				if (run) {
1335					printf("-%d", irq);
1336					run = 0;
1337				}
1338
1339				/* Start new range. */
1340				printf(",%lu", rle->start);
1341				irq = rle->start;
1342			}
1343
1344			/* Unfinished range? */
1345			if (run)
1346				printf("-%d", irq);
1347			printf(" for MSI-X\n");
1348		}
1349	}
1350
1351	/* Mask all vectors. */
1352	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1353		pci_mask_msix(child, i);
1354
1355	/* Allocate and initialize vector data and virtual table. */
1356	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1357	    M_DEVBUF, M_WAITOK | M_ZERO);
1358	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1359	    M_DEVBUF, M_WAITOK | M_ZERO);
1360	for (i = 0; i < actual; i++) {
1361		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1362		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1363		cfg->msix.msix_table[i].mte_vector = i + 1;
1364	}
1365
1366	/* Update control register to enable MSI-X. */
1367	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1368	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1369	    cfg->msix.msix_ctrl, 2);
1370
1371	/* Update counts of alloc'd messages. */
1372	cfg->msix.msix_alloc = actual;
1373	cfg->msix.msix_table_len = actual;
1374	*count = actual;
1375	return (0);
1376}
1377
1378/*
1379 * By default, pci_alloc_msix() will assign the allocated IRQ
1380 * resources consecutively to the first N messages in the MSI-X table.
1381 * However, device drivers may want to use different layouts if they
1382 * either receive fewer messages than they asked for, or they wish to
1383 * populate the MSI-X table sparsely.  This method allows the driver
1384 * to specify what layout it wants.  It must be called after a
1385 * successful pci_alloc_msix() but before any of the associated
1386 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1387 *
1388 * The 'vectors' array contains 'count' message vectors.  The array
1389 * maps directly to the MSI-X table in that index 0 in the array
1390 * specifies the vector for the first message in the MSI-X table, etc.
1391 * The vector value in each array index can either be 0 to indicate
1392 * that no vector should be assigned to a message slot, or it can be a
1393 * number from 1 to N (where N is the count returned from a
1394 * succcessful call to pci_alloc_msix()) to indicate which message
1395 * vector (IRQ) to be used for the corresponding message.
1396 *
1397 * On successful return, each message with a non-zero vector will have
1398 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1399 * 1.  Additionally, if any of the IRQs allocated via the previous
1400 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1401 * will be freed back to the system automatically.
1402 *
1403 * For example, suppose a driver has a MSI-X table with 6 messages and
1404 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1405 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1406 * C.  After the call to pci_alloc_msix(), the device will be setup to
1407 * have an MSI-X table of ABC--- (where - means no vector assigned).
1408 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1409 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1410 * be freed back to the system.  This device will also have valid
1411 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1412 *
1413 * In any case, the SYS_RES_IRQ rid X will always map to the message
1414 * at MSI-X table index X - 1 and will only be valid if a vector is
1415 * assigned to that table entry.
1416 */
1417int
1418pci_remap_msix_method(device_t dev, device_t child, int count,
1419    const u_int *vectors)
1420{
1421	struct pci_devinfo *dinfo = device_get_ivars(child);
1422	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1423	struct resource_list_entry *rle;
1424	int i, irq, j, *used;
1425
1426	/*
1427	 * Have to have at least one message in the table but the
1428	 * table can't be bigger than the actual MSI-X table in the
1429	 * device.
1430	 */
1431	if (count == 0 || count > msix->msix_msgnum)
1432		return (EINVAL);
1433
1434	/* Sanity check the vectors. */
1435	for (i = 0; i < count; i++)
1436		if (vectors[i] > msix->msix_alloc)
1437			return (EINVAL);
1438
1439	/*
1440	 * Make sure there aren't any holes in the vectors to be used.
1441	 * It's a big pain to support it, and it doesn't really make
1442	 * sense anyway.  Also, at least one vector must be used.
1443	 */
1444	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1445	    M_ZERO);
1446	for (i = 0; i < count; i++)
1447		if (vectors[i] != 0)
1448			used[vectors[i] - 1] = 1;
1449	for (i = 0; i < msix->msix_alloc - 1; i++)
1450		if (used[i] == 0 && used[i + 1] == 1) {
1451			free(used, M_DEVBUF);
1452			return (EINVAL);
1453		}
1454	if (used[0] != 1) {
1455		free(used, M_DEVBUF);
1456		return (EINVAL);
1457	}
1458
1459	/* Make sure none of the resources are allocated. */
1460	for (i = 0; i < msix->msix_table_len; i++) {
1461		if (msix->msix_table[i].mte_vector == 0)
1462			continue;
1463		if (msix->msix_table[i].mte_handlers > 0)
1464			return (EBUSY);
1465		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1466		KASSERT(rle != NULL, ("missing resource"));
1467		if (rle->res != NULL)
1468			return (EBUSY);
1469	}
1470
1471	/* Free the existing resource list entries. */
1472	for (i = 0; i < msix->msix_table_len; i++) {
1473		if (msix->msix_table[i].mte_vector == 0)
1474			continue;
1475		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1476	}
1477
1478	/*
1479	 * Build the new virtual table keeping track of which vectors are
1480	 * used.
1481	 */
1482	free(msix->msix_table, M_DEVBUF);
1483	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1484	    M_DEVBUF, M_WAITOK | M_ZERO);
1485	for (i = 0; i < count; i++)
1486		msix->msix_table[i].mte_vector = vectors[i];
1487	msix->msix_table_len = count;
1488
1489	/* Free any unused IRQs and resize the vectors array if necessary. */
1490	j = msix->msix_alloc - 1;
1491	if (used[j] == 0) {
1492		struct msix_vector *vec;
1493
1494		while (used[j] == 0) {
1495			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1496			    msix->msix_vectors[j].mv_irq);
1497			j--;
1498		}
1499		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1500		    M_WAITOK);
1501		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1502		    (j + 1));
1503		free(msix->msix_vectors, M_DEVBUF);
1504		msix->msix_vectors = vec;
1505		msix->msix_alloc = j + 1;
1506	}
1507	free(used, M_DEVBUF);
1508
1509	/* Map the IRQs onto the rids. */
1510	for (i = 0; i < count; i++) {
1511		if (vectors[i] == 0)
1512			continue;
1513		irq = msix->msix_vectors[vectors[i]].mv_irq;
1514		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1515		    irq, 1);
1516	}
1517
1518	if (bootverbose) {
1519		device_printf(child, "Remapped MSI-X IRQs as: ");
1520		for (i = 0; i < count; i++) {
1521			if (i != 0)
1522				printf(", ");
1523			if (vectors[i] == 0)
1524				printf("---");
1525			else
1526				printf("%d",
1527				    msix->msix_vectors[vectors[i]].mv_irq);
1528		}
1529		printf("\n");
1530	}
1531
1532	return (0);
1533}
1534
1535static int
1536pci_release_msix(device_t dev, device_t child)
1537{
1538	struct pci_devinfo *dinfo = device_get_ivars(child);
1539	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1540	struct resource_list_entry *rle;
1541	int i;
1542
1543	/* Do we have any messages to release? */
1544	if (msix->msix_alloc == 0)
1545		return (ENODEV);
1546
1547	/* Make sure none of the resources are allocated. */
1548	for (i = 0; i < msix->msix_table_len; i++) {
1549		if (msix->msix_table[i].mte_vector == 0)
1550			continue;
1551		if (msix->msix_table[i].mte_handlers > 0)
1552			return (EBUSY);
1553		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1554		KASSERT(rle != NULL, ("missing resource"));
1555		if (rle->res != NULL)
1556			return (EBUSY);
1557	}
1558
1559	/* Update control register to disable MSI-X. */
1560	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1561	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1562	    msix->msix_ctrl, 2);
1563
1564	/* Free the resource list entries. */
1565	for (i = 0; i < msix->msix_table_len; i++) {
1566		if (msix->msix_table[i].mte_vector == 0)
1567			continue;
1568		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1569	}
1570	free(msix->msix_table, M_DEVBUF);
1571	msix->msix_table_len = 0;
1572
1573	/* Release the IRQs. */
1574	for (i = 0; i < msix->msix_alloc; i++)
1575		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1576		    msix->msix_vectors[i].mv_irq);
1577	free(msix->msix_vectors, M_DEVBUF);
1578	msix->msix_alloc = 0;
1579	return (0);
1580}
1581
1582/*
1583 * Return the max supported MSI-X messages this device supports.
1584 * Basically, assuming the MD code can alloc messages, this function
1585 * should return the maximum value that pci_alloc_msix() can return.
1586 * Thus, it is subject to the tunables, etc.
1587 */
1588int
1589pci_msix_count_method(device_t dev, device_t child)
1590{
1591	struct pci_devinfo *dinfo = device_get_ivars(child);
1592	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1593
1594	if (pci_do_msix && msix->msix_location != 0)
1595		return (msix->msix_msgnum);
1596	return (0);
1597}
1598
1599/*
1600 * HyperTransport MSI mapping control
1601 */
1602void
1603pci_ht_map_msi(device_t dev, uint64_t addr)
1604{
1605	struct pci_devinfo *dinfo = device_get_ivars(dev);
1606	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1607
1608	if (!ht->ht_msimap)
1609		return;
1610
1611	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1612	    ht->ht_msiaddr >> 20 == addr >> 20) {
1613		/* Enable MSI -> HT mapping. */
1614		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1615		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1616		    ht->ht_msictrl, 2);
1617	}
1618
1619	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1620		/* Disable MSI -> HT mapping. */
1621		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1622		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1623		    ht->ht_msictrl, 2);
1624	}
1625}
1626
1627int
1628pci_get_max_read_req(device_t dev)
1629{
1630	int cap;
1631	uint16_t val;
1632
1633	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1634		return (0);
1635	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1636	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1637	val >>= 12;
1638	return (1 << (val + 7));
1639}
1640
1641int
1642pci_set_max_read_req(device_t dev, int size)
1643{
1644	int cap;
1645	uint16_t val;
1646
1647	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1648		return (0);
1649	if (size < 128)
1650		size = 128;
1651	if (size > 4096)
1652		size = 4096;
1653	size = (1 << (fls(size) - 1));
1654	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1655	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1656	val |= (fls(size) - 8) << 12;
1657	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1658	return (size);
1659}
1660
1661/*
1662 * Support for MSI message signalled interrupts.
1663 */
1664void
1665pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1666{
1667	struct pci_devinfo *dinfo = device_get_ivars(dev);
1668	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1669
1670	/* Write data and address values. */
1671	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1672	    address & 0xffffffff, 4);
1673	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1674		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1675		    address >> 32, 4);
1676		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1677		    data, 2);
1678	} else
1679		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1680		    2);
1681
1682	/* Enable MSI in the control register. */
1683	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1684	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1685	    2);
1686
1687	/* Enable MSI -> HT mapping. */
1688	pci_ht_map_msi(dev, address);
1689}
1690
1691void
1692pci_disable_msi(device_t dev)
1693{
1694	struct pci_devinfo *dinfo = device_get_ivars(dev);
1695	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1696
1697	/* Disable MSI -> HT mapping. */
1698	pci_ht_map_msi(dev, 0);
1699
1700	/* Disable MSI in the control register. */
1701	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1702	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1703	    2);
1704}
1705
1706/*
1707 * Restore MSI registers during resume.  If MSI is enabled then
1708 * restore the data and address registers in addition to the control
1709 * register.
1710 */
1711static void
1712pci_resume_msi(device_t dev)
1713{
1714	struct pci_devinfo *dinfo = device_get_ivars(dev);
1715	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1716	uint64_t address;
1717	uint16_t data;
1718
1719	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1720		address = msi->msi_addr;
1721		data = msi->msi_data;
1722		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1723		    address & 0xffffffff, 4);
1724		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1725			pci_write_config(dev, msi->msi_location +
1726			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1727			pci_write_config(dev, msi->msi_location +
1728			    PCIR_MSI_DATA_64BIT, data, 2);
1729		} else
1730			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1731			    data, 2);
1732	}
1733	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1734	    2);
1735}
1736
1737static int
1738pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1739{
1740	struct pci_devinfo *dinfo = device_get_ivars(dev);
1741	pcicfgregs *cfg = &dinfo->cfg;
1742	struct resource_list_entry *rle;
1743	struct msix_table_entry *mte;
1744	struct msix_vector *mv;
1745	uint64_t addr;
1746	uint32_t data;
1747	int error, i, j;
1748
1749	/*
1750	 * Handle MSI first.  We try to find this IRQ among our list
1751	 * of MSI IRQs.  If we find it, we request updated address and
1752	 * data registers and apply the results.
1753	 */
1754	if (cfg->msi.msi_alloc > 0) {
1755
1756		/* If we don't have any active handlers, nothing to do. */
1757		if (cfg->msi.msi_handlers == 0)
1758			return (0);
1759		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1760			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1761			    i + 1);
1762			if (rle->start == irq) {
1763				error = PCIB_MAP_MSI(device_get_parent(bus),
1764				    dev, irq, &addr, &data);
1765				if (error)
1766					return (error);
1767				pci_disable_msi(dev);
1768				dinfo->cfg.msi.msi_addr = addr;
1769				dinfo->cfg.msi.msi_data = data;
1770				pci_enable_msi(dev, addr, data);
1771				return (0);
1772			}
1773		}
1774		return (ENOENT);
1775	}
1776
1777	/*
1778	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1779	 * we request the updated mapping info.  If that works, we go
1780	 * through all the slots that use this IRQ and update them.
1781	 */
1782	if (cfg->msix.msix_alloc > 0) {
1783		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1784			mv = &cfg->msix.msix_vectors[i];
1785			if (mv->mv_irq == irq) {
1786				error = PCIB_MAP_MSI(device_get_parent(bus),
1787				    dev, irq, &addr, &data);
1788				if (error)
1789					return (error);
1790				mv->mv_address = addr;
1791				mv->mv_data = data;
1792				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1793					mte = &cfg->msix.msix_table[j];
1794					if (mte->mte_vector != i + 1)
1795						continue;
1796					if (mte->mte_handlers == 0)
1797						continue;
1798					pci_mask_msix(dev, j);
1799					pci_enable_msix(dev, j, addr, data);
1800					pci_unmask_msix(dev, j);
1801				}
1802			}
1803		}
1804		return (ENOENT);
1805	}
1806
1807	return (ENOENT);
1808}
1809
1810/*
1811 * Returns true if the specified device is blacklisted because MSI
1812 * doesn't work.
1813 */
1814int
1815pci_msi_device_blacklisted(device_t dev)
1816{
1817	struct pci_quirk *q;
1818
1819	if (!pci_honor_msi_blacklist)
1820		return (0);
1821
1822	for (q = &pci_quirks[0]; q->devid; q++) {
1823		if (q->devid == pci_get_devid(dev) &&
1824		    q->type == PCI_QUIRK_DISABLE_MSI)
1825			return (1);
1826	}
1827	return (0);
1828}
1829
1830/*
1831 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1832 * we just check for blacklisted chipsets as represented by the
1833 * host-PCI bridge at device 0:0:0.  In the future, it may become
1834 * necessary to check other system attributes, such as the kenv values
1835 * that give the motherboard manufacturer and model number.
1836 */
1837static int
1838pci_msi_blacklisted(void)
1839{
1840	device_t dev;
1841
1842	if (!pci_honor_msi_blacklist)
1843		return (0);
1844
1845	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1846	if (!(pcie_chipset || pcix_chipset))
1847		return (1);
1848
1849	dev = pci_find_bsf(0, 0, 0);
1850	if (dev != NULL)
1851		return (pci_msi_device_blacklisted(dev));
1852	return (0);
1853}
1854
1855/*
1856 * Attempt to allocate *count MSI messages.  The actual number allocated is
1857 * returned in *count.  After this function returns, each message will be
1858 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1859 */
1860int
1861pci_alloc_msi_method(device_t dev, device_t child, int *count)
1862{
1863	struct pci_devinfo *dinfo = device_get_ivars(child);
1864	pcicfgregs *cfg = &dinfo->cfg;
1865	struct resource_list_entry *rle;
1866	int actual, error, i, irqs[32];
1867	uint16_t ctrl;
1868
1869	/* Don't let count == 0 get us into trouble. */
1870	if (*count == 0)
1871		return (EINVAL);
1872
1873	/* If rid 0 is allocated, then fail. */
1874	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1875	if (rle != NULL && rle->res != NULL)
1876		return (ENXIO);
1877
1878	/* Already have allocated messages? */
1879	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1880		return (ENXIO);
1881
1882	/* If MSI is blacklisted for this system, fail. */
1883	if (pci_msi_blacklisted())
1884		return (ENXIO);
1885
1886	/* MSI capability present? */
1887	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1888		return (ENODEV);
1889
1890	if (bootverbose)
1891		device_printf(child,
1892		    "attempting to allocate %d MSI vectors (%d supported)\n",
1893		    *count, cfg->msi.msi_msgnum);
1894
1895	/* Don't ask for more than the device supports. */
1896	actual = min(*count, cfg->msi.msi_msgnum);
1897
1898	/* Don't ask for more than 32 messages. */
1899	actual = min(actual, 32);
1900
1901	/* MSI requires power of 2 number of messages. */
1902	if (!powerof2(actual))
1903		return (EINVAL);
1904
1905	for (;;) {
1906		/* Try to allocate N messages. */
1907		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1908		    cfg->msi.msi_msgnum, irqs);
1909		if (error == 0)
1910			break;
1911		if (actual == 1)
1912			return (error);
1913
1914		/* Try N / 2. */
1915		actual >>= 1;
1916	}
1917
1918	/*
1919	 * We now have N actual messages mapped onto SYS_RES_IRQ
1920	 * resources in the irqs[] array, so add new resources
1921	 * starting at rid 1.
1922	 */
1923	for (i = 0; i < actual; i++)
1924		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1925		    irqs[i], irqs[i], 1);
1926
1927	if (bootverbose) {
1928		if (actual == 1)
1929			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1930		else {
1931			int run;
1932
1933			/*
1934			 * Be fancy and try to print contiguous runs
1935			 * of IRQ values as ranges.  'run' is true if
1936			 * we are in a range.
1937			 */
1938			device_printf(child, "using IRQs %d", irqs[0]);
1939			run = 0;
1940			for (i = 1; i < actual; i++) {
1941
1942				/* Still in a run? */
1943				if (irqs[i] == irqs[i - 1] + 1) {
1944					run = 1;
1945					continue;
1946				}
1947
1948				/* Finish previous range. */
1949				if (run) {
1950					printf("-%d", irqs[i - 1]);
1951					run = 0;
1952				}
1953
1954				/* Start new range. */
1955				printf(",%d", irqs[i]);
1956			}
1957
1958			/* Unfinished range? */
1959			if (run)
1960				printf("-%d", irqs[actual - 1]);
1961			printf(" for MSI\n");
1962		}
1963	}
1964
1965	/* Update control register with actual count. */
1966	ctrl = cfg->msi.msi_ctrl;
1967	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1968	ctrl |= (ffs(actual) - 1) << 4;
1969	cfg->msi.msi_ctrl = ctrl;
1970	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1971
1972	/* Update counts of alloc'd messages. */
1973	cfg->msi.msi_alloc = actual;
1974	cfg->msi.msi_handlers = 0;
1975	*count = actual;
1976	return (0);
1977}
1978
1979/* Release the MSI messages associated with this device. */
1980int
1981pci_release_msi_method(device_t dev, device_t child)
1982{
1983	struct pci_devinfo *dinfo = device_get_ivars(child);
1984	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1985	struct resource_list_entry *rle;
1986	int error, i, irqs[32];
1987
1988	/* Try MSI-X first. */
1989	error = pci_release_msix(dev, child);
1990	if (error != ENODEV)
1991		return (error);
1992
1993	/* Do we have any messages to release? */
1994	if (msi->msi_alloc == 0)
1995		return (ENODEV);
1996	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1997
1998	/* Make sure none of the resources are allocated. */
1999	if (msi->msi_handlers > 0)
2000		return (EBUSY);
2001	for (i = 0; i < msi->msi_alloc; i++) {
2002		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2003		KASSERT(rle != NULL, ("missing MSI resource"));
2004		if (rle->res != NULL)
2005			return (EBUSY);
2006		irqs[i] = rle->start;
2007	}
2008
2009	/* Update control register with 0 count. */
2010	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2011	    ("%s: MSI still enabled", __func__));
2012	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2013	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2014	    msi->msi_ctrl, 2);
2015
2016	/* Release the messages. */
2017	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2018	for (i = 0; i < msi->msi_alloc; i++)
2019		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2020
2021	/* Update alloc count. */
2022	msi->msi_alloc = 0;
2023	msi->msi_addr = 0;
2024	msi->msi_data = 0;
2025	return (0);
2026}
2027
2028/*
2029 * Return the max supported MSI messages this device supports.
2030 * Basically, assuming the MD code can alloc messages, this function
2031 * should return the maximum value that pci_alloc_msi() can return.
2032 * Thus, it is subject to the tunables, etc.
2033 */
2034int
2035pci_msi_count_method(device_t dev, device_t child)
2036{
2037	struct pci_devinfo *dinfo = device_get_ivars(child);
2038	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2039
2040	if (pci_do_msi && msi->msi_location != 0)
2041		return (msi->msi_msgnum);
2042	return (0);
2043}
2044
2045/* free pcicfgregs structure and all depending data structures */
2046
2047int
2048pci_freecfg(struct pci_devinfo *dinfo)
2049{
2050	struct devlist *devlist_head;
2051	int i;
2052
2053	devlist_head = &pci_devq;
2054
2055	if (dinfo->cfg.vpd.vpd_reg) {
2056		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2057		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2058			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2059		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2060		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2061			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2062		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2063	}
2064	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2065	free(dinfo, M_DEVBUF);
2066
2067	/* increment the generation count */
2068	pci_generation++;
2069
2070	/* we're losing one device */
2071	pci_numdevs--;
2072	return (0);
2073}
2074
2075/*
2076 * PCI power manangement
2077 */
2078int
2079pci_set_powerstate_method(device_t dev, device_t child, int state)
2080{
2081	struct pci_devinfo *dinfo = device_get_ivars(child);
2082	pcicfgregs *cfg = &dinfo->cfg;
2083	uint16_t status;
2084	int result, oldstate, highest, delay;
2085
2086	if (cfg->pp.pp_cap == 0)
2087		return (EOPNOTSUPP);
2088
2089	/*
2090	 * Optimize a no state change request away.  While it would be OK to
2091	 * write to the hardware in theory, some devices have shown odd
2092	 * behavior when going from D3 -> D3.
2093	 */
2094	oldstate = pci_get_powerstate(child);
2095	if (oldstate == state)
2096		return (0);
2097
2098	/*
2099	 * The PCI power management specification states that after a state
2100	 * transition between PCI power states, system software must
2101	 * guarantee a minimal delay before the function accesses the device.
2102	 * Compute the worst case delay that we need to guarantee before we
2103	 * access the device.  Many devices will be responsive much more
2104	 * quickly than this delay, but there are some that don't respond
2105	 * instantly to state changes.  Transitions to/from D3 state require
2106	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2107	 * is done below with DELAY rather than a sleeper function because
2108	 * this function can be called from contexts where we cannot sleep.
2109	 */
2110	highest = (oldstate > state) ? oldstate : state;
2111	if (highest == PCI_POWERSTATE_D3)
2112	    delay = 10000;
2113	else if (highest == PCI_POWERSTATE_D2)
2114	    delay = 200;
2115	else
2116	    delay = 0;
2117	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2118	    & ~PCIM_PSTAT_DMASK;
2119	result = 0;
2120	switch (state) {
2121	case PCI_POWERSTATE_D0:
2122		status |= PCIM_PSTAT_D0;
2123		break;
2124	case PCI_POWERSTATE_D1:
2125		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2126			return (EOPNOTSUPP);
2127		status |= PCIM_PSTAT_D1;
2128		break;
2129	case PCI_POWERSTATE_D2:
2130		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2131			return (EOPNOTSUPP);
2132		status |= PCIM_PSTAT_D2;
2133		break;
2134	case PCI_POWERSTATE_D3:
2135		status |= PCIM_PSTAT_D3;
2136		break;
2137	default:
2138		return (EINVAL);
2139	}
2140
2141	if (bootverbose)
2142		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2143		    state);
2144
2145	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2146	if (delay)
2147		DELAY(delay);
2148	return (0);
2149}
2150
2151int
2152pci_get_powerstate_method(device_t dev, device_t child)
2153{
2154	struct pci_devinfo *dinfo = device_get_ivars(child);
2155	pcicfgregs *cfg = &dinfo->cfg;
2156	uint16_t status;
2157	int result;
2158
2159	if (cfg->pp.pp_cap != 0) {
2160		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2161		switch (status & PCIM_PSTAT_DMASK) {
2162		case PCIM_PSTAT_D0:
2163			result = PCI_POWERSTATE_D0;
2164			break;
2165		case PCIM_PSTAT_D1:
2166			result = PCI_POWERSTATE_D1;
2167			break;
2168		case PCIM_PSTAT_D2:
2169			result = PCI_POWERSTATE_D2;
2170			break;
2171		case PCIM_PSTAT_D3:
2172			result = PCI_POWERSTATE_D3;
2173			break;
2174		default:
2175			result = PCI_POWERSTATE_UNKNOWN;
2176			break;
2177		}
2178	} else {
2179		/* No support, device is always at D0 */
2180		result = PCI_POWERSTATE_D0;
2181	}
2182	return (result);
2183}
2184
2185/*
2186 * Some convenience functions for PCI device drivers.
2187 */
2188
2189static __inline void
2190pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2191{
2192	uint16_t	command;
2193
2194	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2195	command |= bit;
2196	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2197}
2198
2199static __inline void
2200pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2201{
2202	uint16_t	command;
2203
2204	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2205	command &= ~bit;
2206	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2207}
2208
2209int
2210pci_enable_busmaster_method(device_t dev, device_t child)
2211{
2212	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2213	return (0);
2214}
2215
2216int
2217pci_disable_busmaster_method(device_t dev, device_t child)
2218{
2219	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2220	return (0);
2221}
2222
2223int
2224pci_enable_io_method(device_t dev, device_t child, int space)
2225{
2226	uint16_t bit;
2227
2228	switch(space) {
2229	case SYS_RES_IOPORT:
2230		bit = PCIM_CMD_PORTEN;
2231		break;
2232	case SYS_RES_MEMORY:
2233		bit = PCIM_CMD_MEMEN;
2234		break;
2235	default:
2236		return (EINVAL);
2237	}
2238	pci_set_command_bit(dev, child, bit);
2239	return (0);
2240}
2241
2242int
2243pci_disable_io_method(device_t dev, device_t child, int space)
2244{
2245	uint16_t bit;
2246
2247	switch(space) {
2248	case SYS_RES_IOPORT:
2249		bit = PCIM_CMD_PORTEN;
2250		break;
2251	case SYS_RES_MEMORY:
2252		bit = PCIM_CMD_MEMEN;
2253		break;
2254	default:
2255		return (EINVAL);
2256	}
2257	pci_clear_command_bit(dev, child, bit);
2258	return (0);
2259}
2260
2261/*
2262 * New style pci driver.  Parent device is either a pci-host-bridge or a
2263 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2264 */
2265
2266void
2267pci_print_verbose(struct pci_devinfo *dinfo)
2268{
2269
2270	if (bootverbose) {
2271		pcicfgregs *cfg = &dinfo->cfg;
2272
2273		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2274		    cfg->vendor, cfg->device, cfg->revid);
2275		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2276		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2277		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2278		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2279		    cfg->mfdev);
2280		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2281		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2282		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2283		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2284		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2285		if (cfg->intpin > 0)
2286			printf("\tintpin=%c, irq=%d\n",
2287			    cfg->intpin +'a' -1, cfg->intline);
2288		if (cfg->pp.pp_cap) {
2289			uint16_t status;
2290
2291			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2292			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2293			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2294			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2295			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2296			    status & PCIM_PSTAT_DMASK);
2297		}
2298		if (cfg->msi.msi_location) {
2299			int ctrl;
2300
2301			ctrl = cfg->msi.msi_ctrl;
2302			printf("\tMSI supports %d message%s%s%s\n",
2303			    cfg->msi.msi_msgnum,
2304			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2305			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2306			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2307		}
2308		if (cfg->msix.msix_location) {
2309			printf("\tMSI-X supports %d message%s ",
2310			    cfg->msix.msix_msgnum,
2311			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2312			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2313				printf("in map 0x%x\n",
2314				    cfg->msix.msix_table_bar);
2315			else
2316				printf("in maps 0x%x and 0x%x\n",
2317				    cfg->msix.msix_table_bar,
2318				    cfg->msix.msix_pba_bar);
2319		}
2320	}
2321}
2322
2323static int
2324pci_porten(device_t dev)
2325{
2326	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2327}
2328
2329static int
2330pci_memen(device_t dev)
2331{
2332	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2333}
2334
2335static void
2336pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2337{
2338	pci_addr_t map, testval;
2339	int ln2range;
2340	uint16_t cmd;
2341
2342	/*
2343	 * The device ROM BAR is special.  It is always a 32-bit
2344	 * memory BAR.  Bit 0 is special and should not be set when
2345	 * sizing the BAR.
2346	 */
2347	if (reg == PCIR_BIOS) {
2348		map = pci_read_config(dev, reg, 4);
2349		pci_write_config(dev, reg, 0xfffffffe, 4);
2350		testval = pci_read_config(dev, reg, 4);
2351		pci_write_config(dev, reg, map, 4);
2352		*mapp = map;
2353		*testvalp = testval;
2354		return;
2355	}
2356
2357	map = pci_read_config(dev, reg, 4);
2358	ln2range = pci_maprange(map);
2359	if (ln2range == 64)
2360		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2361
2362	/*
2363	 * Disable decoding via the command register before
2364	 * determining the BAR's length since we will be placing it in
2365	 * a weird state.
2366	 */
2367	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2368	pci_write_config(dev, PCIR_COMMAND,
2369	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2370
2371	/*
2372	 * Determine the BAR's length by writing all 1's.  The bottom
2373	 * log_2(size) bits of the BAR will stick as 0 when we read
2374	 * the value back.
2375	 */
2376	pci_write_config(dev, reg, 0xffffffff, 4);
2377	testval = pci_read_config(dev, reg, 4);
2378	if (ln2range == 64) {
2379		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2380		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2381	}
2382
2383	/*
2384	 * Restore the original value of the BAR.  We may have reprogrammed
2385	 * the BAR of the low-level console device and when booting verbose,
2386	 * we need the console device addressable.
2387	 */
2388	pci_write_config(dev, reg, map, 4);
2389	if (ln2range == 64)
2390		pci_write_config(dev, reg + 4, map >> 32, 4);
2391	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2392
2393	*mapp = map;
2394	*testvalp = testval;
2395}
2396
2397static void
2398pci_write_bar(device_t dev, int reg, pci_addr_t base)
2399{
2400	pci_addr_t map;
2401	int ln2range;
2402
2403	map = pci_read_config(dev, reg, 4);
2404
2405	/* The device ROM BAR is always 32-bits. */
2406	if (reg == PCIR_BIOS)
2407		return;
2408	ln2range = pci_maprange(map);
2409	pci_write_config(dev, reg, base, 4);
2410	if (ln2range == 64)
2411		pci_write_config(dev, reg + 4, base >> 32, 4);
2412}
2413
2414/*
2415 * Add a resource based on a pci map register. Return 1 if the map
2416 * register is a 32bit map register or 2 if it is a 64bit register.
2417 */
2418static int
2419pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2420    int force, int prefetch)
2421{
2422	pci_addr_t base, map, testval;
2423	pci_addr_t start, end, count;
2424	int barlen, basezero, maprange, mapsize, type;
2425	uint16_t cmd;
2426	struct resource *res;
2427
2428	pci_read_bar(dev, reg, &map, &testval);
2429	if (PCI_BAR_MEM(map)) {
2430		type = SYS_RES_MEMORY;
2431		if (map & PCIM_BAR_MEM_PREFETCH)
2432			prefetch = 1;
2433	} else
2434		type = SYS_RES_IOPORT;
2435	mapsize = pci_mapsize(testval);
2436	base = pci_mapbase(map);
2437#ifdef __PCI_BAR_ZERO_VALID
2438	basezero = 0;
2439#else
2440	basezero = base == 0;
2441#endif
2442	maprange = pci_maprange(map);
2443	barlen = maprange == 64 ? 2 : 1;
2444
2445	/*
2446	 * For I/O registers, if bottom bit is set, and the next bit up
2447	 * isn't clear, we know we have a BAR that doesn't conform to the
2448	 * spec, so ignore it.  Also, sanity check the size of the data
2449	 * areas to the type of memory involved.  Memory must be at least
2450	 * 16 bytes in size, while I/O ranges must be at least 4.
2451	 */
2452	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2453		return (barlen);
2454	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2455	    (type == SYS_RES_IOPORT && mapsize < 2))
2456		return (barlen);
2457
2458	if (bootverbose) {
2459		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2460		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2461		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2462			printf(", port disabled\n");
2463		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2464			printf(", memory disabled\n");
2465		else
2466			printf(", enabled\n");
2467	}
2468
2469	/*
2470	 * If base is 0, then we have problems if this architecture does
2471	 * not allow that.  It is best to ignore such entries for the
2472	 * moment.  These will be allocated later if the driver specifically
2473	 * requests them.  However, some removable busses look better when
2474	 * all resources are allocated, so allow '0' to be overriden.
2475	 *
2476	 * Similarly treat maps whose values is the same as the test value
2477	 * read back.  These maps have had all f's written to them by the
2478	 * BIOS in an attempt to disable the resources.
2479	 */
2480	if (!force && (basezero || map == testval))
2481		return (barlen);
2482	if ((u_long)base != base) {
2483		device_printf(bus,
2484		    "pci%d:%d:%d:%d bar %#x too many address bits",
2485		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2486		    pci_get_function(dev), reg);
2487		return (barlen);
2488	}
2489
2490	/*
2491	 * This code theoretically does the right thing, but has
2492	 * undesirable side effects in some cases where peripherals
2493	 * respond oddly to having these bits enabled.  Let the user
2494	 * be able to turn them off (since pci_enable_io_modes is 1 by
2495	 * default).
2496	 */
2497	if (pci_enable_io_modes) {
2498		/* Turn on resources that have been left off by a lazy BIOS */
2499		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2500			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2501			cmd |= PCIM_CMD_PORTEN;
2502			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2503		}
2504		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2505			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2506			cmd |= PCIM_CMD_MEMEN;
2507			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2508		}
2509	} else {
2510		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2511			return (barlen);
2512		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2513			return (barlen);
2514	}
2515
2516	count = 1 << mapsize;
2517	if (basezero || base == pci_mapbase(testval)) {
2518		start = 0;	/* Let the parent decide. */
2519		end = ~0ULL;
2520	} else {
2521		start = base;
2522		end = base + (1 << mapsize) - 1;
2523	}
2524	resource_list_add(rl, type, reg, start, end, count);
2525
2526	/*
2527	 * Try to allocate the resource for this BAR from our parent
2528	 * so that this resource range is already reserved.  The
2529	 * driver for this device will later inherit this resource in
2530	 * pci_alloc_resource().
2531	 */
2532	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2533	    prefetch ? RF_PREFETCHABLE : 0);
2534	if (res == NULL) {
2535		/*
2536		 * If the allocation fails, clear the BAR and delete
2537		 * the resource list entry to force
2538		 * pci_alloc_resource() to allocate resources from the
2539		 * parent.
2540		 */
2541		resource_list_delete(rl, type, reg);
2542		start = 0;
2543	} else
2544		start = rman_get_start(res);
2545	pci_write_bar(dev, reg, start);
2546	return (barlen);
2547}
2548
2549/*
2550 * For ATA devices we need to decide early what addressing mode to use.
2551 * Legacy demands that the primary and secondary ATA ports sits on the
2552 * same addresses that old ISA hardware did. This dictates that we use
2553 * those addresses and ignore the BAR's if we cannot set PCI native
2554 * addressing mode.
2555 */
2556static void
2557pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2558    uint32_t prefetchmask)
2559{
2560	struct resource *r;
2561	int rid, type, progif;
2562#if 0
2563	/* if this device supports PCI native addressing use it */
2564	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2565	if ((progif & 0x8a) == 0x8a) {
2566		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2567		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2568			printf("Trying ATA native PCI addressing mode\n");
2569			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2570		}
2571	}
2572#endif
2573	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2574	type = SYS_RES_IOPORT;
2575	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2576		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2577		    prefetchmask & (1 << 0));
2578		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2579		    prefetchmask & (1 << 1));
2580	} else {
2581		rid = PCIR_BAR(0);
2582		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2583		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2584		    0x1f7, 8, 0);
2585		rid = PCIR_BAR(1);
2586		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2587		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2588		    0x3f6, 1, 0);
2589	}
2590	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2591		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2592		    prefetchmask & (1 << 2));
2593		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2594		    prefetchmask & (1 << 3));
2595	} else {
2596		rid = PCIR_BAR(2);
2597		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2598		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2599		    0x177, 8, 0);
2600		rid = PCIR_BAR(3);
2601		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2602		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2603		    0x376, 1, 0);
2604	}
2605	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2606	    prefetchmask & (1 << 4));
2607	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2608	    prefetchmask & (1 << 5));
2609}
2610
2611static void
2612pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2613{
2614	struct pci_devinfo *dinfo = device_get_ivars(dev);
2615	pcicfgregs *cfg = &dinfo->cfg;
2616	char tunable_name[64];
2617	int irq;
2618
2619	/* Has to have an intpin to have an interrupt. */
2620	if (cfg->intpin == 0)
2621		return;
2622
2623	/* Let the user override the IRQ with a tunable. */
2624	irq = PCI_INVALID_IRQ;
2625	snprintf(tunable_name, sizeof(tunable_name),
2626	    "hw.pci%d.%d.%d.INT%c.irq",
2627	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2628	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2629		irq = PCI_INVALID_IRQ;
2630
2631	/*
2632	 * If we didn't get an IRQ via the tunable, then we either use the
2633	 * IRQ value in the intline register or we ask the bus to route an
2634	 * interrupt for us.  If force_route is true, then we only use the
2635	 * value in the intline register if the bus was unable to assign an
2636	 * IRQ.
2637	 */
2638	if (!PCI_INTERRUPT_VALID(irq)) {
2639		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2640			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2641		if (!PCI_INTERRUPT_VALID(irq))
2642			irq = cfg->intline;
2643	}
2644
2645	/* If after all that we don't have an IRQ, just bail. */
2646	if (!PCI_INTERRUPT_VALID(irq))
2647		return;
2648
2649	/* Update the config register if it changed. */
2650	if (irq != cfg->intline) {
2651		cfg->intline = irq;
2652		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2653	}
2654
2655	/* Add this IRQ as rid 0 interrupt resource. */
2656	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2657}
2658
2659/* Perform early OHCI takeover from SMM. */
2660static void
2661ohci_early_takeover(device_t self)
2662{
2663	struct resource *res;
2664	uint32_t ctl;
2665	int rid;
2666	int i;
2667
2668	rid = PCIR_BAR(0);
2669	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2670	if (res == NULL)
2671		return;
2672
2673	ctl = bus_read_4(res, OHCI_CONTROL);
2674	if (ctl & OHCI_IR) {
2675		if (bootverbose)
2676			printf("ohci early: "
2677			    "SMM active, request owner change\n");
2678		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2679		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2680			DELAY(1000);
2681			ctl = bus_read_4(res, OHCI_CONTROL);
2682		}
2683		if (ctl & OHCI_IR) {
2684			if (bootverbose)
2685				printf("ohci early: "
2686				    "SMM does not respond, resetting\n");
2687			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2688		}
2689		/* Disable interrupts */
2690		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2691	}
2692
2693	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2694}
2695
2696/* Perform early UHCI takeover from SMM. */
2697static void
2698uhci_early_takeover(device_t self)
2699{
2700	struct resource *res;
2701	int rid;
2702
2703	/*
2704	 * Set the PIRQD enable bit and switch off all the others. We don't
2705	 * want legacy support to interfere with us XXX Does this also mean
2706	 * that the BIOS won't touch the keyboard anymore if it is connected
2707	 * to the ports of the root hub?
2708	 */
2709	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2710
2711	/* Disable interrupts */
2712	rid = PCI_UHCI_BASE_REG;
2713	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2714	if (res != NULL) {
2715		bus_write_2(res, UHCI_INTR, 0);
2716		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2717	}
2718}
2719
2720/* Perform early EHCI takeover from SMM. */
2721static void
2722ehci_early_takeover(device_t self)
2723{
2724	struct resource *res;
2725	uint32_t cparams;
2726	uint32_t eec;
2727	uint8_t eecp;
2728	uint8_t bios_sem;
2729	uint8_t offs;
2730	int rid;
2731	int i;
2732
2733	rid = PCIR_BAR(0);
2734	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2735	if (res == NULL)
2736		return;
2737
2738	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2739
2740	/* Synchronise with the BIOS if it owns the controller. */
2741	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2742	    eecp = EHCI_EECP_NEXT(eec)) {
2743		eec = pci_read_config(self, eecp, 4);
2744		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2745			continue;
2746		}
2747		bios_sem = pci_read_config(self, eecp +
2748		    EHCI_LEGSUP_BIOS_SEM, 1);
2749		if (bios_sem == 0) {
2750			continue;
2751		}
2752		if (bootverbose)
2753			printf("ehci early: "
2754			    "SMM active, request owner change\n");
2755
2756		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2757
2758		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2759			DELAY(1000);
2760			bios_sem = pci_read_config(self, eecp +
2761			    EHCI_LEGSUP_BIOS_SEM, 1);
2762		}
2763
2764		if (bios_sem != 0) {
2765			if (bootverbose)
2766				printf("ehci early: "
2767				    "SMM does not respond\n");
2768		}
2769		/* Disable interrupts */
2770		offs = bus_read_1(res, EHCI_CAPLENGTH);
2771		bus_write_4(res, offs + EHCI_USBINTR, 0);
2772	}
2773	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2774}
2775
2776void
2777pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2778{
2779	struct pci_devinfo *dinfo = device_get_ivars(dev);
2780	pcicfgregs *cfg = &dinfo->cfg;
2781	struct resource_list *rl = &dinfo->resources;
2782	struct pci_quirk *q;
2783	int i;
2784
2785	/* ATA devices needs special map treatment */
2786	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2787	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2788	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2789	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2790	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2791		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2792	else
2793		for (i = 0; i < cfg->nummaps;)
2794			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2795			    prefetchmask & (1 << i));
2796
2797	/*
2798	 * Add additional, quirked resources.
2799	 */
2800	for (q = &pci_quirks[0]; q->devid; q++) {
2801		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2802		    && q->type == PCI_QUIRK_MAP_REG)
2803			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2804	}
2805
2806	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2807#ifdef __PCI_REROUTE_INTERRUPT
2808		/*
2809		 * Try to re-route interrupts. Sometimes the BIOS or
2810		 * firmware may leave bogus values in these registers.
2811		 * If the re-route fails, then just stick with what we
2812		 * have.
2813		 */
2814		pci_assign_interrupt(bus, dev, 1);
2815#else
2816		pci_assign_interrupt(bus, dev, 0);
2817#endif
2818	}
2819
2820	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2821	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2822		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2823			ehci_early_takeover(dev);
2824		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2825			ohci_early_takeover(dev);
2826		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2827			uhci_early_takeover(dev);
2828	}
2829}
2830
2831void
2832pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2833{
2834#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2835	device_t pcib = device_get_parent(dev);
2836	struct pci_devinfo *dinfo;
2837	int maxslots;
2838	int s, f, pcifunchigh;
2839	uint8_t hdrtype;
2840
2841	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2842	    ("dinfo_size too small"));
2843	maxslots = PCIB_MAXSLOTS(pcib);
2844	for (s = 0; s <= maxslots; s++) {
2845		pcifunchigh = 0;
2846		f = 0;
2847		DELAY(1);
2848		hdrtype = REG(PCIR_HDRTYPE, 1);
2849		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2850			continue;
2851		if (hdrtype & PCIM_MFDEV)
2852			pcifunchigh = PCI_FUNCMAX;
2853		for (f = 0; f <= pcifunchigh; f++) {
2854			dinfo = pci_read_device(pcib, domain, busno, s, f,
2855			    dinfo_size);
2856			if (dinfo != NULL) {
2857				pci_add_child(dev, dinfo);
2858			}
2859		}
2860	}
2861#undef REG
2862}
2863
2864void
2865pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2866{
2867	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2868	device_set_ivars(dinfo->cfg.dev, dinfo);
2869	resource_list_init(&dinfo->resources);
2870	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2871	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2872	pci_print_verbose(dinfo);
2873	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2874}
2875
2876static int
2877pci_probe(device_t dev)
2878{
2879
2880	device_set_desc(dev, "PCI bus");
2881
2882	/* Allow other subclasses to override this driver. */
2883	return (BUS_PROBE_GENERIC);
2884}
2885
2886static int
2887pci_attach(device_t dev)
2888{
2889	int busno, domain;
2890
2891	/*
2892	 * Since there can be multiple independantly numbered PCI
2893	 * busses on systems with multiple PCI domains, we can't use
2894	 * the unit number to decide which bus we are probing. We ask
2895	 * the parent pcib what our domain and bus numbers are.
2896	 */
2897	domain = pcib_get_domain(dev);
2898	busno = pcib_get_bus(dev);
2899	if (bootverbose)
2900		device_printf(dev, "domain=%d, physical bus=%d\n",
2901		    domain, busno);
2902	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2903	return (bus_generic_attach(dev));
2904}
2905
2906static void
2907pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2908    int state)
2909{
2910	device_t child, pcib;
2911	struct pci_devinfo *dinfo;
2912	int dstate, i;
2913
2914	/*
2915	 * Set the device to the given state.  If the firmware suggests
2916	 * a different power state, use it instead.  If power management
2917	 * is not present, the firmware is responsible for managing
2918	 * device power.  Skip children who aren't attached since they
2919	 * are handled separately.
2920	 */
2921	pcib = device_get_parent(dev);
2922	for (i = 0; i < numdevs; i++) {
2923		child = devlist[i];
2924		dinfo = device_get_ivars(child);
2925		dstate = state;
2926		if (device_is_attached(child) &&
2927		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2928			pci_set_powerstate(child, dstate);
2929	}
2930}
2931
2932int
2933pci_suspend(device_t dev)
2934{
2935	device_t child, *devlist;
2936	struct pci_devinfo *dinfo;
2937	int error, i, numdevs;
2938
2939	/*
2940	 * Save the PCI configuration space for each child and set the
2941	 * device in the appropriate power state for this sleep state.
2942	 */
2943	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2944		return (error);
2945	for (i = 0; i < numdevs; i++) {
2946		child = devlist[i];
2947		dinfo = device_get_ivars(child);
2948		pci_cfg_save(child, dinfo, 0);
2949	}
2950
2951	/* Suspend devices before potentially powering them down. */
2952	error = bus_generic_suspend(dev);
2953	if (error) {
2954		free(devlist, M_TEMP);
2955		return (error);
2956	}
2957	pci_set_power_children(dev, devlist, numdevs, PCI_POWERSTATE_D3);
2958	free(devlist, M_TEMP);
2959	return (0);
2960}
2961
2962int
2963pci_resume(device_t dev)
2964{
2965	device_t child, *devlist;
2966	struct pci_devinfo *dinfo;
2967	int error, i, numdevs;
2968
2969	/*
2970	 * Set each child to D0 and restore its PCI configuration space.
2971	 */
2972	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2973		return (error);
2974	if (pci_do_power_resume)
2975		pci_set_power_children(dev, devlist, numdevs,
2976		    PCI_POWERSTATE_D0);
2977
2978	/* Now the device is powered up, restore its config space. */
2979	for (i = 0; i < numdevs; i++) {
2980		child = devlist[i];
2981		dinfo = device_get_ivars(child);
2982
2983		pci_cfg_restore(child, dinfo);
2984		if (!device_is_attached(child))
2985			pci_cfg_save(child, dinfo, 1);
2986	}
2987	free(devlist, M_TEMP);
2988	return (bus_generic_resume(dev));
2989}
2990
2991static void
2992pci_load_vendor_data(void)
2993{
2994	caddr_t vendordata, info;
2995
2996	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2997		info = preload_search_info(vendordata, MODINFO_ADDR);
2998		pci_vendordata = *(char **)info;
2999		info = preload_search_info(vendordata, MODINFO_SIZE);
3000		pci_vendordata_size = *(size_t *)info;
3001		/* terminate the database */
3002		pci_vendordata[pci_vendordata_size] = '\n';
3003	}
3004}
3005
3006void
3007pci_driver_added(device_t dev, driver_t *driver)
3008{
3009	int numdevs;
3010	device_t *devlist;
3011	device_t child;
3012	struct pci_devinfo *dinfo;
3013	int i;
3014
3015	if (bootverbose)
3016		device_printf(dev, "driver added\n");
3017	DEVICE_IDENTIFY(driver, dev);
3018	if (device_get_children(dev, &devlist, &numdevs) != 0)
3019		return;
3020	for (i = 0; i < numdevs; i++) {
3021		child = devlist[i];
3022		if (device_get_state(child) != DS_NOTPRESENT)
3023			continue;
3024		dinfo = device_get_ivars(child);
3025		pci_print_verbose(dinfo);
3026		if (bootverbose)
3027			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3028		pci_cfg_restore(child, dinfo);
3029		if (device_probe_and_attach(child) != 0)
3030			pci_cfg_save(child, dinfo, 1);
3031	}
3032	free(devlist, M_TEMP);
3033}
3034
3035int
3036pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3037    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3038{
3039	struct pci_devinfo *dinfo;
3040	struct msix_table_entry *mte;
3041	struct msix_vector *mv;
3042	uint64_t addr;
3043	uint32_t data;
3044	void *cookie;
3045	int error, rid;
3046
3047	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3048	    arg, &cookie);
3049	if (error)
3050		return (error);
3051
3052	/* If this is not a direct child, just bail out. */
3053	if (device_get_parent(child) != dev) {
3054		*cookiep = cookie;
3055		return(0);
3056	}
3057
3058	rid = rman_get_rid(irq);
3059	if (rid == 0) {
3060		/* Make sure that INTx is enabled */
3061		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3062	} else {
3063		/*
3064		 * Check to see if the interrupt is MSI or MSI-X.
3065		 * Ask our parent to map the MSI and give
3066		 * us the address and data register values.
3067		 * If we fail for some reason, teardown the
3068		 * interrupt handler.
3069		 */
3070		dinfo = device_get_ivars(child);
3071		if (dinfo->cfg.msi.msi_alloc > 0) {
3072			if (dinfo->cfg.msi.msi_addr == 0) {
3073				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3074			    ("MSI has handlers, but vectors not mapped"));
3075				error = PCIB_MAP_MSI(device_get_parent(dev),
3076				    child, rman_get_start(irq), &addr, &data);
3077				if (error)
3078					goto bad;
3079				dinfo->cfg.msi.msi_addr = addr;
3080				dinfo->cfg.msi.msi_data = data;
3081			}
3082			if (dinfo->cfg.msi.msi_handlers == 0)
3083				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3084				    dinfo->cfg.msi.msi_data);
3085			dinfo->cfg.msi.msi_handlers++;
3086		} else {
3087			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3088			    ("No MSI or MSI-X interrupts allocated"));
3089			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3090			    ("MSI-X index too high"));
3091			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3092			KASSERT(mte->mte_vector != 0, ("no message vector"));
3093			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3094			KASSERT(mv->mv_irq == rman_get_start(irq),
3095			    ("IRQ mismatch"));
3096			if (mv->mv_address == 0) {
3097				KASSERT(mte->mte_handlers == 0,
3098		    ("MSI-X table entry has handlers, but vector not mapped"));
3099				error = PCIB_MAP_MSI(device_get_parent(dev),
3100				    child, rman_get_start(irq), &addr, &data);
3101				if (error)
3102					goto bad;
3103				mv->mv_address = addr;
3104				mv->mv_data = data;
3105			}
3106			if (mte->mte_handlers == 0) {
3107				pci_enable_msix(child, rid - 1, mv->mv_address,
3108				    mv->mv_data);
3109				pci_unmask_msix(child, rid - 1);
3110			}
3111			mte->mte_handlers++;
3112		}
3113
3114		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3115		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3116	bad:
3117		if (error) {
3118			(void)bus_generic_teardown_intr(dev, child, irq,
3119			    cookie);
3120			return (error);
3121		}
3122	}
3123	*cookiep = cookie;
3124	return (0);
3125}
3126
3127int
3128pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3129    void *cookie)
3130{
3131	struct msix_table_entry *mte;
3132	struct resource_list_entry *rle;
3133	struct pci_devinfo *dinfo;
3134	int error, rid;
3135
3136	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3137		return (EINVAL);
3138
3139	/* If this isn't a direct child, just bail out */
3140	if (device_get_parent(child) != dev)
3141		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3142
3143	rid = rman_get_rid(irq);
3144	if (rid == 0) {
3145		/* Mask INTx */
3146		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3147	} else {
3148		/*
3149		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3150		 * decrement the appropriate handlers count and mask the
3151		 * MSI-X message, or disable MSI messages if the count
3152		 * drops to 0.
3153		 */
3154		dinfo = device_get_ivars(child);
3155		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3156		if (rle->res != irq)
3157			return (EINVAL);
3158		if (dinfo->cfg.msi.msi_alloc > 0) {
3159			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3160			    ("MSI-X index too high"));
3161			if (dinfo->cfg.msi.msi_handlers == 0)
3162				return (EINVAL);
3163			dinfo->cfg.msi.msi_handlers--;
3164			if (dinfo->cfg.msi.msi_handlers == 0)
3165				pci_disable_msi(child);
3166		} else {
3167			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3168			    ("No MSI or MSI-X interrupts allocated"));
3169			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3170			    ("MSI-X index too high"));
3171			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3172			if (mte->mte_handlers == 0)
3173				return (EINVAL);
3174			mte->mte_handlers--;
3175			if (mte->mte_handlers == 0)
3176				pci_mask_msix(child, rid - 1);
3177		}
3178	}
3179	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3180	if (rid > 0)
3181		KASSERT(error == 0,
3182		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3183	return (error);
3184}
3185
3186int
3187pci_print_child(device_t dev, device_t child)
3188{
3189	struct pci_devinfo *dinfo;
3190	struct resource_list *rl;
3191	int retval = 0;
3192
3193	dinfo = device_get_ivars(child);
3194	rl = &dinfo->resources;
3195
3196	retval += bus_print_child_header(dev, child);
3197
3198	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3199	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3200	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3201	if (device_get_flags(dev))
3202		retval += printf(" flags %#x", device_get_flags(dev));
3203
3204	retval += printf(" at device %d.%d", pci_get_slot(child),
3205	    pci_get_function(child));
3206
3207	retval += bus_print_child_footer(dev, child);
3208
3209	return (retval);
3210}
3211
3212static struct
3213{
3214	int	class;
3215	int	subclass;
3216	char	*desc;
3217} pci_nomatch_tab[] = {
3218	{PCIC_OLD,		-1,			"old"},
3219	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3220	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3221	{PCIC_STORAGE,		-1,			"mass storage"},
3222	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3223	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3224	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3225	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3226	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3227	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3228	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3229	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3230	{PCIC_NETWORK,		-1,			"network"},
3231	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3232	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3233	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3234	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3235	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3236	{PCIC_DISPLAY,		-1,			"display"},
3237	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3238	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3239	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3240	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3241	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3242	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3243	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3244	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3245	{PCIC_MEMORY,		-1,			"memory"},
3246	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3247	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3248	{PCIC_BRIDGE,		-1,			"bridge"},
3249	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3250	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3251	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3252	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3253	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3254	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3255	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3256	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3257	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3258	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3259	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3260	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3261	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3262	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3263	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3264	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3265	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3266	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3267	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3268	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3269	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3270	{PCIC_INPUTDEV,		-1,			"input device"},
3271	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3272	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3273	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3274	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3275	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3276	{PCIC_DOCKING,		-1,			"docking station"},
3277	{PCIC_PROCESSOR,	-1,			"processor"},
3278	{PCIC_SERIALBUS,	-1,			"serial bus"},
3279	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3280	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3281	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3282	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3283	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3284	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3285	{PCIC_WIRELESS,		-1,			"wireless controller"},
3286	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3287	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3288	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3289	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3290	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3291	{PCIC_SATCOM,		-1,			"satellite communication"},
3292	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3293	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3294	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3295	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3296	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3297	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3298	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3299	{PCIC_DASP,		-1,			"dasp"},
3300	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3301	{0, 0,		NULL}
3302};
3303
3304void
3305pci_probe_nomatch(device_t dev, device_t child)
3306{
3307	int	i;
3308	char	*cp, *scp, *device;
3309
3310	/*
3311	 * Look for a listing for this device in a loaded device database.
3312	 */
3313	if ((device = pci_describe_device(child)) != NULL) {
3314		device_printf(dev, "<%s>", device);
3315		free(device, M_DEVBUF);
3316	} else {
3317		/*
3318		 * Scan the class/subclass descriptions for a general
3319		 * description.
3320		 */
3321		cp = "unknown";
3322		scp = NULL;
3323		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3324			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3325				if (pci_nomatch_tab[i].subclass == -1) {
3326					cp = pci_nomatch_tab[i].desc;
3327				} else if (pci_nomatch_tab[i].subclass ==
3328				    pci_get_subclass(child)) {
3329					scp = pci_nomatch_tab[i].desc;
3330				}
3331			}
3332		}
3333		device_printf(dev, "<%s%s%s>",
3334		    cp ? cp : "",
3335		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3336		    scp ? scp : "");
3337	}
3338	printf(" at device %d.%d (no driver attached)\n",
3339	    pci_get_slot(child), pci_get_function(child));
3340	pci_cfg_save(child, device_get_ivars(child), 1);
3341	return;
3342}
3343
3344/*
3345 * Parse the PCI device database, if loaded, and return a pointer to a
3346 * description of the device.
3347 *
3348 * The database is flat text formatted as follows:
3349 *
3350 * Any line not in a valid format is ignored.
3351 * Lines are terminated with newline '\n' characters.
3352 *
3353 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3354 * the vendor name.
3355 *
3356 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3357 * - devices cannot be listed without a corresponding VENDOR line.
3358 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3359 * another TAB, then the device name.
3360 */
3361
3362/*
3363 * Assuming (ptr) points to the beginning of a line in the database,
3364 * return the vendor or device and description of the next entry.
3365 * The value of (vendor) or (device) inappropriate for the entry type
3366 * is set to -1.  Returns nonzero at the end of the database.
3367 *
3368 * Note that this is slightly unrobust in the face of corrupt data;
3369 * we attempt to safeguard against this by spamming the end of the
3370 * database with a newline when we initialise.
3371 */
3372static int
3373pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3374{
3375	char	*cp = *ptr;
3376	int	left;
3377
3378	*device = -1;
3379	*vendor = -1;
3380	**desc = '\0';
3381	for (;;) {
3382		left = pci_vendordata_size - (cp - pci_vendordata);
3383		if (left <= 0) {
3384			*ptr = cp;
3385			return(1);
3386		}
3387
3388		/* vendor entry? */
3389		if (*cp != '\t' &&
3390		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3391			break;
3392		/* device entry? */
3393		if (*cp == '\t' &&
3394		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3395			break;
3396
3397		/* skip to next line */
3398		while (*cp != '\n' && left > 0) {
3399			cp++;
3400			left--;
3401		}
3402		if (*cp == '\n') {
3403			cp++;
3404			left--;
3405		}
3406	}
3407	/* skip to next line */
3408	while (*cp != '\n' && left > 0) {
3409		cp++;
3410		left--;
3411	}
3412	if (*cp == '\n' && left > 0)
3413		cp++;
3414	*ptr = cp;
3415	return(0);
3416}
3417
3418static char *
3419pci_describe_device(device_t dev)
3420{
3421	int	vendor, device;
3422	char	*desc, *vp, *dp, *line;
3423
3424	desc = vp = dp = NULL;
3425
3426	/*
3427	 * If we have no vendor data, we can't do anything.
3428	 */
3429	if (pci_vendordata == NULL)
3430		goto out;
3431
3432	/*
3433	 * Scan the vendor data looking for this device
3434	 */
3435	line = pci_vendordata;
3436	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3437		goto out;
3438	for (;;) {
3439		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3440			goto out;
3441		if (vendor == pci_get_vendor(dev))
3442			break;
3443	}
3444	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3445		goto out;
3446	for (;;) {
3447		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3448			*dp = 0;
3449			break;
3450		}
3451		if (vendor != -1) {
3452			*dp = 0;
3453			break;
3454		}
3455		if (device == pci_get_device(dev))
3456			break;
3457	}
3458	if (dp[0] == '\0')
3459		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3460	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3461	    NULL)
3462		sprintf(desc, "%s, %s", vp, dp);
3463 out:
3464	if (vp != NULL)
3465		free(vp, M_DEVBUF);
3466	if (dp != NULL)
3467		free(dp, M_DEVBUF);
3468	return(desc);
3469}
3470
3471int
3472pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3473{
3474	struct pci_devinfo *dinfo;
3475	pcicfgregs *cfg;
3476
3477	dinfo = device_get_ivars(child);
3478	cfg = &dinfo->cfg;
3479
3480	switch (which) {
3481	case PCI_IVAR_ETHADDR:
3482		/*
3483		 * The generic accessor doesn't deal with failure, so
3484		 * we set the return value, then return an error.
3485		 */
3486		*((uint8_t **) result) = NULL;
3487		return (EINVAL);
3488	case PCI_IVAR_SUBVENDOR:
3489		*result = cfg->subvendor;
3490		break;
3491	case PCI_IVAR_SUBDEVICE:
3492		*result = cfg->subdevice;
3493		break;
3494	case PCI_IVAR_VENDOR:
3495		*result = cfg->vendor;
3496		break;
3497	case PCI_IVAR_DEVICE:
3498		*result = cfg->device;
3499		break;
3500	case PCI_IVAR_DEVID:
3501		*result = (cfg->device << 16) | cfg->vendor;
3502		break;
3503	case PCI_IVAR_CLASS:
3504		*result = cfg->baseclass;
3505		break;
3506	case PCI_IVAR_SUBCLASS:
3507		*result = cfg->subclass;
3508		break;
3509	case PCI_IVAR_PROGIF:
3510		*result = cfg->progif;
3511		break;
3512	case PCI_IVAR_REVID:
3513		*result = cfg->revid;
3514		break;
3515	case PCI_IVAR_INTPIN:
3516		*result = cfg->intpin;
3517		break;
3518	case PCI_IVAR_IRQ:
3519		*result = cfg->intline;
3520		break;
3521	case PCI_IVAR_DOMAIN:
3522		*result = cfg->domain;
3523		break;
3524	case PCI_IVAR_BUS:
3525		*result = cfg->bus;
3526		break;
3527	case PCI_IVAR_SLOT:
3528		*result = cfg->slot;
3529		break;
3530	case PCI_IVAR_FUNCTION:
3531		*result = cfg->func;
3532		break;
3533	case PCI_IVAR_CMDREG:
3534		*result = cfg->cmdreg;
3535		break;
3536	case PCI_IVAR_CACHELNSZ:
3537		*result = cfg->cachelnsz;
3538		break;
3539	case PCI_IVAR_MINGNT:
3540		*result = cfg->mingnt;
3541		break;
3542	case PCI_IVAR_MAXLAT:
3543		*result = cfg->maxlat;
3544		break;
3545	case PCI_IVAR_LATTIMER:
3546		*result = cfg->lattimer;
3547		break;
3548	default:
3549		return (ENOENT);
3550	}
3551	return (0);
3552}
3553
3554int
3555pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3556{
3557	struct pci_devinfo *dinfo;
3558
3559	dinfo = device_get_ivars(child);
3560
3561	switch (which) {
3562	case PCI_IVAR_INTPIN:
3563		dinfo->cfg.intpin = value;
3564		return (0);
3565	case PCI_IVAR_ETHADDR:
3566	case PCI_IVAR_SUBVENDOR:
3567	case PCI_IVAR_SUBDEVICE:
3568	case PCI_IVAR_VENDOR:
3569	case PCI_IVAR_DEVICE:
3570	case PCI_IVAR_DEVID:
3571	case PCI_IVAR_CLASS:
3572	case PCI_IVAR_SUBCLASS:
3573	case PCI_IVAR_PROGIF:
3574	case PCI_IVAR_REVID:
3575	case PCI_IVAR_IRQ:
3576	case PCI_IVAR_DOMAIN:
3577	case PCI_IVAR_BUS:
3578	case PCI_IVAR_SLOT:
3579	case PCI_IVAR_FUNCTION:
3580		return (EINVAL);	/* disallow for now */
3581
3582	default:
3583		return (ENOENT);
3584	}
3585}
3586
3587
3588#include "opt_ddb.h"
3589#ifdef DDB
3590#include <ddb/ddb.h>
3591#include <sys/cons.h>
3592
3593/*
3594 * List resources based on pci map registers, used for within ddb
3595 */
3596
3597DB_SHOW_COMMAND(pciregs, db_pci_dump)
3598{
3599	struct pci_devinfo *dinfo;
3600	struct devlist *devlist_head;
3601	struct pci_conf *p;
3602	const char *name;
3603	int i, error, none_count;
3604
3605	none_count = 0;
3606	/* get the head of the device queue */
3607	devlist_head = &pci_devq;
3608
3609	/*
3610	 * Go through the list of devices and print out devices
3611	 */
3612	for (error = 0, i = 0,
3613	     dinfo = STAILQ_FIRST(devlist_head);
3614	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3615	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3616
3617		/* Populate pd_name and pd_unit */
3618		name = NULL;
3619		if (dinfo->cfg.dev)
3620			name = device_get_name(dinfo->cfg.dev);
3621
3622		p = &dinfo->conf;
3623		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3624			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3625			(name && *name) ? name : "none",
3626			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3627			none_count++,
3628			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3629			p->pc_sel.pc_func, (p->pc_class << 16) |
3630			(p->pc_subclass << 8) | p->pc_progif,
3631			(p->pc_subdevice << 16) | p->pc_subvendor,
3632			(p->pc_device << 16) | p->pc_vendor,
3633			p->pc_revid, p->pc_hdr);
3634	}
3635}
3636#endif /* DDB */
3637
3638static struct resource *
3639pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3640    u_long start, u_long end, u_long count, u_int flags)
3641{
3642	struct pci_devinfo *dinfo = device_get_ivars(child);
3643	struct resource_list *rl = &dinfo->resources;
3644	struct resource_list_entry *rle;
3645	struct resource *res;
3646	pci_addr_t map, testval;
3647	int mapsize;
3648
3649	/*
3650	 * Weed out the bogons, and figure out how large the BAR/map
3651	 * is.  Bars that read back 0 here are bogus and unimplemented.
3652	 * Note: atapci in legacy mode are special and handled elsewhere
3653	 * in the code.  If you have a atapci device in legacy mode and
3654	 * it fails here, that other code is broken.
3655	 */
3656	res = NULL;
3657	pci_read_bar(child, *rid, &map, &testval);
3658
3659	/* Ignore a BAR with a base of 0. */
3660	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3661	    pci_mapbase(testval) == 0)
3662		goto out;
3663
3664	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3665		if (type != SYS_RES_MEMORY) {
3666			if (bootverbose)
3667				device_printf(dev,
3668				    "child %s requested type %d for rid %#x,"
3669				    " but the BAR says it is an memio\n",
3670				    device_get_nameunit(child), type, *rid);
3671			goto out;
3672		}
3673	} else {
3674		if (type != SYS_RES_IOPORT) {
3675			if (bootverbose)
3676				device_printf(dev,
3677				    "child %s requested type %d for rid %#x,"
3678				    " but the BAR says it is an ioport\n",
3679				    device_get_nameunit(child), type, *rid);
3680			goto out;
3681		}
3682	}
3683
3684	/*
3685	 * For real BARs, we need to override the size that
3686	 * the driver requests, because that's what the BAR
3687	 * actually uses and we would otherwise have a
3688	 * situation where we might allocate the excess to
3689	 * another driver, which won't work.
3690	 *
3691	 * Device ROM BARs use a different mask value.
3692	 */
3693	if (*rid == PCIR_BIOS)
3694		mapsize = pci_romsize(testval);
3695	else
3696		mapsize = pci_mapsize(testval);
3697	count = 1UL << mapsize;
3698	if (RF_ALIGNMENT(flags) < mapsize)
3699		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3700	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3701		flags |= RF_PREFETCHABLE;
3702
3703	/*
3704	 * Allocate enough resource, and then write back the
3705	 * appropriate bar for that resource.
3706	 */
3707	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3708	    start, end, count, flags & ~RF_ACTIVE);
3709	if (res == NULL) {
3710		device_printf(child,
3711		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3712		    count, *rid, type, start, end);
3713		goto out;
3714	}
3715	resource_list_add(rl, type, *rid, start, end, count);
3716	rle = resource_list_find(rl, type, *rid);
3717	if (rle == NULL)
3718		panic("pci_reserve_map: unexpectedly can't find resource.");
3719	rle->res = res;
3720	rle->start = rman_get_start(res);
3721	rle->end = rman_get_end(res);
3722	rle->count = count;
3723	rle->flags = RLE_RESERVED;
3724	if (bootverbose)
3725		device_printf(child,
3726		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3727		    count, *rid, type, rman_get_start(res));
3728	map = rman_get_start(res);
3729	pci_write_bar(child, *rid, map);
3730out:;
3731	return (res);
3732}
3733
3734
3735struct resource *
3736pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3737		   u_long start, u_long end, u_long count, u_int flags)
3738{
3739	struct pci_devinfo *dinfo = device_get_ivars(child);
3740	struct resource_list *rl = &dinfo->resources;
3741	struct resource_list_entry *rle;
3742	struct resource *res;
3743	pcicfgregs *cfg = &dinfo->cfg;
3744
3745	if (device_get_parent(child) != dev)
3746		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3747		    type, rid, start, end, count, flags));
3748
3749	/*
3750	 * Perform lazy resource allocation
3751	 */
3752	switch (type) {
3753	case SYS_RES_IRQ:
3754		/*
3755		 * Can't alloc legacy interrupt once MSI messages have
3756		 * been allocated.
3757		 */
3758		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3759		    cfg->msix.msix_alloc > 0))
3760			return (NULL);
3761
3762		/*
3763		 * If the child device doesn't have an interrupt
3764		 * routed and is deserving of an interrupt, try to
3765		 * assign it one.
3766		 */
3767		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3768		    (cfg->intpin != 0))
3769			pci_assign_interrupt(dev, child, 0);
3770		break;
3771	case SYS_RES_IOPORT:
3772	case SYS_RES_MEMORY:
3773		/* Reserve resources for this BAR if needed. */
3774		rle = resource_list_find(rl, type, *rid);
3775		if (rle == NULL) {
3776			res = pci_reserve_map(dev, child, type, rid, start, end,
3777			    count, flags);
3778			if (res == NULL)
3779				return (NULL);
3780		}
3781	}
3782	return (resource_list_alloc(rl, dev, child, type, rid,
3783	    start, end, count, flags));
3784}
3785
3786int
3787pci_activate_resource(device_t dev, device_t child, int type, int rid,
3788    struct resource *r)
3789{
3790	int error;
3791
3792	error = bus_generic_activate_resource(dev, child, type, rid, r);
3793	if (error)
3794		return (error);
3795
3796	/* Enable decoding in the command register when activating BARs. */
3797	if (device_get_parent(child) == dev) {
3798		/* Device ROMs need their decoding explicitly enabled. */
3799		if (rid == PCIR_BIOS)
3800			pci_write_config(child, rid, rman_get_start(r) |
3801			    PCIM_BIOS_ENABLE, 4);
3802		switch (type) {
3803		case SYS_RES_IOPORT:
3804		case SYS_RES_MEMORY:
3805			error = PCI_ENABLE_IO(dev, child, type);
3806			break;
3807		}
3808	}
3809	return (error);
3810}
3811
3812int
3813pci_deactivate_resource(device_t dev, device_t child, int type,
3814    int rid, struct resource *r)
3815{
3816	int error;
3817
3818	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3819	if (error)
3820		return (error);
3821
3822	/* Disable decoding for device ROMs. */
3823	if (rid == PCIR_BIOS)
3824		pci_write_config(child, rid, rman_get_start(r), 4);
3825	return (0);
3826}
3827
3828void
3829pci_delete_child(device_t dev, device_t child)
3830{
3831	struct resource_list_entry *rle;
3832	struct resource_list *rl;
3833	struct pci_devinfo *dinfo;
3834
3835	dinfo = device_get_ivars(child);
3836	rl = &dinfo->resources;
3837
3838	if (device_is_attached(child))
3839		device_detach(child);
3840
3841	/* Turn off access to resources we're about to free */
3842	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3843	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3844
3845	/* Free all allocated resources */
3846	STAILQ_FOREACH(rle, rl, link) {
3847		if (rle->res) {
3848			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3849			    resource_list_busy(rl, rle->type, rle->rid)) {
3850				pci_printf(&dinfo->cfg,
3851				    "Resource still owned, oops. "
3852				    "(type=%d, rid=%d, addr=%lx)\n",
3853				    rle->type, rle->rid,
3854				    rman_get_start(rle->res));
3855				bus_release_resource(child, rle->type, rle->rid,
3856				    rle->res);
3857			}
3858			resource_list_unreserve(rl, dev, child, rle->type,
3859			    rle->rid);
3860		}
3861	}
3862	resource_list_free(rl);
3863
3864	device_delete_child(dev, child);
3865	pci_freecfg(dinfo);
3866}
3867
3868void
3869pci_delete_resource(device_t dev, device_t child, int type, int rid)
3870{
3871	struct pci_devinfo *dinfo;
3872	struct resource_list *rl;
3873	struct resource_list_entry *rle;
3874
3875	if (device_get_parent(child) != dev)
3876		return;
3877
3878	dinfo = device_get_ivars(child);
3879	rl = &dinfo->resources;
3880	rle = resource_list_find(rl, type, rid);
3881	if (rle == NULL)
3882		return;
3883
3884	if (rle->res) {
3885		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3886		    resource_list_busy(rl, type, rid)) {
3887			device_printf(dev, "delete_resource: "
3888			    "Resource still owned by child, oops. "
3889			    "(type=%d, rid=%d, addr=%lx)\n",
3890			    type, rid, rman_get_start(rle->res));
3891			return;
3892		}
3893
3894#ifndef __PCI_BAR_ZERO_VALID
3895		/*
3896		 * If this is a BAR, clear the BAR so it stops
3897		 * decoding before releasing the resource.
3898		 */
3899		switch (type) {
3900		case SYS_RES_IOPORT:
3901		case SYS_RES_MEMORY:
3902			pci_write_bar(child, rid, 0);
3903			break;
3904		}
3905#endif
3906		resource_list_unreserve(rl, dev, child, type, rid);
3907	}
3908	resource_list_delete(rl, type, rid);
3909}
3910
3911struct resource_list *
3912pci_get_resource_list (device_t dev, device_t child)
3913{
3914	struct pci_devinfo *dinfo = device_get_ivars(child);
3915
3916	return (&dinfo->resources);
3917}
3918
3919uint32_t
3920pci_read_config_method(device_t dev, device_t child, int reg, int width)
3921{
3922	struct pci_devinfo *dinfo = device_get_ivars(child);
3923	pcicfgregs *cfg = &dinfo->cfg;
3924
3925	return (PCIB_READ_CONFIG(device_get_parent(dev),
3926	    cfg->bus, cfg->slot, cfg->func, reg, width));
3927}
3928
3929void
3930pci_write_config_method(device_t dev, device_t child, int reg,
3931    uint32_t val, int width)
3932{
3933	struct pci_devinfo *dinfo = device_get_ivars(child);
3934	pcicfgregs *cfg = &dinfo->cfg;
3935
3936	PCIB_WRITE_CONFIG(device_get_parent(dev),
3937	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3938}
3939
3940int
3941pci_child_location_str_method(device_t dev, device_t child, char *buf,
3942    size_t buflen)
3943{
3944
3945	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3946	    pci_get_function(child));
3947	return (0);
3948}
3949
3950int
3951pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3952    size_t buflen)
3953{
3954	struct pci_devinfo *dinfo;
3955	pcicfgregs *cfg;
3956
3957	dinfo = device_get_ivars(child);
3958	cfg = &dinfo->cfg;
3959	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3960	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3961	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3962	    cfg->progif);
3963	return (0);
3964}
3965
3966int
3967pci_assign_interrupt_method(device_t dev, device_t child)
3968{
3969	struct pci_devinfo *dinfo = device_get_ivars(child);
3970	pcicfgregs *cfg = &dinfo->cfg;
3971
3972	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3973	    cfg->intpin));
3974}
3975
3976static int
3977pci_modevent(module_t mod, int what, void *arg)
3978{
3979	static struct cdev *pci_cdev;
3980
3981	switch (what) {
3982	case MOD_LOAD:
3983		STAILQ_INIT(&pci_devq);
3984		pci_generation = 0;
3985		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3986		    "pci");
3987		pci_load_vendor_data();
3988		break;
3989
3990	case MOD_UNLOAD:
3991		destroy_dev(pci_cdev);
3992		break;
3993	}
3994
3995	return (0);
3996}
3997
3998void
3999pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4000{
4001	int i;
4002
4003	/*
4004	 * Only do header type 0 devices.  Type 1 devices are bridges,
4005	 * which we know need special treatment.  Type 2 devices are
4006	 * cardbus bridges which also require special treatment.
4007	 * Other types are unknown, and we err on the side of safety
4008	 * by ignoring them.
4009	 */
4010	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4011		return;
4012
4013	/*
4014	 * Restore the device to full power mode.  We must do this
4015	 * before we restore the registers because moving from D3 to
4016	 * D0 will cause the chip's BARs and some other registers to
4017	 * be reset to some unknown power on reset values.  Cut down
4018	 * the noise on boot by doing nothing if we are already in
4019	 * state D0.
4020	 */
4021	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4022		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4023	for (i = 0; i < dinfo->cfg.nummaps; i++)
4024		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4025	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4026	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4027	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4028	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4029	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4030	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4031	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4032	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4033	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4034	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4035
4036	/* Restore MSI and MSI-X configurations if they are present. */
4037	if (dinfo->cfg.msi.msi_location != 0)
4038		pci_resume_msi(dev);
4039	if (dinfo->cfg.msix.msix_location != 0)
4040		pci_resume_msix(dev);
4041}
4042
4043void
4044pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4045{
4046	int i;
4047	uint32_t cls;
4048	int ps;
4049
4050	/*
4051	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4052	 * we know need special treatment.  Type 2 devices are cardbus bridges
4053	 * which also require special treatment.  Other types are unknown, and
4054	 * we err on the side of safety by ignoring them.  Powering down
4055	 * bridges should not be undertaken lightly.
4056	 */
4057	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4058		return;
4059	for (i = 0; i < dinfo->cfg.nummaps; i++)
4060		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4061	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4062
4063	/*
4064	 * Some drivers apparently write to these registers w/o updating our
4065	 * cached copy.  No harm happens if we update the copy, so do so here
4066	 * so we can restore them.  The COMMAND register is modified by the
4067	 * bus w/o updating the cache.  This should represent the normally
4068	 * writable portion of the 'defined' part of type 0 headers.  In
4069	 * theory we also need to save/restore the PCI capability structures
4070	 * we know about, but apart from power we don't know any that are
4071	 * writable.
4072	 */
4073	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4074	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4075	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4076	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4077	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4078	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4079	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4080	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4081	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4082	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4083	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4084	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4085	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4086	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4087	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4088
4089	/*
4090	 * don't set the state for display devices, base peripherals and
4091	 * memory devices since bad things happen when they are powered down.
4092	 * We should (a) have drivers that can easily detach and (b) use
4093	 * generic drivers for these devices so that some device actually
4094	 * attaches.  We need to make sure that when we implement (a) we don't
4095	 * power the device down on a reattach.
4096	 */
4097	cls = pci_get_class(dev);
4098	if (!setstate)
4099		return;
4100	switch (pci_do_power_nodriver)
4101	{
4102		case 0:		/* NO powerdown at all */
4103			return;
4104		case 1:		/* Conservative about what to power down */
4105			if (cls == PCIC_STORAGE)
4106				return;
4107			/*FALLTHROUGH*/
4108		case 2:		/* Agressive about what to power down */
4109			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4110			    cls == PCIC_BASEPERIPH)
4111				return;
4112			/*FALLTHROUGH*/
4113		case 3:		/* Power down everything */
4114			break;
4115	}
4116	/*
4117	 * PCI spec says we can only go into D3 state from D0 state.
4118	 * Transition from D[12] into D0 before going to D3 state.
4119	 */
4120	ps = pci_get_powerstate(dev);
4121	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4122		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4123	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4124		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4125}
4126