pci.c revision 209154
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 209154 2010-06-14 07:10:37Z mav $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#ifdef __HAVE_ACPI
73#include <contrib/dev/acpica/include/acpi.h>
74#include "acpi_if.h"
75#else
76#define	ACPI_PWR_FOR_SLEEP(x, y, z)
77#endif
78
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static pci_addr_t	pci_rombase(uint64_t mapreg);
84static int		pci_romsize(uint64_t testval);
85static void		pci_fixancient(pcicfgregs *cfg);
86static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87
88static int		pci_porten(device_t dev);
89static int		pci_memen(device_t dev);
90static void		pci_assign_interrupt(device_t bus, device_t dev,
91			    int force_route);
92static int		pci_add_map(device_t bus, device_t dev, int reg,
93			    struct resource_list *rl, int force, int prefetch);
94static int		pci_probe(device_t dev);
95static int		pci_attach(device_t dev);
96static void		pci_load_vendor_data(void);
97static int		pci_describe_parse_line(char **ptr, int *vendor,
98			    int *device, char **desc);
99static char		*pci_describe_device(device_t dev);
100static int		pci_modevent(module_t mod, int what, void *arg);
101static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
102			    pcicfgregs *cfg);
103static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
104static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
105			    int reg, uint32_t *data);
106#if 0
107static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t data);
109#endif
110static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
111static void		pci_disable_msi(device_t dev);
112static void		pci_enable_msi(device_t dev, uint64_t address,
113			    uint16_t data);
114static void		pci_enable_msix(device_t dev, u_int index,
115			    uint64_t address, uint32_t data);
116static void		pci_mask_msix(device_t dev, u_int index);
117static void		pci_unmask_msix(device_t dev, u_int index);
118static int		pci_msi_blacklisted(void);
119static void		pci_resume_msi(device_t dev);
120static void		pci_resume_msix(device_t dev);
121static int		pci_remap_intr_method(device_t bus, device_t dev,
122			    u_int irq);
123
124static device_method_t pci_methods[] = {
125	/* Device interface */
126	DEVMETHOD(device_probe,		pci_probe),
127	DEVMETHOD(device_attach,	pci_attach),
128	DEVMETHOD(device_detach,	bus_generic_detach),
129	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130	DEVMETHOD(device_suspend,	pci_suspend),
131	DEVMETHOD(device_resume,	pci_resume),
132
133	/* Bus interface */
134	DEVMETHOD(bus_print_child,	pci_print_child),
135	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138	DEVMETHOD(bus_driver_added,	pci_driver_added),
139	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141
142	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
143	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
144	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
145	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
146	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
147	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
148	DEVMETHOD(bus_activate_resource, pci_activate_resource),
149	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
150	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
151	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
152	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
153
154	/* PCI interface */
155	DEVMETHOD(pci_read_config,	pci_read_config_method),
156	DEVMETHOD(pci_write_config,	pci_write_config_method),
157	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
158	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
159	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
160	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
161	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
162	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
163	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
164	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
165	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
166	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
167	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
168	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
169	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
170	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
171	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
172	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
173
174	{ 0, 0 }
175};
176
177DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
178
179static devclass_t pci_devclass;
180DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
181MODULE_VERSION(pci, 1);
182
183static char	*pci_vendordata;
184static size_t	pci_vendordata_size;
185
186
187struct pci_quirk {
188	uint32_t devid;	/* Vendor/device of the card */
189	int	type;
190#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
191#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
192	int	arg1;
193	int	arg2;
194};
195
196struct pci_quirk pci_quirks[] = {
197	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202
203	/*
204	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206	 */
207	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209
210	/*
211	 * MSI doesn't work on earlier Intel chipsets including
212	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213	 */
214	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221
222	/*
223	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224	 * bridge.
225	 */
226	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227
228	{ 0 }
229};
230
231/* map register information */
232#define	PCI_MAPMEM	0x01	/* memory map */
233#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234#define	PCI_MAPPORT	0x04	/* port map */
235
236struct devlist pci_devq;
237uint32_t pci_generation;
238uint32_t pci_numdevs = 0;
239static int pcie_chipset, pcix_chipset;
240
241/* sysctl vars */
242SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243
244static int pci_enable_io_modes = 1;
245TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247    &pci_enable_io_modes, 1,
248    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249enable these bits correctly.  We'd like to do this all the time, but there\n\
250are some peripherals that this causes problems with.");
251
252static int pci_do_power_nodriver = 0;
253TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255    &pci_do_power_nodriver, 0,
256  "Place a function into D3 state when no driver attaches to it.  0 means\n\
257disable.  1 means conservatively place devices into D3 state.  2 means\n\
258agressively place devices into D3 state.  3 means put absolutely everything\n\
259in D3 state.");
260
261int pci_do_power_resume = 1;
262TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264    &pci_do_power_resume, 1,
265  "Transition from D3 -> D0 on resume.");
266
267static int pci_do_msi = 1;
268TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
269SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
270    "Enable support for MSI interrupts");
271
272static int pci_do_msix = 1;
273TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
274SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
275    "Enable support for MSI-X interrupts");
276
277static int pci_honor_msi_blacklist = 1;
278TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
279SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
280    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
281
282#if defined(__i386__) || defined(__amd64__)
283static int pci_usb_takeover = 1;
284#else
285static int pci_usb_takeover = 0;
286#endif
287TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
288SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
289    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
290Disable this if you depend on BIOS emulation of USB devices, that is\n\
291you use USB devices (like keyboard or mouse) but do not load USB drivers");
292
293/* Find a device_t by bus/slot/function in domain 0 */
294
295device_t
296pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
297{
298
299	return (pci_find_dbsf(0, bus, slot, func));
300}
301
302/* Find a device_t by domain/bus/slot/function */
303
304device_t
305pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
306{
307	struct pci_devinfo *dinfo;
308
309	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
310		if ((dinfo->cfg.domain == domain) &&
311		    (dinfo->cfg.bus == bus) &&
312		    (dinfo->cfg.slot == slot) &&
313		    (dinfo->cfg.func == func)) {
314			return (dinfo->cfg.dev);
315		}
316	}
317
318	return (NULL);
319}
320
321/* Find a device_t by vendor/device ID */
322
323device_t
324pci_find_device(uint16_t vendor, uint16_t device)
325{
326	struct pci_devinfo *dinfo;
327
328	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
329		if ((dinfo->cfg.vendor == vendor) &&
330		    (dinfo->cfg.device == device)) {
331			return (dinfo->cfg.dev);
332		}
333	}
334
335	return (NULL);
336}
337
338static int
339pci_printf(pcicfgregs *cfg, const char *fmt, ...)
340{
341	va_list ap;
342	int retval;
343
344	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
345	    cfg->func);
346	va_start(ap, fmt);
347	retval += vprintf(fmt, ap);
348	va_end(ap);
349	return (retval);
350}
351
352/* return base address of memory or port map */
353
354static pci_addr_t
355pci_mapbase(uint64_t mapreg)
356{
357
358	if (PCI_BAR_MEM(mapreg))
359		return (mapreg & PCIM_BAR_MEM_BASE);
360	else
361		return (mapreg & PCIM_BAR_IO_BASE);
362}
363
364/* return map type of memory or port map */
365
366static const char *
367pci_maptype(uint64_t mapreg)
368{
369
370	if (PCI_BAR_IO(mapreg))
371		return ("I/O Port");
372	if (mapreg & PCIM_BAR_MEM_PREFETCH)
373		return ("Prefetchable Memory");
374	return ("Memory");
375}
376
377/* return log2 of map size decoded for memory or port map */
378
379static int
380pci_mapsize(uint64_t testval)
381{
382	int ln2size;
383
384	testval = pci_mapbase(testval);
385	ln2size = 0;
386	if (testval != 0) {
387		while ((testval & 1) == 0)
388		{
389			ln2size++;
390			testval >>= 1;
391		}
392	}
393	return (ln2size);
394}
395
396/* return base address of device ROM */
397
398static pci_addr_t
399pci_rombase(uint64_t mapreg)
400{
401
402	return (mapreg & PCIM_BIOS_ADDR_MASK);
403}
404
405/* return log2 of map size decided for device ROM */
406
407static int
408pci_romsize(uint64_t testval)
409{
410	int ln2size;
411
412	testval = pci_rombase(testval);
413	ln2size = 0;
414	if (testval != 0) {
415		while ((testval & 1) == 0)
416		{
417			ln2size++;
418			testval >>= 1;
419		}
420	}
421	return (ln2size);
422}
423
424/* return log2 of address range supported by map register */
425
426static int
427pci_maprange(uint64_t mapreg)
428{
429	int ln2range = 0;
430
431	if (PCI_BAR_IO(mapreg))
432		ln2range = 32;
433	else
434		switch (mapreg & PCIM_BAR_MEM_TYPE) {
435		case PCIM_BAR_MEM_32:
436			ln2range = 32;
437			break;
438		case PCIM_BAR_MEM_1MB:
439			ln2range = 20;
440			break;
441		case PCIM_BAR_MEM_64:
442			ln2range = 64;
443			break;
444		}
445	return (ln2range);
446}
447
448/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
449
450static void
451pci_fixancient(pcicfgregs *cfg)
452{
453	if (cfg->hdrtype != 0)
454		return;
455
456	/* PCI to PCI bridges use header type 1 */
457	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
458		cfg->hdrtype = 1;
459}
460
461/* extract header type specific config data */
462
463static void
464pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
465{
466#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
467	switch (cfg->hdrtype) {
468	case 0:
469		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
470		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
471		cfg->nummaps	    = PCI_MAXMAPS_0;
472		break;
473	case 1:
474		cfg->nummaps	    = PCI_MAXMAPS_1;
475		break;
476	case 2:
477		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
478		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
479		cfg->nummaps	    = PCI_MAXMAPS_2;
480		break;
481	}
482#undef REG
483}
484
485/* read configuration header into pcicfgregs structure */
486struct pci_devinfo *
487pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
488{
489#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
490	pcicfgregs *cfg = NULL;
491	struct pci_devinfo *devlist_entry;
492	struct devlist *devlist_head;
493
494	devlist_head = &pci_devq;
495
496	devlist_entry = NULL;
497
498	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
499		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
500		if (devlist_entry == NULL)
501			return (NULL);
502
503		cfg = &devlist_entry->cfg;
504
505		cfg->domain		= d;
506		cfg->bus		= b;
507		cfg->slot		= s;
508		cfg->func		= f;
509		cfg->vendor		= REG(PCIR_VENDOR, 2);
510		cfg->device		= REG(PCIR_DEVICE, 2);
511		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
512		cfg->statreg		= REG(PCIR_STATUS, 2);
513		cfg->baseclass		= REG(PCIR_CLASS, 1);
514		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
515		cfg->progif		= REG(PCIR_PROGIF, 1);
516		cfg->revid		= REG(PCIR_REVID, 1);
517		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
518		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
519		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
520		cfg->intpin		= REG(PCIR_INTPIN, 1);
521		cfg->intline		= REG(PCIR_INTLINE, 1);
522
523		cfg->mingnt		= REG(PCIR_MINGNT, 1);
524		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
525
526		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
527		cfg->hdrtype		&= ~PCIM_MFDEV;
528
529		pci_fixancient(cfg);
530		pci_hdrtypedata(pcib, b, s, f, cfg);
531
532		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
533			pci_read_extcap(pcib, cfg);
534
535		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
536
537		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
538		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
539		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
540		devlist_entry->conf.pc_sel.pc_func = cfg->func;
541		devlist_entry->conf.pc_hdr = cfg->hdrtype;
542
543		devlist_entry->conf.pc_subvendor = cfg->subvendor;
544		devlist_entry->conf.pc_subdevice = cfg->subdevice;
545		devlist_entry->conf.pc_vendor = cfg->vendor;
546		devlist_entry->conf.pc_device = cfg->device;
547
548		devlist_entry->conf.pc_class = cfg->baseclass;
549		devlist_entry->conf.pc_subclass = cfg->subclass;
550		devlist_entry->conf.pc_progif = cfg->progif;
551		devlist_entry->conf.pc_revid = cfg->revid;
552
553		pci_numdevs++;
554		pci_generation++;
555	}
556	return (devlist_entry);
557#undef REG
558}
559
560static void
561pci_read_extcap(device_t pcib, pcicfgregs *cfg)
562{
563#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
564#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
565#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
566	uint64_t addr;
567#endif
568	uint32_t val;
569	int	ptr, nextptr, ptrptr;
570
571	switch (cfg->hdrtype & PCIM_HDRTYPE) {
572	case 0:
573	case 1:
574		ptrptr = PCIR_CAP_PTR;
575		break;
576	case 2:
577		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
578		break;
579	default:
580		return;		/* no extended capabilities support */
581	}
582	nextptr = REG(ptrptr, 1);	/* sanity check? */
583
584	/*
585	 * Read capability entries.
586	 */
587	while (nextptr != 0) {
588		/* Sanity check */
589		if (nextptr > 255) {
590			printf("illegal PCI extended capability offset %d\n",
591			    nextptr);
592			return;
593		}
594		/* Find the next entry */
595		ptr = nextptr;
596		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
597
598		/* Process this entry */
599		switch (REG(ptr + PCICAP_ID, 1)) {
600		case PCIY_PMG:		/* PCI power management */
601			if (cfg->pp.pp_cap == 0) {
602				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
603				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
604				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
605				if ((nextptr - ptr) > PCIR_POWER_DATA)
606					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
607			}
608			break;
609#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
610		case PCIY_HT:		/* HyperTransport */
611			/* Determine HT-specific capability type. */
612			val = REG(ptr + PCIR_HT_COMMAND, 2);
613			switch (val & PCIM_HTCMD_CAP_MASK) {
614			case PCIM_HTCAP_MSI_MAPPING:
615				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
616					/* Sanity check the mapping window. */
617					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
618					    4);
619					addr <<= 32;
620					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
621					    4);
622					if (addr != MSI_INTEL_ADDR_BASE)
623						device_printf(pcib,
624	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
625						    cfg->domain, cfg->bus,
626						    cfg->slot, cfg->func,
627						    (long long)addr);
628				} else
629					addr = MSI_INTEL_ADDR_BASE;
630
631				cfg->ht.ht_msimap = ptr;
632				cfg->ht.ht_msictrl = val;
633				cfg->ht.ht_msiaddr = addr;
634				break;
635			}
636			break;
637#endif
638		case PCIY_MSI:		/* PCI MSI */
639			cfg->msi.msi_location = ptr;
640			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
641			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
642						     PCIM_MSICTRL_MMC_MASK)>>1);
643			break;
644		case PCIY_MSIX:		/* PCI MSI-X */
645			cfg->msix.msix_location = ptr;
646			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
647			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
648			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
649			val = REG(ptr + PCIR_MSIX_TABLE, 4);
650			cfg->msix.msix_table_bar = PCIR_BAR(val &
651			    PCIM_MSIX_BIR_MASK);
652			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
653			val = REG(ptr + PCIR_MSIX_PBA, 4);
654			cfg->msix.msix_pba_bar = PCIR_BAR(val &
655			    PCIM_MSIX_BIR_MASK);
656			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
657			break;
658		case PCIY_VPD:		/* PCI Vital Product Data */
659			cfg->vpd.vpd_reg = ptr;
660			break;
661		case PCIY_SUBVENDOR:
662			/* Should always be true. */
663			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
664				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
665				cfg->subvendor = val & 0xffff;
666				cfg->subdevice = val >> 16;
667			}
668			break;
669		case PCIY_PCIX:		/* PCI-X */
670			/*
671			 * Assume we have a PCI-X chipset if we have
672			 * at least one PCI-PCI bridge with a PCI-X
673			 * capability.  Note that some systems with
674			 * PCI-express or HT chipsets might match on
675			 * this check as well.
676			 */
677			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
678				pcix_chipset = 1;
679			break;
680		case PCIY_EXPRESS:	/* PCI-express */
681			/*
682			 * Assume we have a PCI-express chipset if we have
683			 * at least one PCI-express device.
684			 */
685			pcie_chipset = 1;
686			break;
687		default:
688			break;
689		}
690	}
691/* REG and WREG use carry through to next functions */
692}
693
694/*
695 * PCI Vital Product Data
696 */
697
698#define	PCI_VPD_TIMEOUT		1000000
699
700static int
701pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
702{
703	int count = PCI_VPD_TIMEOUT;
704
705	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
706
707	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
708
709	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
710		if (--count < 0)
711			return (ENXIO);
712		DELAY(1);	/* limit looping */
713	}
714	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
715
716	return (0);
717}
718
719#if 0
720static int
721pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
722{
723	int count = PCI_VPD_TIMEOUT;
724
725	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
726
727	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
728	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
729	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
730		if (--count < 0)
731			return (ENXIO);
732		DELAY(1);	/* limit looping */
733	}
734
735	return (0);
736}
737#endif
738
739#undef PCI_VPD_TIMEOUT
740
741struct vpd_readstate {
742	device_t	pcib;
743	pcicfgregs	*cfg;
744	uint32_t	val;
745	int		bytesinval;
746	int		off;
747	uint8_t		cksum;
748};
749
750static int
751vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
752{
753	uint32_t reg;
754	uint8_t byte;
755
756	if (vrs->bytesinval == 0) {
757		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
758			return (ENXIO);
759		vrs->val = le32toh(reg);
760		vrs->off += 4;
761		byte = vrs->val & 0xff;
762		vrs->bytesinval = 3;
763	} else {
764		vrs->val = vrs->val >> 8;
765		byte = vrs->val & 0xff;
766		vrs->bytesinval--;
767	}
768
769	vrs->cksum += byte;
770	*data = byte;
771	return (0);
772}
773
774static void
775pci_read_vpd(device_t pcib, pcicfgregs *cfg)
776{
777	struct vpd_readstate vrs;
778	int state;
779	int name;
780	int remain;
781	int i;
782	int alloc, off;		/* alloc/off for RO/W arrays */
783	int cksumvalid;
784	int dflen;
785	uint8_t byte;
786	uint8_t byte2;
787
788	/* init vpd reader */
789	vrs.bytesinval = 0;
790	vrs.off = 0;
791	vrs.pcib = pcib;
792	vrs.cfg = cfg;
793	vrs.cksum = 0;
794
795	state = 0;
796	name = remain = i = 0;	/* shut up stupid gcc */
797	alloc = off = 0;	/* shut up stupid gcc */
798	dflen = 0;		/* shut up stupid gcc */
799	cksumvalid = -1;
800	while (state >= 0) {
801		if (vpd_nextbyte(&vrs, &byte)) {
802			state = -2;
803			break;
804		}
805#if 0
806		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
807		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
808		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
809#endif
810		switch (state) {
811		case 0:		/* item name */
812			if (byte & 0x80) {
813				if (vpd_nextbyte(&vrs, &byte2)) {
814					state = -2;
815					break;
816				}
817				remain = byte2;
818				if (vpd_nextbyte(&vrs, &byte2)) {
819					state = -2;
820					break;
821				}
822				remain |= byte2 << 8;
823				if (remain > (0x7f*4 - vrs.off)) {
824					state = -1;
825					printf(
826			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
827					    cfg->domain, cfg->bus, cfg->slot,
828					    cfg->func, remain);
829				}
830				name = byte & 0x7f;
831			} else {
832				remain = byte & 0x7;
833				name = (byte >> 3) & 0xf;
834			}
835			switch (name) {
836			case 0x2:	/* String */
837				cfg->vpd.vpd_ident = malloc(remain + 1,
838				    M_DEVBUF, M_WAITOK);
839				i = 0;
840				state = 1;
841				break;
842			case 0xf:	/* End */
843				state = -1;
844				break;
845			case 0x10:	/* VPD-R */
846				alloc = 8;
847				off = 0;
848				cfg->vpd.vpd_ros = malloc(alloc *
849				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
850				    M_WAITOK | M_ZERO);
851				state = 2;
852				break;
853			case 0x11:	/* VPD-W */
854				alloc = 8;
855				off = 0;
856				cfg->vpd.vpd_w = malloc(alloc *
857				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
858				    M_WAITOK | M_ZERO);
859				state = 5;
860				break;
861			default:	/* Invalid data, abort */
862				state = -1;
863				break;
864			}
865			break;
866
867		case 1:	/* Identifier String */
868			cfg->vpd.vpd_ident[i++] = byte;
869			remain--;
870			if (remain == 0)  {
871				cfg->vpd.vpd_ident[i] = '\0';
872				state = 0;
873			}
874			break;
875
876		case 2:	/* VPD-R Keyword Header */
877			if (off == alloc) {
878				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
879				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
880				    M_DEVBUF, M_WAITOK | M_ZERO);
881			}
882			cfg->vpd.vpd_ros[off].keyword[0] = byte;
883			if (vpd_nextbyte(&vrs, &byte2)) {
884				state = -2;
885				break;
886			}
887			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
888			if (vpd_nextbyte(&vrs, &byte2)) {
889				state = -2;
890				break;
891			}
892			dflen = byte2;
893			if (dflen == 0 &&
894			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
895			    2) == 0) {
896				/*
897				 * if this happens, we can't trust the rest
898				 * of the VPD.
899				 */
900				printf(
901				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
902				    cfg->domain, cfg->bus, cfg->slot,
903				    cfg->func, dflen);
904				cksumvalid = 0;
905				state = -1;
906				break;
907			} else if (dflen == 0) {
908				cfg->vpd.vpd_ros[off].value = malloc(1 *
909				    sizeof(*cfg->vpd.vpd_ros[off].value),
910				    M_DEVBUF, M_WAITOK);
911				cfg->vpd.vpd_ros[off].value[0] = '\x00';
912			} else
913				cfg->vpd.vpd_ros[off].value = malloc(
914				    (dflen + 1) *
915				    sizeof(*cfg->vpd.vpd_ros[off].value),
916				    M_DEVBUF, M_WAITOK);
917			remain -= 3;
918			i = 0;
919			/* keep in sync w/ state 3's transistions */
920			if (dflen == 0 && remain == 0)
921				state = 0;
922			else if (dflen == 0)
923				state = 2;
924			else
925				state = 3;
926			break;
927
928		case 3:	/* VPD-R Keyword Value */
929			cfg->vpd.vpd_ros[off].value[i++] = byte;
930			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
931			    "RV", 2) == 0 && cksumvalid == -1) {
932				if (vrs.cksum == 0)
933					cksumvalid = 1;
934				else {
935					if (bootverbose)
936						printf(
937				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
938						    cfg->domain, cfg->bus,
939						    cfg->slot, cfg->func,
940						    vrs.cksum);
941					cksumvalid = 0;
942					state = -1;
943					break;
944				}
945			}
946			dflen--;
947			remain--;
948			/* keep in sync w/ state 2's transistions */
949			if (dflen == 0)
950				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
951			if (dflen == 0 && remain == 0) {
952				cfg->vpd.vpd_rocnt = off;
953				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
954				    off * sizeof(*cfg->vpd.vpd_ros),
955				    M_DEVBUF, M_WAITOK | M_ZERO);
956				state = 0;
957			} else if (dflen == 0)
958				state = 2;
959			break;
960
961		case 4:
962			remain--;
963			if (remain == 0)
964				state = 0;
965			break;
966
967		case 5:	/* VPD-W Keyword Header */
968			if (off == alloc) {
969				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
970				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
971				    M_DEVBUF, M_WAITOK | M_ZERO);
972			}
973			cfg->vpd.vpd_w[off].keyword[0] = byte;
974			if (vpd_nextbyte(&vrs, &byte2)) {
975				state = -2;
976				break;
977			}
978			cfg->vpd.vpd_w[off].keyword[1] = byte2;
979			if (vpd_nextbyte(&vrs, &byte2)) {
980				state = -2;
981				break;
982			}
983			cfg->vpd.vpd_w[off].len = dflen = byte2;
984			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
985			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
986			    sizeof(*cfg->vpd.vpd_w[off].value),
987			    M_DEVBUF, M_WAITOK);
988			remain -= 3;
989			i = 0;
990			/* keep in sync w/ state 6's transistions */
991			if (dflen == 0 && remain == 0)
992				state = 0;
993			else if (dflen == 0)
994				state = 5;
995			else
996				state = 6;
997			break;
998
999		case 6:	/* VPD-W Keyword Value */
1000			cfg->vpd.vpd_w[off].value[i++] = byte;
1001			dflen--;
1002			remain--;
1003			/* keep in sync w/ state 5's transistions */
1004			if (dflen == 0)
1005				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1006			if (dflen == 0 && remain == 0) {
1007				cfg->vpd.vpd_wcnt = off;
1008				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1009				    off * sizeof(*cfg->vpd.vpd_w),
1010				    M_DEVBUF, M_WAITOK | M_ZERO);
1011				state = 0;
1012			} else if (dflen == 0)
1013				state = 5;
1014			break;
1015
1016		default:
1017			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1018			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1019			    state);
1020			state = -1;
1021			break;
1022		}
1023	}
1024
1025	if (cksumvalid == 0 || state < -1) {
1026		/* read-only data bad, clean up */
1027		if (cfg->vpd.vpd_ros != NULL) {
1028			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1029				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1030			free(cfg->vpd.vpd_ros, M_DEVBUF);
1031			cfg->vpd.vpd_ros = NULL;
1032		}
1033	}
1034	if (state < -1) {
1035		/* I/O error, clean up */
1036		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1037		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1038		if (cfg->vpd.vpd_ident != NULL) {
1039			free(cfg->vpd.vpd_ident, M_DEVBUF);
1040			cfg->vpd.vpd_ident = NULL;
1041		}
1042		if (cfg->vpd.vpd_w != NULL) {
1043			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1044				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1045			free(cfg->vpd.vpd_w, M_DEVBUF);
1046			cfg->vpd.vpd_w = NULL;
1047		}
1048	}
1049	cfg->vpd.vpd_cached = 1;
1050#undef REG
1051#undef WREG
1052}
1053
1054int
1055pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1056{
1057	struct pci_devinfo *dinfo = device_get_ivars(child);
1058	pcicfgregs *cfg = &dinfo->cfg;
1059
1060	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1061		pci_read_vpd(device_get_parent(dev), cfg);
1062
1063	*identptr = cfg->vpd.vpd_ident;
1064
1065	if (*identptr == NULL)
1066		return (ENXIO);
1067
1068	return (0);
1069}
1070
1071int
1072pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1073	const char **vptr)
1074{
1075	struct pci_devinfo *dinfo = device_get_ivars(child);
1076	pcicfgregs *cfg = &dinfo->cfg;
1077	int i;
1078
1079	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1080		pci_read_vpd(device_get_parent(dev), cfg);
1081
1082	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1083		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1084		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1085			*vptr = cfg->vpd.vpd_ros[i].value;
1086		}
1087
1088	if (i != cfg->vpd.vpd_rocnt)
1089		return (0);
1090
1091	*vptr = NULL;
1092	return (ENXIO);
1093}
1094
1095/*
1096 * Find the requested extended capability and return the offset in
1097 * configuration space via the pointer provided. The function returns
1098 * 0 on success and error code otherwise.
1099 */
1100int
1101pci_find_extcap_method(device_t dev, device_t child, int capability,
1102    int *capreg)
1103{
1104	struct pci_devinfo *dinfo = device_get_ivars(child);
1105	pcicfgregs *cfg = &dinfo->cfg;
1106	u_int32_t status;
1107	u_int8_t ptr;
1108
1109	/*
1110	 * Check the CAP_LIST bit of the PCI status register first.
1111	 */
1112	status = pci_read_config(child, PCIR_STATUS, 2);
1113	if (!(status & PCIM_STATUS_CAPPRESENT))
1114		return (ENXIO);
1115
1116	/*
1117	 * Determine the start pointer of the capabilities list.
1118	 */
1119	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1120	case 0:
1121	case 1:
1122		ptr = PCIR_CAP_PTR;
1123		break;
1124	case 2:
1125		ptr = PCIR_CAP_PTR_2;
1126		break;
1127	default:
1128		/* XXX: panic? */
1129		return (ENXIO);		/* no extended capabilities support */
1130	}
1131	ptr = pci_read_config(child, ptr, 1);
1132
1133	/*
1134	 * Traverse the capabilities list.
1135	 */
1136	while (ptr != 0) {
1137		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1138			if (capreg != NULL)
1139				*capreg = ptr;
1140			return (0);
1141		}
1142		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1143	}
1144
1145	return (ENOENT);
1146}
1147
1148/*
1149 * Support for MSI-X message interrupts.
1150 */
1151void
1152pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1153{
1154	struct pci_devinfo *dinfo = device_get_ivars(dev);
1155	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1156	uint32_t offset;
1157
1158	KASSERT(msix->msix_table_len > index, ("bogus index"));
1159	offset = msix->msix_table_offset + index * 16;
1160	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1161	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1162	bus_write_4(msix->msix_table_res, offset + 8, data);
1163
1164	/* Enable MSI -> HT mapping. */
1165	pci_ht_map_msi(dev, address);
1166}
1167
1168void
1169pci_mask_msix(device_t dev, u_int index)
1170{
1171	struct pci_devinfo *dinfo = device_get_ivars(dev);
1172	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1173	uint32_t offset, val;
1174
1175	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1176	offset = msix->msix_table_offset + index * 16 + 12;
1177	val = bus_read_4(msix->msix_table_res, offset);
1178	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1179		val |= PCIM_MSIX_VCTRL_MASK;
1180		bus_write_4(msix->msix_table_res, offset, val);
1181	}
1182}
1183
1184void
1185pci_unmask_msix(device_t dev, u_int index)
1186{
1187	struct pci_devinfo *dinfo = device_get_ivars(dev);
1188	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1189	uint32_t offset, val;
1190
1191	KASSERT(msix->msix_table_len > index, ("bogus index"));
1192	offset = msix->msix_table_offset + index * 16 + 12;
1193	val = bus_read_4(msix->msix_table_res, offset);
1194	if (val & PCIM_MSIX_VCTRL_MASK) {
1195		val &= ~PCIM_MSIX_VCTRL_MASK;
1196		bus_write_4(msix->msix_table_res, offset, val);
1197	}
1198}
1199
1200int
1201pci_pending_msix(device_t dev, u_int index)
1202{
1203	struct pci_devinfo *dinfo = device_get_ivars(dev);
1204	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1205	uint32_t offset, bit;
1206
1207	KASSERT(msix->msix_table_len > index, ("bogus index"));
1208	offset = msix->msix_pba_offset + (index / 32) * 4;
1209	bit = 1 << index % 32;
1210	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1211}
1212
1213/*
1214 * Restore MSI-X registers and table during resume.  If MSI-X is
1215 * enabled then walk the virtual table to restore the actual MSI-X
1216 * table.
1217 */
1218static void
1219pci_resume_msix(device_t dev)
1220{
1221	struct pci_devinfo *dinfo = device_get_ivars(dev);
1222	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1223	struct msix_table_entry *mte;
1224	struct msix_vector *mv;
1225	int i;
1226
1227	if (msix->msix_alloc > 0) {
1228		/* First, mask all vectors. */
1229		for (i = 0; i < msix->msix_msgnum; i++)
1230			pci_mask_msix(dev, i);
1231
1232		/* Second, program any messages with at least one handler. */
1233		for (i = 0; i < msix->msix_table_len; i++) {
1234			mte = &msix->msix_table[i];
1235			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1236				continue;
1237			mv = &msix->msix_vectors[mte->mte_vector - 1];
1238			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1239			pci_unmask_msix(dev, i);
1240		}
1241	}
1242	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1243	    msix->msix_ctrl, 2);
1244}
1245
1246/*
1247 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1248 * returned in *count.  After this function returns, each message will be
1249 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1250 */
1251int
1252pci_alloc_msix_method(device_t dev, device_t child, int *count)
1253{
1254	struct pci_devinfo *dinfo = device_get_ivars(child);
1255	pcicfgregs *cfg = &dinfo->cfg;
1256	struct resource_list_entry *rle;
1257	int actual, error, i, irq, max;
1258
1259	/* Don't let count == 0 get us into trouble. */
1260	if (*count == 0)
1261		return (EINVAL);
1262
1263	/* If rid 0 is allocated, then fail. */
1264	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1265	if (rle != NULL && rle->res != NULL)
1266		return (ENXIO);
1267
1268	/* Already have allocated messages? */
1269	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1270		return (ENXIO);
1271
1272	/* If MSI is blacklisted for this system, fail. */
1273	if (pci_msi_blacklisted())
1274		return (ENXIO);
1275
1276	/* MSI-X capability present? */
1277	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1278		return (ENODEV);
1279
1280	/* Make sure the appropriate BARs are mapped. */
1281	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1282	    cfg->msix.msix_table_bar);
1283	if (rle == NULL || rle->res == NULL ||
1284	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1285		return (ENXIO);
1286	cfg->msix.msix_table_res = rle->res;
1287	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1288		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1289		    cfg->msix.msix_pba_bar);
1290		if (rle == NULL || rle->res == NULL ||
1291		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1292			return (ENXIO);
1293	}
1294	cfg->msix.msix_pba_res = rle->res;
1295
1296	if (bootverbose)
1297		device_printf(child,
1298		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1299		    *count, cfg->msix.msix_msgnum);
1300	max = min(*count, cfg->msix.msix_msgnum);
1301	for (i = 0; i < max; i++) {
1302		/* Allocate a message. */
1303		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1304		if (error)
1305			break;
1306		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1307		    irq, 1);
1308	}
1309	actual = i;
1310
1311	if (bootverbose) {
1312		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1313		if (actual == 1)
1314			device_printf(child, "using IRQ %lu for MSI-X\n",
1315			    rle->start);
1316		else {
1317			int run;
1318
1319			/*
1320			 * Be fancy and try to print contiguous runs of
1321			 * IRQ values as ranges.  'irq' is the previous IRQ.
1322			 * 'run' is true if we are in a range.
1323			 */
1324			device_printf(child, "using IRQs %lu", rle->start);
1325			irq = rle->start;
1326			run = 0;
1327			for (i = 1; i < actual; i++) {
1328				rle = resource_list_find(&dinfo->resources,
1329				    SYS_RES_IRQ, i + 1);
1330
1331				/* Still in a run? */
1332				if (rle->start == irq + 1) {
1333					run = 1;
1334					irq++;
1335					continue;
1336				}
1337
1338				/* Finish previous range. */
1339				if (run) {
1340					printf("-%d", irq);
1341					run = 0;
1342				}
1343
1344				/* Start new range. */
1345				printf(",%lu", rle->start);
1346				irq = rle->start;
1347			}
1348
1349			/* Unfinished range? */
1350			if (run)
1351				printf("-%d", irq);
1352			printf(" for MSI-X\n");
1353		}
1354	}
1355
1356	/* Mask all vectors. */
1357	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1358		pci_mask_msix(child, i);
1359
1360	/* Allocate and initialize vector data and virtual table. */
1361	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1362	    M_DEVBUF, M_WAITOK | M_ZERO);
1363	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1364	    M_DEVBUF, M_WAITOK | M_ZERO);
1365	for (i = 0; i < actual; i++) {
1366		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1367		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1368		cfg->msix.msix_table[i].mte_vector = i + 1;
1369	}
1370
1371	/* Update control register to enable MSI-X. */
1372	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1373	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1374	    cfg->msix.msix_ctrl, 2);
1375
1376	/* Update counts of alloc'd messages. */
1377	cfg->msix.msix_alloc = actual;
1378	cfg->msix.msix_table_len = actual;
1379	*count = actual;
1380	return (0);
1381}
1382
1383/*
1384 * By default, pci_alloc_msix() will assign the allocated IRQ
1385 * resources consecutively to the first N messages in the MSI-X table.
1386 * However, device drivers may want to use different layouts if they
1387 * either receive fewer messages than they asked for, or they wish to
1388 * populate the MSI-X table sparsely.  This method allows the driver
1389 * to specify what layout it wants.  It must be called after a
1390 * successful pci_alloc_msix() but before any of the associated
1391 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1392 *
1393 * The 'vectors' array contains 'count' message vectors.  The array
1394 * maps directly to the MSI-X table in that index 0 in the array
1395 * specifies the vector for the first message in the MSI-X table, etc.
1396 * The vector value in each array index can either be 0 to indicate
1397 * that no vector should be assigned to a message slot, or it can be a
1398 * number from 1 to N (where N is the count returned from a
1399 * succcessful call to pci_alloc_msix()) to indicate which message
1400 * vector (IRQ) to be used for the corresponding message.
1401 *
1402 * On successful return, each message with a non-zero vector will have
1403 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1404 * 1.  Additionally, if any of the IRQs allocated via the previous
1405 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1406 * will be freed back to the system automatically.
1407 *
1408 * For example, suppose a driver has a MSI-X table with 6 messages and
1409 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1410 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1411 * C.  After the call to pci_alloc_msix(), the device will be setup to
1412 * have an MSI-X table of ABC--- (where - means no vector assigned).
1413 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1414 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1415 * be freed back to the system.  This device will also have valid
1416 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1417 *
1418 * In any case, the SYS_RES_IRQ rid X will always map to the message
1419 * at MSI-X table index X - 1 and will only be valid if a vector is
1420 * assigned to that table entry.
1421 */
1422int
1423pci_remap_msix_method(device_t dev, device_t child, int count,
1424    const u_int *vectors)
1425{
1426	struct pci_devinfo *dinfo = device_get_ivars(child);
1427	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1428	struct resource_list_entry *rle;
1429	int i, irq, j, *used;
1430
1431	/*
1432	 * Have to have at least one message in the table but the
1433	 * table can't be bigger than the actual MSI-X table in the
1434	 * device.
1435	 */
1436	if (count == 0 || count > msix->msix_msgnum)
1437		return (EINVAL);
1438
1439	/* Sanity check the vectors. */
1440	for (i = 0; i < count; i++)
1441		if (vectors[i] > msix->msix_alloc)
1442			return (EINVAL);
1443
1444	/*
1445	 * Make sure there aren't any holes in the vectors to be used.
1446	 * It's a big pain to support it, and it doesn't really make
1447	 * sense anyway.  Also, at least one vector must be used.
1448	 */
1449	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1450	    M_ZERO);
1451	for (i = 0; i < count; i++)
1452		if (vectors[i] != 0)
1453			used[vectors[i] - 1] = 1;
1454	for (i = 0; i < msix->msix_alloc - 1; i++)
1455		if (used[i] == 0 && used[i + 1] == 1) {
1456			free(used, M_DEVBUF);
1457			return (EINVAL);
1458		}
1459	if (used[0] != 1) {
1460		free(used, M_DEVBUF);
1461		return (EINVAL);
1462	}
1463
1464	/* Make sure none of the resources are allocated. */
1465	for (i = 0; i < msix->msix_table_len; i++) {
1466		if (msix->msix_table[i].mte_vector == 0)
1467			continue;
1468		if (msix->msix_table[i].mte_handlers > 0)
1469			return (EBUSY);
1470		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1471		KASSERT(rle != NULL, ("missing resource"));
1472		if (rle->res != NULL)
1473			return (EBUSY);
1474	}
1475
1476	/* Free the existing resource list entries. */
1477	for (i = 0; i < msix->msix_table_len; i++) {
1478		if (msix->msix_table[i].mte_vector == 0)
1479			continue;
1480		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1481	}
1482
1483	/*
1484	 * Build the new virtual table keeping track of which vectors are
1485	 * used.
1486	 */
1487	free(msix->msix_table, M_DEVBUF);
1488	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1489	    M_DEVBUF, M_WAITOK | M_ZERO);
1490	for (i = 0; i < count; i++)
1491		msix->msix_table[i].mte_vector = vectors[i];
1492	msix->msix_table_len = count;
1493
1494	/* Free any unused IRQs and resize the vectors array if necessary. */
1495	j = msix->msix_alloc - 1;
1496	if (used[j] == 0) {
1497		struct msix_vector *vec;
1498
1499		while (used[j] == 0) {
1500			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1501			    msix->msix_vectors[j].mv_irq);
1502			j--;
1503		}
1504		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1505		    M_WAITOK);
1506		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1507		    (j + 1));
1508		free(msix->msix_vectors, M_DEVBUF);
1509		msix->msix_vectors = vec;
1510		msix->msix_alloc = j + 1;
1511	}
1512	free(used, M_DEVBUF);
1513
1514	/* Map the IRQs onto the rids. */
1515	for (i = 0; i < count; i++) {
1516		if (vectors[i] == 0)
1517			continue;
1518		irq = msix->msix_vectors[vectors[i]].mv_irq;
1519		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1520		    irq, 1);
1521	}
1522
1523	if (bootverbose) {
1524		device_printf(child, "Remapped MSI-X IRQs as: ");
1525		for (i = 0; i < count; i++) {
1526			if (i != 0)
1527				printf(", ");
1528			if (vectors[i] == 0)
1529				printf("---");
1530			else
1531				printf("%d",
1532				    msix->msix_vectors[vectors[i]].mv_irq);
1533		}
1534		printf("\n");
1535	}
1536
1537	return (0);
1538}
1539
1540static int
1541pci_release_msix(device_t dev, device_t child)
1542{
1543	struct pci_devinfo *dinfo = device_get_ivars(child);
1544	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1545	struct resource_list_entry *rle;
1546	int i;
1547
1548	/* Do we have any messages to release? */
1549	if (msix->msix_alloc == 0)
1550		return (ENODEV);
1551
1552	/* Make sure none of the resources are allocated. */
1553	for (i = 0; i < msix->msix_table_len; i++) {
1554		if (msix->msix_table[i].mte_vector == 0)
1555			continue;
1556		if (msix->msix_table[i].mte_handlers > 0)
1557			return (EBUSY);
1558		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1559		KASSERT(rle != NULL, ("missing resource"));
1560		if (rle->res != NULL)
1561			return (EBUSY);
1562	}
1563
1564	/* Update control register to disable MSI-X. */
1565	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1566	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1567	    msix->msix_ctrl, 2);
1568
1569	/* Free the resource list entries. */
1570	for (i = 0; i < msix->msix_table_len; i++) {
1571		if (msix->msix_table[i].mte_vector == 0)
1572			continue;
1573		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1574	}
1575	free(msix->msix_table, M_DEVBUF);
1576	msix->msix_table_len = 0;
1577
1578	/* Release the IRQs. */
1579	for (i = 0; i < msix->msix_alloc; i++)
1580		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1581		    msix->msix_vectors[i].mv_irq);
1582	free(msix->msix_vectors, M_DEVBUF);
1583	msix->msix_alloc = 0;
1584	return (0);
1585}
1586
1587/*
1588 * Return the max supported MSI-X messages this device supports.
1589 * Basically, assuming the MD code can alloc messages, this function
1590 * should return the maximum value that pci_alloc_msix() can return.
1591 * Thus, it is subject to the tunables, etc.
1592 */
1593int
1594pci_msix_count_method(device_t dev, device_t child)
1595{
1596	struct pci_devinfo *dinfo = device_get_ivars(child);
1597	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1598
1599	if (pci_do_msix && msix->msix_location != 0)
1600		return (msix->msix_msgnum);
1601	return (0);
1602}
1603
1604/*
1605 * HyperTransport MSI mapping control
1606 */
1607void
1608pci_ht_map_msi(device_t dev, uint64_t addr)
1609{
1610	struct pci_devinfo *dinfo = device_get_ivars(dev);
1611	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1612
1613	if (!ht->ht_msimap)
1614		return;
1615
1616	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1617	    ht->ht_msiaddr >> 20 == addr >> 20) {
1618		/* Enable MSI -> HT mapping. */
1619		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1620		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1621		    ht->ht_msictrl, 2);
1622	}
1623
1624	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1625		/* Disable MSI -> HT mapping. */
1626		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1627		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1628		    ht->ht_msictrl, 2);
1629	}
1630}
1631
1632int
1633pci_get_max_read_req(device_t dev)
1634{
1635	int cap;
1636	uint16_t val;
1637
1638	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1639		return (0);
1640	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1641	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1642	val >>= 12;
1643	return (1 << (val + 7));
1644}
1645
1646int
1647pci_set_max_read_req(device_t dev, int size)
1648{
1649	int cap;
1650	uint16_t val;
1651
1652	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1653		return (0);
1654	if (size < 128)
1655		size = 128;
1656	if (size > 4096)
1657		size = 4096;
1658	size = (1 << (fls(size) - 1));
1659	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1660	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1661	val |= (fls(size) - 8) << 12;
1662	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1663	return (size);
1664}
1665
1666/*
1667 * Support for MSI message signalled interrupts.
1668 */
1669void
1670pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1671{
1672	struct pci_devinfo *dinfo = device_get_ivars(dev);
1673	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1674
1675	/* Write data and address values. */
1676	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1677	    address & 0xffffffff, 4);
1678	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1679		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1680		    address >> 32, 4);
1681		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1682		    data, 2);
1683	} else
1684		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1685		    2);
1686
1687	/* Enable MSI in the control register. */
1688	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1689	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1690	    2);
1691
1692	/* Enable MSI -> HT mapping. */
1693	pci_ht_map_msi(dev, address);
1694}
1695
1696void
1697pci_disable_msi(device_t dev)
1698{
1699	struct pci_devinfo *dinfo = device_get_ivars(dev);
1700	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1701
1702	/* Disable MSI -> HT mapping. */
1703	pci_ht_map_msi(dev, 0);
1704
1705	/* Disable MSI in the control register. */
1706	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1707	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1708	    2);
1709}
1710
1711/*
1712 * Restore MSI registers during resume.  If MSI is enabled then
1713 * restore the data and address registers in addition to the control
1714 * register.
1715 */
1716static void
1717pci_resume_msi(device_t dev)
1718{
1719	struct pci_devinfo *dinfo = device_get_ivars(dev);
1720	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1721	uint64_t address;
1722	uint16_t data;
1723
1724	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1725		address = msi->msi_addr;
1726		data = msi->msi_data;
1727		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1728		    address & 0xffffffff, 4);
1729		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1730			pci_write_config(dev, msi->msi_location +
1731			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1732			pci_write_config(dev, msi->msi_location +
1733			    PCIR_MSI_DATA_64BIT, data, 2);
1734		} else
1735			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1736			    data, 2);
1737	}
1738	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1739	    2);
1740}
1741
1742static int
1743pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1744{
1745	struct pci_devinfo *dinfo = device_get_ivars(dev);
1746	pcicfgregs *cfg = &dinfo->cfg;
1747	struct resource_list_entry *rle;
1748	struct msix_table_entry *mte;
1749	struct msix_vector *mv;
1750	uint64_t addr;
1751	uint32_t data;
1752	int error, i, j;
1753
1754	/*
1755	 * Handle MSI first.  We try to find this IRQ among our list
1756	 * of MSI IRQs.  If we find it, we request updated address and
1757	 * data registers and apply the results.
1758	 */
1759	if (cfg->msi.msi_alloc > 0) {
1760
1761		/* If we don't have any active handlers, nothing to do. */
1762		if (cfg->msi.msi_handlers == 0)
1763			return (0);
1764		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1765			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1766			    i + 1);
1767			if (rle->start == irq) {
1768				error = PCIB_MAP_MSI(device_get_parent(bus),
1769				    dev, irq, &addr, &data);
1770				if (error)
1771					return (error);
1772				pci_disable_msi(dev);
1773				dinfo->cfg.msi.msi_addr = addr;
1774				dinfo->cfg.msi.msi_data = data;
1775				pci_enable_msi(dev, addr, data);
1776				return (0);
1777			}
1778		}
1779		return (ENOENT);
1780	}
1781
1782	/*
1783	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1784	 * we request the updated mapping info.  If that works, we go
1785	 * through all the slots that use this IRQ and update them.
1786	 */
1787	if (cfg->msix.msix_alloc > 0) {
1788		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1789			mv = &cfg->msix.msix_vectors[i];
1790			if (mv->mv_irq == irq) {
1791				error = PCIB_MAP_MSI(device_get_parent(bus),
1792				    dev, irq, &addr, &data);
1793				if (error)
1794					return (error);
1795				mv->mv_address = addr;
1796				mv->mv_data = data;
1797				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1798					mte = &cfg->msix.msix_table[j];
1799					if (mte->mte_vector != i + 1)
1800						continue;
1801					if (mte->mte_handlers == 0)
1802						continue;
1803					pci_mask_msix(dev, j);
1804					pci_enable_msix(dev, j, addr, data);
1805					pci_unmask_msix(dev, j);
1806				}
1807			}
1808		}
1809		return (ENOENT);
1810	}
1811
1812	return (ENOENT);
1813}
1814
1815/*
1816 * Returns true if the specified device is blacklisted because MSI
1817 * doesn't work.
1818 */
1819int
1820pci_msi_device_blacklisted(device_t dev)
1821{
1822	struct pci_quirk *q;
1823
1824	if (!pci_honor_msi_blacklist)
1825		return (0);
1826
1827	for (q = &pci_quirks[0]; q->devid; q++) {
1828		if (q->devid == pci_get_devid(dev) &&
1829		    q->type == PCI_QUIRK_DISABLE_MSI)
1830			return (1);
1831	}
1832	return (0);
1833}
1834
1835/*
1836 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1837 * we just check for blacklisted chipsets as represented by the
1838 * host-PCI bridge at device 0:0:0.  In the future, it may become
1839 * necessary to check other system attributes, such as the kenv values
1840 * that give the motherboard manufacturer and model number.
1841 */
1842static int
1843pci_msi_blacklisted(void)
1844{
1845	device_t dev;
1846
1847	if (!pci_honor_msi_blacklist)
1848		return (0);
1849
1850	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1851	if (!(pcie_chipset || pcix_chipset))
1852		return (1);
1853
1854	dev = pci_find_bsf(0, 0, 0);
1855	if (dev != NULL)
1856		return (pci_msi_device_blacklisted(dev));
1857	return (0);
1858}
1859
1860/*
1861 * Attempt to allocate *count MSI messages.  The actual number allocated is
1862 * returned in *count.  After this function returns, each message will be
1863 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1864 */
1865int
1866pci_alloc_msi_method(device_t dev, device_t child, int *count)
1867{
1868	struct pci_devinfo *dinfo = device_get_ivars(child);
1869	pcicfgregs *cfg = &dinfo->cfg;
1870	struct resource_list_entry *rle;
1871	int actual, error, i, irqs[32];
1872	uint16_t ctrl;
1873
1874	/* Don't let count == 0 get us into trouble. */
1875	if (*count == 0)
1876		return (EINVAL);
1877
1878	/* If rid 0 is allocated, then fail. */
1879	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1880	if (rle != NULL && rle->res != NULL)
1881		return (ENXIO);
1882
1883	/* Already have allocated messages? */
1884	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1885		return (ENXIO);
1886
1887	/* If MSI is blacklisted for this system, fail. */
1888	if (pci_msi_blacklisted())
1889		return (ENXIO);
1890
1891	/* MSI capability present? */
1892	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1893		return (ENODEV);
1894
1895	if (bootverbose)
1896		device_printf(child,
1897		    "attempting to allocate %d MSI vectors (%d supported)\n",
1898		    *count, cfg->msi.msi_msgnum);
1899
1900	/* Don't ask for more than the device supports. */
1901	actual = min(*count, cfg->msi.msi_msgnum);
1902
1903	/* Don't ask for more than 32 messages. */
1904	actual = min(actual, 32);
1905
1906	/* MSI requires power of 2 number of messages. */
1907	if (!powerof2(actual))
1908		return (EINVAL);
1909
1910	for (;;) {
1911		/* Try to allocate N messages. */
1912		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1913		    cfg->msi.msi_msgnum, irqs);
1914		if (error == 0)
1915			break;
1916		if (actual == 1)
1917			return (error);
1918
1919		/* Try N / 2. */
1920		actual >>= 1;
1921	}
1922
1923	/*
1924	 * We now have N actual messages mapped onto SYS_RES_IRQ
1925	 * resources in the irqs[] array, so add new resources
1926	 * starting at rid 1.
1927	 */
1928	for (i = 0; i < actual; i++)
1929		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1930		    irqs[i], irqs[i], 1);
1931
1932	if (bootverbose) {
1933		if (actual == 1)
1934			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1935		else {
1936			int run;
1937
1938			/*
1939			 * Be fancy and try to print contiguous runs
1940			 * of IRQ values as ranges.  'run' is true if
1941			 * we are in a range.
1942			 */
1943			device_printf(child, "using IRQs %d", irqs[0]);
1944			run = 0;
1945			for (i = 1; i < actual; i++) {
1946
1947				/* Still in a run? */
1948				if (irqs[i] == irqs[i - 1] + 1) {
1949					run = 1;
1950					continue;
1951				}
1952
1953				/* Finish previous range. */
1954				if (run) {
1955					printf("-%d", irqs[i - 1]);
1956					run = 0;
1957				}
1958
1959				/* Start new range. */
1960				printf(",%d", irqs[i]);
1961			}
1962
1963			/* Unfinished range? */
1964			if (run)
1965				printf("-%d", irqs[actual - 1]);
1966			printf(" for MSI\n");
1967		}
1968	}
1969
1970	/* Update control register with actual count. */
1971	ctrl = cfg->msi.msi_ctrl;
1972	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1973	ctrl |= (ffs(actual) - 1) << 4;
1974	cfg->msi.msi_ctrl = ctrl;
1975	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1976
1977	/* Update counts of alloc'd messages. */
1978	cfg->msi.msi_alloc = actual;
1979	cfg->msi.msi_handlers = 0;
1980	*count = actual;
1981	return (0);
1982}
1983
1984/* Release the MSI messages associated with this device. */
1985int
1986pci_release_msi_method(device_t dev, device_t child)
1987{
1988	struct pci_devinfo *dinfo = device_get_ivars(child);
1989	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1990	struct resource_list_entry *rle;
1991	int error, i, irqs[32];
1992
1993	/* Try MSI-X first. */
1994	error = pci_release_msix(dev, child);
1995	if (error != ENODEV)
1996		return (error);
1997
1998	/* Do we have any messages to release? */
1999	if (msi->msi_alloc == 0)
2000		return (ENODEV);
2001	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2002
2003	/* Make sure none of the resources are allocated. */
2004	if (msi->msi_handlers > 0)
2005		return (EBUSY);
2006	for (i = 0; i < msi->msi_alloc; i++) {
2007		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2008		KASSERT(rle != NULL, ("missing MSI resource"));
2009		if (rle->res != NULL)
2010			return (EBUSY);
2011		irqs[i] = rle->start;
2012	}
2013
2014	/* Update control register with 0 count. */
2015	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2016	    ("%s: MSI still enabled", __func__));
2017	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2018	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2019	    msi->msi_ctrl, 2);
2020
2021	/* Release the messages. */
2022	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2023	for (i = 0; i < msi->msi_alloc; i++)
2024		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2025
2026	/* Update alloc count. */
2027	msi->msi_alloc = 0;
2028	msi->msi_addr = 0;
2029	msi->msi_data = 0;
2030	return (0);
2031}
2032
2033/*
2034 * Return the max supported MSI messages this device supports.
2035 * Basically, assuming the MD code can alloc messages, this function
2036 * should return the maximum value that pci_alloc_msi() can return.
2037 * Thus, it is subject to the tunables, etc.
2038 */
2039int
2040pci_msi_count_method(device_t dev, device_t child)
2041{
2042	struct pci_devinfo *dinfo = device_get_ivars(child);
2043	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2044
2045	if (pci_do_msi && msi->msi_location != 0)
2046		return (msi->msi_msgnum);
2047	return (0);
2048}
2049
2050/* free pcicfgregs structure and all depending data structures */
2051
2052int
2053pci_freecfg(struct pci_devinfo *dinfo)
2054{
2055	struct devlist *devlist_head;
2056	int i;
2057
2058	devlist_head = &pci_devq;
2059
2060	if (dinfo->cfg.vpd.vpd_reg) {
2061		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2062		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2063			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2064		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2065		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2066			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2067		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2068	}
2069	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2070	free(dinfo, M_DEVBUF);
2071
2072	/* increment the generation count */
2073	pci_generation++;
2074
2075	/* we're losing one device */
2076	pci_numdevs--;
2077	return (0);
2078}
2079
2080/*
2081 * PCI power manangement
2082 */
2083int
2084pci_set_powerstate_method(device_t dev, device_t child, int state)
2085{
2086	struct pci_devinfo *dinfo = device_get_ivars(child);
2087	pcicfgregs *cfg = &dinfo->cfg;
2088	uint16_t status;
2089	int result, oldstate, highest, delay;
2090
2091	if (cfg->pp.pp_cap == 0)
2092		return (EOPNOTSUPP);
2093
2094	/*
2095	 * Optimize a no state change request away.  While it would be OK to
2096	 * write to the hardware in theory, some devices have shown odd
2097	 * behavior when going from D3 -> D3.
2098	 */
2099	oldstate = pci_get_powerstate(child);
2100	if (oldstate == state)
2101		return (0);
2102
2103	/*
2104	 * The PCI power management specification states that after a state
2105	 * transition between PCI power states, system software must
2106	 * guarantee a minimal delay before the function accesses the device.
2107	 * Compute the worst case delay that we need to guarantee before we
2108	 * access the device.  Many devices will be responsive much more
2109	 * quickly than this delay, but there are some that don't respond
2110	 * instantly to state changes.  Transitions to/from D3 state require
2111	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2112	 * is done below with DELAY rather than a sleeper function because
2113	 * this function can be called from contexts where we cannot sleep.
2114	 */
2115	highest = (oldstate > state) ? oldstate : state;
2116	if (highest == PCI_POWERSTATE_D3)
2117	    delay = 10000;
2118	else if (highest == PCI_POWERSTATE_D2)
2119	    delay = 200;
2120	else
2121	    delay = 0;
2122	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2123	    & ~PCIM_PSTAT_DMASK;
2124	result = 0;
2125	switch (state) {
2126	case PCI_POWERSTATE_D0:
2127		status |= PCIM_PSTAT_D0;
2128		break;
2129	case PCI_POWERSTATE_D1:
2130		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2131			return (EOPNOTSUPP);
2132		status |= PCIM_PSTAT_D1;
2133		break;
2134	case PCI_POWERSTATE_D2:
2135		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2136			return (EOPNOTSUPP);
2137		status |= PCIM_PSTAT_D2;
2138		break;
2139	case PCI_POWERSTATE_D3:
2140		status |= PCIM_PSTAT_D3;
2141		break;
2142	default:
2143		return (EINVAL);
2144	}
2145
2146	if (bootverbose)
2147		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2148		    state);
2149
2150	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2151	if (delay)
2152		DELAY(delay);
2153	return (0);
2154}
2155
2156int
2157pci_get_powerstate_method(device_t dev, device_t child)
2158{
2159	struct pci_devinfo *dinfo = device_get_ivars(child);
2160	pcicfgregs *cfg = &dinfo->cfg;
2161	uint16_t status;
2162	int result;
2163
2164	if (cfg->pp.pp_cap != 0) {
2165		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2166		switch (status & PCIM_PSTAT_DMASK) {
2167		case PCIM_PSTAT_D0:
2168			result = PCI_POWERSTATE_D0;
2169			break;
2170		case PCIM_PSTAT_D1:
2171			result = PCI_POWERSTATE_D1;
2172			break;
2173		case PCIM_PSTAT_D2:
2174			result = PCI_POWERSTATE_D2;
2175			break;
2176		case PCIM_PSTAT_D3:
2177			result = PCI_POWERSTATE_D3;
2178			break;
2179		default:
2180			result = PCI_POWERSTATE_UNKNOWN;
2181			break;
2182		}
2183	} else {
2184		/* No support, device is always at D0 */
2185		result = PCI_POWERSTATE_D0;
2186	}
2187	return (result);
2188}
2189
2190/*
2191 * Some convenience functions for PCI device drivers.
2192 */
2193
2194static __inline void
2195pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2196{
2197	uint16_t	command;
2198
2199	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2200	command |= bit;
2201	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2202}
2203
2204static __inline void
2205pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2206{
2207	uint16_t	command;
2208
2209	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2210	command &= ~bit;
2211	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2212}
2213
2214int
2215pci_enable_busmaster_method(device_t dev, device_t child)
2216{
2217	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2218	return (0);
2219}
2220
2221int
2222pci_disable_busmaster_method(device_t dev, device_t child)
2223{
2224	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2225	return (0);
2226}
2227
2228int
2229pci_enable_io_method(device_t dev, device_t child, int space)
2230{
2231	uint16_t bit;
2232
2233	switch(space) {
2234	case SYS_RES_IOPORT:
2235		bit = PCIM_CMD_PORTEN;
2236		break;
2237	case SYS_RES_MEMORY:
2238		bit = PCIM_CMD_MEMEN;
2239		break;
2240	default:
2241		return (EINVAL);
2242	}
2243	pci_set_command_bit(dev, child, bit);
2244	return (0);
2245}
2246
2247int
2248pci_disable_io_method(device_t dev, device_t child, int space)
2249{
2250	uint16_t bit;
2251
2252	switch(space) {
2253	case SYS_RES_IOPORT:
2254		bit = PCIM_CMD_PORTEN;
2255		break;
2256	case SYS_RES_MEMORY:
2257		bit = PCIM_CMD_MEMEN;
2258		break;
2259	default:
2260		return (EINVAL);
2261	}
2262	pci_clear_command_bit(dev, child, bit);
2263	return (0);
2264}
2265
2266/*
2267 * New style pci driver.  Parent device is either a pci-host-bridge or a
2268 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2269 */
2270
2271void
2272pci_print_verbose(struct pci_devinfo *dinfo)
2273{
2274
2275	if (bootverbose) {
2276		pcicfgregs *cfg = &dinfo->cfg;
2277
2278		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2279		    cfg->vendor, cfg->device, cfg->revid);
2280		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2281		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2282		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2283		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2284		    cfg->mfdev);
2285		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2286		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2287		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2288		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2289		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2290		if (cfg->intpin > 0)
2291			printf("\tintpin=%c, irq=%d\n",
2292			    cfg->intpin +'a' -1, cfg->intline);
2293		if (cfg->pp.pp_cap) {
2294			uint16_t status;
2295
2296			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2297			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2298			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2299			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2300			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2301			    status & PCIM_PSTAT_DMASK);
2302		}
2303		if (cfg->msi.msi_location) {
2304			int ctrl;
2305
2306			ctrl = cfg->msi.msi_ctrl;
2307			printf("\tMSI supports %d message%s%s%s\n",
2308			    cfg->msi.msi_msgnum,
2309			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2310			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2311			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2312		}
2313		if (cfg->msix.msix_location) {
2314			printf("\tMSI-X supports %d message%s ",
2315			    cfg->msix.msix_msgnum,
2316			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2317			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2318				printf("in map 0x%x\n",
2319				    cfg->msix.msix_table_bar);
2320			else
2321				printf("in maps 0x%x and 0x%x\n",
2322				    cfg->msix.msix_table_bar,
2323				    cfg->msix.msix_pba_bar);
2324		}
2325	}
2326}
2327
2328static int
2329pci_porten(device_t dev)
2330{
2331	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2332}
2333
2334static int
2335pci_memen(device_t dev)
2336{
2337	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2338}
2339
2340static void
2341pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2342{
2343	pci_addr_t map, testval;
2344	int ln2range;
2345	uint16_t cmd;
2346
2347	/*
2348	 * The device ROM BAR is special.  It is always a 32-bit
2349	 * memory BAR.  Bit 0 is special and should not be set when
2350	 * sizing the BAR.
2351	 */
2352	if (reg == PCIR_BIOS) {
2353		map = pci_read_config(dev, reg, 4);
2354		pci_write_config(dev, reg, 0xfffffffe, 4);
2355		testval = pci_read_config(dev, reg, 4);
2356		pci_write_config(dev, reg, map, 4);
2357		*mapp = map;
2358		*testvalp = testval;
2359		return;
2360	}
2361
2362	map = pci_read_config(dev, reg, 4);
2363	ln2range = pci_maprange(map);
2364	if (ln2range == 64)
2365		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2366
2367	/*
2368	 * Disable decoding via the command register before
2369	 * determining the BAR's length since we will be placing it in
2370	 * a weird state.
2371	 */
2372	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2373	pci_write_config(dev, PCIR_COMMAND,
2374	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2375
2376	/*
2377	 * Determine the BAR's length by writing all 1's.  The bottom
2378	 * log_2(size) bits of the BAR will stick as 0 when we read
2379	 * the value back.
2380	 */
2381	pci_write_config(dev, reg, 0xffffffff, 4);
2382	testval = pci_read_config(dev, reg, 4);
2383	if (ln2range == 64) {
2384		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2385		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2386	}
2387
2388	/*
2389	 * Restore the original value of the BAR.  We may have reprogrammed
2390	 * the BAR of the low-level console device and when booting verbose,
2391	 * we need the console device addressable.
2392	 */
2393	pci_write_config(dev, reg, map, 4);
2394	if (ln2range == 64)
2395		pci_write_config(dev, reg + 4, map >> 32, 4);
2396	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2397
2398	*mapp = map;
2399	*testvalp = testval;
2400}
2401
2402static void
2403pci_write_bar(device_t dev, int reg, pci_addr_t base)
2404{
2405	pci_addr_t map;
2406	int ln2range;
2407
2408	map = pci_read_config(dev, reg, 4);
2409
2410	/* The device ROM BAR is always 32-bits. */
2411	if (reg == PCIR_BIOS)
2412		return;
2413	ln2range = pci_maprange(map);
2414	pci_write_config(dev, reg, base, 4);
2415	if (ln2range == 64)
2416		pci_write_config(dev, reg + 4, base >> 32, 4);
2417}
2418
2419/*
2420 * Add a resource based on a pci map register. Return 1 if the map
2421 * register is a 32bit map register or 2 if it is a 64bit register.
2422 */
2423static int
2424pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2425    int force, int prefetch)
2426{
2427	pci_addr_t base, map, testval;
2428	pci_addr_t start, end, count;
2429	int barlen, basezero, maprange, mapsize, type;
2430	uint16_t cmd;
2431	struct resource *res;
2432
2433	pci_read_bar(dev, reg, &map, &testval);
2434	if (PCI_BAR_MEM(map)) {
2435		type = SYS_RES_MEMORY;
2436		if (map & PCIM_BAR_MEM_PREFETCH)
2437			prefetch = 1;
2438	} else
2439		type = SYS_RES_IOPORT;
2440	mapsize = pci_mapsize(testval);
2441	base = pci_mapbase(map);
2442#ifdef __PCI_BAR_ZERO_VALID
2443	basezero = 0;
2444#else
2445	basezero = base == 0;
2446#endif
2447	maprange = pci_maprange(map);
2448	barlen = maprange == 64 ? 2 : 1;
2449
2450	/*
2451	 * For I/O registers, if bottom bit is set, and the next bit up
2452	 * isn't clear, we know we have a BAR that doesn't conform to the
2453	 * spec, so ignore it.  Also, sanity check the size of the data
2454	 * areas to the type of memory involved.  Memory must be at least
2455	 * 16 bytes in size, while I/O ranges must be at least 4.
2456	 */
2457	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2458		return (barlen);
2459	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2460	    (type == SYS_RES_IOPORT && mapsize < 2))
2461		return (barlen);
2462
2463	if (bootverbose) {
2464		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2465		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2466		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2467			printf(", port disabled\n");
2468		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2469			printf(", memory disabled\n");
2470		else
2471			printf(", enabled\n");
2472	}
2473
2474	/*
2475	 * If base is 0, then we have problems if this architecture does
2476	 * not allow that.  It is best to ignore such entries for the
2477	 * moment.  These will be allocated later if the driver specifically
2478	 * requests them.  However, some removable busses look better when
2479	 * all resources are allocated, so allow '0' to be overriden.
2480	 *
2481	 * Similarly treat maps whose values is the same as the test value
2482	 * read back.  These maps have had all f's written to them by the
2483	 * BIOS in an attempt to disable the resources.
2484	 */
2485	if (!force && (basezero || map == testval))
2486		return (barlen);
2487	if ((u_long)base != base) {
2488		device_printf(bus,
2489		    "pci%d:%d:%d:%d bar %#x too many address bits",
2490		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2491		    pci_get_function(dev), reg);
2492		return (barlen);
2493	}
2494
2495	/*
2496	 * This code theoretically does the right thing, but has
2497	 * undesirable side effects in some cases where peripherals
2498	 * respond oddly to having these bits enabled.  Let the user
2499	 * be able to turn them off (since pci_enable_io_modes is 1 by
2500	 * default).
2501	 */
2502	if (pci_enable_io_modes) {
2503		/* Turn on resources that have been left off by a lazy BIOS */
2504		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2505			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2506			cmd |= PCIM_CMD_PORTEN;
2507			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2508		}
2509		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2510			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2511			cmd |= PCIM_CMD_MEMEN;
2512			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2513		}
2514	} else {
2515		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2516			return (barlen);
2517		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2518			return (barlen);
2519	}
2520
2521	count = 1 << mapsize;
2522	if (basezero || base == pci_mapbase(testval)) {
2523		start = 0;	/* Let the parent decide. */
2524		end = ~0ULL;
2525	} else {
2526		start = base;
2527		end = base + (1 << mapsize) - 1;
2528	}
2529	resource_list_add(rl, type, reg, start, end, count);
2530
2531	/*
2532	 * Try to allocate the resource for this BAR from our parent
2533	 * so that this resource range is already reserved.  The
2534	 * driver for this device will later inherit this resource in
2535	 * pci_alloc_resource().
2536	 */
2537	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2538	    prefetch ? RF_PREFETCHABLE : 0);
2539	if (res == NULL) {
2540		/*
2541		 * If the allocation fails, clear the BAR and delete
2542		 * the resource list entry to force
2543		 * pci_alloc_resource() to allocate resources from the
2544		 * parent.
2545		 */
2546		resource_list_delete(rl, type, reg);
2547		start = 0;
2548	} else
2549		start = rman_get_start(res);
2550	pci_write_bar(dev, reg, start);
2551	return (barlen);
2552}
2553
2554/*
2555 * For ATA devices we need to decide early what addressing mode to use.
2556 * Legacy demands that the primary and secondary ATA ports sits on the
2557 * same addresses that old ISA hardware did. This dictates that we use
2558 * those addresses and ignore the BAR's if we cannot set PCI native
2559 * addressing mode.
2560 */
2561static void
2562pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2563    uint32_t prefetchmask)
2564{
2565	struct resource *r;
2566	int rid, type, progif;
2567#if 0
2568	/* if this device supports PCI native addressing use it */
2569	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2570	if ((progif & 0x8a) == 0x8a) {
2571		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2572		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2573			printf("Trying ATA native PCI addressing mode\n");
2574			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2575		}
2576	}
2577#endif
2578	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2579	type = SYS_RES_IOPORT;
2580	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2581		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2582		    prefetchmask & (1 << 0));
2583		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2584		    prefetchmask & (1 << 1));
2585	} else {
2586		rid = PCIR_BAR(0);
2587		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2588		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2589		    0x1f7, 8, 0);
2590		rid = PCIR_BAR(1);
2591		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2592		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2593		    0x3f6, 1, 0);
2594	}
2595	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2596		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2597		    prefetchmask & (1 << 2));
2598		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2599		    prefetchmask & (1 << 3));
2600	} else {
2601		rid = PCIR_BAR(2);
2602		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2603		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2604		    0x177, 8, 0);
2605		rid = PCIR_BAR(3);
2606		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2607		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2608		    0x376, 1, 0);
2609	}
2610	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2611	    prefetchmask & (1 << 4));
2612	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2613	    prefetchmask & (1 << 5));
2614}
2615
2616static void
2617pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2618{
2619	struct pci_devinfo *dinfo = device_get_ivars(dev);
2620	pcicfgregs *cfg = &dinfo->cfg;
2621	char tunable_name[64];
2622	int irq;
2623
2624	/* Has to have an intpin to have an interrupt. */
2625	if (cfg->intpin == 0)
2626		return;
2627
2628	/* Let the user override the IRQ with a tunable. */
2629	irq = PCI_INVALID_IRQ;
2630	snprintf(tunable_name, sizeof(tunable_name),
2631	    "hw.pci%d.%d.%d.INT%c.irq",
2632	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2633	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2634		irq = PCI_INVALID_IRQ;
2635
2636	/*
2637	 * If we didn't get an IRQ via the tunable, then we either use the
2638	 * IRQ value in the intline register or we ask the bus to route an
2639	 * interrupt for us.  If force_route is true, then we only use the
2640	 * value in the intline register if the bus was unable to assign an
2641	 * IRQ.
2642	 */
2643	if (!PCI_INTERRUPT_VALID(irq)) {
2644		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2645			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2646		if (!PCI_INTERRUPT_VALID(irq))
2647			irq = cfg->intline;
2648	}
2649
2650	/* If after all that we don't have an IRQ, just bail. */
2651	if (!PCI_INTERRUPT_VALID(irq))
2652		return;
2653
2654	/* Update the config register if it changed. */
2655	if (irq != cfg->intline) {
2656		cfg->intline = irq;
2657		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2658	}
2659
2660	/* Add this IRQ as rid 0 interrupt resource. */
2661	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2662}
2663
2664/* Perform early OHCI takeover from SMM. */
2665static void
2666ohci_early_takeover(device_t self)
2667{
2668	struct resource *res;
2669	uint32_t ctl;
2670	int rid;
2671	int i;
2672
2673	rid = PCIR_BAR(0);
2674	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2675	if (res == NULL)
2676		return;
2677
2678	ctl = bus_read_4(res, OHCI_CONTROL);
2679	if (ctl & OHCI_IR) {
2680		if (bootverbose)
2681			printf("ohci early: "
2682			    "SMM active, request owner change\n");
2683		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2684		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2685			DELAY(1000);
2686			ctl = bus_read_4(res, OHCI_CONTROL);
2687		}
2688		if (ctl & OHCI_IR) {
2689			if (bootverbose)
2690				printf("ohci early: "
2691				    "SMM does not respond, resetting\n");
2692			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2693		}
2694		/* Disable interrupts */
2695		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2696	}
2697
2698	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2699}
2700
2701/* Perform early UHCI takeover from SMM. */
2702static void
2703uhci_early_takeover(device_t self)
2704{
2705	struct resource *res;
2706	int rid;
2707
2708	/*
2709	 * Set the PIRQD enable bit and switch off all the others. We don't
2710	 * want legacy support to interfere with us XXX Does this also mean
2711	 * that the BIOS won't touch the keyboard anymore if it is connected
2712	 * to the ports of the root hub?
2713	 */
2714	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2715
2716	/* Disable interrupts */
2717	rid = PCI_UHCI_BASE_REG;
2718	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2719	if (res != NULL) {
2720		bus_write_2(res, UHCI_INTR, 0);
2721		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2722	}
2723}
2724
2725/* Perform early EHCI takeover from SMM. */
2726static void
2727ehci_early_takeover(device_t self)
2728{
2729	struct resource *res;
2730	uint32_t cparams;
2731	uint32_t eec;
2732	uint8_t eecp;
2733	uint8_t bios_sem;
2734	uint8_t offs;
2735	int rid;
2736	int i;
2737
2738	rid = PCIR_BAR(0);
2739	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2740	if (res == NULL)
2741		return;
2742
2743	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2744
2745	/* Synchronise with the BIOS if it owns the controller. */
2746	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2747	    eecp = EHCI_EECP_NEXT(eec)) {
2748		eec = pci_read_config(self, eecp, 4);
2749		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2750			continue;
2751		}
2752		bios_sem = pci_read_config(self, eecp +
2753		    EHCI_LEGSUP_BIOS_SEM, 1);
2754		if (bios_sem == 0) {
2755			continue;
2756		}
2757		if (bootverbose)
2758			printf("ehci early: "
2759			    "SMM active, request owner change\n");
2760
2761		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2762
2763		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2764			DELAY(1000);
2765			bios_sem = pci_read_config(self, eecp +
2766			    EHCI_LEGSUP_BIOS_SEM, 1);
2767		}
2768
2769		if (bios_sem != 0) {
2770			if (bootverbose)
2771				printf("ehci early: "
2772				    "SMM does not respond\n");
2773		}
2774		/* Disable interrupts */
2775		offs = bus_read_1(res, EHCI_CAPLENGTH);
2776		bus_write_4(res, offs + EHCI_USBINTR, 0);
2777	}
2778	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2779}
2780
2781void
2782pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2783{
2784	struct pci_devinfo *dinfo = device_get_ivars(dev);
2785	pcicfgregs *cfg = &dinfo->cfg;
2786	struct resource_list *rl = &dinfo->resources;
2787	struct pci_quirk *q;
2788	int i;
2789
2790	/* ATA devices needs special map treatment */
2791	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2792	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2793	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2794	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2795	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2796		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2797	else
2798		for (i = 0; i < cfg->nummaps;)
2799			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2800			    prefetchmask & (1 << i));
2801
2802	/*
2803	 * Add additional, quirked resources.
2804	 */
2805	for (q = &pci_quirks[0]; q->devid; q++) {
2806		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2807		    && q->type == PCI_QUIRK_MAP_REG)
2808			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2809	}
2810
2811	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2812#ifdef __PCI_REROUTE_INTERRUPT
2813		/*
2814		 * Try to re-route interrupts. Sometimes the BIOS or
2815		 * firmware may leave bogus values in these registers.
2816		 * If the re-route fails, then just stick with what we
2817		 * have.
2818		 */
2819		pci_assign_interrupt(bus, dev, 1);
2820#else
2821		pci_assign_interrupt(bus, dev, 0);
2822#endif
2823	}
2824
2825	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2826	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2827		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2828			ehci_early_takeover(dev);
2829		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2830			ohci_early_takeover(dev);
2831		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2832			uhci_early_takeover(dev);
2833	}
2834}
2835
2836void
2837pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2838{
2839#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2840	device_t pcib = device_get_parent(dev);
2841	struct pci_devinfo *dinfo;
2842	int maxslots;
2843	int s, f, pcifunchigh;
2844	uint8_t hdrtype;
2845
2846	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2847	    ("dinfo_size too small"));
2848	maxslots = PCIB_MAXSLOTS(pcib);
2849	for (s = 0; s <= maxslots; s++) {
2850		pcifunchigh = 0;
2851		f = 0;
2852		DELAY(1);
2853		hdrtype = REG(PCIR_HDRTYPE, 1);
2854		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2855			continue;
2856		if (hdrtype & PCIM_MFDEV)
2857			pcifunchigh = PCI_FUNCMAX;
2858		for (f = 0; f <= pcifunchigh; f++) {
2859			dinfo = pci_read_device(pcib, domain, busno, s, f,
2860			    dinfo_size);
2861			if (dinfo != NULL) {
2862				pci_add_child(dev, dinfo);
2863			}
2864		}
2865	}
2866#undef REG
2867}
2868
2869void
2870pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2871{
2872	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2873	device_set_ivars(dinfo->cfg.dev, dinfo);
2874	resource_list_init(&dinfo->resources);
2875	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2876	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2877	pci_print_verbose(dinfo);
2878	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2879}
2880
2881static int
2882pci_probe(device_t dev)
2883{
2884
2885	device_set_desc(dev, "PCI bus");
2886
2887	/* Allow other subclasses to override this driver. */
2888	return (BUS_PROBE_GENERIC);
2889}
2890
2891static int
2892pci_attach(device_t dev)
2893{
2894	int busno, domain;
2895
2896	/*
2897	 * Since there can be multiple independantly numbered PCI
2898	 * busses on systems with multiple PCI domains, we can't use
2899	 * the unit number to decide which bus we are probing. We ask
2900	 * the parent pcib what our domain and bus numbers are.
2901	 */
2902	domain = pcib_get_domain(dev);
2903	busno = pcib_get_bus(dev);
2904	if (bootverbose)
2905		device_printf(dev, "domain=%d, physical bus=%d\n",
2906		    domain, busno);
2907	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2908	return (bus_generic_attach(dev));
2909}
2910
2911int
2912pci_suspend(device_t dev)
2913{
2914	int dstate, error, i, numdevs;
2915	device_t acpi_dev, child, *devlist;
2916	struct pci_devinfo *dinfo;
2917
2918	/*
2919	 * Save the PCI configuration space for each child and set the
2920	 * device in the appropriate power state for this sleep state.
2921	 */
2922	acpi_dev = NULL;
2923	if (pci_do_power_resume)
2924		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2925	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2926		return (error);
2927	for (i = 0; i < numdevs; i++) {
2928		child = devlist[i];
2929		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2930		pci_cfg_save(child, dinfo, 0);
2931	}
2932
2933	/* Suspend devices before potentially powering them down. */
2934	error = bus_generic_suspend(dev);
2935	if (error) {
2936		free(devlist, M_TEMP);
2937		return (error);
2938	}
2939
2940	/*
2941	 * Always set the device to D3.  If ACPI suggests a different
2942	 * power state, use it instead.  If ACPI is not present, the
2943	 * firmware is responsible for managing device power.  Skip
2944	 * children who aren't attached since they are powered down
2945	 * separately.  Only manage type 0 devices for now.
2946	 */
2947	for (i = 0; acpi_dev && i < numdevs; i++) {
2948		child = devlist[i];
2949		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2950		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2951			dstate = PCI_POWERSTATE_D3;
2952			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2953			pci_set_powerstate(child, dstate);
2954		}
2955	}
2956	free(devlist, M_TEMP);
2957	return (0);
2958}
2959
2960int
2961pci_resume(device_t dev)
2962{
2963	int i, numdevs, error;
2964	device_t acpi_dev, child, *devlist;
2965	struct pci_devinfo *dinfo;
2966
2967	/*
2968	 * Set each child to D0 and restore its PCI configuration space.
2969	 */
2970	acpi_dev = NULL;
2971	if (pci_do_power_resume)
2972		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2973	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2974		return (error);
2975	for (i = 0; i < numdevs; i++) {
2976		/*
2977		 * Notify ACPI we're going to D0 but ignore the result.  If
2978		 * ACPI is not present, the firmware is responsible for
2979		 * managing device power.  Only manage type 0 devices for now.
2980		 */
2981		child = devlist[i];
2982		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2983		if (acpi_dev && device_is_attached(child) &&
2984		    dinfo->cfg.hdrtype == 0) {
2985			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2986			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2987		}
2988
2989		/* Now the device is powered up, restore its config space. */
2990		pci_cfg_restore(child, dinfo);
2991		if (!device_is_attached(child))
2992			pci_cfg_save(child, dinfo, 1);
2993	}
2994	free(devlist, M_TEMP);
2995	return (bus_generic_resume(dev));
2996}
2997
2998static void
2999pci_load_vendor_data(void)
3000{
3001	caddr_t vendordata, info;
3002
3003	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3004		info = preload_search_info(vendordata, MODINFO_ADDR);
3005		pci_vendordata = *(char **)info;
3006		info = preload_search_info(vendordata, MODINFO_SIZE);
3007		pci_vendordata_size = *(size_t *)info;
3008		/* terminate the database */
3009		pci_vendordata[pci_vendordata_size] = '\n';
3010	}
3011}
3012
3013void
3014pci_driver_added(device_t dev, driver_t *driver)
3015{
3016	int numdevs;
3017	device_t *devlist;
3018	device_t child;
3019	struct pci_devinfo *dinfo;
3020	int i;
3021
3022	if (bootverbose)
3023		device_printf(dev, "driver added\n");
3024	DEVICE_IDENTIFY(driver, dev);
3025	if (device_get_children(dev, &devlist, &numdevs) != 0)
3026		return;
3027	for (i = 0; i < numdevs; i++) {
3028		child = devlist[i];
3029		if (device_get_state(child) != DS_NOTPRESENT)
3030			continue;
3031		dinfo = device_get_ivars(child);
3032		pci_print_verbose(dinfo);
3033		if (bootverbose)
3034			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3035		pci_cfg_restore(child, dinfo);
3036		if (device_probe_and_attach(child) != 0)
3037			pci_cfg_save(child, dinfo, 1);
3038	}
3039	free(devlist, M_TEMP);
3040}
3041
3042int
3043pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3044    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3045{
3046	struct pci_devinfo *dinfo;
3047	struct msix_table_entry *mte;
3048	struct msix_vector *mv;
3049	uint64_t addr;
3050	uint32_t data;
3051	void *cookie;
3052	int error, rid;
3053
3054	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3055	    arg, &cookie);
3056	if (error)
3057		return (error);
3058
3059	/* If this is not a direct child, just bail out. */
3060	if (device_get_parent(child) != dev) {
3061		*cookiep = cookie;
3062		return(0);
3063	}
3064
3065	rid = rman_get_rid(irq);
3066	if (rid == 0) {
3067		/* Make sure that INTx is enabled */
3068		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3069	} else {
3070		/*
3071		 * Check to see if the interrupt is MSI or MSI-X.
3072		 * Ask our parent to map the MSI and give
3073		 * us the address and data register values.
3074		 * If we fail for some reason, teardown the
3075		 * interrupt handler.
3076		 */
3077		dinfo = device_get_ivars(child);
3078		if (dinfo->cfg.msi.msi_alloc > 0) {
3079			if (dinfo->cfg.msi.msi_addr == 0) {
3080				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3081			    ("MSI has handlers, but vectors not mapped"));
3082				error = PCIB_MAP_MSI(device_get_parent(dev),
3083				    child, rman_get_start(irq), &addr, &data);
3084				if (error)
3085					goto bad;
3086				dinfo->cfg.msi.msi_addr = addr;
3087				dinfo->cfg.msi.msi_data = data;
3088			}
3089			if (dinfo->cfg.msi.msi_handlers == 0)
3090				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3091				    dinfo->cfg.msi.msi_data);
3092			dinfo->cfg.msi.msi_handlers++;
3093		} else {
3094			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3095			    ("No MSI or MSI-X interrupts allocated"));
3096			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3097			    ("MSI-X index too high"));
3098			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3099			KASSERT(mte->mte_vector != 0, ("no message vector"));
3100			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3101			KASSERT(mv->mv_irq == rman_get_start(irq),
3102			    ("IRQ mismatch"));
3103			if (mv->mv_address == 0) {
3104				KASSERT(mte->mte_handlers == 0,
3105		    ("MSI-X table entry has handlers, but vector not mapped"));
3106				error = PCIB_MAP_MSI(device_get_parent(dev),
3107				    child, rman_get_start(irq), &addr, &data);
3108				if (error)
3109					goto bad;
3110				mv->mv_address = addr;
3111				mv->mv_data = data;
3112			}
3113			if (mte->mte_handlers == 0) {
3114				pci_enable_msix(child, rid - 1, mv->mv_address,
3115				    mv->mv_data);
3116				pci_unmask_msix(child, rid - 1);
3117			}
3118			mte->mte_handlers++;
3119		}
3120
3121		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3122		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3123	bad:
3124		if (error) {
3125			(void)bus_generic_teardown_intr(dev, child, irq,
3126			    cookie);
3127			return (error);
3128		}
3129	}
3130	*cookiep = cookie;
3131	return (0);
3132}
3133
3134int
3135pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3136    void *cookie)
3137{
3138	struct msix_table_entry *mte;
3139	struct resource_list_entry *rle;
3140	struct pci_devinfo *dinfo;
3141	int error, rid;
3142
3143	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3144		return (EINVAL);
3145
3146	/* If this isn't a direct child, just bail out */
3147	if (device_get_parent(child) != dev)
3148		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3149
3150	rid = rman_get_rid(irq);
3151	if (rid == 0) {
3152		/* Mask INTx */
3153		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154	} else {
3155		/*
3156		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3157		 * decrement the appropriate handlers count and mask the
3158		 * MSI-X message, or disable MSI messages if the count
3159		 * drops to 0.
3160		 */
3161		dinfo = device_get_ivars(child);
3162		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3163		if (rle->res != irq)
3164			return (EINVAL);
3165		if (dinfo->cfg.msi.msi_alloc > 0) {
3166			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3167			    ("MSI-X index too high"));
3168			if (dinfo->cfg.msi.msi_handlers == 0)
3169				return (EINVAL);
3170			dinfo->cfg.msi.msi_handlers--;
3171			if (dinfo->cfg.msi.msi_handlers == 0)
3172				pci_disable_msi(child);
3173		} else {
3174			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3175			    ("No MSI or MSI-X interrupts allocated"));
3176			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3177			    ("MSI-X index too high"));
3178			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3179			if (mte->mte_handlers == 0)
3180				return (EINVAL);
3181			mte->mte_handlers--;
3182			if (mte->mte_handlers == 0)
3183				pci_mask_msix(child, rid - 1);
3184		}
3185	}
3186	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3187	if (rid > 0)
3188		KASSERT(error == 0,
3189		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3190	return (error);
3191}
3192
3193int
3194pci_print_child(device_t dev, device_t child)
3195{
3196	struct pci_devinfo *dinfo;
3197	struct resource_list *rl;
3198	int retval = 0;
3199
3200	dinfo = device_get_ivars(child);
3201	rl = &dinfo->resources;
3202
3203	retval += bus_print_child_header(dev, child);
3204
3205	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3206	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3207	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3208	if (device_get_flags(dev))
3209		retval += printf(" flags %#x", device_get_flags(dev));
3210
3211	retval += printf(" at device %d.%d", pci_get_slot(child),
3212	    pci_get_function(child));
3213
3214	retval += bus_print_child_footer(dev, child);
3215
3216	return (retval);
3217}
3218
3219static struct
3220{
3221	int	class;
3222	int	subclass;
3223	char	*desc;
3224} pci_nomatch_tab[] = {
3225	{PCIC_OLD,		-1,			"old"},
3226	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3227	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3228	{PCIC_STORAGE,		-1,			"mass storage"},
3229	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3230	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3231	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3232	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3233	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3234	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3235	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3236	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3237	{PCIC_NETWORK,		-1,			"network"},
3238	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3239	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3240	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3241	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3242	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3243	{PCIC_DISPLAY,		-1,			"display"},
3244	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3245	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3246	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3247	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3248	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3249	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3250	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3251	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3252	{PCIC_MEMORY,		-1,			"memory"},
3253	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3254	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3255	{PCIC_BRIDGE,		-1,			"bridge"},
3256	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3257	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3258	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3259	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3260	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3261	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3262	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3263	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3264	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3265	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3266	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3267	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3268	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3269	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3270	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3271	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3272	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3273	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3274	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3275	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3276	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3277	{PCIC_INPUTDEV,		-1,			"input device"},
3278	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3279	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3280	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3281	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3282	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3283	{PCIC_DOCKING,		-1,			"docking station"},
3284	{PCIC_PROCESSOR,	-1,			"processor"},
3285	{PCIC_SERIALBUS,	-1,			"serial bus"},
3286	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3287	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3288	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3289	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3290	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3291	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3292	{PCIC_WIRELESS,		-1,			"wireless controller"},
3293	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3294	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3295	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3296	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3297	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3298	{PCIC_SATCOM,		-1,			"satellite communication"},
3299	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3300	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3301	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3302	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3303	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3304	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3305	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3306	{PCIC_DASP,		-1,			"dasp"},
3307	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3308	{0, 0,		NULL}
3309};
3310
3311void
3312pci_probe_nomatch(device_t dev, device_t child)
3313{
3314	int	i;
3315	char	*cp, *scp, *device;
3316
3317	/*
3318	 * Look for a listing for this device in a loaded device database.
3319	 */
3320	if ((device = pci_describe_device(child)) != NULL) {
3321		device_printf(dev, "<%s>", device);
3322		free(device, M_DEVBUF);
3323	} else {
3324		/*
3325		 * Scan the class/subclass descriptions for a general
3326		 * description.
3327		 */
3328		cp = "unknown";
3329		scp = NULL;
3330		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3331			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3332				if (pci_nomatch_tab[i].subclass == -1) {
3333					cp = pci_nomatch_tab[i].desc;
3334				} else if (pci_nomatch_tab[i].subclass ==
3335				    pci_get_subclass(child)) {
3336					scp = pci_nomatch_tab[i].desc;
3337				}
3338			}
3339		}
3340		device_printf(dev, "<%s%s%s>",
3341		    cp ? cp : "",
3342		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3343		    scp ? scp : "");
3344	}
3345	printf(" at device %d.%d (no driver attached)\n",
3346	    pci_get_slot(child), pci_get_function(child));
3347	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3348	return;
3349}
3350
3351/*
3352 * Parse the PCI device database, if loaded, and return a pointer to a
3353 * description of the device.
3354 *
3355 * The database is flat text formatted as follows:
3356 *
3357 * Any line not in a valid format is ignored.
3358 * Lines are terminated with newline '\n' characters.
3359 *
3360 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3361 * the vendor name.
3362 *
3363 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3364 * - devices cannot be listed without a corresponding VENDOR line.
3365 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3366 * another TAB, then the device name.
3367 */
3368
3369/*
3370 * Assuming (ptr) points to the beginning of a line in the database,
3371 * return the vendor or device and description of the next entry.
3372 * The value of (vendor) or (device) inappropriate for the entry type
3373 * is set to -1.  Returns nonzero at the end of the database.
3374 *
3375 * Note that this is slightly unrobust in the face of corrupt data;
3376 * we attempt to safeguard against this by spamming the end of the
3377 * database with a newline when we initialise.
3378 */
3379static int
3380pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3381{
3382	char	*cp = *ptr;
3383	int	left;
3384
3385	*device = -1;
3386	*vendor = -1;
3387	**desc = '\0';
3388	for (;;) {
3389		left = pci_vendordata_size - (cp - pci_vendordata);
3390		if (left <= 0) {
3391			*ptr = cp;
3392			return(1);
3393		}
3394
3395		/* vendor entry? */
3396		if (*cp != '\t' &&
3397		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3398			break;
3399		/* device entry? */
3400		if (*cp == '\t' &&
3401		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3402			break;
3403
3404		/* skip to next line */
3405		while (*cp != '\n' && left > 0) {
3406			cp++;
3407			left--;
3408		}
3409		if (*cp == '\n') {
3410			cp++;
3411			left--;
3412		}
3413	}
3414	/* skip to next line */
3415	while (*cp != '\n' && left > 0) {
3416		cp++;
3417		left--;
3418	}
3419	if (*cp == '\n' && left > 0)
3420		cp++;
3421	*ptr = cp;
3422	return(0);
3423}
3424
3425static char *
3426pci_describe_device(device_t dev)
3427{
3428	int	vendor, device;
3429	char	*desc, *vp, *dp, *line;
3430
3431	desc = vp = dp = NULL;
3432
3433	/*
3434	 * If we have no vendor data, we can't do anything.
3435	 */
3436	if (pci_vendordata == NULL)
3437		goto out;
3438
3439	/*
3440	 * Scan the vendor data looking for this device
3441	 */
3442	line = pci_vendordata;
3443	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3444		goto out;
3445	for (;;) {
3446		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3447			goto out;
3448		if (vendor == pci_get_vendor(dev))
3449			break;
3450	}
3451	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3452		goto out;
3453	for (;;) {
3454		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3455			*dp = 0;
3456			break;
3457		}
3458		if (vendor != -1) {
3459			*dp = 0;
3460			break;
3461		}
3462		if (device == pci_get_device(dev))
3463			break;
3464	}
3465	if (dp[0] == '\0')
3466		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3467	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3468	    NULL)
3469		sprintf(desc, "%s, %s", vp, dp);
3470 out:
3471	if (vp != NULL)
3472		free(vp, M_DEVBUF);
3473	if (dp != NULL)
3474		free(dp, M_DEVBUF);
3475	return(desc);
3476}
3477
3478int
3479pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3480{
3481	struct pci_devinfo *dinfo;
3482	pcicfgregs *cfg;
3483
3484	dinfo = device_get_ivars(child);
3485	cfg = &dinfo->cfg;
3486
3487	switch (which) {
3488	case PCI_IVAR_ETHADDR:
3489		/*
3490		 * The generic accessor doesn't deal with failure, so
3491		 * we set the return value, then return an error.
3492		 */
3493		*((uint8_t **) result) = NULL;
3494		return (EINVAL);
3495	case PCI_IVAR_SUBVENDOR:
3496		*result = cfg->subvendor;
3497		break;
3498	case PCI_IVAR_SUBDEVICE:
3499		*result = cfg->subdevice;
3500		break;
3501	case PCI_IVAR_VENDOR:
3502		*result = cfg->vendor;
3503		break;
3504	case PCI_IVAR_DEVICE:
3505		*result = cfg->device;
3506		break;
3507	case PCI_IVAR_DEVID:
3508		*result = (cfg->device << 16) | cfg->vendor;
3509		break;
3510	case PCI_IVAR_CLASS:
3511		*result = cfg->baseclass;
3512		break;
3513	case PCI_IVAR_SUBCLASS:
3514		*result = cfg->subclass;
3515		break;
3516	case PCI_IVAR_PROGIF:
3517		*result = cfg->progif;
3518		break;
3519	case PCI_IVAR_REVID:
3520		*result = cfg->revid;
3521		break;
3522	case PCI_IVAR_INTPIN:
3523		*result = cfg->intpin;
3524		break;
3525	case PCI_IVAR_IRQ:
3526		*result = cfg->intline;
3527		break;
3528	case PCI_IVAR_DOMAIN:
3529		*result = cfg->domain;
3530		break;
3531	case PCI_IVAR_BUS:
3532		*result = cfg->bus;
3533		break;
3534	case PCI_IVAR_SLOT:
3535		*result = cfg->slot;
3536		break;
3537	case PCI_IVAR_FUNCTION:
3538		*result = cfg->func;
3539		break;
3540	case PCI_IVAR_CMDREG:
3541		*result = cfg->cmdreg;
3542		break;
3543	case PCI_IVAR_CACHELNSZ:
3544		*result = cfg->cachelnsz;
3545		break;
3546	case PCI_IVAR_MINGNT:
3547		*result = cfg->mingnt;
3548		break;
3549	case PCI_IVAR_MAXLAT:
3550		*result = cfg->maxlat;
3551		break;
3552	case PCI_IVAR_LATTIMER:
3553		*result = cfg->lattimer;
3554		break;
3555	default:
3556		return (ENOENT);
3557	}
3558	return (0);
3559}
3560
3561int
3562pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3563{
3564	struct pci_devinfo *dinfo;
3565
3566	dinfo = device_get_ivars(child);
3567
3568	switch (which) {
3569	case PCI_IVAR_INTPIN:
3570		dinfo->cfg.intpin = value;
3571		return (0);
3572	case PCI_IVAR_ETHADDR:
3573	case PCI_IVAR_SUBVENDOR:
3574	case PCI_IVAR_SUBDEVICE:
3575	case PCI_IVAR_VENDOR:
3576	case PCI_IVAR_DEVICE:
3577	case PCI_IVAR_DEVID:
3578	case PCI_IVAR_CLASS:
3579	case PCI_IVAR_SUBCLASS:
3580	case PCI_IVAR_PROGIF:
3581	case PCI_IVAR_REVID:
3582	case PCI_IVAR_IRQ:
3583	case PCI_IVAR_DOMAIN:
3584	case PCI_IVAR_BUS:
3585	case PCI_IVAR_SLOT:
3586	case PCI_IVAR_FUNCTION:
3587		return (EINVAL);	/* disallow for now */
3588
3589	default:
3590		return (ENOENT);
3591	}
3592}
3593
3594
3595#include "opt_ddb.h"
3596#ifdef DDB
3597#include <ddb/ddb.h>
3598#include <sys/cons.h>
3599
3600/*
3601 * List resources based on pci map registers, used for within ddb
3602 */
3603
3604DB_SHOW_COMMAND(pciregs, db_pci_dump)
3605{
3606	struct pci_devinfo *dinfo;
3607	struct devlist *devlist_head;
3608	struct pci_conf *p;
3609	const char *name;
3610	int i, error, none_count;
3611
3612	none_count = 0;
3613	/* get the head of the device queue */
3614	devlist_head = &pci_devq;
3615
3616	/*
3617	 * Go through the list of devices and print out devices
3618	 */
3619	for (error = 0, i = 0,
3620	     dinfo = STAILQ_FIRST(devlist_head);
3621	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3622	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3623
3624		/* Populate pd_name and pd_unit */
3625		name = NULL;
3626		if (dinfo->cfg.dev)
3627			name = device_get_name(dinfo->cfg.dev);
3628
3629		p = &dinfo->conf;
3630		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3631			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3632			(name && *name) ? name : "none",
3633			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3634			none_count++,
3635			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3636			p->pc_sel.pc_func, (p->pc_class << 16) |
3637			(p->pc_subclass << 8) | p->pc_progif,
3638			(p->pc_subdevice << 16) | p->pc_subvendor,
3639			(p->pc_device << 16) | p->pc_vendor,
3640			p->pc_revid, p->pc_hdr);
3641	}
3642}
3643#endif /* DDB */
3644
3645static struct resource *
3646pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3647    u_long start, u_long end, u_long count, u_int flags)
3648{
3649	struct pci_devinfo *dinfo = device_get_ivars(child);
3650	struct resource_list *rl = &dinfo->resources;
3651	struct resource_list_entry *rle;
3652	struct resource *res;
3653	pci_addr_t map, testval;
3654	int mapsize;
3655
3656	/*
3657	 * Weed out the bogons, and figure out how large the BAR/map
3658	 * is.  Bars that read back 0 here are bogus and unimplemented.
3659	 * Note: atapci in legacy mode are special and handled elsewhere
3660	 * in the code.  If you have a atapci device in legacy mode and
3661	 * it fails here, that other code is broken.
3662	 */
3663	res = NULL;
3664	pci_read_bar(child, *rid, &map, &testval);
3665
3666	/* Ignore a BAR with a base of 0. */
3667	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3668	    pci_mapbase(testval) == 0)
3669		goto out;
3670
3671	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3672		if (type != SYS_RES_MEMORY) {
3673			if (bootverbose)
3674				device_printf(dev,
3675				    "child %s requested type %d for rid %#x,"
3676				    " but the BAR says it is an memio\n",
3677				    device_get_nameunit(child), type, *rid);
3678			goto out;
3679		}
3680	} else {
3681		if (type != SYS_RES_IOPORT) {
3682			if (bootverbose)
3683				device_printf(dev,
3684				    "child %s requested type %d for rid %#x,"
3685				    " but the BAR says it is an ioport\n",
3686				    device_get_nameunit(child), type, *rid);
3687			goto out;
3688		}
3689	}
3690
3691	/*
3692	 * For real BARs, we need to override the size that
3693	 * the driver requests, because that's what the BAR
3694	 * actually uses and we would otherwise have a
3695	 * situation where we might allocate the excess to
3696	 * another driver, which won't work.
3697	 *
3698	 * Device ROM BARs use a different mask value.
3699	 */
3700	if (*rid == PCIR_BIOS)
3701		mapsize = pci_romsize(testval);
3702	else
3703		mapsize = pci_mapsize(testval);
3704	count = 1UL << mapsize;
3705	if (RF_ALIGNMENT(flags) < mapsize)
3706		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3707	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3708		flags |= RF_PREFETCHABLE;
3709
3710	/*
3711	 * Allocate enough resource, and then write back the
3712	 * appropriate bar for that resource.
3713	 */
3714	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3715	    start, end, count, flags & ~RF_ACTIVE);
3716	if (res == NULL) {
3717		device_printf(child,
3718		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3719		    count, *rid, type, start, end);
3720		goto out;
3721	}
3722	resource_list_add(rl, type, *rid, start, end, count);
3723	rle = resource_list_find(rl, type, *rid);
3724	if (rle == NULL)
3725		panic("pci_reserve_map: unexpectedly can't find resource.");
3726	rle->res = res;
3727	rle->start = rman_get_start(res);
3728	rle->end = rman_get_end(res);
3729	rle->count = count;
3730	rle->flags = RLE_RESERVED;
3731	if (bootverbose)
3732		device_printf(child,
3733		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3734		    count, *rid, type, rman_get_start(res));
3735	map = rman_get_start(res);
3736	pci_write_bar(child, *rid, map);
3737out:;
3738	return (res);
3739}
3740
3741
3742struct resource *
3743pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3744		   u_long start, u_long end, u_long count, u_int flags)
3745{
3746	struct pci_devinfo *dinfo = device_get_ivars(child);
3747	struct resource_list *rl = &dinfo->resources;
3748	struct resource_list_entry *rle;
3749	struct resource *res;
3750	pcicfgregs *cfg = &dinfo->cfg;
3751
3752	if (device_get_parent(child) != dev)
3753		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3754		    type, rid, start, end, count, flags));
3755
3756	/*
3757	 * Perform lazy resource allocation
3758	 */
3759	switch (type) {
3760	case SYS_RES_IRQ:
3761		/*
3762		 * Can't alloc legacy interrupt once MSI messages have
3763		 * been allocated.
3764		 */
3765		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3766		    cfg->msix.msix_alloc > 0))
3767			return (NULL);
3768
3769		/*
3770		 * If the child device doesn't have an interrupt
3771		 * routed and is deserving of an interrupt, try to
3772		 * assign it one.
3773		 */
3774		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3775		    (cfg->intpin != 0))
3776			pci_assign_interrupt(dev, child, 0);
3777		break;
3778	case SYS_RES_IOPORT:
3779	case SYS_RES_MEMORY:
3780		/* Reserve resources for this BAR if needed. */
3781		rle = resource_list_find(rl, type, *rid);
3782		if (rle == NULL) {
3783			res = pci_reserve_map(dev, child, type, rid, start, end,
3784			    count, flags);
3785			if (res == NULL)
3786				return (NULL);
3787		}
3788	}
3789	return (resource_list_alloc(rl, dev, child, type, rid,
3790	    start, end, count, flags));
3791}
3792
3793int
3794pci_activate_resource(device_t dev, device_t child, int type, int rid,
3795    struct resource *r)
3796{
3797	int error;
3798
3799	error = bus_generic_activate_resource(dev, child, type, rid, r);
3800	if (error)
3801		return (error);
3802
3803	/* Enable decoding in the command register when activating BARs. */
3804	if (device_get_parent(child) == dev) {
3805		/* Device ROMs need their decoding explicitly enabled. */
3806		if (rid == PCIR_BIOS)
3807			pci_write_config(child, rid, rman_get_start(r) |
3808			    PCIM_BIOS_ENABLE, 4);
3809		switch (type) {
3810		case SYS_RES_IOPORT:
3811		case SYS_RES_MEMORY:
3812			error = PCI_ENABLE_IO(dev, child, type);
3813			break;
3814		}
3815	}
3816	return (error);
3817}
3818
3819int
3820pci_deactivate_resource(device_t dev, device_t child, int type,
3821    int rid, struct resource *r)
3822{
3823	int error;
3824
3825	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3826	if (error)
3827		return (error);
3828
3829	/* Disable decoding for device ROMs. */
3830	if (rid == PCIR_BIOS)
3831		pci_write_config(child, rid, rman_get_start(r), 4);
3832	return (0);
3833}
3834
3835void
3836pci_delete_child(device_t dev, device_t child)
3837{
3838	struct resource_list_entry *rle;
3839	struct resource_list *rl;
3840	struct pci_devinfo *dinfo;
3841
3842	dinfo = device_get_ivars(child);
3843	rl = &dinfo->resources;
3844
3845	if (device_is_attached(child))
3846		device_detach(child);
3847
3848	/* Turn off access to resources we're about to free */
3849	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3850	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3851
3852	/* Free all allocated resources */
3853	STAILQ_FOREACH(rle, rl, link) {
3854		if (rle->res) {
3855			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3856			    resource_list_busy(rl, rle->type, rle->rid)) {
3857				pci_printf(&dinfo->cfg,
3858				    "Resource still owned, oops. "
3859				    "(type=%d, rid=%d, addr=%lx)\n",
3860				    rle->type, rle->rid,
3861				    rman_get_start(rle->res));
3862				bus_release_resource(child, rle->type, rle->rid,
3863				    rle->res);
3864			}
3865			resource_list_unreserve(rl, dev, child, rle->type,
3866			    rle->rid);
3867		}
3868	}
3869	resource_list_free(rl);
3870
3871	device_delete_child(dev, child);
3872	pci_freecfg(dinfo);
3873}
3874
3875void
3876pci_delete_resource(device_t dev, device_t child, int type, int rid)
3877{
3878	struct pci_devinfo *dinfo;
3879	struct resource_list *rl;
3880	struct resource_list_entry *rle;
3881
3882	if (device_get_parent(child) != dev)
3883		return;
3884
3885	dinfo = device_get_ivars(child);
3886	rl = &dinfo->resources;
3887	rle = resource_list_find(rl, type, rid);
3888	if (rle == NULL)
3889		return;
3890
3891	if (rle->res) {
3892		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3893		    resource_list_busy(rl, type, rid)) {
3894			device_printf(dev, "delete_resource: "
3895			    "Resource still owned by child, oops. "
3896			    "(type=%d, rid=%d, addr=%lx)\n",
3897			    type, rid, rman_get_start(rle->res));
3898			return;
3899		}
3900
3901#ifndef __PCI_BAR_ZERO_VALID
3902		/*
3903		 * If this is a BAR, clear the BAR so it stops
3904		 * decoding before releasing the resource.
3905		 */
3906		switch (type) {
3907		case SYS_RES_IOPORT:
3908		case SYS_RES_MEMORY:
3909			pci_write_bar(child, rid, 0);
3910			break;
3911		}
3912#endif
3913		resource_list_unreserve(rl, dev, child, type, rid);
3914	}
3915	resource_list_delete(rl, type, rid);
3916}
3917
3918struct resource_list *
3919pci_get_resource_list (device_t dev, device_t child)
3920{
3921	struct pci_devinfo *dinfo = device_get_ivars(child);
3922
3923	return (&dinfo->resources);
3924}
3925
3926uint32_t
3927pci_read_config_method(device_t dev, device_t child, int reg, int width)
3928{
3929	struct pci_devinfo *dinfo = device_get_ivars(child);
3930	pcicfgregs *cfg = &dinfo->cfg;
3931
3932	return (PCIB_READ_CONFIG(device_get_parent(dev),
3933	    cfg->bus, cfg->slot, cfg->func, reg, width));
3934}
3935
3936void
3937pci_write_config_method(device_t dev, device_t child, int reg,
3938    uint32_t val, int width)
3939{
3940	struct pci_devinfo *dinfo = device_get_ivars(child);
3941	pcicfgregs *cfg = &dinfo->cfg;
3942
3943	PCIB_WRITE_CONFIG(device_get_parent(dev),
3944	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3945}
3946
3947int
3948pci_child_location_str_method(device_t dev, device_t child, char *buf,
3949    size_t buflen)
3950{
3951
3952	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3953	    pci_get_function(child));
3954	return (0);
3955}
3956
3957int
3958pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3959    size_t buflen)
3960{
3961	struct pci_devinfo *dinfo;
3962	pcicfgregs *cfg;
3963
3964	dinfo = device_get_ivars(child);
3965	cfg = &dinfo->cfg;
3966	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3967	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3968	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3969	    cfg->progif);
3970	return (0);
3971}
3972
3973int
3974pci_assign_interrupt_method(device_t dev, device_t child)
3975{
3976	struct pci_devinfo *dinfo = device_get_ivars(child);
3977	pcicfgregs *cfg = &dinfo->cfg;
3978
3979	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3980	    cfg->intpin));
3981}
3982
3983static int
3984pci_modevent(module_t mod, int what, void *arg)
3985{
3986	static struct cdev *pci_cdev;
3987
3988	switch (what) {
3989	case MOD_LOAD:
3990		STAILQ_INIT(&pci_devq);
3991		pci_generation = 0;
3992		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3993		    "pci");
3994		pci_load_vendor_data();
3995		break;
3996
3997	case MOD_UNLOAD:
3998		destroy_dev(pci_cdev);
3999		break;
4000	}
4001
4002	return (0);
4003}
4004
4005void
4006pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4007{
4008	int i;
4009
4010	/*
4011	 * Only do header type 0 devices.  Type 1 devices are bridges,
4012	 * which we know need special treatment.  Type 2 devices are
4013	 * cardbus bridges which also require special treatment.
4014	 * Other types are unknown, and we err on the side of safety
4015	 * by ignoring them.
4016	 */
4017	if (dinfo->cfg.hdrtype != 0)
4018		return;
4019
4020	/*
4021	 * Restore the device to full power mode.  We must do this
4022	 * before we restore the registers because moving from D3 to
4023	 * D0 will cause the chip's BARs and some other registers to
4024	 * be reset to some unknown power on reset values.  Cut down
4025	 * the noise on boot by doing nothing if we are already in
4026	 * state D0.
4027	 */
4028	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4029		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4030	}
4031	for (i = 0; i < dinfo->cfg.nummaps; i++)
4032		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4033	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4034	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4035	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4036	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4037	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4038	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4039	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4040	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4041	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4042	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4043
4044	/* Restore MSI and MSI-X configurations if they are present. */
4045	if (dinfo->cfg.msi.msi_location != 0)
4046		pci_resume_msi(dev);
4047	if (dinfo->cfg.msix.msix_location != 0)
4048		pci_resume_msix(dev);
4049}
4050
4051void
4052pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4053{
4054	int i;
4055	uint32_t cls;
4056	int ps;
4057
4058	/*
4059	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4060	 * we know need special treatment.  Type 2 devices are cardbus bridges
4061	 * which also require special treatment.  Other types are unknown, and
4062	 * we err on the side of safety by ignoring them.  Powering down
4063	 * bridges should not be undertaken lightly.
4064	 */
4065	if (dinfo->cfg.hdrtype != 0)
4066		return;
4067	for (i = 0; i < dinfo->cfg.nummaps; i++)
4068		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4069	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4070
4071	/*
4072	 * Some drivers apparently write to these registers w/o updating our
4073	 * cached copy.  No harm happens if we update the copy, so do so here
4074	 * so we can restore them.  The COMMAND register is modified by the
4075	 * bus w/o updating the cache.  This should represent the normally
4076	 * writable portion of the 'defined' part of type 0 headers.  In
4077	 * theory we also need to save/restore the PCI capability structures
4078	 * we know about, but apart from power we don't know any that are
4079	 * writable.
4080	 */
4081	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4082	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4083	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4084	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4085	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4086	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4087	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4088	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4089	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4090	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4091	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4092	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4093	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4094	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4095	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4096
4097	/*
4098	 * don't set the state for display devices, base peripherals and
4099	 * memory devices since bad things happen when they are powered down.
4100	 * We should (a) have drivers that can easily detach and (b) use
4101	 * generic drivers for these devices so that some device actually
4102	 * attaches.  We need to make sure that when we implement (a) we don't
4103	 * power the device down on a reattach.
4104	 */
4105	cls = pci_get_class(dev);
4106	if (!setstate)
4107		return;
4108	switch (pci_do_power_nodriver)
4109	{
4110		case 0:		/* NO powerdown at all */
4111			return;
4112		case 1:		/* Conservative about what to power down */
4113			if (cls == PCIC_STORAGE)
4114				return;
4115			/*FALLTHROUGH*/
4116		case 2:		/* Agressive about what to power down */
4117			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4118			    cls == PCIC_BASEPERIPH)
4119				return;
4120			/*FALLTHROUGH*/
4121		case 3:		/* Power down everything */
4122			break;
4123	}
4124	/*
4125	 * PCI spec says we can only go into D3 state from D0 state.
4126	 * Transition from D[12] into D0 before going to D3 state.
4127	 */
4128	ps = pci_get_powerstate(dev);
4129	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4130		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4131	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4132		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4133}
4134