pci.c revision 201609
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 201609 2010-01-05 20:42:25Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#ifdef __HAVE_ACPI
73#include <contrib/dev/acpica/include/acpi.h>
74#include "acpi_if.h"
75#else
76#define	ACPI_PWR_FOR_SLEEP(x, y, z)
77#endif
78
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static pci_addr_t	pci_rombase(uint64_t mapreg);
84static int		pci_romsize(uint64_t testval);
85static void		pci_fixancient(pcicfgregs *cfg);
86static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87
88static int		pci_porten(device_t dev);
89static int		pci_memen(device_t dev);
90static void		pci_assign_interrupt(device_t bus, device_t dev,
91			    int force_route);
92static int		pci_add_map(device_t bus, device_t dev, int reg,
93			    struct resource_list *rl, int force, int prefetch);
94static int		pci_probe(device_t dev);
95static int		pci_attach(device_t dev);
96static void		pci_load_vendor_data(void);
97static int		pci_describe_parse_line(char **ptr, int *vendor,
98			    int *device, char **desc);
99static char		*pci_describe_device(device_t dev);
100static int		pci_modevent(module_t mod, int what, void *arg);
101static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
102			    pcicfgregs *cfg);
103static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
104static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
105			    int reg, uint32_t *data);
106#if 0
107static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t data);
109#endif
110static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
111static void		pci_disable_msi(device_t dev);
112static void		pci_enable_msi(device_t dev, uint64_t address,
113			    uint16_t data);
114static void		pci_enable_msix(device_t dev, u_int index,
115			    uint64_t address, uint32_t data);
116static void		pci_mask_msix(device_t dev, u_int index);
117static void		pci_unmask_msix(device_t dev, u_int index);
118static int		pci_msi_blacklisted(void);
119static void		pci_resume_msi(device_t dev);
120static void		pci_resume_msix(device_t dev);
121
122static device_method_t pci_methods[] = {
123	/* Device interface */
124	DEVMETHOD(device_probe,		pci_probe),
125	DEVMETHOD(device_attach,	pci_attach),
126	DEVMETHOD(device_detach,	bus_generic_detach),
127	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128	DEVMETHOD(device_suspend,	pci_suspend),
129	DEVMETHOD(device_resume,	pci_resume),
130
131	/* Bus interface */
132	DEVMETHOD(bus_print_child,	pci_print_child),
133	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136	DEVMETHOD(bus_driver_added,	pci_driver_added),
137	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139
140	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
146	DEVMETHOD(bus_activate_resource, pci_activate_resource),
147	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
148	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
149	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
150
151	/* PCI interface */
152	DEVMETHOD(pci_read_config,	pci_read_config_method),
153	DEVMETHOD(pci_write_config,	pci_write_config_method),
154	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
155	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
156	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
157	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
158	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
159	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
160	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
161	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
162	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
163	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
164	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
165	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
166	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
167	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
168	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
169	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
170
171	{ 0, 0 }
172};
173
174DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
175
176static devclass_t pci_devclass;
177DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
178MODULE_VERSION(pci, 1);
179
180static char	*pci_vendordata;
181static size_t	pci_vendordata_size;
182
183
184struct pci_quirk {
185	uint32_t devid;	/* Vendor/device of the card */
186	int	type;
187#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
188#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
189	int	arg1;
190	int	arg2;
191};
192
193struct pci_quirk pci_quirks[] = {
194	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
195	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
197	/* As does the Serverworks OSB4 (the SMBus mapping register) */
198	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199
200	/*
201	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
202	 * or the CMIC-SL (AKA ServerWorks GC_LE).
203	 */
204	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206
207	/*
208	 * MSI doesn't work on earlier Intel chipsets including
209	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
210	 */
211	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218
219	/*
220	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
221	 * bridge.
222	 */
223	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224
225	{ 0 }
226};
227
228/* map register information */
229#define	PCI_MAPMEM	0x01	/* memory map */
230#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
231#define	PCI_MAPPORT	0x04	/* port map */
232
233struct devlist pci_devq;
234uint32_t pci_generation;
235uint32_t pci_numdevs = 0;
236static int pcie_chipset, pcix_chipset;
237
238/* sysctl vars */
239SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
240
241static int pci_enable_io_modes = 1;
242TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
243SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
244    &pci_enable_io_modes, 1,
245    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
246enable these bits correctly.  We'd like to do this all the time, but there\n\
247are some peripherals that this causes problems with.");
248
249static int pci_do_power_nodriver = 0;
250TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
251SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
252    &pci_do_power_nodriver, 0,
253  "Place a function into D3 state when no driver attaches to it.  0 means\n\
254disable.  1 means conservatively place devices into D3 state.  2 means\n\
255agressively place devices into D3 state.  3 means put absolutely everything\n\
256in D3 state.");
257
258int pci_do_power_resume = 1;
259TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
260SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
261    &pci_do_power_resume, 1,
262  "Transition from D3 -> D0 on resume.");
263
264static int pci_do_msi = 1;
265TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
266SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
267    "Enable support for MSI interrupts");
268
269static int pci_do_msix = 1;
270TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
271SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
272    "Enable support for MSI-X interrupts");
273
274static int pci_honor_msi_blacklist = 1;
275TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
276SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
277    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
278
279#if defined(__i386__) || defined(__amd64__)
280static int pci_usb_takeover = 1;
281#else
282static int pci_usb_takeover = 0;
283#endif
284TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
285SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
286    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
287Disable this if you depend on BIOS emulation of USB devices, that is\n\
288you use USB devices (like keyboard or mouse) but do not load USB drivers");
289
290/* Find a device_t by bus/slot/function in domain 0 */
291
292device_t
293pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
294{
295
296	return (pci_find_dbsf(0, bus, slot, func));
297}
298
299/* Find a device_t by domain/bus/slot/function */
300
301device_t
302pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
303{
304	struct pci_devinfo *dinfo;
305
306	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
307		if ((dinfo->cfg.domain == domain) &&
308		    (dinfo->cfg.bus == bus) &&
309		    (dinfo->cfg.slot == slot) &&
310		    (dinfo->cfg.func == func)) {
311			return (dinfo->cfg.dev);
312		}
313	}
314
315	return (NULL);
316}
317
318/* Find a device_t by vendor/device ID */
319
320device_t
321pci_find_device(uint16_t vendor, uint16_t device)
322{
323	struct pci_devinfo *dinfo;
324
325	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
326		if ((dinfo->cfg.vendor == vendor) &&
327		    (dinfo->cfg.device == device)) {
328			return (dinfo->cfg.dev);
329		}
330	}
331
332	return (NULL);
333}
334
335static int
336pci_printf(pcicfgregs *cfg, const char *fmt, ...)
337{
338	va_list ap;
339	int retval;
340
341	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
342	    cfg->func);
343	va_start(ap, fmt);
344	retval += vprintf(fmt, ap);
345	va_end(ap);
346	return (retval);
347}
348
349/* return base address of memory or port map */
350
351static pci_addr_t
352pci_mapbase(uint64_t mapreg)
353{
354
355	if (PCI_BAR_MEM(mapreg))
356		return (mapreg & PCIM_BAR_MEM_BASE);
357	else
358		return (mapreg & PCIM_BAR_IO_BASE);
359}
360
361/* return map type of memory or port map */
362
363static const char *
364pci_maptype(uint64_t mapreg)
365{
366
367	if (PCI_BAR_IO(mapreg))
368		return ("I/O Port");
369	if (mapreg & PCIM_BAR_MEM_PREFETCH)
370		return ("Prefetchable Memory");
371	return ("Memory");
372}
373
374/* return log2 of map size decoded for memory or port map */
375
376static int
377pci_mapsize(uint64_t testval)
378{
379	int ln2size;
380
381	testval = pci_mapbase(testval);
382	ln2size = 0;
383	if (testval != 0) {
384		while ((testval & 1) == 0)
385		{
386			ln2size++;
387			testval >>= 1;
388		}
389	}
390	return (ln2size);
391}
392
393/* return base address of device ROM */
394
395static pci_addr_t
396pci_rombase(uint64_t mapreg)
397{
398
399	return (mapreg & PCIM_BIOS_ADDR_MASK);
400}
401
402/* return log2 of map size decided for device ROM */
403
404static int
405pci_romsize(uint64_t testval)
406{
407	int ln2size;
408
409	testval = pci_rombase(testval);
410	ln2size = 0;
411	if (testval != 0) {
412		while ((testval & 1) == 0)
413		{
414			ln2size++;
415			testval >>= 1;
416		}
417	}
418	return (ln2size);
419}
420
421/* return log2 of address range supported by map register */
422
423static int
424pci_maprange(uint64_t mapreg)
425{
426	int ln2range = 0;
427
428	if (PCI_BAR_IO(mapreg))
429		ln2range = 32;
430	else
431		switch (mapreg & PCIM_BAR_MEM_TYPE) {
432		case PCIM_BAR_MEM_32:
433			ln2range = 32;
434			break;
435		case PCIM_BAR_MEM_1MB:
436			ln2range = 20;
437			break;
438		case PCIM_BAR_MEM_64:
439			ln2range = 64;
440			break;
441		}
442	return (ln2range);
443}
444
445/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
446
447static void
448pci_fixancient(pcicfgregs *cfg)
449{
450	if (cfg->hdrtype != 0)
451		return;
452
453	/* PCI to PCI bridges use header type 1 */
454	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
455		cfg->hdrtype = 1;
456}
457
458/* extract header type specific config data */
459
460static void
461pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
462{
463#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
464	switch (cfg->hdrtype) {
465	case 0:
466		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
467		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
468		cfg->nummaps	    = PCI_MAXMAPS_0;
469		break;
470	case 1:
471		cfg->nummaps	    = PCI_MAXMAPS_1;
472		break;
473	case 2:
474		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
475		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
476		cfg->nummaps	    = PCI_MAXMAPS_2;
477		break;
478	}
479#undef REG
480}
481
482/* read configuration header into pcicfgregs structure */
483struct pci_devinfo *
484pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
485{
486#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
487	pcicfgregs *cfg = NULL;
488	struct pci_devinfo *devlist_entry;
489	struct devlist *devlist_head;
490
491	devlist_head = &pci_devq;
492
493	devlist_entry = NULL;
494
495	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
496		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
497		if (devlist_entry == NULL)
498			return (NULL);
499
500		cfg = &devlist_entry->cfg;
501
502		cfg->domain		= d;
503		cfg->bus		= b;
504		cfg->slot		= s;
505		cfg->func		= f;
506		cfg->vendor		= REG(PCIR_VENDOR, 2);
507		cfg->device		= REG(PCIR_DEVICE, 2);
508		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
509		cfg->statreg		= REG(PCIR_STATUS, 2);
510		cfg->baseclass		= REG(PCIR_CLASS, 1);
511		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
512		cfg->progif		= REG(PCIR_PROGIF, 1);
513		cfg->revid		= REG(PCIR_REVID, 1);
514		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
515		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
516		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
517		cfg->intpin		= REG(PCIR_INTPIN, 1);
518		cfg->intline		= REG(PCIR_INTLINE, 1);
519
520		cfg->mingnt		= REG(PCIR_MINGNT, 1);
521		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
522
523		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
524		cfg->hdrtype		&= ~PCIM_MFDEV;
525
526		pci_fixancient(cfg);
527		pci_hdrtypedata(pcib, b, s, f, cfg);
528
529		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
530			pci_read_extcap(pcib, cfg);
531
532		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
533
534		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
535		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
536		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
537		devlist_entry->conf.pc_sel.pc_func = cfg->func;
538		devlist_entry->conf.pc_hdr = cfg->hdrtype;
539
540		devlist_entry->conf.pc_subvendor = cfg->subvendor;
541		devlist_entry->conf.pc_subdevice = cfg->subdevice;
542		devlist_entry->conf.pc_vendor = cfg->vendor;
543		devlist_entry->conf.pc_device = cfg->device;
544
545		devlist_entry->conf.pc_class = cfg->baseclass;
546		devlist_entry->conf.pc_subclass = cfg->subclass;
547		devlist_entry->conf.pc_progif = cfg->progif;
548		devlist_entry->conf.pc_revid = cfg->revid;
549
550		pci_numdevs++;
551		pci_generation++;
552	}
553	return (devlist_entry);
554#undef REG
555}
556
557static void
558pci_read_extcap(device_t pcib, pcicfgregs *cfg)
559{
560#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
561#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
562#if defined(__i386__) || defined(__amd64__)
563	uint64_t addr;
564#endif
565	uint32_t val;
566	int	ptr, nextptr, ptrptr;
567
568	switch (cfg->hdrtype & PCIM_HDRTYPE) {
569	case 0:
570	case 1:
571		ptrptr = PCIR_CAP_PTR;
572		break;
573	case 2:
574		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
575		break;
576	default:
577		return;		/* no extended capabilities support */
578	}
579	nextptr = REG(ptrptr, 1);	/* sanity check? */
580
581	/*
582	 * Read capability entries.
583	 */
584	while (nextptr != 0) {
585		/* Sanity check */
586		if (nextptr > 255) {
587			printf("illegal PCI extended capability offset %d\n",
588			    nextptr);
589			return;
590		}
591		/* Find the next entry */
592		ptr = nextptr;
593		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
594
595		/* Process this entry */
596		switch (REG(ptr + PCICAP_ID, 1)) {
597		case PCIY_PMG:		/* PCI power management */
598			if (cfg->pp.pp_cap == 0) {
599				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
600				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
601				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
602				if ((nextptr - ptr) > PCIR_POWER_DATA)
603					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
604			}
605			break;
606#if defined(__i386__) || defined(__amd64__)
607		case PCIY_HT:		/* HyperTransport */
608			/* Determine HT-specific capability type. */
609			val = REG(ptr + PCIR_HT_COMMAND, 2);
610			switch (val & PCIM_HTCMD_CAP_MASK) {
611			case PCIM_HTCAP_MSI_MAPPING:
612				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
613					/* Sanity check the mapping window. */
614					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
615					    4);
616					addr <<= 32;
617					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
618					    4);
619					if (addr != MSI_INTEL_ADDR_BASE)
620						device_printf(pcib,
621	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
622						    cfg->domain, cfg->bus,
623						    cfg->slot, cfg->func,
624						    (long long)addr);
625				} else
626					addr = MSI_INTEL_ADDR_BASE;
627
628				cfg->ht.ht_msimap = ptr;
629				cfg->ht.ht_msictrl = val;
630				cfg->ht.ht_msiaddr = addr;
631				break;
632			}
633			break;
634#endif
635		case PCIY_MSI:		/* PCI MSI */
636			cfg->msi.msi_location = ptr;
637			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
638			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
639						     PCIM_MSICTRL_MMC_MASK)>>1);
640			break;
641		case PCIY_MSIX:		/* PCI MSI-X */
642			cfg->msix.msix_location = ptr;
643			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
644			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
645			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
646			val = REG(ptr + PCIR_MSIX_TABLE, 4);
647			cfg->msix.msix_table_bar = PCIR_BAR(val &
648			    PCIM_MSIX_BIR_MASK);
649			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
650			val = REG(ptr + PCIR_MSIX_PBA, 4);
651			cfg->msix.msix_pba_bar = PCIR_BAR(val &
652			    PCIM_MSIX_BIR_MASK);
653			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
654			break;
655		case PCIY_VPD:		/* PCI Vital Product Data */
656			cfg->vpd.vpd_reg = ptr;
657			break;
658		case PCIY_SUBVENDOR:
659			/* Should always be true. */
660			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
661				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
662				cfg->subvendor = val & 0xffff;
663				cfg->subdevice = val >> 16;
664			}
665			break;
666		case PCIY_PCIX:		/* PCI-X */
667			/*
668			 * Assume we have a PCI-X chipset if we have
669			 * at least one PCI-PCI bridge with a PCI-X
670			 * capability.  Note that some systems with
671			 * PCI-express or HT chipsets might match on
672			 * this check as well.
673			 */
674			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
675				pcix_chipset = 1;
676			break;
677		case PCIY_EXPRESS:	/* PCI-express */
678			/*
679			 * Assume we have a PCI-express chipset if we have
680			 * at least one PCI-express device.
681			 */
682			pcie_chipset = 1;
683			break;
684		default:
685			break;
686		}
687	}
688/* REG and WREG use carry through to next functions */
689}
690
691/*
692 * PCI Vital Product Data
693 */
694
695#define	PCI_VPD_TIMEOUT		1000000
696
697static int
698pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
699{
700	int count = PCI_VPD_TIMEOUT;
701
702	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
703
704	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
705
706	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
707		if (--count < 0)
708			return (ENXIO);
709		DELAY(1);	/* limit looping */
710	}
711	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
712
713	return (0);
714}
715
716#if 0
717static int
718pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
719{
720	int count = PCI_VPD_TIMEOUT;
721
722	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
723
724	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
725	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
726	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
727		if (--count < 0)
728			return (ENXIO);
729		DELAY(1);	/* limit looping */
730	}
731
732	return (0);
733}
734#endif
735
736#undef PCI_VPD_TIMEOUT
737
738struct vpd_readstate {
739	device_t	pcib;
740	pcicfgregs	*cfg;
741	uint32_t	val;
742	int		bytesinval;
743	int		off;
744	uint8_t		cksum;
745};
746
747static int
748vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
749{
750	uint32_t reg;
751	uint8_t byte;
752
753	if (vrs->bytesinval == 0) {
754		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
755			return (ENXIO);
756		vrs->val = le32toh(reg);
757		vrs->off += 4;
758		byte = vrs->val & 0xff;
759		vrs->bytesinval = 3;
760	} else {
761		vrs->val = vrs->val >> 8;
762		byte = vrs->val & 0xff;
763		vrs->bytesinval--;
764	}
765
766	vrs->cksum += byte;
767	*data = byte;
768	return (0);
769}
770
771static void
772pci_read_vpd(device_t pcib, pcicfgregs *cfg)
773{
774	struct vpd_readstate vrs;
775	int state;
776	int name;
777	int remain;
778	int i;
779	int alloc, off;		/* alloc/off for RO/W arrays */
780	int cksumvalid;
781	int dflen;
782	uint8_t byte;
783	uint8_t byte2;
784
785	/* init vpd reader */
786	vrs.bytesinval = 0;
787	vrs.off = 0;
788	vrs.pcib = pcib;
789	vrs.cfg = cfg;
790	vrs.cksum = 0;
791
792	state = 0;
793	name = remain = i = 0;	/* shut up stupid gcc */
794	alloc = off = 0;	/* shut up stupid gcc */
795	dflen = 0;		/* shut up stupid gcc */
796	cksumvalid = -1;
797	while (state >= 0) {
798		if (vpd_nextbyte(&vrs, &byte)) {
799			state = -2;
800			break;
801		}
802#if 0
803		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
804		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
805		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
806#endif
807		switch (state) {
808		case 0:		/* item name */
809			if (byte & 0x80) {
810				if (vpd_nextbyte(&vrs, &byte2)) {
811					state = -2;
812					break;
813				}
814				remain = byte2;
815				if (vpd_nextbyte(&vrs, &byte2)) {
816					state = -2;
817					break;
818				}
819				remain |= byte2 << 8;
820				if (remain > (0x7f*4 - vrs.off)) {
821					state = -1;
822					printf(
823			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
824					    cfg->domain, cfg->bus, cfg->slot,
825					    cfg->func, remain);
826				}
827				name = byte & 0x7f;
828			} else {
829				remain = byte & 0x7;
830				name = (byte >> 3) & 0xf;
831			}
832			switch (name) {
833			case 0x2:	/* String */
834				cfg->vpd.vpd_ident = malloc(remain + 1,
835				    M_DEVBUF, M_WAITOK);
836				i = 0;
837				state = 1;
838				break;
839			case 0xf:	/* End */
840				state = -1;
841				break;
842			case 0x10:	/* VPD-R */
843				alloc = 8;
844				off = 0;
845				cfg->vpd.vpd_ros = malloc(alloc *
846				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
847				    M_WAITOK | M_ZERO);
848				state = 2;
849				break;
850			case 0x11:	/* VPD-W */
851				alloc = 8;
852				off = 0;
853				cfg->vpd.vpd_w = malloc(alloc *
854				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
855				    M_WAITOK | M_ZERO);
856				state = 5;
857				break;
858			default:	/* Invalid data, abort */
859				state = -1;
860				break;
861			}
862			break;
863
864		case 1:	/* Identifier String */
865			cfg->vpd.vpd_ident[i++] = byte;
866			remain--;
867			if (remain == 0)  {
868				cfg->vpd.vpd_ident[i] = '\0';
869				state = 0;
870			}
871			break;
872
873		case 2:	/* VPD-R Keyword Header */
874			if (off == alloc) {
875				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
876				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
877				    M_DEVBUF, M_WAITOK | M_ZERO);
878			}
879			cfg->vpd.vpd_ros[off].keyword[0] = byte;
880			if (vpd_nextbyte(&vrs, &byte2)) {
881				state = -2;
882				break;
883			}
884			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
885			if (vpd_nextbyte(&vrs, &byte2)) {
886				state = -2;
887				break;
888			}
889			dflen = byte2;
890			if (dflen == 0 &&
891			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
892			    2) == 0) {
893				/*
894				 * if this happens, we can't trust the rest
895				 * of the VPD.
896				 */
897				printf(
898				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
899				    cfg->domain, cfg->bus, cfg->slot,
900				    cfg->func, dflen);
901				cksumvalid = 0;
902				state = -1;
903				break;
904			} else if (dflen == 0) {
905				cfg->vpd.vpd_ros[off].value = malloc(1 *
906				    sizeof(*cfg->vpd.vpd_ros[off].value),
907				    M_DEVBUF, M_WAITOK);
908				cfg->vpd.vpd_ros[off].value[0] = '\x00';
909			} else
910				cfg->vpd.vpd_ros[off].value = malloc(
911				    (dflen + 1) *
912				    sizeof(*cfg->vpd.vpd_ros[off].value),
913				    M_DEVBUF, M_WAITOK);
914			remain -= 3;
915			i = 0;
916			/* keep in sync w/ state 3's transistions */
917			if (dflen == 0 && remain == 0)
918				state = 0;
919			else if (dflen == 0)
920				state = 2;
921			else
922				state = 3;
923			break;
924
925		case 3:	/* VPD-R Keyword Value */
926			cfg->vpd.vpd_ros[off].value[i++] = byte;
927			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
928			    "RV", 2) == 0 && cksumvalid == -1) {
929				if (vrs.cksum == 0)
930					cksumvalid = 1;
931				else {
932					if (bootverbose)
933						printf(
934				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
935						    cfg->domain, cfg->bus,
936						    cfg->slot, cfg->func,
937						    vrs.cksum);
938					cksumvalid = 0;
939					state = -1;
940					break;
941				}
942			}
943			dflen--;
944			remain--;
945			/* keep in sync w/ state 2's transistions */
946			if (dflen == 0)
947				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
948			if (dflen == 0 && remain == 0) {
949				cfg->vpd.vpd_rocnt = off;
950				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
951				    off * sizeof(*cfg->vpd.vpd_ros),
952				    M_DEVBUF, M_WAITOK | M_ZERO);
953				state = 0;
954			} else if (dflen == 0)
955				state = 2;
956			break;
957
958		case 4:
959			remain--;
960			if (remain == 0)
961				state = 0;
962			break;
963
964		case 5:	/* VPD-W Keyword Header */
965			if (off == alloc) {
966				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
967				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
968				    M_DEVBUF, M_WAITOK | M_ZERO);
969			}
970			cfg->vpd.vpd_w[off].keyword[0] = byte;
971			if (vpd_nextbyte(&vrs, &byte2)) {
972				state = -2;
973				break;
974			}
975			cfg->vpd.vpd_w[off].keyword[1] = byte2;
976			if (vpd_nextbyte(&vrs, &byte2)) {
977				state = -2;
978				break;
979			}
980			cfg->vpd.vpd_w[off].len = dflen = byte2;
981			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
982			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
983			    sizeof(*cfg->vpd.vpd_w[off].value),
984			    M_DEVBUF, M_WAITOK);
985			remain -= 3;
986			i = 0;
987			/* keep in sync w/ state 6's transistions */
988			if (dflen == 0 && remain == 0)
989				state = 0;
990			else if (dflen == 0)
991				state = 5;
992			else
993				state = 6;
994			break;
995
996		case 6:	/* VPD-W Keyword Value */
997			cfg->vpd.vpd_w[off].value[i++] = byte;
998			dflen--;
999			remain--;
1000			/* keep in sync w/ state 5's transistions */
1001			if (dflen == 0)
1002				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1003			if (dflen == 0 && remain == 0) {
1004				cfg->vpd.vpd_wcnt = off;
1005				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1006				    off * sizeof(*cfg->vpd.vpd_w),
1007				    M_DEVBUF, M_WAITOK | M_ZERO);
1008				state = 0;
1009			} else if (dflen == 0)
1010				state = 5;
1011			break;
1012
1013		default:
1014			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1015			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1016			    state);
1017			state = -1;
1018			break;
1019		}
1020	}
1021
1022	if (cksumvalid == 0 || state < -1) {
1023		/* read-only data bad, clean up */
1024		if (cfg->vpd.vpd_ros != NULL) {
1025			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1026				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1027			free(cfg->vpd.vpd_ros, M_DEVBUF);
1028			cfg->vpd.vpd_ros = NULL;
1029		}
1030	}
1031	if (state < -1) {
1032		/* I/O error, clean up */
1033		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1034		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1035		if (cfg->vpd.vpd_ident != NULL) {
1036			free(cfg->vpd.vpd_ident, M_DEVBUF);
1037			cfg->vpd.vpd_ident = NULL;
1038		}
1039		if (cfg->vpd.vpd_w != NULL) {
1040			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1041				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1042			free(cfg->vpd.vpd_w, M_DEVBUF);
1043			cfg->vpd.vpd_w = NULL;
1044		}
1045	}
1046	cfg->vpd.vpd_cached = 1;
1047#undef REG
1048#undef WREG
1049}
1050
1051int
1052pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1053{
1054	struct pci_devinfo *dinfo = device_get_ivars(child);
1055	pcicfgregs *cfg = &dinfo->cfg;
1056
1057	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1058		pci_read_vpd(device_get_parent(dev), cfg);
1059
1060	*identptr = cfg->vpd.vpd_ident;
1061
1062	if (*identptr == NULL)
1063		return (ENXIO);
1064
1065	return (0);
1066}
1067
1068int
1069pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1070	const char **vptr)
1071{
1072	struct pci_devinfo *dinfo = device_get_ivars(child);
1073	pcicfgregs *cfg = &dinfo->cfg;
1074	int i;
1075
1076	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1077		pci_read_vpd(device_get_parent(dev), cfg);
1078
1079	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1080		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1081		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1082			*vptr = cfg->vpd.vpd_ros[i].value;
1083		}
1084
1085	if (i != cfg->vpd.vpd_rocnt)
1086		return (0);
1087
1088	*vptr = NULL;
1089	return (ENXIO);
1090}
1091
1092/*
1093 * Find the requested extended capability and return the offset in
1094 * configuration space via the pointer provided. The function returns
1095 * 0 on success and error code otherwise.
1096 */
1097int
1098pci_find_extcap_method(device_t dev, device_t child, int capability,
1099    int *capreg)
1100{
1101	struct pci_devinfo *dinfo = device_get_ivars(child);
1102	pcicfgregs *cfg = &dinfo->cfg;
1103	u_int32_t status;
1104	u_int8_t ptr;
1105
1106	/*
1107	 * Check the CAP_LIST bit of the PCI status register first.
1108	 */
1109	status = pci_read_config(child, PCIR_STATUS, 2);
1110	if (!(status & PCIM_STATUS_CAPPRESENT))
1111		return (ENXIO);
1112
1113	/*
1114	 * Determine the start pointer of the capabilities list.
1115	 */
1116	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1117	case 0:
1118	case 1:
1119		ptr = PCIR_CAP_PTR;
1120		break;
1121	case 2:
1122		ptr = PCIR_CAP_PTR_2;
1123		break;
1124	default:
1125		/* XXX: panic? */
1126		return (ENXIO);		/* no extended capabilities support */
1127	}
1128	ptr = pci_read_config(child, ptr, 1);
1129
1130	/*
1131	 * Traverse the capabilities list.
1132	 */
1133	while (ptr != 0) {
1134		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1135			if (capreg != NULL)
1136				*capreg = ptr;
1137			return (0);
1138		}
1139		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1140	}
1141
1142	return (ENOENT);
1143}
1144
1145/*
1146 * Support for MSI-X message interrupts.
1147 */
1148void
1149pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1150{
1151	struct pci_devinfo *dinfo = device_get_ivars(dev);
1152	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1153	uint32_t offset;
1154
1155	KASSERT(msix->msix_table_len > index, ("bogus index"));
1156	offset = msix->msix_table_offset + index * 16;
1157	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1158	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1159	bus_write_4(msix->msix_table_res, offset + 8, data);
1160
1161	/* Enable MSI -> HT mapping. */
1162	pci_ht_map_msi(dev, address);
1163}
1164
1165void
1166pci_mask_msix(device_t dev, u_int index)
1167{
1168	struct pci_devinfo *dinfo = device_get_ivars(dev);
1169	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1170	uint32_t offset, val;
1171
1172	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1173	offset = msix->msix_table_offset + index * 16 + 12;
1174	val = bus_read_4(msix->msix_table_res, offset);
1175	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1176		val |= PCIM_MSIX_VCTRL_MASK;
1177		bus_write_4(msix->msix_table_res, offset, val);
1178	}
1179}
1180
1181void
1182pci_unmask_msix(device_t dev, u_int index)
1183{
1184	struct pci_devinfo *dinfo = device_get_ivars(dev);
1185	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1186	uint32_t offset, val;
1187
1188	KASSERT(msix->msix_table_len > index, ("bogus index"));
1189	offset = msix->msix_table_offset + index * 16 + 12;
1190	val = bus_read_4(msix->msix_table_res, offset);
1191	if (val & PCIM_MSIX_VCTRL_MASK) {
1192		val &= ~PCIM_MSIX_VCTRL_MASK;
1193		bus_write_4(msix->msix_table_res, offset, val);
1194	}
1195}
1196
1197int
1198pci_pending_msix(device_t dev, u_int index)
1199{
1200	struct pci_devinfo *dinfo = device_get_ivars(dev);
1201	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1202	uint32_t offset, bit;
1203
1204	KASSERT(msix->msix_table_len > index, ("bogus index"));
1205	offset = msix->msix_pba_offset + (index / 32) * 4;
1206	bit = 1 << index % 32;
1207	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1208}
1209
1210/*
1211 * Restore MSI-X registers and table during resume.  If MSI-X is
1212 * enabled then walk the virtual table to restore the actual MSI-X
1213 * table.
1214 */
1215static void
1216pci_resume_msix(device_t dev)
1217{
1218	struct pci_devinfo *dinfo = device_get_ivars(dev);
1219	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1220	struct msix_table_entry *mte;
1221	struct msix_vector *mv;
1222	int i;
1223
1224	if (msix->msix_alloc > 0) {
1225		/* First, mask all vectors. */
1226		for (i = 0; i < msix->msix_msgnum; i++)
1227			pci_mask_msix(dev, i);
1228
1229		/* Second, program any messages with at least one handler. */
1230		for (i = 0; i < msix->msix_table_len; i++) {
1231			mte = &msix->msix_table[i];
1232			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1233				continue;
1234			mv = &msix->msix_vectors[mte->mte_vector - 1];
1235			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1236			pci_unmask_msix(dev, i);
1237		}
1238	}
1239	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1240	    msix->msix_ctrl, 2);
1241}
1242
1243/*
1244 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1245 * returned in *count.  After this function returns, each message will be
1246 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1247 */
1248int
1249pci_alloc_msix_method(device_t dev, device_t child, int *count)
1250{
1251	struct pci_devinfo *dinfo = device_get_ivars(child);
1252	pcicfgregs *cfg = &dinfo->cfg;
1253	struct resource_list_entry *rle;
1254	int actual, error, i, irq, max;
1255
1256	/* Don't let count == 0 get us into trouble. */
1257	if (*count == 0)
1258		return (EINVAL);
1259
1260	/* If rid 0 is allocated, then fail. */
1261	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1262	if (rle != NULL && rle->res != NULL)
1263		return (ENXIO);
1264
1265	/* Already have allocated messages? */
1266	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1267		return (ENXIO);
1268
1269	/* If MSI is blacklisted for this system, fail. */
1270	if (pci_msi_blacklisted())
1271		return (ENXIO);
1272
1273	/* MSI-X capability present? */
1274	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1275		return (ENODEV);
1276
1277	/* Make sure the appropriate BARs are mapped. */
1278	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1279	    cfg->msix.msix_table_bar);
1280	if (rle == NULL || rle->res == NULL ||
1281	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1282		return (ENXIO);
1283	cfg->msix.msix_table_res = rle->res;
1284	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1285		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1286		    cfg->msix.msix_pba_bar);
1287		if (rle == NULL || rle->res == NULL ||
1288		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1289			return (ENXIO);
1290	}
1291	cfg->msix.msix_pba_res = rle->res;
1292
1293	if (bootverbose)
1294		device_printf(child,
1295		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1296		    *count, cfg->msix.msix_msgnum);
1297	max = min(*count, cfg->msix.msix_msgnum);
1298	for (i = 0; i < max; i++) {
1299		/* Allocate a message. */
1300		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1301		if (error)
1302			break;
1303		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1304		    irq, 1);
1305	}
1306	actual = i;
1307
1308	if (bootverbose) {
1309		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1310		if (actual == 1)
1311			device_printf(child, "using IRQ %lu for MSI-X\n",
1312			    rle->start);
1313		else {
1314			int run;
1315
1316			/*
1317			 * Be fancy and try to print contiguous runs of
1318			 * IRQ values as ranges.  'irq' is the previous IRQ.
1319			 * 'run' is true if we are in a range.
1320			 */
1321			device_printf(child, "using IRQs %lu", rle->start);
1322			irq = rle->start;
1323			run = 0;
1324			for (i = 1; i < actual; i++) {
1325				rle = resource_list_find(&dinfo->resources,
1326				    SYS_RES_IRQ, i + 1);
1327
1328				/* Still in a run? */
1329				if (rle->start == irq + 1) {
1330					run = 1;
1331					irq++;
1332					continue;
1333				}
1334
1335				/* Finish previous range. */
1336				if (run) {
1337					printf("-%d", irq);
1338					run = 0;
1339				}
1340
1341				/* Start new range. */
1342				printf(",%lu", rle->start);
1343				irq = rle->start;
1344			}
1345
1346			/* Unfinished range? */
1347			if (run)
1348				printf("-%d", irq);
1349			printf(" for MSI-X\n");
1350		}
1351	}
1352
1353	/* Mask all vectors. */
1354	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1355		pci_mask_msix(child, i);
1356
1357	/* Allocate and initialize vector data and virtual table. */
1358	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1359	    M_DEVBUF, M_WAITOK | M_ZERO);
1360	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1361	    M_DEVBUF, M_WAITOK | M_ZERO);
1362	for (i = 0; i < actual; i++) {
1363		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1364		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1365		cfg->msix.msix_table[i].mte_vector = i + 1;
1366	}
1367
1368	/* Update control register to enable MSI-X. */
1369	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1370	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1371	    cfg->msix.msix_ctrl, 2);
1372
1373	/* Update counts of alloc'd messages. */
1374	cfg->msix.msix_alloc = actual;
1375	cfg->msix.msix_table_len = actual;
1376	*count = actual;
1377	return (0);
1378}
1379
1380/*
1381 * By default, pci_alloc_msix() will assign the allocated IRQ
1382 * resources consecutively to the first N messages in the MSI-X table.
1383 * However, device drivers may want to use different layouts if they
1384 * either receive fewer messages than they asked for, or they wish to
1385 * populate the MSI-X table sparsely.  This method allows the driver
1386 * to specify what layout it wants.  It must be called after a
1387 * successful pci_alloc_msix() but before any of the associated
1388 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1389 *
1390 * The 'vectors' array contains 'count' message vectors.  The array
1391 * maps directly to the MSI-X table in that index 0 in the array
1392 * specifies the vector for the first message in the MSI-X table, etc.
1393 * The vector value in each array index can either be 0 to indicate
1394 * that no vector should be assigned to a message slot, or it can be a
1395 * number from 1 to N (where N is the count returned from a
1396 * succcessful call to pci_alloc_msix()) to indicate which message
1397 * vector (IRQ) to be used for the corresponding message.
1398 *
1399 * On successful return, each message with a non-zero vector will have
1400 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1401 * 1.  Additionally, if any of the IRQs allocated via the previous
1402 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1403 * will be freed back to the system automatically.
1404 *
1405 * For example, suppose a driver has a MSI-X table with 6 messages and
1406 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1407 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1408 * C.  After the call to pci_alloc_msix(), the device will be setup to
1409 * have an MSI-X table of ABC--- (where - means no vector assigned).
1410 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1411 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1412 * be freed back to the system.  This device will also have valid
1413 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1414 *
1415 * In any case, the SYS_RES_IRQ rid X will always map to the message
1416 * at MSI-X table index X - 1 and will only be valid if a vector is
1417 * assigned to that table entry.
1418 */
1419int
1420pci_remap_msix_method(device_t dev, device_t child, int count,
1421    const u_int *vectors)
1422{
1423	struct pci_devinfo *dinfo = device_get_ivars(child);
1424	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1425	struct resource_list_entry *rle;
1426	int i, irq, j, *used;
1427
1428	/*
1429	 * Have to have at least one message in the table but the
1430	 * table can't be bigger than the actual MSI-X table in the
1431	 * device.
1432	 */
1433	if (count == 0 || count > msix->msix_msgnum)
1434		return (EINVAL);
1435
1436	/* Sanity check the vectors. */
1437	for (i = 0; i < count; i++)
1438		if (vectors[i] > msix->msix_alloc)
1439			return (EINVAL);
1440
1441	/*
1442	 * Make sure there aren't any holes in the vectors to be used.
1443	 * It's a big pain to support it, and it doesn't really make
1444	 * sense anyway.  Also, at least one vector must be used.
1445	 */
1446	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1447	    M_ZERO);
1448	for (i = 0; i < count; i++)
1449		if (vectors[i] != 0)
1450			used[vectors[i] - 1] = 1;
1451	for (i = 0; i < msix->msix_alloc - 1; i++)
1452		if (used[i] == 0 && used[i + 1] == 1) {
1453			free(used, M_DEVBUF);
1454			return (EINVAL);
1455		}
1456	if (used[0] != 1) {
1457		free(used, M_DEVBUF);
1458		return (EINVAL);
1459	}
1460
1461	/* Make sure none of the resources are allocated. */
1462	for (i = 0; i < msix->msix_table_len; i++) {
1463		if (msix->msix_table[i].mte_vector == 0)
1464			continue;
1465		if (msix->msix_table[i].mte_handlers > 0)
1466			return (EBUSY);
1467		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1468		KASSERT(rle != NULL, ("missing resource"));
1469		if (rle->res != NULL)
1470			return (EBUSY);
1471	}
1472
1473	/* Free the existing resource list entries. */
1474	for (i = 0; i < msix->msix_table_len; i++) {
1475		if (msix->msix_table[i].mte_vector == 0)
1476			continue;
1477		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1478	}
1479
1480	/*
1481	 * Build the new virtual table keeping track of which vectors are
1482	 * used.
1483	 */
1484	free(msix->msix_table, M_DEVBUF);
1485	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1486	    M_DEVBUF, M_WAITOK | M_ZERO);
1487	for (i = 0; i < count; i++)
1488		msix->msix_table[i].mte_vector = vectors[i];
1489	msix->msix_table_len = count;
1490
1491	/* Free any unused IRQs and resize the vectors array if necessary. */
1492	j = msix->msix_alloc - 1;
1493	if (used[j] == 0) {
1494		struct msix_vector *vec;
1495
1496		while (used[j] == 0) {
1497			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1498			    msix->msix_vectors[j].mv_irq);
1499			j--;
1500		}
1501		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1502		    M_WAITOK);
1503		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1504		    (j + 1));
1505		free(msix->msix_vectors, M_DEVBUF);
1506		msix->msix_vectors = vec;
1507		msix->msix_alloc = j + 1;
1508	}
1509	free(used, M_DEVBUF);
1510
1511	/* Map the IRQs onto the rids. */
1512	for (i = 0; i < count; i++) {
1513		if (vectors[i] == 0)
1514			continue;
1515		irq = msix->msix_vectors[vectors[i]].mv_irq;
1516		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1517		    irq, 1);
1518	}
1519
1520	if (bootverbose) {
1521		device_printf(child, "Remapped MSI-X IRQs as: ");
1522		for (i = 0; i < count; i++) {
1523			if (i != 0)
1524				printf(", ");
1525			if (vectors[i] == 0)
1526				printf("---");
1527			else
1528				printf("%d",
1529				    msix->msix_vectors[vectors[i]].mv_irq);
1530		}
1531		printf("\n");
1532	}
1533
1534	return (0);
1535}
1536
1537static int
1538pci_release_msix(device_t dev, device_t child)
1539{
1540	struct pci_devinfo *dinfo = device_get_ivars(child);
1541	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1542	struct resource_list_entry *rle;
1543	int i;
1544
1545	/* Do we have any messages to release? */
1546	if (msix->msix_alloc == 0)
1547		return (ENODEV);
1548
1549	/* Make sure none of the resources are allocated. */
1550	for (i = 0; i < msix->msix_table_len; i++) {
1551		if (msix->msix_table[i].mte_vector == 0)
1552			continue;
1553		if (msix->msix_table[i].mte_handlers > 0)
1554			return (EBUSY);
1555		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1556		KASSERT(rle != NULL, ("missing resource"));
1557		if (rle->res != NULL)
1558			return (EBUSY);
1559	}
1560
1561	/* Update control register to disable MSI-X. */
1562	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1563	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1564	    msix->msix_ctrl, 2);
1565
1566	/* Free the resource list entries. */
1567	for (i = 0; i < msix->msix_table_len; i++) {
1568		if (msix->msix_table[i].mte_vector == 0)
1569			continue;
1570		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1571	}
1572	free(msix->msix_table, M_DEVBUF);
1573	msix->msix_table_len = 0;
1574
1575	/* Release the IRQs. */
1576	for (i = 0; i < msix->msix_alloc; i++)
1577		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1578		    msix->msix_vectors[i].mv_irq);
1579	free(msix->msix_vectors, M_DEVBUF);
1580	msix->msix_alloc = 0;
1581	return (0);
1582}
1583
1584/*
1585 * Return the max supported MSI-X messages this device supports.
1586 * Basically, assuming the MD code can alloc messages, this function
1587 * should return the maximum value that pci_alloc_msix() can return.
1588 * Thus, it is subject to the tunables, etc.
1589 */
1590int
1591pci_msix_count_method(device_t dev, device_t child)
1592{
1593	struct pci_devinfo *dinfo = device_get_ivars(child);
1594	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1595
1596	if (pci_do_msix && msix->msix_location != 0)
1597		return (msix->msix_msgnum);
1598	return (0);
1599}
1600
1601/*
1602 * HyperTransport MSI mapping control
1603 */
1604void
1605pci_ht_map_msi(device_t dev, uint64_t addr)
1606{
1607	struct pci_devinfo *dinfo = device_get_ivars(dev);
1608	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1609
1610	if (!ht->ht_msimap)
1611		return;
1612
1613	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1614	    ht->ht_msiaddr >> 20 == addr >> 20) {
1615		/* Enable MSI -> HT mapping. */
1616		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1617		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1618		    ht->ht_msictrl, 2);
1619	}
1620
1621	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1622		/* Disable MSI -> HT mapping. */
1623		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1624		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1625		    ht->ht_msictrl, 2);
1626	}
1627}
1628
1629/*
1630 * Support for MSI message signalled interrupts.
1631 */
1632void
1633pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1634{
1635	struct pci_devinfo *dinfo = device_get_ivars(dev);
1636	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1637
1638	/* Write data and address values. */
1639	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1640	    address & 0xffffffff, 4);
1641	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1642		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1643		    address >> 32, 4);
1644		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1645		    data, 2);
1646	} else
1647		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1648		    2);
1649
1650	/* Enable MSI in the control register. */
1651	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1652	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1653	    2);
1654
1655	/* Enable MSI -> HT mapping. */
1656	pci_ht_map_msi(dev, address);
1657}
1658
1659void
1660pci_disable_msi(device_t dev)
1661{
1662	struct pci_devinfo *dinfo = device_get_ivars(dev);
1663	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1664
1665	/* Disable MSI -> HT mapping. */
1666	pci_ht_map_msi(dev, 0);
1667
1668	/* Disable MSI in the control register. */
1669	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1670	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1671	    2);
1672}
1673
1674/*
1675 * Restore MSI registers during resume.  If MSI is enabled then
1676 * restore the data and address registers in addition to the control
1677 * register.
1678 */
1679static void
1680pci_resume_msi(device_t dev)
1681{
1682	struct pci_devinfo *dinfo = device_get_ivars(dev);
1683	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1684	uint64_t address;
1685	uint16_t data;
1686
1687	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1688		address = msi->msi_addr;
1689		data = msi->msi_data;
1690		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1691		    address & 0xffffffff, 4);
1692		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1693			pci_write_config(dev, msi->msi_location +
1694			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1695			pci_write_config(dev, msi->msi_location +
1696			    PCIR_MSI_DATA_64BIT, data, 2);
1697		} else
1698			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1699			    data, 2);
1700	}
1701	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1702	    2);
1703}
1704
1705int
1706pci_remap_msi_irq(device_t dev, u_int irq)
1707{
1708	struct pci_devinfo *dinfo = device_get_ivars(dev);
1709	pcicfgregs *cfg = &dinfo->cfg;
1710	struct resource_list_entry *rle;
1711	struct msix_table_entry *mte;
1712	struct msix_vector *mv;
1713	device_t bus;
1714	uint64_t addr;
1715	uint32_t data;
1716	int error, i, j;
1717
1718	bus = device_get_parent(dev);
1719
1720	/*
1721	 * Handle MSI first.  We try to find this IRQ among our list
1722	 * of MSI IRQs.  If we find it, we request updated address and
1723	 * data registers and apply the results.
1724	 */
1725	if (cfg->msi.msi_alloc > 0) {
1726
1727		/* If we don't have any active handlers, nothing to do. */
1728		if (cfg->msi.msi_handlers == 0)
1729			return (0);
1730		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1731			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1732			    i + 1);
1733			if (rle->start == irq) {
1734				error = PCIB_MAP_MSI(device_get_parent(bus),
1735				    dev, irq, &addr, &data);
1736				if (error)
1737					return (error);
1738				pci_disable_msi(dev);
1739				dinfo->cfg.msi.msi_addr = addr;
1740				dinfo->cfg.msi.msi_data = data;
1741				pci_enable_msi(dev, addr, data);
1742				return (0);
1743			}
1744		}
1745		return (ENOENT);
1746	}
1747
1748	/*
1749	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1750	 * we request the updated mapping info.  If that works, we go
1751	 * through all the slots that use this IRQ and update them.
1752	 */
1753	if (cfg->msix.msix_alloc > 0) {
1754		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1755			mv = &cfg->msix.msix_vectors[i];
1756			if (mv->mv_irq == irq) {
1757				error = PCIB_MAP_MSI(device_get_parent(bus),
1758				    dev, irq, &addr, &data);
1759				if (error)
1760					return (error);
1761				mv->mv_address = addr;
1762				mv->mv_data = data;
1763				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1764					mte = &cfg->msix.msix_table[j];
1765					if (mte->mte_vector != i + 1)
1766						continue;
1767					if (mte->mte_handlers == 0)
1768						continue;
1769					pci_mask_msix(dev, j);
1770					pci_enable_msix(dev, j, addr, data);
1771					pci_unmask_msix(dev, j);
1772				}
1773			}
1774		}
1775		return (ENOENT);
1776	}
1777
1778	return (ENOENT);
1779}
1780
1781/*
1782 * Returns true if the specified device is blacklisted because MSI
1783 * doesn't work.
1784 */
1785int
1786pci_msi_device_blacklisted(device_t dev)
1787{
1788	struct pci_quirk *q;
1789
1790	if (!pci_honor_msi_blacklist)
1791		return (0);
1792
1793	for (q = &pci_quirks[0]; q->devid; q++) {
1794		if (q->devid == pci_get_devid(dev) &&
1795		    q->type == PCI_QUIRK_DISABLE_MSI)
1796			return (1);
1797	}
1798	return (0);
1799}
1800
1801/*
1802 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1803 * we just check for blacklisted chipsets as represented by the
1804 * host-PCI bridge at device 0:0:0.  In the future, it may become
1805 * necessary to check other system attributes, such as the kenv values
1806 * that give the motherboard manufacturer and model number.
1807 */
1808static int
1809pci_msi_blacklisted(void)
1810{
1811	device_t dev;
1812
1813	if (!pci_honor_msi_blacklist)
1814		return (0);
1815
1816	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1817	if (!(pcie_chipset || pcix_chipset))
1818		return (1);
1819
1820	dev = pci_find_bsf(0, 0, 0);
1821	if (dev != NULL)
1822		return (pci_msi_device_blacklisted(dev));
1823	return (0);
1824}
1825
1826/*
1827 * Attempt to allocate *count MSI messages.  The actual number allocated is
1828 * returned in *count.  After this function returns, each message will be
1829 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1830 */
1831int
1832pci_alloc_msi_method(device_t dev, device_t child, int *count)
1833{
1834	struct pci_devinfo *dinfo = device_get_ivars(child);
1835	pcicfgregs *cfg = &dinfo->cfg;
1836	struct resource_list_entry *rle;
1837	int actual, error, i, irqs[32];
1838	uint16_t ctrl;
1839
1840	/* Don't let count == 0 get us into trouble. */
1841	if (*count == 0)
1842		return (EINVAL);
1843
1844	/* If rid 0 is allocated, then fail. */
1845	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1846	if (rle != NULL && rle->res != NULL)
1847		return (ENXIO);
1848
1849	/* Already have allocated messages? */
1850	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1851		return (ENXIO);
1852
1853	/* If MSI is blacklisted for this system, fail. */
1854	if (pci_msi_blacklisted())
1855		return (ENXIO);
1856
1857	/* MSI capability present? */
1858	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1859		return (ENODEV);
1860
1861	if (bootverbose)
1862		device_printf(child,
1863		    "attempting to allocate %d MSI vectors (%d supported)\n",
1864		    *count, cfg->msi.msi_msgnum);
1865
1866	/* Don't ask for more than the device supports. */
1867	actual = min(*count, cfg->msi.msi_msgnum);
1868
1869	/* Don't ask for more than 32 messages. */
1870	actual = min(actual, 32);
1871
1872	/* MSI requires power of 2 number of messages. */
1873	if (!powerof2(actual))
1874		return (EINVAL);
1875
1876	for (;;) {
1877		/* Try to allocate N messages. */
1878		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1879		    cfg->msi.msi_msgnum, irqs);
1880		if (error == 0)
1881			break;
1882		if (actual == 1)
1883			return (error);
1884
1885		/* Try N / 2. */
1886		actual >>= 1;
1887	}
1888
1889	/*
1890	 * We now have N actual messages mapped onto SYS_RES_IRQ
1891	 * resources in the irqs[] array, so add new resources
1892	 * starting at rid 1.
1893	 */
1894	for (i = 0; i < actual; i++)
1895		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1896		    irqs[i], irqs[i], 1);
1897
1898	if (bootverbose) {
1899		if (actual == 1)
1900			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1901		else {
1902			int run;
1903
1904			/*
1905			 * Be fancy and try to print contiguous runs
1906			 * of IRQ values as ranges.  'run' is true if
1907			 * we are in a range.
1908			 */
1909			device_printf(child, "using IRQs %d", irqs[0]);
1910			run = 0;
1911			for (i = 1; i < actual; i++) {
1912
1913				/* Still in a run? */
1914				if (irqs[i] == irqs[i - 1] + 1) {
1915					run = 1;
1916					continue;
1917				}
1918
1919				/* Finish previous range. */
1920				if (run) {
1921					printf("-%d", irqs[i - 1]);
1922					run = 0;
1923				}
1924
1925				/* Start new range. */
1926				printf(",%d", irqs[i]);
1927			}
1928
1929			/* Unfinished range? */
1930			if (run)
1931				printf("-%d", irqs[actual - 1]);
1932			printf(" for MSI\n");
1933		}
1934	}
1935
1936	/* Update control register with actual count. */
1937	ctrl = cfg->msi.msi_ctrl;
1938	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1939	ctrl |= (ffs(actual) - 1) << 4;
1940	cfg->msi.msi_ctrl = ctrl;
1941	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1942
1943	/* Update counts of alloc'd messages. */
1944	cfg->msi.msi_alloc = actual;
1945	cfg->msi.msi_handlers = 0;
1946	*count = actual;
1947	return (0);
1948}
1949
1950/* Release the MSI messages associated with this device. */
1951int
1952pci_release_msi_method(device_t dev, device_t child)
1953{
1954	struct pci_devinfo *dinfo = device_get_ivars(child);
1955	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1956	struct resource_list_entry *rle;
1957	int error, i, irqs[32];
1958
1959	/* Try MSI-X first. */
1960	error = pci_release_msix(dev, child);
1961	if (error != ENODEV)
1962		return (error);
1963
1964	/* Do we have any messages to release? */
1965	if (msi->msi_alloc == 0)
1966		return (ENODEV);
1967	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1968
1969	/* Make sure none of the resources are allocated. */
1970	if (msi->msi_handlers > 0)
1971		return (EBUSY);
1972	for (i = 0; i < msi->msi_alloc; i++) {
1973		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1974		KASSERT(rle != NULL, ("missing MSI resource"));
1975		if (rle->res != NULL)
1976			return (EBUSY);
1977		irqs[i] = rle->start;
1978	}
1979
1980	/* Update control register with 0 count. */
1981	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1982	    ("%s: MSI still enabled", __func__));
1983	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1984	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1985	    msi->msi_ctrl, 2);
1986
1987	/* Release the messages. */
1988	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1989	for (i = 0; i < msi->msi_alloc; i++)
1990		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1991
1992	/* Update alloc count. */
1993	msi->msi_alloc = 0;
1994	msi->msi_addr = 0;
1995	msi->msi_data = 0;
1996	return (0);
1997}
1998
1999/*
2000 * Return the max supported MSI messages this device supports.
2001 * Basically, assuming the MD code can alloc messages, this function
2002 * should return the maximum value that pci_alloc_msi() can return.
2003 * Thus, it is subject to the tunables, etc.
2004 */
2005int
2006pci_msi_count_method(device_t dev, device_t child)
2007{
2008	struct pci_devinfo *dinfo = device_get_ivars(child);
2009	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2010
2011	if (pci_do_msi && msi->msi_location != 0)
2012		return (msi->msi_msgnum);
2013	return (0);
2014}
2015
2016/* free pcicfgregs structure and all depending data structures */
2017
2018int
2019pci_freecfg(struct pci_devinfo *dinfo)
2020{
2021	struct devlist *devlist_head;
2022	int i;
2023
2024	devlist_head = &pci_devq;
2025
2026	if (dinfo->cfg.vpd.vpd_reg) {
2027		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2028		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2029			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2030		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2031		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2032			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2033		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2034	}
2035	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2036	free(dinfo, M_DEVBUF);
2037
2038	/* increment the generation count */
2039	pci_generation++;
2040
2041	/* we're losing one device */
2042	pci_numdevs--;
2043	return (0);
2044}
2045
2046/*
2047 * PCI power manangement
2048 */
2049int
2050pci_set_powerstate_method(device_t dev, device_t child, int state)
2051{
2052	struct pci_devinfo *dinfo = device_get_ivars(child);
2053	pcicfgregs *cfg = &dinfo->cfg;
2054	uint16_t status;
2055	int result, oldstate, highest, delay;
2056
2057	if (cfg->pp.pp_cap == 0)
2058		return (EOPNOTSUPP);
2059
2060	/*
2061	 * Optimize a no state change request away.  While it would be OK to
2062	 * write to the hardware in theory, some devices have shown odd
2063	 * behavior when going from D3 -> D3.
2064	 */
2065	oldstate = pci_get_powerstate(child);
2066	if (oldstate == state)
2067		return (0);
2068
2069	/*
2070	 * The PCI power management specification states that after a state
2071	 * transition between PCI power states, system software must
2072	 * guarantee a minimal delay before the function accesses the device.
2073	 * Compute the worst case delay that we need to guarantee before we
2074	 * access the device.  Many devices will be responsive much more
2075	 * quickly than this delay, but there are some that don't respond
2076	 * instantly to state changes.  Transitions to/from D3 state require
2077	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2078	 * is done below with DELAY rather than a sleeper function because
2079	 * this function can be called from contexts where we cannot sleep.
2080	 */
2081	highest = (oldstate > state) ? oldstate : state;
2082	if (highest == PCI_POWERSTATE_D3)
2083	    delay = 10000;
2084	else if (highest == PCI_POWERSTATE_D2)
2085	    delay = 200;
2086	else
2087	    delay = 0;
2088	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2089	    & ~PCIM_PSTAT_DMASK;
2090	result = 0;
2091	switch (state) {
2092	case PCI_POWERSTATE_D0:
2093		status |= PCIM_PSTAT_D0;
2094		break;
2095	case PCI_POWERSTATE_D1:
2096		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2097			return (EOPNOTSUPP);
2098		status |= PCIM_PSTAT_D1;
2099		break;
2100	case PCI_POWERSTATE_D2:
2101		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2102			return (EOPNOTSUPP);
2103		status |= PCIM_PSTAT_D2;
2104		break;
2105	case PCI_POWERSTATE_D3:
2106		status |= PCIM_PSTAT_D3;
2107		break;
2108	default:
2109		return (EINVAL);
2110	}
2111
2112	if (bootverbose)
2113		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2114		    state);
2115
2116	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2117	if (delay)
2118		DELAY(delay);
2119	return (0);
2120}
2121
2122int
2123pci_get_powerstate_method(device_t dev, device_t child)
2124{
2125	struct pci_devinfo *dinfo = device_get_ivars(child);
2126	pcicfgregs *cfg = &dinfo->cfg;
2127	uint16_t status;
2128	int result;
2129
2130	if (cfg->pp.pp_cap != 0) {
2131		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2132		switch (status & PCIM_PSTAT_DMASK) {
2133		case PCIM_PSTAT_D0:
2134			result = PCI_POWERSTATE_D0;
2135			break;
2136		case PCIM_PSTAT_D1:
2137			result = PCI_POWERSTATE_D1;
2138			break;
2139		case PCIM_PSTAT_D2:
2140			result = PCI_POWERSTATE_D2;
2141			break;
2142		case PCIM_PSTAT_D3:
2143			result = PCI_POWERSTATE_D3;
2144			break;
2145		default:
2146			result = PCI_POWERSTATE_UNKNOWN;
2147			break;
2148		}
2149	} else {
2150		/* No support, device is always at D0 */
2151		result = PCI_POWERSTATE_D0;
2152	}
2153	return (result);
2154}
2155
2156/*
2157 * Some convenience functions for PCI device drivers.
2158 */
2159
2160static __inline void
2161pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2162{
2163	uint16_t	command;
2164
2165	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2166	command |= bit;
2167	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2168}
2169
2170static __inline void
2171pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2172{
2173	uint16_t	command;
2174
2175	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2176	command &= ~bit;
2177	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2178}
2179
2180int
2181pci_enable_busmaster_method(device_t dev, device_t child)
2182{
2183	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2184	return (0);
2185}
2186
2187int
2188pci_disable_busmaster_method(device_t dev, device_t child)
2189{
2190	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2191	return (0);
2192}
2193
2194int
2195pci_enable_io_method(device_t dev, device_t child, int space)
2196{
2197	uint16_t bit;
2198
2199	switch(space) {
2200	case SYS_RES_IOPORT:
2201		bit = PCIM_CMD_PORTEN;
2202		break;
2203	case SYS_RES_MEMORY:
2204		bit = PCIM_CMD_MEMEN;
2205		break;
2206	default:
2207		return (EINVAL);
2208	}
2209	pci_set_command_bit(dev, child, bit);
2210	return (0);
2211}
2212
2213int
2214pci_disable_io_method(device_t dev, device_t child, int space)
2215{
2216	uint16_t bit;
2217
2218	switch(space) {
2219	case SYS_RES_IOPORT:
2220		bit = PCIM_CMD_PORTEN;
2221		break;
2222	case SYS_RES_MEMORY:
2223		bit = PCIM_CMD_MEMEN;
2224		break;
2225	default:
2226		return (EINVAL);
2227	}
2228	pci_clear_command_bit(dev, child, bit);
2229	return (0);
2230}
2231
2232/*
2233 * New style pci driver.  Parent device is either a pci-host-bridge or a
2234 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2235 */
2236
2237void
2238pci_print_verbose(struct pci_devinfo *dinfo)
2239{
2240
2241	if (bootverbose) {
2242		pcicfgregs *cfg = &dinfo->cfg;
2243
2244		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2245		    cfg->vendor, cfg->device, cfg->revid);
2246		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2247		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2248		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2249		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2250		    cfg->mfdev);
2251		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2252		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2253		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2254		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2255		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2256		if (cfg->intpin > 0)
2257			printf("\tintpin=%c, irq=%d\n",
2258			    cfg->intpin +'a' -1, cfg->intline);
2259		if (cfg->pp.pp_cap) {
2260			uint16_t status;
2261
2262			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2263			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2264			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2265			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2266			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2267			    status & PCIM_PSTAT_DMASK);
2268		}
2269		if (cfg->msi.msi_location) {
2270			int ctrl;
2271
2272			ctrl = cfg->msi.msi_ctrl;
2273			printf("\tMSI supports %d message%s%s%s\n",
2274			    cfg->msi.msi_msgnum,
2275			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2276			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2277			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2278		}
2279		if (cfg->msix.msix_location) {
2280			printf("\tMSI-X supports %d message%s ",
2281			    cfg->msix.msix_msgnum,
2282			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2283			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2284				printf("in map 0x%x\n",
2285				    cfg->msix.msix_table_bar);
2286			else
2287				printf("in maps 0x%x and 0x%x\n",
2288				    cfg->msix.msix_table_bar,
2289				    cfg->msix.msix_pba_bar);
2290		}
2291	}
2292}
2293
2294static int
2295pci_porten(device_t dev)
2296{
2297	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2298}
2299
2300static int
2301pci_memen(device_t dev)
2302{
2303	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2304}
2305
2306static void
2307pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2308{
2309	pci_addr_t map, testval;
2310	int ln2range;
2311	uint16_t cmd;
2312
2313	/*
2314	 * The device ROM BAR is special.  It is always a 32-bit
2315	 * memory BAR.  Bit 0 is special and should not be set when
2316	 * sizing the BAR.
2317	 */
2318	if (reg == PCIR_BIOS) {
2319		map = pci_read_config(dev, reg, 4);
2320		pci_write_config(dev, reg, 0xfffffffe, 4);
2321		testval = pci_read_config(dev, reg, 4);
2322		pci_write_config(dev, reg, map, 4);
2323		*mapp = map;
2324		*testvalp = testval;
2325		return;
2326	}
2327
2328	map = pci_read_config(dev, reg, 4);
2329	ln2range = pci_maprange(map);
2330	if (ln2range == 64)
2331		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2332
2333	/*
2334	 * Disable decoding via the command register before
2335	 * determining the BAR's length since we will be placing it in
2336	 * a weird state.
2337	 */
2338	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2339	pci_write_config(dev, PCIR_COMMAND,
2340	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2341
2342	/*
2343	 * Determine the BAR's length by writing all 1's.  The bottom
2344	 * log_2(size) bits of the BAR will stick as 0 when we read
2345	 * the value back.
2346	 */
2347	pci_write_config(dev, reg, 0xffffffff, 4);
2348	testval = pci_read_config(dev, reg, 4);
2349	if (ln2range == 64) {
2350		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2351		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2352	}
2353
2354	/*
2355	 * Restore the original value of the BAR.  We may have reprogrammed
2356	 * the BAR of the low-level console device and when booting verbose,
2357	 * we need the console device addressable.
2358	 */
2359	pci_write_config(dev, reg, map, 4);
2360	if (ln2range == 64)
2361		pci_write_config(dev, reg + 4, map >> 32, 4);
2362	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2363
2364	*mapp = map;
2365	*testvalp = testval;
2366}
2367
2368static void
2369pci_write_bar(device_t dev, int reg, pci_addr_t base)
2370{
2371	pci_addr_t map;
2372	int ln2range;
2373
2374	map = pci_read_config(dev, reg, 4);
2375
2376	/* The device ROM BAR is always 32-bits. */
2377	if (reg == PCIR_BIOS)
2378		return;
2379	ln2range = pci_maprange(map);
2380	pci_write_config(dev, reg, base, 4);
2381	if (ln2range == 64)
2382		pci_write_config(dev, reg + 4, base >> 32, 4);
2383}
2384
2385/*
2386 * Add a resource based on a pci map register. Return 1 if the map
2387 * register is a 32bit map register or 2 if it is a 64bit register.
2388 */
2389static int
2390pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2391    int force, int prefetch)
2392{
2393	pci_addr_t base, map, testval;
2394	pci_addr_t start, end, count;
2395	int barlen, basezero, maprange, mapsize, type;
2396	uint16_t cmd;
2397	struct resource *res;
2398
2399	pci_read_bar(dev, reg, &map, &testval);
2400	if (PCI_BAR_MEM(map)) {
2401		type = SYS_RES_MEMORY;
2402		if (map & PCIM_BAR_MEM_PREFETCH)
2403			prefetch = 1;
2404	} else
2405		type = SYS_RES_IOPORT;
2406	mapsize = pci_mapsize(testval);
2407	base = pci_mapbase(map);
2408#ifdef __PCI_BAR_ZERO_VALID
2409	basezero = 0;
2410#else
2411	basezero = base == 0;
2412#endif
2413	maprange = pci_maprange(map);
2414	barlen = maprange == 64 ? 2 : 1;
2415
2416	/*
2417	 * For I/O registers, if bottom bit is set, and the next bit up
2418	 * isn't clear, we know we have a BAR that doesn't conform to the
2419	 * spec, so ignore it.  Also, sanity check the size of the data
2420	 * areas to the type of memory involved.  Memory must be at least
2421	 * 16 bytes in size, while I/O ranges must be at least 4.
2422	 */
2423	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2424		return (barlen);
2425	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2426	    (type == SYS_RES_IOPORT && mapsize < 2))
2427		return (barlen);
2428
2429	if (bootverbose) {
2430		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2431		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2432		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2433			printf(", port disabled\n");
2434		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2435			printf(", memory disabled\n");
2436		else
2437			printf(", enabled\n");
2438	}
2439
2440	/*
2441	 * If base is 0, then we have problems if this architecture does
2442	 * not allow that.  It is best to ignore such entries for the
2443	 * moment.  These will be allocated later if the driver specifically
2444	 * requests them.  However, some removable busses look better when
2445	 * all resources are allocated, so allow '0' to be overriden.
2446	 *
2447	 * Similarly treat maps whose values is the same as the test value
2448	 * read back.  These maps have had all f's written to them by the
2449	 * BIOS in an attempt to disable the resources.
2450	 */
2451	if (!force && (basezero || map == testval))
2452		return (barlen);
2453	if ((u_long)base != base) {
2454		device_printf(bus,
2455		    "pci%d:%d:%d:%d bar %#x too many address bits",
2456		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2457		    pci_get_function(dev), reg);
2458		return (barlen);
2459	}
2460
2461	/*
2462	 * This code theoretically does the right thing, but has
2463	 * undesirable side effects in some cases where peripherals
2464	 * respond oddly to having these bits enabled.  Let the user
2465	 * be able to turn them off (since pci_enable_io_modes is 1 by
2466	 * default).
2467	 */
2468	if (pci_enable_io_modes) {
2469		/* Turn on resources that have been left off by a lazy BIOS */
2470		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2471			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2472			cmd |= PCIM_CMD_PORTEN;
2473			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2474		}
2475		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2476			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2477			cmd |= PCIM_CMD_MEMEN;
2478			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2479		}
2480	} else {
2481		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2482			return (barlen);
2483		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2484			return (barlen);
2485	}
2486
2487	count = 1 << mapsize;
2488	if (basezero || base == pci_mapbase(testval)) {
2489		start = 0;	/* Let the parent decide. */
2490		end = ~0ULL;
2491	} else {
2492		start = base;
2493		end = base + (1 << mapsize) - 1;
2494	}
2495	resource_list_add(rl, type, reg, start, end, count);
2496
2497	/*
2498	 * Try to allocate the resource for this BAR from our parent
2499	 * so that this resource range is already reserved.  The
2500	 * driver for this device will later inherit this resource in
2501	 * pci_alloc_resource().
2502	 */
2503	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2504	    prefetch ? RF_PREFETCHABLE : 0);
2505	if (res == NULL) {
2506		/*
2507		 * If the allocation fails, clear the BAR and delete
2508		 * the resource list entry to force
2509		 * pci_alloc_resource() to allocate resources from the
2510		 * parent.
2511		 */
2512		resource_list_delete(rl, type, reg);
2513		start = 0;
2514	} else
2515		start = rman_get_start(res);
2516	pci_write_bar(dev, reg, start);
2517	return (barlen);
2518}
2519
2520/*
2521 * For ATA devices we need to decide early what addressing mode to use.
2522 * Legacy demands that the primary and secondary ATA ports sits on the
2523 * same addresses that old ISA hardware did. This dictates that we use
2524 * those addresses and ignore the BAR's if we cannot set PCI native
2525 * addressing mode.
2526 */
2527static void
2528pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2529    uint32_t prefetchmask)
2530{
2531	struct resource *r;
2532	int rid, type, progif;
2533#if 0
2534	/* if this device supports PCI native addressing use it */
2535	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2536	if ((progif & 0x8a) == 0x8a) {
2537		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2538		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2539			printf("Trying ATA native PCI addressing mode\n");
2540			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2541		}
2542	}
2543#endif
2544	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2545	type = SYS_RES_IOPORT;
2546	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2547		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2548		    prefetchmask & (1 << 0));
2549		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2550		    prefetchmask & (1 << 1));
2551	} else {
2552		rid = PCIR_BAR(0);
2553		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2554		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2555		    0x1f7, 8, 0);
2556		rid = PCIR_BAR(1);
2557		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2558		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2559		    0x3f6, 1, 0);
2560	}
2561	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2562		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2563		    prefetchmask & (1 << 2));
2564		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2565		    prefetchmask & (1 << 3));
2566	} else {
2567		rid = PCIR_BAR(2);
2568		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2569		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2570		    0x177, 8, 0);
2571		rid = PCIR_BAR(3);
2572		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2573		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2574		    0x376, 1, 0);
2575	}
2576	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2577	    prefetchmask & (1 << 4));
2578	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2579	    prefetchmask & (1 << 5));
2580}
2581
2582static void
2583pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2584{
2585	struct pci_devinfo *dinfo = device_get_ivars(dev);
2586	pcicfgregs *cfg = &dinfo->cfg;
2587	char tunable_name[64];
2588	int irq;
2589
2590	/* Has to have an intpin to have an interrupt. */
2591	if (cfg->intpin == 0)
2592		return;
2593
2594	/* Let the user override the IRQ with a tunable. */
2595	irq = PCI_INVALID_IRQ;
2596	snprintf(tunable_name, sizeof(tunable_name),
2597	    "hw.pci%d.%d.%d.INT%c.irq",
2598	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2599	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2600		irq = PCI_INVALID_IRQ;
2601
2602	/*
2603	 * If we didn't get an IRQ via the tunable, then we either use the
2604	 * IRQ value in the intline register or we ask the bus to route an
2605	 * interrupt for us.  If force_route is true, then we only use the
2606	 * value in the intline register if the bus was unable to assign an
2607	 * IRQ.
2608	 */
2609	if (!PCI_INTERRUPT_VALID(irq)) {
2610		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2611			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2612		if (!PCI_INTERRUPT_VALID(irq))
2613			irq = cfg->intline;
2614	}
2615
2616	/* If after all that we don't have an IRQ, just bail. */
2617	if (!PCI_INTERRUPT_VALID(irq))
2618		return;
2619
2620	/* Update the config register if it changed. */
2621	if (irq != cfg->intline) {
2622		cfg->intline = irq;
2623		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2624	}
2625
2626	/* Add this IRQ as rid 0 interrupt resource. */
2627	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2628}
2629
2630/* Perform early OHCI takeover from SMM. */
2631static void
2632ohci_early_takeover(device_t self)
2633{
2634	struct resource *res;
2635	uint32_t ctl;
2636	int rid;
2637	int i;
2638
2639	rid = PCIR_BAR(0);
2640	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2641	if (res == NULL)
2642		return;
2643
2644	ctl = bus_read_4(res, OHCI_CONTROL);
2645	if (ctl & OHCI_IR) {
2646		if (bootverbose)
2647			printf("ohci early: "
2648			    "SMM active, request owner change\n");
2649		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2650		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2651			DELAY(1000);
2652			ctl = bus_read_4(res, OHCI_CONTROL);
2653		}
2654		if (ctl & OHCI_IR) {
2655			if (bootverbose)
2656				printf("ohci early: "
2657				    "SMM does not respond, resetting\n");
2658			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2659		}
2660		/* Disable interrupts */
2661		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2662	}
2663
2664	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2665}
2666
2667/* Perform early UHCI takeover from SMM. */
2668static void
2669uhci_early_takeover(device_t self)
2670{
2671	struct resource *res;
2672	int rid;
2673
2674	/*
2675	 * Set the PIRQD enable bit and switch off all the others. We don't
2676	 * want legacy support to interfere with us XXX Does this also mean
2677	 * that the BIOS won't touch the keyboard anymore if it is connected
2678	 * to the ports of the root hub?
2679	 */
2680	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2681
2682	/* Disable interrupts */
2683	rid = PCI_UHCI_BASE_REG;
2684	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2685	if (res != NULL) {
2686		bus_write_2(res, UHCI_INTR, 0);
2687		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2688	}
2689}
2690
2691/* Perform early EHCI takeover from SMM. */
2692static void
2693ehci_early_takeover(device_t self)
2694{
2695	struct resource *res;
2696	uint32_t cparams;
2697	uint32_t eec;
2698	uint8_t eecp;
2699	uint8_t bios_sem;
2700	uint8_t offs;
2701	int rid;
2702	int i;
2703
2704	rid = PCIR_BAR(0);
2705	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2706	if (res == NULL)
2707		return;
2708
2709	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2710
2711	/* Synchronise with the BIOS if it owns the controller. */
2712	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2713	    eecp = EHCI_EECP_NEXT(eec)) {
2714		eec = pci_read_config(self, eecp, 4);
2715		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2716			continue;
2717		}
2718		bios_sem = pci_read_config(self, eecp +
2719		    EHCI_LEGSUP_BIOS_SEM, 1);
2720		if (bios_sem == 0) {
2721			continue;
2722		}
2723		if (bootverbose)
2724			printf("ehci early: "
2725			    "SMM active, request owner change\n");
2726
2727		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2728
2729		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2730			DELAY(1000);
2731			bios_sem = pci_read_config(self, eecp +
2732			    EHCI_LEGSUP_BIOS_SEM, 1);
2733		}
2734
2735		if (bios_sem != 0) {
2736			if (bootverbose)
2737				printf("ehci early: "
2738				    "SMM does not respond\n");
2739		}
2740		/* Disable interrupts */
2741		offs = bus_read_1(res, EHCI_CAPLENGTH);
2742		bus_write_4(res, offs + EHCI_USBINTR, 0);
2743	}
2744	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2745}
2746
2747void
2748pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2749{
2750	struct pci_devinfo *dinfo = device_get_ivars(dev);
2751	pcicfgregs *cfg = &dinfo->cfg;
2752	struct resource_list *rl = &dinfo->resources;
2753	struct pci_quirk *q;
2754	int i;
2755
2756	/* ATA devices needs special map treatment */
2757	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2758	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2759	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2760	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2761	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2762		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2763	else
2764		for (i = 0; i < cfg->nummaps;)
2765			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2766			    prefetchmask & (1 << i));
2767
2768	/*
2769	 * Add additional, quirked resources.
2770	 */
2771	for (q = &pci_quirks[0]; q->devid; q++) {
2772		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2773		    && q->type == PCI_QUIRK_MAP_REG)
2774			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2775	}
2776
2777	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2778#ifdef __PCI_REROUTE_INTERRUPT
2779		/*
2780		 * Try to re-route interrupts. Sometimes the BIOS or
2781		 * firmware may leave bogus values in these registers.
2782		 * If the re-route fails, then just stick with what we
2783		 * have.
2784		 */
2785		pci_assign_interrupt(bus, dev, 1);
2786#else
2787		pci_assign_interrupt(bus, dev, 0);
2788#endif
2789	}
2790
2791	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2792	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2793		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2794			ehci_early_takeover(dev);
2795		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2796			ohci_early_takeover(dev);
2797		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2798			uhci_early_takeover(dev);
2799	}
2800}
2801
2802void
2803pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2804{
2805#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2806	device_t pcib = device_get_parent(dev);
2807	struct pci_devinfo *dinfo;
2808	int maxslots;
2809	int s, f, pcifunchigh;
2810	uint8_t hdrtype;
2811
2812	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2813	    ("dinfo_size too small"));
2814	maxslots = PCIB_MAXSLOTS(pcib);
2815	for (s = 0; s <= maxslots; s++) {
2816		pcifunchigh = 0;
2817		f = 0;
2818		DELAY(1);
2819		hdrtype = REG(PCIR_HDRTYPE, 1);
2820		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2821			continue;
2822		if (hdrtype & PCIM_MFDEV)
2823			pcifunchigh = PCI_FUNCMAX;
2824		for (f = 0; f <= pcifunchigh; f++) {
2825			dinfo = pci_read_device(pcib, domain, busno, s, f,
2826			    dinfo_size);
2827			if (dinfo != NULL) {
2828				pci_add_child(dev, dinfo);
2829			}
2830		}
2831	}
2832#undef REG
2833}
2834
2835void
2836pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2837{
2838	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2839	device_set_ivars(dinfo->cfg.dev, dinfo);
2840	resource_list_init(&dinfo->resources);
2841	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2842	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2843	pci_print_verbose(dinfo);
2844	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2845}
2846
2847static int
2848pci_probe(device_t dev)
2849{
2850
2851	device_set_desc(dev, "PCI bus");
2852
2853	/* Allow other subclasses to override this driver. */
2854	return (BUS_PROBE_GENERIC);
2855}
2856
2857static int
2858pci_attach(device_t dev)
2859{
2860	int busno, domain;
2861
2862	/*
2863	 * Since there can be multiple independantly numbered PCI
2864	 * busses on systems with multiple PCI domains, we can't use
2865	 * the unit number to decide which bus we are probing. We ask
2866	 * the parent pcib what our domain and bus numbers are.
2867	 */
2868	domain = pcib_get_domain(dev);
2869	busno = pcib_get_bus(dev);
2870	if (bootverbose)
2871		device_printf(dev, "domain=%d, physical bus=%d\n",
2872		    domain, busno);
2873	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2874	return (bus_generic_attach(dev));
2875}
2876
2877int
2878pci_suspend(device_t dev)
2879{
2880	int dstate, error, i, numdevs;
2881	device_t acpi_dev, child, *devlist;
2882	struct pci_devinfo *dinfo;
2883
2884	/*
2885	 * Save the PCI configuration space for each child and set the
2886	 * device in the appropriate power state for this sleep state.
2887	 */
2888	acpi_dev = NULL;
2889	if (pci_do_power_resume)
2890		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2891	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2892		return (error);
2893	for (i = 0; i < numdevs; i++) {
2894		child = devlist[i];
2895		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2896		pci_cfg_save(child, dinfo, 0);
2897	}
2898
2899	/* Suspend devices before potentially powering them down. */
2900	error = bus_generic_suspend(dev);
2901	if (error) {
2902		free(devlist, M_TEMP);
2903		return (error);
2904	}
2905
2906	/*
2907	 * Always set the device to D3.  If ACPI suggests a different
2908	 * power state, use it instead.  If ACPI is not present, the
2909	 * firmware is responsible for managing device power.  Skip
2910	 * children who aren't attached since they are powered down
2911	 * separately.  Only manage type 0 devices for now.
2912	 */
2913	for (i = 0; acpi_dev && i < numdevs; i++) {
2914		child = devlist[i];
2915		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2916		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2917			dstate = PCI_POWERSTATE_D3;
2918			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2919			pci_set_powerstate(child, dstate);
2920		}
2921	}
2922	free(devlist, M_TEMP);
2923	return (0);
2924}
2925
2926int
2927pci_resume(device_t dev)
2928{
2929	int i, numdevs, error;
2930	device_t acpi_dev, child, *devlist;
2931	struct pci_devinfo *dinfo;
2932
2933	/*
2934	 * Set each child to D0 and restore its PCI configuration space.
2935	 */
2936	acpi_dev = NULL;
2937	if (pci_do_power_resume)
2938		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2939	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2940		return (error);
2941	for (i = 0; i < numdevs; i++) {
2942		/*
2943		 * Notify ACPI we're going to D0 but ignore the result.  If
2944		 * ACPI is not present, the firmware is responsible for
2945		 * managing device power.  Only manage type 0 devices for now.
2946		 */
2947		child = devlist[i];
2948		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2949		if (acpi_dev && device_is_attached(child) &&
2950		    dinfo->cfg.hdrtype == 0) {
2951			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2952			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2953		}
2954
2955		/* Now the device is powered up, restore its config space. */
2956		pci_cfg_restore(child, dinfo);
2957	}
2958	free(devlist, M_TEMP);
2959	return (bus_generic_resume(dev));
2960}
2961
2962static void
2963pci_load_vendor_data(void)
2964{
2965	caddr_t vendordata, info;
2966
2967	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2968		info = preload_search_info(vendordata, MODINFO_ADDR);
2969		pci_vendordata = *(char **)info;
2970		info = preload_search_info(vendordata, MODINFO_SIZE);
2971		pci_vendordata_size = *(size_t *)info;
2972		/* terminate the database */
2973		pci_vendordata[pci_vendordata_size] = '\n';
2974	}
2975}
2976
2977void
2978pci_driver_added(device_t dev, driver_t *driver)
2979{
2980	int numdevs;
2981	device_t *devlist;
2982	device_t child;
2983	struct pci_devinfo *dinfo;
2984	int i;
2985
2986	if (bootverbose)
2987		device_printf(dev, "driver added\n");
2988	DEVICE_IDENTIFY(driver, dev);
2989	if (device_get_children(dev, &devlist, &numdevs) != 0)
2990		return;
2991	for (i = 0; i < numdevs; i++) {
2992		child = devlist[i];
2993		if (device_get_state(child) != DS_NOTPRESENT)
2994			continue;
2995		dinfo = device_get_ivars(child);
2996		pci_print_verbose(dinfo);
2997		if (bootverbose)
2998			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2999		pci_cfg_restore(child, dinfo);
3000		if (device_probe_and_attach(child) != 0)
3001			pci_cfg_save(child, dinfo, 1);
3002	}
3003	free(devlist, M_TEMP);
3004}
3005
3006int
3007pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3008    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3009{
3010	struct pci_devinfo *dinfo;
3011	struct msix_table_entry *mte;
3012	struct msix_vector *mv;
3013	uint64_t addr;
3014	uint32_t data;
3015	void *cookie;
3016	int error, rid;
3017
3018	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3019	    arg, &cookie);
3020	if (error)
3021		return (error);
3022
3023	/* If this is not a direct child, just bail out. */
3024	if (device_get_parent(child) != dev) {
3025		*cookiep = cookie;
3026		return(0);
3027	}
3028
3029	rid = rman_get_rid(irq);
3030	if (rid == 0) {
3031		/* Make sure that INTx is enabled */
3032		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3033	} else {
3034		/*
3035		 * Check to see if the interrupt is MSI or MSI-X.
3036		 * Ask our parent to map the MSI and give
3037		 * us the address and data register values.
3038		 * If we fail for some reason, teardown the
3039		 * interrupt handler.
3040		 */
3041		dinfo = device_get_ivars(child);
3042		if (dinfo->cfg.msi.msi_alloc > 0) {
3043			if (dinfo->cfg.msi.msi_addr == 0) {
3044				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3045			    ("MSI has handlers, but vectors not mapped"));
3046				error = PCIB_MAP_MSI(device_get_parent(dev),
3047				    child, rman_get_start(irq), &addr, &data);
3048				if (error)
3049					goto bad;
3050				dinfo->cfg.msi.msi_addr = addr;
3051				dinfo->cfg.msi.msi_data = data;
3052			}
3053			if (dinfo->cfg.msi.msi_handlers == 0)
3054				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3055				    dinfo->cfg.msi.msi_data);
3056			dinfo->cfg.msi.msi_handlers++;
3057		} else {
3058			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3059			    ("No MSI or MSI-X interrupts allocated"));
3060			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3061			    ("MSI-X index too high"));
3062			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3063			KASSERT(mte->mte_vector != 0, ("no message vector"));
3064			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3065			KASSERT(mv->mv_irq == rman_get_start(irq),
3066			    ("IRQ mismatch"));
3067			if (mv->mv_address == 0) {
3068				KASSERT(mte->mte_handlers == 0,
3069		    ("MSI-X table entry has handlers, but vector not mapped"));
3070				error = PCIB_MAP_MSI(device_get_parent(dev),
3071				    child, rman_get_start(irq), &addr, &data);
3072				if (error)
3073					goto bad;
3074				mv->mv_address = addr;
3075				mv->mv_data = data;
3076			}
3077			if (mte->mte_handlers == 0) {
3078				pci_enable_msix(child, rid - 1, mv->mv_address,
3079				    mv->mv_data);
3080				pci_unmask_msix(child, rid - 1);
3081			}
3082			mte->mte_handlers++;
3083		}
3084
3085		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3086		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3087	bad:
3088		if (error) {
3089			(void)bus_generic_teardown_intr(dev, child, irq,
3090			    cookie);
3091			return (error);
3092		}
3093	}
3094	*cookiep = cookie;
3095	return (0);
3096}
3097
3098int
3099pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3100    void *cookie)
3101{
3102	struct msix_table_entry *mte;
3103	struct resource_list_entry *rle;
3104	struct pci_devinfo *dinfo;
3105	int error, rid;
3106
3107	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3108		return (EINVAL);
3109
3110	/* If this isn't a direct child, just bail out */
3111	if (device_get_parent(child) != dev)
3112		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3113
3114	rid = rman_get_rid(irq);
3115	if (rid == 0) {
3116		/* Mask INTx */
3117		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3118	} else {
3119		/*
3120		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3121		 * decrement the appropriate handlers count and mask the
3122		 * MSI-X message, or disable MSI messages if the count
3123		 * drops to 0.
3124		 */
3125		dinfo = device_get_ivars(child);
3126		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3127		if (rle->res != irq)
3128			return (EINVAL);
3129		if (dinfo->cfg.msi.msi_alloc > 0) {
3130			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3131			    ("MSI-X index too high"));
3132			if (dinfo->cfg.msi.msi_handlers == 0)
3133				return (EINVAL);
3134			dinfo->cfg.msi.msi_handlers--;
3135			if (dinfo->cfg.msi.msi_handlers == 0)
3136				pci_disable_msi(child);
3137		} else {
3138			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3139			    ("No MSI or MSI-X interrupts allocated"));
3140			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3141			    ("MSI-X index too high"));
3142			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3143			if (mte->mte_handlers == 0)
3144				return (EINVAL);
3145			mte->mte_handlers--;
3146			if (mte->mte_handlers == 0)
3147				pci_mask_msix(child, rid - 1);
3148		}
3149	}
3150	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3151	if (rid > 0)
3152		KASSERT(error == 0,
3153		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3154	return (error);
3155}
3156
3157int
3158pci_print_child(device_t dev, device_t child)
3159{
3160	struct pci_devinfo *dinfo;
3161	struct resource_list *rl;
3162	int retval = 0;
3163
3164	dinfo = device_get_ivars(child);
3165	rl = &dinfo->resources;
3166
3167	retval += bus_print_child_header(dev, child);
3168
3169	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3170	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3171	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3172	if (device_get_flags(dev))
3173		retval += printf(" flags %#x", device_get_flags(dev));
3174
3175	retval += printf(" at device %d.%d", pci_get_slot(child),
3176	    pci_get_function(child));
3177
3178	retval += bus_print_child_footer(dev, child);
3179
3180	return (retval);
3181}
3182
3183static struct
3184{
3185	int	class;
3186	int	subclass;
3187	char	*desc;
3188} pci_nomatch_tab[] = {
3189	{PCIC_OLD,		-1,			"old"},
3190	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3191	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3192	{PCIC_STORAGE,		-1,			"mass storage"},
3193	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3194	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3195	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3196	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3197	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3198	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3199	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3200	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3201	{PCIC_NETWORK,		-1,			"network"},
3202	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3203	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3204	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3205	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3206	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3207	{PCIC_DISPLAY,		-1,			"display"},
3208	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3209	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3210	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3211	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3212	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3213	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3214	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3215	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3216	{PCIC_MEMORY,		-1,			"memory"},
3217	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3218	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3219	{PCIC_BRIDGE,		-1,			"bridge"},
3220	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3221	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3222	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3223	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3224	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3225	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3226	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3227	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3228	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3229	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3230	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3231	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3232	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3233	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3234	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3235	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3236	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3237	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3238	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3239	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3240	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3241	{PCIC_INPUTDEV,		-1,			"input device"},
3242	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3243	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3244	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3245	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3246	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3247	{PCIC_DOCKING,		-1,			"docking station"},
3248	{PCIC_PROCESSOR,	-1,			"processor"},
3249	{PCIC_SERIALBUS,	-1,			"serial bus"},
3250	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3251	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3252	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3253	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3254	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3255	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3256	{PCIC_WIRELESS,		-1,			"wireless controller"},
3257	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3258	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3259	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3260	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3261	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3262	{PCIC_SATCOM,		-1,			"satellite communication"},
3263	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3264	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3265	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3266	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3267	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3268	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3269	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3270	{PCIC_DASP,		-1,			"dasp"},
3271	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3272	{0, 0,		NULL}
3273};
3274
3275void
3276pci_probe_nomatch(device_t dev, device_t child)
3277{
3278	int	i;
3279	char	*cp, *scp, *device;
3280
3281	/*
3282	 * Look for a listing for this device in a loaded device database.
3283	 */
3284	if ((device = pci_describe_device(child)) != NULL) {
3285		device_printf(dev, "<%s>", device);
3286		free(device, M_DEVBUF);
3287	} else {
3288		/*
3289		 * Scan the class/subclass descriptions for a general
3290		 * description.
3291		 */
3292		cp = "unknown";
3293		scp = NULL;
3294		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3295			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3296				if (pci_nomatch_tab[i].subclass == -1) {
3297					cp = pci_nomatch_tab[i].desc;
3298				} else if (pci_nomatch_tab[i].subclass ==
3299				    pci_get_subclass(child)) {
3300					scp = pci_nomatch_tab[i].desc;
3301				}
3302			}
3303		}
3304		device_printf(dev, "<%s%s%s>",
3305		    cp ? cp : "",
3306		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3307		    scp ? scp : "");
3308	}
3309	printf(" at device %d.%d (no driver attached)\n",
3310	    pci_get_slot(child), pci_get_function(child));
3311	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3312	return;
3313}
3314
3315/*
3316 * Parse the PCI device database, if loaded, and return a pointer to a
3317 * description of the device.
3318 *
3319 * The database is flat text formatted as follows:
3320 *
3321 * Any line not in a valid format is ignored.
3322 * Lines are terminated with newline '\n' characters.
3323 *
3324 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3325 * the vendor name.
3326 *
3327 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3328 * - devices cannot be listed without a corresponding VENDOR line.
3329 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3330 * another TAB, then the device name.
3331 */
3332
3333/*
3334 * Assuming (ptr) points to the beginning of a line in the database,
3335 * return the vendor or device and description of the next entry.
3336 * The value of (vendor) or (device) inappropriate for the entry type
3337 * is set to -1.  Returns nonzero at the end of the database.
3338 *
3339 * Note that this is slightly unrobust in the face of corrupt data;
3340 * we attempt to safeguard against this by spamming the end of the
3341 * database with a newline when we initialise.
3342 */
3343static int
3344pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3345{
3346	char	*cp = *ptr;
3347	int	left;
3348
3349	*device = -1;
3350	*vendor = -1;
3351	**desc = '\0';
3352	for (;;) {
3353		left = pci_vendordata_size - (cp - pci_vendordata);
3354		if (left <= 0) {
3355			*ptr = cp;
3356			return(1);
3357		}
3358
3359		/* vendor entry? */
3360		if (*cp != '\t' &&
3361		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3362			break;
3363		/* device entry? */
3364		if (*cp == '\t' &&
3365		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3366			break;
3367
3368		/* skip to next line */
3369		while (*cp != '\n' && left > 0) {
3370			cp++;
3371			left--;
3372		}
3373		if (*cp == '\n') {
3374			cp++;
3375			left--;
3376		}
3377	}
3378	/* skip to next line */
3379	while (*cp != '\n' && left > 0) {
3380		cp++;
3381		left--;
3382	}
3383	if (*cp == '\n' && left > 0)
3384		cp++;
3385	*ptr = cp;
3386	return(0);
3387}
3388
3389static char *
3390pci_describe_device(device_t dev)
3391{
3392	int	vendor, device;
3393	char	*desc, *vp, *dp, *line;
3394
3395	desc = vp = dp = NULL;
3396
3397	/*
3398	 * If we have no vendor data, we can't do anything.
3399	 */
3400	if (pci_vendordata == NULL)
3401		goto out;
3402
3403	/*
3404	 * Scan the vendor data looking for this device
3405	 */
3406	line = pci_vendordata;
3407	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3408		goto out;
3409	for (;;) {
3410		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3411			goto out;
3412		if (vendor == pci_get_vendor(dev))
3413			break;
3414	}
3415	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3416		goto out;
3417	for (;;) {
3418		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3419			*dp = 0;
3420			break;
3421		}
3422		if (vendor != -1) {
3423			*dp = 0;
3424			break;
3425		}
3426		if (device == pci_get_device(dev))
3427			break;
3428	}
3429	if (dp[0] == '\0')
3430		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3431	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3432	    NULL)
3433		sprintf(desc, "%s, %s", vp, dp);
3434 out:
3435	if (vp != NULL)
3436		free(vp, M_DEVBUF);
3437	if (dp != NULL)
3438		free(dp, M_DEVBUF);
3439	return(desc);
3440}
3441
3442int
3443pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3444{
3445	struct pci_devinfo *dinfo;
3446	pcicfgregs *cfg;
3447
3448	dinfo = device_get_ivars(child);
3449	cfg = &dinfo->cfg;
3450
3451	switch (which) {
3452	case PCI_IVAR_ETHADDR:
3453		/*
3454		 * The generic accessor doesn't deal with failure, so
3455		 * we set the return value, then return an error.
3456		 */
3457		*((uint8_t **) result) = NULL;
3458		return (EINVAL);
3459	case PCI_IVAR_SUBVENDOR:
3460		*result = cfg->subvendor;
3461		break;
3462	case PCI_IVAR_SUBDEVICE:
3463		*result = cfg->subdevice;
3464		break;
3465	case PCI_IVAR_VENDOR:
3466		*result = cfg->vendor;
3467		break;
3468	case PCI_IVAR_DEVICE:
3469		*result = cfg->device;
3470		break;
3471	case PCI_IVAR_DEVID:
3472		*result = (cfg->device << 16) | cfg->vendor;
3473		break;
3474	case PCI_IVAR_CLASS:
3475		*result = cfg->baseclass;
3476		break;
3477	case PCI_IVAR_SUBCLASS:
3478		*result = cfg->subclass;
3479		break;
3480	case PCI_IVAR_PROGIF:
3481		*result = cfg->progif;
3482		break;
3483	case PCI_IVAR_REVID:
3484		*result = cfg->revid;
3485		break;
3486	case PCI_IVAR_INTPIN:
3487		*result = cfg->intpin;
3488		break;
3489	case PCI_IVAR_IRQ:
3490		*result = cfg->intline;
3491		break;
3492	case PCI_IVAR_DOMAIN:
3493		*result = cfg->domain;
3494		break;
3495	case PCI_IVAR_BUS:
3496		*result = cfg->bus;
3497		break;
3498	case PCI_IVAR_SLOT:
3499		*result = cfg->slot;
3500		break;
3501	case PCI_IVAR_FUNCTION:
3502		*result = cfg->func;
3503		break;
3504	case PCI_IVAR_CMDREG:
3505		*result = cfg->cmdreg;
3506		break;
3507	case PCI_IVAR_CACHELNSZ:
3508		*result = cfg->cachelnsz;
3509		break;
3510	case PCI_IVAR_MINGNT:
3511		*result = cfg->mingnt;
3512		break;
3513	case PCI_IVAR_MAXLAT:
3514		*result = cfg->maxlat;
3515		break;
3516	case PCI_IVAR_LATTIMER:
3517		*result = cfg->lattimer;
3518		break;
3519	default:
3520		return (ENOENT);
3521	}
3522	return (0);
3523}
3524
3525int
3526pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3527{
3528	struct pci_devinfo *dinfo;
3529
3530	dinfo = device_get_ivars(child);
3531
3532	switch (which) {
3533	case PCI_IVAR_INTPIN:
3534		dinfo->cfg.intpin = value;
3535		return (0);
3536	case PCI_IVAR_ETHADDR:
3537	case PCI_IVAR_SUBVENDOR:
3538	case PCI_IVAR_SUBDEVICE:
3539	case PCI_IVAR_VENDOR:
3540	case PCI_IVAR_DEVICE:
3541	case PCI_IVAR_DEVID:
3542	case PCI_IVAR_CLASS:
3543	case PCI_IVAR_SUBCLASS:
3544	case PCI_IVAR_PROGIF:
3545	case PCI_IVAR_REVID:
3546	case PCI_IVAR_IRQ:
3547	case PCI_IVAR_DOMAIN:
3548	case PCI_IVAR_BUS:
3549	case PCI_IVAR_SLOT:
3550	case PCI_IVAR_FUNCTION:
3551		return (EINVAL);	/* disallow for now */
3552
3553	default:
3554		return (ENOENT);
3555	}
3556}
3557
3558
3559#include "opt_ddb.h"
3560#ifdef DDB
3561#include <ddb/ddb.h>
3562#include <sys/cons.h>
3563
3564/*
3565 * List resources based on pci map registers, used for within ddb
3566 */
3567
3568DB_SHOW_COMMAND(pciregs, db_pci_dump)
3569{
3570	struct pci_devinfo *dinfo;
3571	struct devlist *devlist_head;
3572	struct pci_conf *p;
3573	const char *name;
3574	int i, error, none_count;
3575
3576	none_count = 0;
3577	/* get the head of the device queue */
3578	devlist_head = &pci_devq;
3579
3580	/*
3581	 * Go through the list of devices and print out devices
3582	 */
3583	for (error = 0, i = 0,
3584	     dinfo = STAILQ_FIRST(devlist_head);
3585	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3586	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3587
3588		/* Populate pd_name and pd_unit */
3589		name = NULL;
3590		if (dinfo->cfg.dev)
3591			name = device_get_name(dinfo->cfg.dev);
3592
3593		p = &dinfo->conf;
3594		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3595			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3596			(name && *name) ? name : "none",
3597			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3598			none_count++,
3599			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3600			p->pc_sel.pc_func, (p->pc_class << 16) |
3601			(p->pc_subclass << 8) | p->pc_progif,
3602			(p->pc_subdevice << 16) | p->pc_subvendor,
3603			(p->pc_device << 16) | p->pc_vendor,
3604			p->pc_revid, p->pc_hdr);
3605	}
3606}
3607#endif /* DDB */
3608
3609static struct resource *
3610pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3611    u_long start, u_long end, u_long count, u_int flags)
3612{
3613	struct pci_devinfo *dinfo = device_get_ivars(child);
3614	struct resource_list *rl = &dinfo->resources;
3615	struct resource_list_entry *rle;
3616	struct resource *res;
3617	pci_addr_t map, testval;
3618	int mapsize;
3619
3620	/*
3621	 * Weed out the bogons, and figure out how large the BAR/map
3622	 * is.  Bars that read back 0 here are bogus and unimplemented.
3623	 * Note: atapci in legacy mode are special and handled elsewhere
3624	 * in the code.  If you have a atapci device in legacy mode and
3625	 * it fails here, that other code is broken.
3626	 */
3627	res = NULL;
3628	pci_read_bar(child, *rid, &map, &testval);
3629
3630	/* Ignore a BAR with a base of 0. */
3631	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3632	    pci_mapbase(testval) == 0)
3633		goto out;
3634
3635	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3636		if (type != SYS_RES_MEMORY) {
3637			if (bootverbose)
3638				device_printf(dev,
3639				    "child %s requested type %d for rid %#x,"
3640				    " but the BAR says it is an memio\n",
3641				    device_get_nameunit(child), type, *rid);
3642			goto out;
3643		}
3644	} else {
3645		if (type != SYS_RES_IOPORT) {
3646			if (bootverbose)
3647				device_printf(dev,
3648				    "child %s requested type %d for rid %#x,"
3649				    " but the BAR says it is an ioport\n",
3650				    device_get_nameunit(child), type, *rid);
3651			goto out;
3652		}
3653	}
3654
3655	/*
3656	 * For real BARs, we need to override the size that
3657	 * the driver requests, because that's what the BAR
3658	 * actually uses and we would otherwise have a
3659	 * situation where we might allocate the excess to
3660	 * another driver, which won't work.
3661	 *
3662	 * Device ROM BARs use a different mask value.
3663	 */
3664	if (*rid == PCIR_BIOS)
3665		mapsize = pci_romsize(testval);
3666	else
3667		mapsize = pci_mapsize(testval);
3668	count = 1UL << mapsize;
3669	if (RF_ALIGNMENT(flags) < mapsize)
3670		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3671	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3672		flags |= RF_PREFETCHABLE;
3673
3674	/*
3675	 * Allocate enough resource, and then write back the
3676	 * appropriate bar for that resource.
3677	 */
3678	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3679	    start, end, count, flags & ~RF_ACTIVE);
3680	if (res == NULL) {
3681		device_printf(child,
3682		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3683		    count, *rid, type, start, end);
3684		goto out;
3685	}
3686	resource_list_add(rl, type, *rid, start, end, count);
3687	rle = resource_list_find(rl, type, *rid);
3688	if (rle == NULL)
3689		panic("pci_reserve_map: unexpectedly can't find resource.");
3690	rle->res = res;
3691	rle->start = rman_get_start(res);
3692	rle->end = rman_get_end(res);
3693	rle->count = count;
3694	rle->flags = RLE_RESERVED;
3695	if (bootverbose)
3696		device_printf(child,
3697		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3698		    count, *rid, type, rman_get_start(res));
3699	map = rman_get_start(res);
3700	pci_write_bar(child, *rid, map);
3701out:;
3702	return (res);
3703}
3704
3705
3706struct resource *
3707pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3708		   u_long start, u_long end, u_long count, u_int flags)
3709{
3710	struct pci_devinfo *dinfo = device_get_ivars(child);
3711	struct resource_list *rl = &dinfo->resources;
3712	struct resource_list_entry *rle;
3713	struct resource *res;
3714	pcicfgregs *cfg = &dinfo->cfg;
3715
3716	if (device_get_parent(child) != dev)
3717		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3718		    type, rid, start, end, count, flags));
3719
3720	/*
3721	 * Perform lazy resource allocation
3722	 */
3723	switch (type) {
3724	case SYS_RES_IRQ:
3725		/*
3726		 * Can't alloc legacy interrupt once MSI messages have
3727		 * been allocated.
3728		 */
3729		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3730		    cfg->msix.msix_alloc > 0))
3731			return (NULL);
3732
3733		/*
3734		 * If the child device doesn't have an interrupt
3735		 * routed and is deserving of an interrupt, try to
3736		 * assign it one.
3737		 */
3738		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3739		    (cfg->intpin != 0))
3740			pci_assign_interrupt(dev, child, 0);
3741		break;
3742	case SYS_RES_IOPORT:
3743	case SYS_RES_MEMORY:
3744		/* Reserve resources for this BAR if needed. */
3745		rle = resource_list_find(rl, type, *rid);
3746		if (rle == NULL) {
3747			res = pci_reserve_map(dev, child, type, rid, start, end,
3748			    count, flags);
3749			if (res == NULL)
3750				return (NULL);
3751		}
3752	}
3753	return (resource_list_alloc(rl, dev, child, type, rid,
3754	    start, end, count, flags));
3755}
3756
3757int
3758pci_activate_resource(device_t dev, device_t child, int type, int rid,
3759    struct resource *r)
3760{
3761	int error;
3762
3763	error = bus_generic_activate_resource(dev, child, type, rid, r);
3764	if (error)
3765		return (error);
3766
3767	/* Enable decoding in the command register when activating BARs. */
3768	if (device_get_parent(child) == dev) {
3769		/* Device ROMs need their decoding explicitly enabled. */
3770		if (rid == PCIR_BIOS)
3771			pci_write_config(child, rid, rman_get_start(r) |
3772			    PCIM_BIOS_ENABLE, 4);
3773		switch (type) {
3774		case SYS_RES_IOPORT:
3775		case SYS_RES_MEMORY:
3776			error = PCI_ENABLE_IO(dev, child, type);
3777			break;
3778		}
3779	}
3780	return (error);
3781}
3782
3783int
3784pci_deactivate_resource(device_t dev, device_t child, int type,
3785    int rid, struct resource *r)
3786{
3787	int error;
3788
3789	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3790	if (error)
3791		return (error);
3792
3793	/* Disable decoding for device ROMs. */
3794	if (rid == PCIR_BIOS)
3795		pci_write_config(child, rid, rman_get_start(r), 4);
3796	return (0);
3797}
3798
3799void
3800pci_delete_child(device_t dev, device_t child)
3801{
3802	struct resource_list_entry *rle;
3803	struct resource_list *rl;
3804	struct pci_devinfo *dinfo;
3805
3806	dinfo = device_get_ivars(child);
3807	rl = &dinfo->resources;
3808
3809	if (device_is_attached(child))
3810		device_detach(child);
3811
3812	/* Turn off access to resources we're about to free */
3813	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3814	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3815
3816	/* Free all allocated resources */
3817	STAILQ_FOREACH(rle, rl, link) {
3818		if (rle->res) {
3819			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3820			    resource_list_busy(rl, rle->type, rle->rid)) {
3821				pci_printf(&dinfo->cfg,
3822				    "Resource still owned, oops. "
3823				    "(type=%d, rid=%d, addr=%lx)\n",
3824				    rle->type, rle->rid,
3825				    rman_get_start(rle->res));
3826				bus_release_resource(child, rle->type, rle->rid,
3827				    rle->res);
3828			}
3829			resource_list_unreserve(rl, dev, child, rle->type,
3830			    rle->rid);
3831		}
3832	}
3833	resource_list_free(rl);
3834
3835	device_delete_child(dev, child);
3836	pci_freecfg(dinfo);
3837}
3838
3839void
3840pci_delete_resource(device_t dev, device_t child, int type, int rid)
3841{
3842	struct pci_devinfo *dinfo;
3843	struct resource_list *rl;
3844	struct resource_list_entry *rle;
3845
3846	if (device_get_parent(child) != dev)
3847		return;
3848
3849	dinfo = device_get_ivars(child);
3850	rl = &dinfo->resources;
3851	rle = resource_list_find(rl, type, rid);
3852	if (rle == NULL)
3853		return;
3854
3855	if (rle->res) {
3856		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3857		    resource_list_busy(rl, type, rid)) {
3858			device_printf(dev, "delete_resource: "
3859			    "Resource still owned by child, oops. "
3860			    "(type=%d, rid=%d, addr=%lx)\n",
3861			    type, rid, rman_get_start(rle->res));
3862			return;
3863		}
3864
3865#ifndef __PCI_BAR_ZERO_VALID
3866		/*
3867		 * If this is a BAR, clear the BAR so it stops
3868		 * decoding before releasing the resource.
3869		 */
3870		switch (type) {
3871		case SYS_RES_IOPORT:
3872		case SYS_RES_MEMORY:
3873			pci_write_bar(child, rid, 0);
3874			break;
3875		}
3876#endif
3877		resource_list_unreserve(rl, dev, child, type, rid);
3878	}
3879	resource_list_delete(rl, type, rid);
3880}
3881
3882struct resource_list *
3883pci_get_resource_list (device_t dev, device_t child)
3884{
3885	struct pci_devinfo *dinfo = device_get_ivars(child);
3886
3887	return (&dinfo->resources);
3888}
3889
3890uint32_t
3891pci_read_config_method(device_t dev, device_t child, int reg, int width)
3892{
3893	struct pci_devinfo *dinfo = device_get_ivars(child);
3894	pcicfgregs *cfg = &dinfo->cfg;
3895
3896	return (PCIB_READ_CONFIG(device_get_parent(dev),
3897	    cfg->bus, cfg->slot, cfg->func, reg, width));
3898}
3899
3900void
3901pci_write_config_method(device_t dev, device_t child, int reg,
3902    uint32_t val, int width)
3903{
3904	struct pci_devinfo *dinfo = device_get_ivars(child);
3905	pcicfgregs *cfg = &dinfo->cfg;
3906
3907	PCIB_WRITE_CONFIG(device_get_parent(dev),
3908	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3909}
3910
3911int
3912pci_child_location_str_method(device_t dev, device_t child, char *buf,
3913    size_t buflen)
3914{
3915
3916	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3917	    pci_get_function(child));
3918	return (0);
3919}
3920
3921int
3922pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3923    size_t buflen)
3924{
3925	struct pci_devinfo *dinfo;
3926	pcicfgregs *cfg;
3927
3928	dinfo = device_get_ivars(child);
3929	cfg = &dinfo->cfg;
3930	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3931	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3932	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3933	    cfg->progif);
3934	return (0);
3935}
3936
3937int
3938pci_assign_interrupt_method(device_t dev, device_t child)
3939{
3940	struct pci_devinfo *dinfo = device_get_ivars(child);
3941	pcicfgregs *cfg = &dinfo->cfg;
3942
3943	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3944	    cfg->intpin));
3945}
3946
3947static int
3948pci_modevent(module_t mod, int what, void *arg)
3949{
3950	static struct cdev *pci_cdev;
3951
3952	switch (what) {
3953	case MOD_LOAD:
3954		STAILQ_INIT(&pci_devq);
3955		pci_generation = 0;
3956		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3957		    "pci");
3958		pci_load_vendor_data();
3959		break;
3960
3961	case MOD_UNLOAD:
3962		destroy_dev(pci_cdev);
3963		break;
3964	}
3965
3966	return (0);
3967}
3968
3969void
3970pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3971{
3972	int i;
3973
3974	/*
3975	 * Only do header type 0 devices.  Type 1 devices are bridges,
3976	 * which we know need special treatment.  Type 2 devices are
3977	 * cardbus bridges which also require special treatment.
3978	 * Other types are unknown, and we err on the side of safety
3979	 * by ignoring them.
3980	 */
3981	if (dinfo->cfg.hdrtype != 0)
3982		return;
3983
3984	/*
3985	 * Restore the device to full power mode.  We must do this
3986	 * before we restore the registers because moving from D3 to
3987	 * D0 will cause the chip's BARs and some other registers to
3988	 * be reset to some unknown power on reset values.  Cut down
3989	 * the noise on boot by doing nothing if we are already in
3990	 * state D0.
3991	 */
3992	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3993		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3994	}
3995	for (i = 0; i < dinfo->cfg.nummaps; i++)
3996		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3997	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3998	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3999	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4000	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4001	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4002	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4003	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4004	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4005	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4006	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4007
4008	/* Restore MSI and MSI-X configurations if they are present. */
4009	if (dinfo->cfg.msi.msi_location != 0)
4010		pci_resume_msi(dev);
4011	if (dinfo->cfg.msix.msix_location != 0)
4012		pci_resume_msix(dev);
4013}
4014
4015void
4016pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4017{
4018	int i;
4019	uint32_t cls;
4020	int ps;
4021
4022	/*
4023	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4024	 * we know need special treatment.  Type 2 devices are cardbus bridges
4025	 * which also require special treatment.  Other types are unknown, and
4026	 * we err on the side of safety by ignoring them.  Powering down
4027	 * bridges should not be undertaken lightly.
4028	 */
4029	if (dinfo->cfg.hdrtype != 0)
4030		return;
4031	for (i = 0; i < dinfo->cfg.nummaps; i++)
4032		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4033	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4034
4035	/*
4036	 * Some drivers apparently write to these registers w/o updating our
4037	 * cached copy.  No harm happens if we update the copy, so do so here
4038	 * so we can restore them.  The COMMAND register is modified by the
4039	 * bus w/o updating the cache.  This should represent the normally
4040	 * writable portion of the 'defined' part of type 0 headers.  In
4041	 * theory we also need to save/restore the PCI capability structures
4042	 * we know about, but apart from power we don't know any that are
4043	 * writable.
4044	 */
4045	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4046	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4047	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4048	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4049	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4050	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4051	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4052	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4053	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4054	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4055	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4056	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4057	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4058	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4059	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4060
4061	/*
4062	 * don't set the state for display devices, base peripherals and
4063	 * memory devices since bad things happen when they are powered down.
4064	 * We should (a) have drivers that can easily detach and (b) use
4065	 * generic drivers for these devices so that some device actually
4066	 * attaches.  We need to make sure that when we implement (a) we don't
4067	 * power the device down on a reattach.
4068	 */
4069	cls = pci_get_class(dev);
4070	if (!setstate)
4071		return;
4072	switch (pci_do_power_nodriver)
4073	{
4074		case 0:		/* NO powerdown at all */
4075			return;
4076		case 1:		/* Conservative about what to power down */
4077			if (cls == PCIC_STORAGE)
4078				return;
4079			/*FALLTHROUGH*/
4080		case 2:		/* Agressive about what to power down */
4081			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4082			    cls == PCIC_BASEPERIPH)
4083				return;
4084			/*FALLTHROUGH*/
4085		case 3:		/* Power down everything */
4086			break;
4087	}
4088	/*
4089	 * PCI spec says we can only go into D3 state from D0 state.
4090	 * Transition from D[12] into D0 before going to D3 state.
4091	 */
4092	ps = pci_get_powerstate(dev);
4093	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4094		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4095	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4096		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4097}
4098