pci.c revision 232464
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 232464 2012-03-03 14:24:39Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73/*
74 * XXX: Due to a limitation of the bus_dma_tag_create() API, we cannot
75 * specify a 4GB boundary on 32-bit targets.  Usually this does not
76 * matter as it is ok to use a boundary of 0 on these systems.
77 * However, in the case of PAE, DMA addresses can cross a 4GB
78 * boundary, so as a workaround use a 2GB boundary.
79 */
80#ifdef PAE
81#define	PCI_DMA_BOUNDARY	(1u << 31)
82#else
83#define	PCI_DMA_BOUNDARY	((bus_size_t)((uint64_t)1 << 32))
84#endif
85
86#define	PCIR_IS_BIOS(cfg, reg)						\
87	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
88	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
89
90static pci_addr_t	pci_mapbase(uint64_t mapreg);
91static const char	*pci_maptype(uint64_t mapreg);
92static int		pci_mapsize(uint64_t testval);
93static int		pci_maprange(uint64_t mapreg);
94static pci_addr_t	pci_rombase(uint64_t mapreg);
95static int		pci_romsize(uint64_t testval);
96static void		pci_fixancient(pcicfgregs *cfg);
97static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
98
99static int		pci_porten(device_t dev);
100static int		pci_memen(device_t dev);
101static void		pci_assign_interrupt(device_t bus, device_t dev,
102			    int force_route);
103static int		pci_add_map(device_t bus, device_t dev, int reg,
104			    struct resource_list *rl, int force, int prefetch);
105static int		pci_probe(device_t dev);
106static int		pci_attach(device_t dev);
107static void		pci_load_vendor_data(void);
108static int		pci_describe_parse_line(char **ptr, int *vendor,
109			    int *device, char **desc);
110static char		*pci_describe_device(device_t dev);
111static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
112static int		pci_modevent(module_t mod, int what, void *arg);
113static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
114			    pcicfgregs *cfg);
115static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
116static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
117			    int reg, uint32_t *data);
118#if 0
119static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
120			    int reg, uint32_t data);
121#endif
122static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
123static void		pci_disable_msi(device_t dev);
124static void		pci_enable_msi(device_t dev, uint64_t address,
125			    uint16_t data);
126static void		pci_enable_msix(device_t dev, u_int index,
127			    uint64_t address, uint32_t data);
128static void		pci_mask_msix(device_t dev, u_int index);
129static void		pci_unmask_msix(device_t dev, u_int index);
130static int		pci_msi_blacklisted(void);
131static void		pci_resume_msi(device_t dev);
132static void		pci_resume_msix(device_t dev);
133static int		pci_remap_intr_method(device_t bus, device_t dev,
134			    u_int irq);
135
136static device_method_t pci_methods[] = {
137	/* Device interface */
138	DEVMETHOD(device_probe,		pci_probe),
139	DEVMETHOD(device_attach,	pci_attach),
140	DEVMETHOD(device_detach,	bus_generic_detach),
141	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
142	DEVMETHOD(device_suspend,	pci_suspend),
143	DEVMETHOD(device_resume,	pci_resume),
144
145	/* Bus interface */
146	DEVMETHOD(bus_print_child,	pci_print_child),
147	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
148	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
149	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
150	DEVMETHOD(bus_driver_added,	pci_driver_added),
151	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153
154	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
155	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
156	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
157	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
158	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
159	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
160	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
161	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
162	DEVMETHOD(bus_activate_resource, pci_activate_resource),
163	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
164	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
165	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
166	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
167
168	/* PCI interface */
169	DEVMETHOD(pci_read_config,	pci_read_config_method),
170	DEVMETHOD(pci_write_config,	pci_write_config_method),
171	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
181	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187
188	DEVMETHOD_END
189};
190
191DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
192
193static devclass_t pci_devclass;
194DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
195MODULE_VERSION(pci, 1);
196
197static char	*pci_vendordata;
198static size_t	pci_vendordata_size;
199
200struct pci_quirk {
201	uint32_t devid;	/* Vendor/device of the card */
202	int	type;
203#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
204#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
205#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
206	int	arg1;
207	int	arg2;
208};
209
210static const struct pci_quirk const pci_quirks[] = {
211	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
212	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
213	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
214	/* As does the Serverworks OSB4 (the SMBus mapping register) */
215	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
216
217	/*
218	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
219	 * or the CMIC-SL (AKA ServerWorks GC_LE).
220	 */
221	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223
224	/*
225	 * MSI doesn't work on earlier Intel chipsets including
226	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
227	 */
228	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235
236	/*
237	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
238	 * bridge.
239	 */
240	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241
242	/*
243	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
244	 * VMware.
245	 */
246	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247
248	/*
249	 * Some virtualization environments emulate an older chipset
250	 * but support MSI just fine.  QEMU uses the Intel 82440.
251	 */
252	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
253
254	{ 0 }
255};
256
257/* map register information */
258#define	PCI_MAPMEM	0x01	/* memory map */
259#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
260#define	PCI_MAPPORT	0x04	/* port map */
261
262struct devlist pci_devq;
263uint32_t pci_generation;
264uint32_t pci_numdevs = 0;
265static int pcie_chipset, pcix_chipset;
266
267/* sysctl vars */
268SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
269
270static int pci_enable_io_modes = 1;
271TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
272SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
273    &pci_enable_io_modes, 1,
274    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
275enable these bits correctly.  We'd like to do this all the time, but there\n\
276are some peripherals that this causes problems with.");
277
278static int pci_do_power_nodriver = 0;
279TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
280SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
281    &pci_do_power_nodriver, 0,
282  "Place a function into D3 state when no driver attaches to it.  0 means\n\
283disable.  1 means conservatively place devices into D3 state.  2 means\n\
284agressively place devices into D3 state.  3 means put absolutely everything\n\
285in D3 state.");
286
287int pci_do_power_resume = 1;
288TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
289SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
290    &pci_do_power_resume, 1,
291  "Transition from D3 -> D0 on resume.");
292
293int pci_do_power_suspend = 1;
294TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
295SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
296    &pci_do_power_suspend, 1,
297  "Transition from D0 -> D3 on suspend.");
298
299static int pci_do_msi = 1;
300TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
301SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
302    "Enable support for MSI interrupts");
303
304static int pci_do_msix = 1;
305TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
306SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
307    "Enable support for MSI-X interrupts");
308
309static int pci_honor_msi_blacklist = 1;
310TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
311SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
312    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
313
314#if defined(__i386__) || defined(__amd64__)
315static int pci_usb_takeover = 1;
316#else
317static int pci_usb_takeover = 0;
318#endif
319TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
320SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
321    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
322Disable this if you depend on BIOS emulation of USB devices, that is\n\
323you use USB devices (like keyboard or mouse) but do not load USB drivers");
324
325/* Find a device_t by bus/slot/function in domain 0 */
326
327device_t
328pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
329{
330
331	return (pci_find_dbsf(0, bus, slot, func));
332}
333
334/* Find a device_t by domain/bus/slot/function */
335
336device_t
337pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
338{
339	struct pci_devinfo *dinfo;
340
341	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
342		if ((dinfo->cfg.domain == domain) &&
343		    (dinfo->cfg.bus == bus) &&
344		    (dinfo->cfg.slot == slot) &&
345		    (dinfo->cfg.func == func)) {
346			return (dinfo->cfg.dev);
347		}
348	}
349
350	return (NULL);
351}
352
353/* Find a device_t by vendor/device ID */
354
355device_t
356pci_find_device(uint16_t vendor, uint16_t device)
357{
358	struct pci_devinfo *dinfo;
359
360	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
361		if ((dinfo->cfg.vendor == vendor) &&
362		    (dinfo->cfg.device == device)) {
363			return (dinfo->cfg.dev);
364		}
365	}
366
367	return (NULL);
368}
369
370device_t
371pci_find_class(uint8_t class, uint8_t subclass)
372{
373	struct pci_devinfo *dinfo;
374
375	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
376		if (dinfo->cfg.baseclass == class &&
377		    dinfo->cfg.subclass == subclass) {
378			return (dinfo->cfg.dev);
379		}
380	}
381
382	return (NULL);
383}
384
385static int
386pci_printf(pcicfgregs *cfg, const char *fmt, ...)
387{
388	va_list ap;
389	int retval;
390
391	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
392	    cfg->func);
393	va_start(ap, fmt);
394	retval += vprintf(fmt, ap);
395	va_end(ap);
396	return (retval);
397}
398
399/* return base address of memory or port map */
400
401static pci_addr_t
402pci_mapbase(uint64_t mapreg)
403{
404
405	if (PCI_BAR_MEM(mapreg))
406		return (mapreg & PCIM_BAR_MEM_BASE);
407	else
408		return (mapreg & PCIM_BAR_IO_BASE);
409}
410
411/* return map type of memory or port map */
412
413static const char *
414pci_maptype(uint64_t mapreg)
415{
416
417	if (PCI_BAR_IO(mapreg))
418		return ("I/O Port");
419	if (mapreg & PCIM_BAR_MEM_PREFETCH)
420		return ("Prefetchable Memory");
421	return ("Memory");
422}
423
424/* return log2 of map size decoded for memory or port map */
425
426static int
427pci_mapsize(uint64_t testval)
428{
429	int ln2size;
430
431	testval = pci_mapbase(testval);
432	ln2size = 0;
433	if (testval != 0) {
434		while ((testval & 1) == 0)
435		{
436			ln2size++;
437			testval >>= 1;
438		}
439	}
440	return (ln2size);
441}
442
443/* return base address of device ROM */
444
445static pci_addr_t
446pci_rombase(uint64_t mapreg)
447{
448
449	return (mapreg & PCIM_BIOS_ADDR_MASK);
450}
451
452/* return log2 of map size decided for device ROM */
453
454static int
455pci_romsize(uint64_t testval)
456{
457	int ln2size;
458
459	testval = pci_rombase(testval);
460	ln2size = 0;
461	if (testval != 0) {
462		while ((testval & 1) == 0)
463		{
464			ln2size++;
465			testval >>= 1;
466		}
467	}
468	return (ln2size);
469}
470
471/* return log2 of address range supported by map register */
472
473static int
474pci_maprange(uint64_t mapreg)
475{
476	int ln2range = 0;
477
478	if (PCI_BAR_IO(mapreg))
479		ln2range = 32;
480	else
481		switch (mapreg & PCIM_BAR_MEM_TYPE) {
482		case PCIM_BAR_MEM_32:
483			ln2range = 32;
484			break;
485		case PCIM_BAR_MEM_1MB:
486			ln2range = 20;
487			break;
488		case PCIM_BAR_MEM_64:
489			ln2range = 64;
490			break;
491		}
492	return (ln2range);
493}
494
495/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
496
497static void
498pci_fixancient(pcicfgregs *cfg)
499{
500	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
501		return;
502
503	/* PCI to PCI bridges use header type 1 */
504	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
505		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
506}
507
508/* extract header type specific config data */
509
510static void
511pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
512{
513#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
514	switch (cfg->hdrtype & PCIM_HDRTYPE) {
515	case PCIM_HDRTYPE_NORMAL:
516		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
517		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
518		cfg->nummaps	    = PCI_MAXMAPS_0;
519		break;
520	case PCIM_HDRTYPE_BRIDGE:
521		cfg->nummaps	    = PCI_MAXMAPS_1;
522		break;
523	case PCIM_HDRTYPE_CARDBUS:
524		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
525		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
526		cfg->nummaps	    = PCI_MAXMAPS_2;
527		break;
528	}
529#undef REG
530}
531
532/* read configuration header into pcicfgregs structure */
533struct pci_devinfo *
534pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
535{
536#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
537	pcicfgregs *cfg = NULL;
538	struct pci_devinfo *devlist_entry;
539	struct devlist *devlist_head;
540
541	devlist_head = &pci_devq;
542
543	devlist_entry = NULL;
544
545	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
546		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
547		if (devlist_entry == NULL)
548			return (NULL);
549
550		cfg = &devlist_entry->cfg;
551
552		cfg->domain		= d;
553		cfg->bus		= b;
554		cfg->slot		= s;
555		cfg->func		= f;
556		cfg->vendor		= REG(PCIR_VENDOR, 2);
557		cfg->device		= REG(PCIR_DEVICE, 2);
558		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
559		cfg->statreg		= REG(PCIR_STATUS, 2);
560		cfg->baseclass		= REG(PCIR_CLASS, 1);
561		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
562		cfg->progif		= REG(PCIR_PROGIF, 1);
563		cfg->revid		= REG(PCIR_REVID, 1);
564		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
565		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
566		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
567		cfg->intpin		= REG(PCIR_INTPIN, 1);
568		cfg->intline		= REG(PCIR_INTLINE, 1);
569
570		cfg->mingnt		= REG(PCIR_MINGNT, 1);
571		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
572
573		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
574		cfg->hdrtype		&= ~PCIM_MFDEV;
575		STAILQ_INIT(&cfg->maps);
576
577		pci_fixancient(cfg);
578		pci_hdrtypedata(pcib, b, s, f, cfg);
579
580		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
581			pci_read_cap(pcib, cfg);
582
583		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
584
585		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
586		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
587		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
588		devlist_entry->conf.pc_sel.pc_func = cfg->func;
589		devlist_entry->conf.pc_hdr = cfg->hdrtype;
590
591		devlist_entry->conf.pc_subvendor = cfg->subvendor;
592		devlist_entry->conf.pc_subdevice = cfg->subdevice;
593		devlist_entry->conf.pc_vendor = cfg->vendor;
594		devlist_entry->conf.pc_device = cfg->device;
595
596		devlist_entry->conf.pc_class = cfg->baseclass;
597		devlist_entry->conf.pc_subclass = cfg->subclass;
598		devlist_entry->conf.pc_progif = cfg->progif;
599		devlist_entry->conf.pc_revid = cfg->revid;
600
601		pci_numdevs++;
602		pci_generation++;
603	}
604	return (devlist_entry);
605#undef REG
606}
607
608static void
609pci_read_cap(device_t pcib, pcicfgregs *cfg)
610{
611#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
612#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
613#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
614	uint64_t addr;
615#endif
616	uint32_t val;
617	int	ptr, nextptr, ptrptr;
618
619	switch (cfg->hdrtype & PCIM_HDRTYPE) {
620	case PCIM_HDRTYPE_NORMAL:
621	case PCIM_HDRTYPE_BRIDGE:
622		ptrptr = PCIR_CAP_PTR;
623		break;
624	case PCIM_HDRTYPE_CARDBUS:
625		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
626		break;
627	default:
628		return;		/* no extended capabilities support */
629	}
630	nextptr = REG(ptrptr, 1);	/* sanity check? */
631
632	/*
633	 * Read capability entries.
634	 */
635	while (nextptr != 0) {
636		/* Sanity check */
637		if (nextptr > 255) {
638			printf("illegal PCI extended capability offset %d\n",
639			    nextptr);
640			return;
641		}
642		/* Find the next entry */
643		ptr = nextptr;
644		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
645
646		/* Process this entry */
647		switch (REG(ptr + PCICAP_ID, 1)) {
648		case PCIY_PMG:		/* PCI power management */
649			if (cfg->pp.pp_cap == 0) {
650				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
651				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
652				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
653				if ((nextptr - ptr) > PCIR_POWER_DATA)
654					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
655			}
656			break;
657		case PCIY_HT:		/* HyperTransport */
658			/* Determine HT-specific capability type. */
659			val = REG(ptr + PCIR_HT_COMMAND, 2);
660
661			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
662				cfg->ht.ht_slave = ptr;
663
664#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
665			switch (val & PCIM_HTCMD_CAP_MASK) {
666			case PCIM_HTCAP_MSI_MAPPING:
667				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
668					/* Sanity check the mapping window. */
669					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
670					    4);
671					addr <<= 32;
672					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
673					    4);
674					if (addr != MSI_INTEL_ADDR_BASE)
675						device_printf(pcib,
676	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
677						    cfg->domain, cfg->bus,
678						    cfg->slot, cfg->func,
679						    (long long)addr);
680				} else
681					addr = MSI_INTEL_ADDR_BASE;
682
683				cfg->ht.ht_msimap = ptr;
684				cfg->ht.ht_msictrl = val;
685				cfg->ht.ht_msiaddr = addr;
686				break;
687			}
688#endif
689			break;
690		case PCIY_MSI:		/* PCI MSI */
691			cfg->msi.msi_location = ptr;
692			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
693			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
694						     PCIM_MSICTRL_MMC_MASK)>>1);
695			break;
696		case PCIY_MSIX:		/* PCI MSI-X */
697			cfg->msix.msix_location = ptr;
698			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
699			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
700			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
701			val = REG(ptr + PCIR_MSIX_TABLE, 4);
702			cfg->msix.msix_table_bar = PCIR_BAR(val &
703			    PCIM_MSIX_BIR_MASK);
704			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
705			val = REG(ptr + PCIR_MSIX_PBA, 4);
706			cfg->msix.msix_pba_bar = PCIR_BAR(val &
707			    PCIM_MSIX_BIR_MASK);
708			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
709			break;
710		case PCIY_VPD:		/* PCI Vital Product Data */
711			cfg->vpd.vpd_reg = ptr;
712			break;
713		case PCIY_SUBVENDOR:
714			/* Should always be true. */
715			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
716			    PCIM_HDRTYPE_BRIDGE) {
717				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
718				cfg->subvendor = val & 0xffff;
719				cfg->subdevice = val >> 16;
720			}
721			break;
722		case PCIY_PCIX:		/* PCI-X */
723			/*
724			 * Assume we have a PCI-X chipset if we have
725			 * at least one PCI-PCI bridge with a PCI-X
726			 * capability.  Note that some systems with
727			 * PCI-express or HT chipsets might match on
728			 * this check as well.
729			 */
730			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
731			    PCIM_HDRTYPE_BRIDGE)
732				pcix_chipset = 1;
733			break;
734		case PCIY_EXPRESS:	/* PCI-express */
735			/*
736			 * Assume we have a PCI-express chipset if we have
737			 * at least one PCI-express device.
738			 */
739			pcie_chipset = 1;
740			break;
741		default:
742			break;
743		}
744	}
745
746#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
747	/*
748	 * Enable the MSI mapping window for all HyperTransport
749	 * slaves.  PCI-PCI bridges have their windows enabled via
750	 * PCIB_MAP_MSI().
751	 */
752	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
753	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
754		device_printf(pcib,
755	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
756		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
757		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
758		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
759		     2);
760	}
761#endif
762/* REG and WREG use carry through to next functions */
763}
764
765/*
766 * PCI Vital Product Data
767 */
768
769#define	PCI_VPD_TIMEOUT		1000000
770
771static int
772pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
773{
774	int count = PCI_VPD_TIMEOUT;
775
776	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
777
778	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
779
780	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
781		if (--count < 0)
782			return (ENXIO);
783		DELAY(1);	/* limit looping */
784	}
785	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
786
787	return (0);
788}
789
790#if 0
791static int
792pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
793{
794	int count = PCI_VPD_TIMEOUT;
795
796	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
797
798	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
799	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
800	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
801		if (--count < 0)
802			return (ENXIO);
803		DELAY(1);	/* limit looping */
804	}
805
806	return (0);
807}
808#endif
809
810#undef PCI_VPD_TIMEOUT
811
812struct vpd_readstate {
813	device_t	pcib;
814	pcicfgregs	*cfg;
815	uint32_t	val;
816	int		bytesinval;
817	int		off;
818	uint8_t		cksum;
819};
820
821static int
822vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
823{
824	uint32_t reg;
825	uint8_t byte;
826
827	if (vrs->bytesinval == 0) {
828		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
829			return (ENXIO);
830		vrs->val = le32toh(reg);
831		vrs->off += 4;
832		byte = vrs->val & 0xff;
833		vrs->bytesinval = 3;
834	} else {
835		vrs->val = vrs->val >> 8;
836		byte = vrs->val & 0xff;
837		vrs->bytesinval--;
838	}
839
840	vrs->cksum += byte;
841	*data = byte;
842	return (0);
843}
844
845static void
846pci_read_vpd(device_t pcib, pcicfgregs *cfg)
847{
848	struct vpd_readstate vrs;
849	int state;
850	int name;
851	int remain;
852	int i;
853	int alloc, off;		/* alloc/off for RO/W arrays */
854	int cksumvalid;
855	int dflen;
856	uint8_t byte;
857	uint8_t byte2;
858
859	/* init vpd reader */
860	vrs.bytesinval = 0;
861	vrs.off = 0;
862	vrs.pcib = pcib;
863	vrs.cfg = cfg;
864	vrs.cksum = 0;
865
866	state = 0;
867	name = remain = i = 0;	/* shut up stupid gcc */
868	alloc = off = 0;	/* shut up stupid gcc */
869	dflen = 0;		/* shut up stupid gcc */
870	cksumvalid = -1;
871	while (state >= 0) {
872		if (vpd_nextbyte(&vrs, &byte)) {
873			state = -2;
874			break;
875		}
876#if 0
877		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
878		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
879		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
880#endif
881		switch (state) {
882		case 0:		/* item name */
883			if (byte & 0x80) {
884				if (vpd_nextbyte(&vrs, &byte2)) {
885					state = -2;
886					break;
887				}
888				remain = byte2;
889				if (vpd_nextbyte(&vrs, &byte2)) {
890					state = -2;
891					break;
892				}
893				remain |= byte2 << 8;
894				if (remain > (0x7f*4 - vrs.off)) {
895					state = -1;
896					pci_printf(cfg,
897					    "invalid VPD data, remain %#x\n",
898					    remain);
899				}
900				name = byte & 0x7f;
901			} else {
902				remain = byte & 0x7;
903				name = (byte >> 3) & 0xf;
904			}
905			switch (name) {
906			case 0x2:	/* String */
907				cfg->vpd.vpd_ident = malloc(remain + 1,
908				    M_DEVBUF, M_WAITOK);
909				i = 0;
910				state = 1;
911				break;
912			case 0xf:	/* End */
913				state = -1;
914				break;
915			case 0x10:	/* VPD-R */
916				alloc = 8;
917				off = 0;
918				cfg->vpd.vpd_ros = malloc(alloc *
919				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
920				    M_WAITOK | M_ZERO);
921				state = 2;
922				break;
923			case 0x11:	/* VPD-W */
924				alloc = 8;
925				off = 0;
926				cfg->vpd.vpd_w = malloc(alloc *
927				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
928				    M_WAITOK | M_ZERO);
929				state = 5;
930				break;
931			default:	/* Invalid data, abort */
932				state = -1;
933				break;
934			}
935			break;
936
937		case 1:	/* Identifier String */
938			cfg->vpd.vpd_ident[i++] = byte;
939			remain--;
940			if (remain == 0)  {
941				cfg->vpd.vpd_ident[i] = '\0';
942				state = 0;
943			}
944			break;
945
946		case 2:	/* VPD-R Keyword Header */
947			if (off == alloc) {
948				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
949				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
950				    M_DEVBUF, M_WAITOK | M_ZERO);
951			}
952			cfg->vpd.vpd_ros[off].keyword[0] = byte;
953			if (vpd_nextbyte(&vrs, &byte2)) {
954				state = -2;
955				break;
956			}
957			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
958			if (vpd_nextbyte(&vrs, &byte2)) {
959				state = -2;
960				break;
961			}
962			dflen = byte2;
963			if (dflen == 0 &&
964			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
965			    2) == 0) {
966				/*
967				 * if this happens, we can't trust the rest
968				 * of the VPD.
969				 */
970				pci_printf(cfg, "bad keyword length: %d\n",
971				    dflen);
972				cksumvalid = 0;
973				state = -1;
974				break;
975			} else if (dflen == 0) {
976				cfg->vpd.vpd_ros[off].value = malloc(1 *
977				    sizeof(*cfg->vpd.vpd_ros[off].value),
978				    M_DEVBUF, M_WAITOK);
979				cfg->vpd.vpd_ros[off].value[0] = '\x00';
980			} else
981				cfg->vpd.vpd_ros[off].value = malloc(
982				    (dflen + 1) *
983				    sizeof(*cfg->vpd.vpd_ros[off].value),
984				    M_DEVBUF, M_WAITOK);
985			remain -= 3;
986			i = 0;
987			/* keep in sync w/ state 3's transistions */
988			if (dflen == 0 && remain == 0)
989				state = 0;
990			else if (dflen == 0)
991				state = 2;
992			else
993				state = 3;
994			break;
995
996		case 3:	/* VPD-R Keyword Value */
997			cfg->vpd.vpd_ros[off].value[i++] = byte;
998			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
999			    "RV", 2) == 0 && cksumvalid == -1) {
1000				if (vrs.cksum == 0)
1001					cksumvalid = 1;
1002				else {
1003					if (bootverbose)
1004						pci_printf(cfg,
1005					    "bad VPD cksum, remain %hhu\n",
1006						    vrs.cksum);
1007					cksumvalid = 0;
1008					state = -1;
1009					break;
1010				}
1011			}
1012			dflen--;
1013			remain--;
1014			/* keep in sync w/ state 2's transistions */
1015			if (dflen == 0)
1016				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1017			if (dflen == 0 && remain == 0) {
1018				cfg->vpd.vpd_rocnt = off;
1019				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1020				    off * sizeof(*cfg->vpd.vpd_ros),
1021				    M_DEVBUF, M_WAITOK | M_ZERO);
1022				state = 0;
1023			} else if (dflen == 0)
1024				state = 2;
1025			break;
1026
1027		case 4:
1028			remain--;
1029			if (remain == 0)
1030				state = 0;
1031			break;
1032
1033		case 5:	/* VPD-W Keyword Header */
1034			if (off == alloc) {
1035				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1036				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1037				    M_DEVBUF, M_WAITOK | M_ZERO);
1038			}
1039			cfg->vpd.vpd_w[off].keyword[0] = byte;
1040			if (vpd_nextbyte(&vrs, &byte2)) {
1041				state = -2;
1042				break;
1043			}
1044			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1045			if (vpd_nextbyte(&vrs, &byte2)) {
1046				state = -2;
1047				break;
1048			}
1049			cfg->vpd.vpd_w[off].len = dflen = byte2;
1050			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1051			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1052			    sizeof(*cfg->vpd.vpd_w[off].value),
1053			    M_DEVBUF, M_WAITOK);
1054			remain -= 3;
1055			i = 0;
1056			/* keep in sync w/ state 6's transistions */
1057			if (dflen == 0 && remain == 0)
1058				state = 0;
1059			else if (dflen == 0)
1060				state = 5;
1061			else
1062				state = 6;
1063			break;
1064
1065		case 6:	/* VPD-W Keyword Value */
1066			cfg->vpd.vpd_w[off].value[i++] = byte;
1067			dflen--;
1068			remain--;
1069			/* keep in sync w/ state 5's transistions */
1070			if (dflen == 0)
1071				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1072			if (dflen == 0 && remain == 0) {
1073				cfg->vpd.vpd_wcnt = off;
1074				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1075				    off * sizeof(*cfg->vpd.vpd_w),
1076				    M_DEVBUF, M_WAITOK | M_ZERO);
1077				state = 0;
1078			} else if (dflen == 0)
1079				state = 5;
1080			break;
1081
1082		default:
1083			pci_printf(cfg, "invalid state: %d\n", state);
1084			state = -1;
1085			break;
1086		}
1087	}
1088
1089	if (cksumvalid == 0 || state < -1) {
1090		/* read-only data bad, clean up */
1091		if (cfg->vpd.vpd_ros != NULL) {
1092			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1093				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1094			free(cfg->vpd.vpd_ros, M_DEVBUF);
1095			cfg->vpd.vpd_ros = NULL;
1096		}
1097	}
1098	if (state < -1) {
1099		/* I/O error, clean up */
1100		pci_printf(cfg, "failed to read VPD data.\n");
1101		if (cfg->vpd.vpd_ident != NULL) {
1102			free(cfg->vpd.vpd_ident, M_DEVBUF);
1103			cfg->vpd.vpd_ident = NULL;
1104		}
1105		if (cfg->vpd.vpd_w != NULL) {
1106			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1107				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1108			free(cfg->vpd.vpd_w, M_DEVBUF);
1109			cfg->vpd.vpd_w = NULL;
1110		}
1111	}
1112	cfg->vpd.vpd_cached = 1;
1113#undef REG
1114#undef WREG
1115}
1116
1117int
1118pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1119{
1120	struct pci_devinfo *dinfo = device_get_ivars(child);
1121	pcicfgregs *cfg = &dinfo->cfg;
1122
1123	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1124		pci_read_vpd(device_get_parent(dev), cfg);
1125
1126	*identptr = cfg->vpd.vpd_ident;
1127
1128	if (*identptr == NULL)
1129		return (ENXIO);
1130
1131	return (0);
1132}
1133
1134int
1135pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1136	const char **vptr)
1137{
1138	struct pci_devinfo *dinfo = device_get_ivars(child);
1139	pcicfgregs *cfg = &dinfo->cfg;
1140	int i;
1141
1142	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1143		pci_read_vpd(device_get_parent(dev), cfg);
1144
1145	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1146		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1147		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1148			*vptr = cfg->vpd.vpd_ros[i].value;
1149			return (0);
1150		}
1151
1152	*vptr = NULL;
1153	return (ENXIO);
1154}
1155
1156/*
1157 * Find the requested extended capability and return the offset in
1158 * configuration space via the pointer provided. The function returns
1159 * 0 on success and error code otherwise.
1160 */
1161int
1162pci_find_extcap_method(device_t dev, device_t child, int capability,
1163    int *capreg)
1164{
1165	struct pci_devinfo *dinfo = device_get_ivars(child);
1166	pcicfgregs *cfg = &dinfo->cfg;
1167	u_int32_t status;
1168	u_int8_t ptr;
1169
1170	/*
1171	 * Check the CAP_LIST bit of the PCI status register first.
1172	 */
1173	status = pci_read_config(child, PCIR_STATUS, 2);
1174	if (!(status & PCIM_STATUS_CAPPRESENT))
1175		return (ENXIO);
1176
1177	/*
1178	 * Determine the start pointer of the capabilities list.
1179	 */
1180	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1181	case PCIM_HDRTYPE_NORMAL:
1182	case PCIM_HDRTYPE_BRIDGE:
1183		ptr = PCIR_CAP_PTR;
1184		break;
1185	case PCIM_HDRTYPE_CARDBUS:
1186		ptr = PCIR_CAP_PTR_2;
1187		break;
1188	default:
1189		/* XXX: panic? */
1190		return (ENXIO);		/* no extended capabilities support */
1191	}
1192	ptr = pci_read_config(child, ptr, 1);
1193
1194	/*
1195	 * Traverse the capabilities list.
1196	 */
1197	while (ptr != 0) {
1198		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1199			if (capreg != NULL)
1200				*capreg = ptr;
1201			return (0);
1202		}
1203		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1204	}
1205
1206	return (ENOENT);
1207}
1208
1209/*
1210 * Support for MSI-X message interrupts.
1211 */
1212void
1213pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1214{
1215	struct pci_devinfo *dinfo = device_get_ivars(dev);
1216	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1217	uint32_t offset;
1218
1219	KASSERT(msix->msix_table_len > index, ("bogus index"));
1220	offset = msix->msix_table_offset + index * 16;
1221	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1222	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1223	bus_write_4(msix->msix_table_res, offset + 8, data);
1224
1225	/* Enable MSI -> HT mapping. */
1226	pci_ht_map_msi(dev, address);
1227}
1228
1229void
1230pci_mask_msix(device_t dev, u_int index)
1231{
1232	struct pci_devinfo *dinfo = device_get_ivars(dev);
1233	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1234	uint32_t offset, val;
1235
1236	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1237	offset = msix->msix_table_offset + index * 16 + 12;
1238	val = bus_read_4(msix->msix_table_res, offset);
1239	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1240		val |= PCIM_MSIX_VCTRL_MASK;
1241		bus_write_4(msix->msix_table_res, offset, val);
1242	}
1243}
1244
1245void
1246pci_unmask_msix(device_t dev, u_int index)
1247{
1248	struct pci_devinfo *dinfo = device_get_ivars(dev);
1249	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1250	uint32_t offset, val;
1251
1252	KASSERT(msix->msix_table_len > index, ("bogus index"));
1253	offset = msix->msix_table_offset + index * 16 + 12;
1254	val = bus_read_4(msix->msix_table_res, offset);
1255	if (val & PCIM_MSIX_VCTRL_MASK) {
1256		val &= ~PCIM_MSIX_VCTRL_MASK;
1257		bus_write_4(msix->msix_table_res, offset, val);
1258	}
1259}
1260
1261int
1262pci_pending_msix(device_t dev, u_int index)
1263{
1264	struct pci_devinfo *dinfo = device_get_ivars(dev);
1265	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1266	uint32_t offset, bit;
1267
1268	KASSERT(msix->msix_table_len > index, ("bogus index"));
1269	offset = msix->msix_pba_offset + (index / 32) * 4;
1270	bit = 1 << index % 32;
1271	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1272}
1273
1274/*
1275 * Restore MSI-X registers and table during resume.  If MSI-X is
1276 * enabled then walk the virtual table to restore the actual MSI-X
1277 * table.
1278 */
1279static void
1280pci_resume_msix(device_t dev)
1281{
1282	struct pci_devinfo *dinfo = device_get_ivars(dev);
1283	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1284	struct msix_table_entry *mte;
1285	struct msix_vector *mv;
1286	int i;
1287
1288	if (msix->msix_alloc > 0) {
1289		/* First, mask all vectors. */
1290		for (i = 0; i < msix->msix_msgnum; i++)
1291			pci_mask_msix(dev, i);
1292
1293		/* Second, program any messages with at least one handler. */
1294		for (i = 0; i < msix->msix_table_len; i++) {
1295			mte = &msix->msix_table[i];
1296			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1297				continue;
1298			mv = &msix->msix_vectors[mte->mte_vector - 1];
1299			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1300			pci_unmask_msix(dev, i);
1301		}
1302	}
1303	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1304	    msix->msix_ctrl, 2);
1305}
1306
1307/*
1308 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1309 * returned in *count.  After this function returns, each message will be
1310 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1311 */
1312int
1313pci_alloc_msix_method(device_t dev, device_t child, int *count)
1314{
1315	struct pci_devinfo *dinfo = device_get_ivars(child);
1316	pcicfgregs *cfg = &dinfo->cfg;
1317	struct resource_list_entry *rle;
1318	int actual, error, i, irq, max;
1319
1320	/* Don't let count == 0 get us into trouble. */
1321	if (*count == 0)
1322		return (EINVAL);
1323
1324	/* If rid 0 is allocated, then fail. */
1325	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1326	if (rle != NULL && rle->res != NULL)
1327		return (ENXIO);
1328
1329	/* Already have allocated messages? */
1330	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1331		return (ENXIO);
1332
1333	/* If MSI is blacklisted for this system, fail. */
1334	if (pci_msi_blacklisted())
1335		return (ENXIO);
1336
1337	/* MSI-X capability present? */
1338	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1339		return (ENODEV);
1340
1341	/* Make sure the appropriate BARs are mapped. */
1342	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1343	    cfg->msix.msix_table_bar);
1344	if (rle == NULL || rle->res == NULL ||
1345	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1346		return (ENXIO);
1347	cfg->msix.msix_table_res = rle->res;
1348	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1349		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1350		    cfg->msix.msix_pba_bar);
1351		if (rle == NULL || rle->res == NULL ||
1352		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1353			return (ENXIO);
1354	}
1355	cfg->msix.msix_pba_res = rle->res;
1356
1357	if (bootverbose)
1358		device_printf(child,
1359		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1360		    *count, cfg->msix.msix_msgnum);
1361	max = min(*count, cfg->msix.msix_msgnum);
1362	for (i = 0; i < max; i++) {
1363		/* Allocate a message. */
1364		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1365		if (error) {
1366			if (i == 0)
1367				return (error);
1368			break;
1369		}
1370		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1371		    irq, 1);
1372	}
1373	actual = i;
1374
1375	if (bootverbose) {
1376		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1377		if (actual == 1)
1378			device_printf(child, "using IRQ %lu for MSI-X\n",
1379			    rle->start);
1380		else {
1381			int run;
1382
1383			/*
1384			 * Be fancy and try to print contiguous runs of
1385			 * IRQ values as ranges.  'irq' is the previous IRQ.
1386			 * 'run' is true if we are in a range.
1387			 */
1388			device_printf(child, "using IRQs %lu", rle->start);
1389			irq = rle->start;
1390			run = 0;
1391			for (i = 1; i < actual; i++) {
1392				rle = resource_list_find(&dinfo->resources,
1393				    SYS_RES_IRQ, i + 1);
1394
1395				/* Still in a run? */
1396				if (rle->start == irq + 1) {
1397					run = 1;
1398					irq++;
1399					continue;
1400				}
1401
1402				/* Finish previous range. */
1403				if (run) {
1404					printf("-%d", irq);
1405					run = 0;
1406				}
1407
1408				/* Start new range. */
1409				printf(",%lu", rle->start);
1410				irq = rle->start;
1411			}
1412
1413			/* Unfinished range? */
1414			if (run)
1415				printf("-%d", irq);
1416			printf(" for MSI-X\n");
1417		}
1418	}
1419
1420	/* Mask all vectors. */
1421	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1422		pci_mask_msix(child, i);
1423
1424	/* Allocate and initialize vector data and virtual table. */
1425	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1426	    M_DEVBUF, M_WAITOK | M_ZERO);
1427	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1428	    M_DEVBUF, M_WAITOK | M_ZERO);
1429	for (i = 0; i < actual; i++) {
1430		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1431		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1432		cfg->msix.msix_table[i].mte_vector = i + 1;
1433	}
1434
1435	/* Update control register to enable MSI-X. */
1436	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1437	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1438	    cfg->msix.msix_ctrl, 2);
1439
1440	/* Update counts of alloc'd messages. */
1441	cfg->msix.msix_alloc = actual;
1442	cfg->msix.msix_table_len = actual;
1443	*count = actual;
1444	return (0);
1445}
1446
1447/*
1448 * By default, pci_alloc_msix() will assign the allocated IRQ
1449 * resources consecutively to the first N messages in the MSI-X table.
1450 * However, device drivers may want to use different layouts if they
1451 * either receive fewer messages than they asked for, or they wish to
1452 * populate the MSI-X table sparsely.  This method allows the driver
1453 * to specify what layout it wants.  It must be called after a
1454 * successful pci_alloc_msix() but before any of the associated
1455 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1456 *
1457 * The 'vectors' array contains 'count' message vectors.  The array
1458 * maps directly to the MSI-X table in that index 0 in the array
1459 * specifies the vector for the first message in the MSI-X table, etc.
1460 * The vector value in each array index can either be 0 to indicate
1461 * that no vector should be assigned to a message slot, or it can be a
1462 * number from 1 to N (where N is the count returned from a
1463 * succcessful call to pci_alloc_msix()) to indicate which message
1464 * vector (IRQ) to be used for the corresponding message.
1465 *
1466 * On successful return, each message with a non-zero vector will have
1467 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1468 * 1.  Additionally, if any of the IRQs allocated via the previous
1469 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1470 * will be freed back to the system automatically.
1471 *
1472 * For example, suppose a driver has a MSI-X table with 6 messages and
1473 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1474 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1475 * C.  After the call to pci_alloc_msix(), the device will be setup to
1476 * have an MSI-X table of ABC--- (where - means no vector assigned).
1477 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1478 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1479 * be freed back to the system.  This device will also have valid
1480 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1481 *
1482 * In any case, the SYS_RES_IRQ rid X will always map to the message
1483 * at MSI-X table index X - 1 and will only be valid if a vector is
1484 * assigned to that table entry.
1485 */
1486int
1487pci_remap_msix_method(device_t dev, device_t child, int count,
1488    const u_int *vectors)
1489{
1490	struct pci_devinfo *dinfo = device_get_ivars(child);
1491	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1492	struct resource_list_entry *rle;
1493	int i, irq, j, *used;
1494
1495	/*
1496	 * Have to have at least one message in the table but the
1497	 * table can't be bigger than the actual MSI-X table in the
1498	 * device.
1499	 */
1500	if (count == 0 || count > msix->msix_msgnum)
1501		return (EINVAL);
1502
1503	/* Sanity check the vectors. */
1504	for (i = 0; i < count; i++)
1505		if (vectors[i] > msix->msix_alloc)
1506			return (EINVAL);
1507
1508	/*
1509	 * Make sure there aren't any holes in the vectors to be used.
1510	 * It's a big pain to support it, and it doesn't really make
1511	 * sense anyway.  Also, at least one vector must be used.
1512	 */
1513	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1514	    M_ZERO);
1515	for (i = 0; i < count; i++)
1516		if (vectors[i] != 0)
1517			used[vectors[i] - 1] = 1;
1518	for (i = 0; i < msix->msix_alloc - 1; i++)
1519		if (used[i] == 0 && used[i + 1] == 1) {
1520			free(used, M_DEVBUF);
1521			return (EINVAL);
1522		}
1523	if (used[0] != 1) {
1524		free(used, M_DEVBUF);
1525		return (EINVAL);
1526	}
1527
1528	/* Make sure none of the resources are allocated. */
1529	for (i = 0; i < msix->msix_table_len; i++) {
1530		if (msix->msix_table[i].mte_vector == 0)
1531			continue;
1532		if (msix->msix_table[i].mte_handlers > 0)
1533			return (EBUSY);
1534		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1535		KASSERT(rle != NULL, ("missing resource"));
1536		if (rle->res != NULL)
1537			return (EBUSY);
1538	}
1539
1540	/* Free the existing resource list entries. */
1541	for (i = 0; i < msix->msix_table_len; i++) {
1542		if (msix->msix_table[i].mte_vector == 0)
1543			continue;
1544		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1545	}
1546
1547	/*
1548	 * Build the new virtual table keeping track of which vectors are
1549	 * used.
1550	 */
1551	free(msix->msix_table, M_DEVBUF);
1552	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1553	    M_DEVBUF, M_WAITOK | M_ZERO);
1554	for (i = 0; i < count; i++)
1555		msix->msix_table[i].mte_vector = vectors[i];
1556	msix->msix_table_len = count;
1557
1558	/* Free any unused IRQs and resize the vectors array if necessary. */
1559	j = msix->msix_alloc - 1;
1560	if (used[j] == 0) {
1561		struct msix_vector *vec;
1562
1563		while (used[j] == 0) {
1564			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1565			    msix->msix_vectors[j].mv_irq);
1566			j--;
1567		}
1568		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1569		    M_WAITOK);
1570		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1571		    (j + 1));
1572		free(msix->msix_vectors, M_DEVBUF);
1573		msix->msix_vectors = vec;
1574		msix->msix_alloc = j + 1;
1575	}
1576	free(used, M_DEVBUF);
1577
1578	/* Map the IRQs onto the rids. */
1579	for (i = 0; i < count; i++) {
1580		if (vectors[i] == 0)
1581			continue;
1582		irq = msix->msix_vectors[vectors[i]].mv_irq;
1583		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1584		    irq, 1);
1585	}
1586
1587	if (bootverbose) {
1588		device_printf(child, "Remapped MSI-X IRQs as: ");
1589		for (i = 0; i < count; i++) {
1590			if (i != 0)
1591				printf(", ");
1592			if (vectors[i] == 0)
1593				printf("---");
1594			else
1595				printf("%d",
1596				    msix->msix_vectors[vectors[i]].mv_irq);
1597		}
1598		printf("\n");
1599	}
1600
1601	return (0);
1602}
1603
1604static int
1605pci_release_msix(device_t dev, device_t child)
1606{
1607	struct pci_devinfo *dinfo = device_get_ivars(child);
1608	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1609	struct resource_list_entry *rle;
1610	int i;
1611
1612	/* Do we have any messages to release? */
1613	if (msix->msix_alloc == 0)
1614		return (ENODEV);
1615
1616	/* Make sure none of the resources are allocated. */
1617	for (i = 0; i < msix->msix_table_len; i++) {
1618		if (msix->msix_table[i].mte_vector == 0)
1619			continue;
1620		if (msix->msix_table[i].mte_handlers > 0)
1621			return (EBUSY);
1622		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1623		KASSERT(rle != NULL, ("missing resource"));
1624		if (rle->res != NULL)
1625			return (EBUSY);
1626	}
1627
1628	/* Update control register to disable MSI-X. */
1629	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1630	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1631	    msix->msix_ctrl, 2);
1632
1633	/* Free the resource list entries. */
1634	for (i = 0; i < msix->msix_table_len; i++) {
1635		if (msix->msix_table[i].mte_vector == 0)
1636			continue;
1637		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1638	}
1639	free(msix->msix_table, M_DEVBUF);
1640	msix->msix_table_len = 0;
1641
1642	/* Release the IRQs. */
1643	for (i = 0; i < msix->msix_alloc; i++)
1644		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1645		    msix->msix_vectors[i].mv_irq);
1646	free(msix->msix_vectors, M_DEVBUF);
1647	msix->msix_alloc = 0;
1648	return (0);
1649}
1650
1651/*
1652 * Return the max supported MSI-X messages this device supports.
1653 * Basically, assuming the MD code can alloc messages, this function
1654 * should return the maximum value that pci_alloc_msix() can return.
1655 * Thus, it is subject to the tunables, etc.
1656 */
1657int
1658pci_msix_count_method(device_t dev, device_t child)
1659{
1660	struct pci_devinfo *dinfo = device_get_ivars(child);
1661	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1662
1663	if (pci_do_msix && msix->msix_location != 0)
1664		return (msix->msix_msgnum);
1665	return (0);
1666}
1667
1668/*
1669 * HyperTransport MSI mapping control
1670 */
1671void
1672pci_ht_map_msi(device_t dev, uint64_t addr)
1673{
1674	struct pci_devinfo *dinfo = device_get_ivars(dev);
1675	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1676
1677	if (!ht->ht_msimap)
1678		return;
1679
1680	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1681	    ht->ht_msiaddr >> 20 == addr >> 20) {
1682		/* Enable MSI -> HT mapping. */
1683		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1684		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1685		    ht->ht_msictrl, 2);
1686	}
1687
1688	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1689		/* Disable MSI -> HT mapping. */
1690		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1691		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1692		    ht->ht_msictrl, 2);
1693	}
1694}
1695
1696int
1697pci_get_max_read_req(device_t dev)
1698{
1699	int cap;
1700	uint16_t val;
1701
1702	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1703		return (0);
1704	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1705	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1706	val >>= 12;
1707	return (1 << (val + 7));
1708}
1709
1710int
1711pci_set_max_read_req(device_t dev, int size)
1712{
1713	int cap;
1714	uint16_t val;
1715
1716	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1717		return (0);
1718	if (size < 128)
1719		size = 128;
1720	if (size > 4096)
1721		size = 4096;
1722	size = (1 << (fls(size) - 1));
1723	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1724	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1725	val |= (fls(size) - 8) << 12;
1726	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1727	return (size);
1728}
1729
1730/*
1731 * Support for MSI message signalled interrupts.
1732 */
1733void
1734pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1735{
1736	struct pci_devinfo *dinfo = device_get_ivars(dev);
1737	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1738
1739	/* Write data and address values. */
1740	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1741	    address & 0xffffffff, 4);
1742	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1743		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1744		    address >> 32, 4);
1745		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1746		    data, 2);
1747	} else
1748		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1749		    2);
1750
1751	/* Enable MSI in the control register. */
1752	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1753	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1754	    2);
1755
1756	/* Enable MSI -> HT mapping. */
1757	pci_ht_map_msi(dev, address);
1758}
1759
1760void
1761pci_disable_msi(device_t dev)
1762{
1763	struct pci_devinfo *dinfo = device_get_ivars(dev);
1764	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1765
1766	/* Disable MSI -> HT mapping. */
1767	pci_ht_map_msi(dev, 0);
1768
1769	/* Disable MSI in the control register. */
1770	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1771	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1772	    2);
1773}
1774
1775/*
1776 * Restore MSI registers during resume.  If MSI is enabled then
1777 * restore the data and address registers in addition to the control
1778 * register.
1779 */
1780static void
1781pci_resume_msi(device_t dev)
1782{
1783	struct pci_devinfo *dinfo = device_get_ivars(dev);
1784	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1785	uint64_t address;
1786	uint16_t data;
1787
1788	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1789		address = msi->msi_addr;
1790		data = msi->msi_data;
1791		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1792		    address & 0xffffffff, 4);
1793		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1794			pci_write_config(dev, msi->msi_location +
1795			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1796			pci_write_config(dev, msi->msi_location +
1797			    PCIR_MSI_DATA_64BIT, data, 2);
1798		} else
1799			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1800			    data, 2);
1801	}
1802	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1803	    2);
1804}
1805
1806static int
1807pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1808{
1809	struct pci_devinfo *dinfo = device_get_ivars(dev);
1810	pcicfgregs *cfg = &dinfo->cfg;
1811	struct resource_list_entry *rle;
1812	struct msix_table_entry *mte;
1813	struct msix_vector *mv;
1814	uint64_t addr;
1815	uint32_t data;
1816	int error, i, j;
1817
1818	/*
1819	 * Handle MSI first.  We try to find this IRQ among our list
1820	 * of MSI IRQs.  If we find it, we request updated address and
1821	 * data registers and apply the results.
1822	 */
1823	if (cfg->msi.msi_alloc > 0) {
1824
1825		/* If we don't have any active handlers, nothing to do. */
1826		if (cfg->msi.msi_handlers == 0)
1827			return (0);
1828		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1829			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1830			    i + 1);
1831			if (rle->start == irq) {
1832				error = PCIB_MAP_MSI(device_get_parent(bus),
1833				    dev, irq, &addr, &data);
1834				if (error)
1835					return (error);
1836				pci_disable_msi(dev);
1837				dinfo->cfg.msi.msi_addr = addr;
1838				dinfo->cfg.msi.msi_data = data;
1839				pci_enable_msi(dev, addr, data);
1840				return (0);
1841			}
1842		}
1843		return (ENOENT);
1844	}
1845
1846	/*
1847	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1848	 * we request the updated mapping info.  If that works, we go
1849	 * through all the slots that use this IRQ and update them.
1850	 */
1851	if (cfg->msix.msix_alloc > 0) {
1852		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1853			mv = &cfg->msix.msix_vectors[i];
1854			if (mv->mv_irq == irq) {
1855				error = PCIB_MAP_MSI(device_get_parent(bus),
1856				    dev, irq, &addr, &data);
1857				if (error)
1858					return (error);
1859				mv->mv_address = addr;
1860				mv->mv_data = data;
1861				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1862					mte = &cfg->msix.msix_table[j];
1863					if (mte->mte_vector != i + 1)
1864						continue;
1865					if (mte->mte_handlers == 0)
1866						continue;
1867					pci_mask_msix(dev, j);
1868					pci_enable_msix(dev, j, addr, data);
1869					pci_unmask_msix(dev, j);
1870				}
1871			}
1872		}
1873		return (ENOENT);
1874	}
1875
1876	return (ENOENT);
1877}
1878
1879/*
1880 * Returns true if the specified device is blacklisted because MSI
1881 * doesn't work.
1882 */
1883int
1884pci_msi_device_blacklisted(device_t dev)
1885{
1886	const struct pci_quirk *q;
1887
1888	if (!pci_honor_msi_blacklist)
1889		return (0);
1890
1891	for (q = &pci_quirks[0]; q->devid; q++) {
1892		if (q->devid == pci_get_devid(dev) &&
1893		    q->type == PCI_QUIRK_DISABLE_MSI)
1894			return (1);
1895	}
1896	return (0);
1897}
1898
1899/*
1900 * Returns true if a specified chipset supports MSI when it is
1901 * emulated hardware in a virtual machine.
1902 */
1903static int
1904pci_msi_vm_chipset(device_t dev)
1905{
1906	const struct pci_quirk *q;
1907
1908	for (q = &pci_quirks[0]; q->devid; q++) {
1909		if (q->devid == pci_get_devid(dev) &&
1910		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1911			return (1);
1912	}
1913	return (0);
1914}
1915
1916/*
1917 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1918 * we just check for blacklisted chipsets as represented by the
1919 * host-PCI bridge at device 0:0:0.  In the future, it may become
1920 * necessary to check other system attributes, such as the kenv values
1921 * that give the motherboard manufacturer and model number.
1922 */
1923static int
1924pci_msi_blacklisted(void)
1925{
1926	device_t dev;
1927
1928	if (!pci_honor_msi_blacklist)
1929		return (0);
1930
1931	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1932	if (!(pcie_chipset || pcix_chipset)) {
1933		if (vm_guest != VM_GUEST_NO) {
1934			dev = pci_find_bsf(0, 0, 0);
1935			if (dev != NULL)
1936				return (pci_msi_vm_chipset(dev) == 0);
1937		}
1938		return (1);
1939	}
1940
1941	dev = pci_find_bsf(0, 0, 0);
1942	if (dev != NULL)
1943		return (pci_msi_device_blacklisted(dev));
1944	return (0);
1945}
1946
1947/*
1948 * Attempt to allocate *count MSI messages.  The actual number allocated is
1949 * returned in *count.  After this function returns, each message will be
1950 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1951 */
1952int
1953pci_alloc_msi_method(device_t dev, device_t child, int *count)
1954{
1955	struct pci_devinfo *dinfo = device_get_ivars(child);
1956	pcicfgregs *cfg = &dinfo->cfg;
1957	struct resource_list_entry *rle;
1958	int actual, error, i, irqs[32];
1959	uint16_t ctrl;
1960
1961	/* Don't let count == 0 get us into trouble. */
1962	if (*count == 0)
1963		return (EINVAL);
1964
1965	/* If rid 0 is allocated, then fail. */
1966	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1967	if (rle != NULL && rle->res != NULL)
1968		return (ENXIO);
1969
1970	/* Already have allocated messages? */
1971	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1972		return (ENXIO);
1973
1974	/* If MSI is blacklisted for this system, fail. */
1975	if (pci_msi_blacklisted())
1976		return (ENXIO);
1977
1978	/* MSI capability present? */
1979	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1980		return (ENODEV);
1981
1982	if (bootverbose)
1983		device_printf(child,
1984		    "attempting to allocate %d MSI vectors (%d supported)\n",
1985		    *count, cfg->msi.msi_msgnum);
1986
1987	/* Don't ask for more than the device supports. */
1988	actual = min(*count, cfg->msi.msi_msgnum);
1989
1990	/* Don't ask for more than 32 messages. */
1991	actual = min(actual, 32);
1992
1993	/* MSI requires power of 2 number of messages. */
1994	if (!powerof2(actual))
1995		return (EINVAL);
1996
1997	for (;;) {
1998		/* Try to allocate N messages. */
1999		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2000		    actual, irqs);
2001		if (error == 0)
2002			break;
2003		if (actual == 1)
2004			return (error);
2005
2006		/* Try N / 2. */
2007		actual >>= 1;
2008	}
2009
2010	/*
2011	 * We now have N actual messages mapped onto SYS_RES_IRQ
2012	 * resources in the irqs[] array, so add new resources
2013	 * starting at rid 1.
2014	 */
2015	for (i = 0; i < actual; i++)
2016		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2017		    irqs[i], irqs[i], 1);
2018
2019	if (bootverbose) {
2020		if (actual == 1)
2021			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2022		else {
2023			int run;
2024
2025			/*
2026			 * Be fancy and try to print contiguous runs
2027			 * of IRQ values as ranges.  'run' is true if
2028			 * we are in a range.
2029			 */
2030			device_printf(child, "using IRQs %d", irqs[0]);
2031			run = 0;
2032			for (i = 1; i < actual; i++) {
2033
2034				/* Still in a run? */
2035				if (irqs[i] == irqs[i - 1] + 1) {
2036					run = 1;
2037					continue;
2038				}
2039
2040				/* Finish previous range. */
2041				if (run) {
2042					printf("-%d", irqs[i - 1]);
2043					run = 0;
2044				}
2045
2046				/* Start new range. */
2047				printf(",%d", irqs[i]);
2048			}
2049
2050			/* Unfinished range? */
2051			if (run)
2052				printf("-%d", irqs[actual - 1]);
2053			printf(" for MSI\n");
2054		}
2055	}
2056
2057	/* Update control register with actual count. */
2058	ctrl = cfg->msi.msi_ctrl;
2059	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2060	ctrl |= (ffs(actual) - 1) << 4;
2061	cfg->msi.msi_ctrl = ctrl;
2062	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2063
2064	/* Update counts of alloc'd messages. */
2065	cfg->msi.msi_alloc = actual;
2066	cfg->msi.msi_handlers = 0;
2067	*count = actual;
2068	return (0);
2069}
2070
2071/* Release the MSI messages associated with this device. */
2072int
2073pci_release_msi_method(device_t dev, device_t child)
2074{
2075	struct pci_devinfo *dinfo = device_get_ivars(child);
2076	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2077	struct resource_list_entry *rle;
2078	int error, i, irqs[32];
2079
2080	/* Try MSI-X first. */
2081	error = pci_release_msix(dev, child);
2082	if (error != ENODEV)
2083		return (error);
2084
2085	/* Do we have any messages to release? */
2086	if (msi->msi_alloc == 0)
2087		return (ENODEV);
2088	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2089
2090	/* Make sure none of the resources are allocated. */
2091	if (msi->msi_handlers > 0)
2092		return (EBUSY);
2093	for (i = 0; i < msi->msi_alloc; i++) {
2094		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2095		KASSERT(rle != NULL, ("missing MSI resource"));
2096		if (rle->res != NULL)
2097			return (EBUSY);
2098		irqs[i] = rle->start;
2099	}
2100
2101	/* Update control register with 0 count. */
2102	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2103	    ("%s: MSI still enabled", __func__));
2104	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2105	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2106	    msi->msi_ctrl, 2);
2107
2108	/* Release the messages. */
2109	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2110	for (i = 0; i < msi->msi_alloc; i++)
2111		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2112
2113	/* Update alloc count. */
2114	msi->msi_alloc = 0;
2115	msi->msi_addr = 0;
2116	msi->msi_data = 0;
2117	return (0);
2118}
2119
2120/*
2121 * Return the max supported MSI messages this device supports.
2122 * Basically, assuming the MD code can alloc messages, this function
2123 * should return the maximum value that pci_alloc_msi() can return.
2124 * Thus, it is subject to the tunables, etc.
2125 */
2126int
2127pci_msi_count_method(device_t dev, device_t child)
2128{
2129	struct pci_devinfo *dinfo = device_get_ivars(child);
2130	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2131
2132	if (pci_do_msi && msi->msi_location != 0)
2133		return (msi->msi_msgnum);
2134	return (0);
2135}
2136
2137/* free pcicfgregs structure and all depending data structures */
2138
2139int
2140pci_freecfg(struct pci_devinfo *dinfo)
2141{
2142	struct devlist *devlist_head;
2143	struct pci_map *pm, *next;
2144	int i;
2145
2146	devlist_head = &pci_devq;
2147
2148	if (dinfo->cfg.vpd.vpd_reg) {
2149		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2150		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2151			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2152		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2153		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2154			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2155		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2156	}
2157	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2158		free(pm, M_DEVBUF);
2159	}
2160	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2161	free(dinfo, M_DEVBUF);
2162
2163	/* increment the generation count */
2164	pci_generation++;
2165
2166	/* we're losing one device */
2167	pci_numdevs--;
2168	return (0);
2169}
2170
2171/*
2172 * PCI power manangement
2173 */
2174int
2175pci_set_powerstate_method(device_t dev, device_t child, int state)
2176{
2177	struct pci_devinfo *dinfo = device_get_ivars(child);
2178	pcicfgregs *cfg = &dinfo->cfg;
2179	uint16_t status;
2180	int result, oldstate, highest, delay;
2181
2182	if (cfg->pp.pp_cap == 0)
2183		return (EOPNOTSUPP);
2184
2185	/*
2186	 * Optimize a no state change request away.  While it would be OK to
2187	 * write to the hardware in theory, some devices have shown odd
2188	 * behavior when going from D3 -> D3.
2189	 */
2190	oldstate = pci_get_powerstate(child);
2191	if (oldstate == state)
2192		return (0);
2193
2194	/*
2195	 * The PCI power management specification states that after a state
2196	 * transition between PCI power states, system software must
2197	 * guarantee a minimal delay before the function accesses the device.
2198	 * Compute the worst case delay that we need to guarantee before we
2199	 * access the device.  Many devices will be responsive much more
2200	 * quickly than this delay, but there are some that don't respond
2201	 * instantly to state changes.  Transitions to/from D3 state require
2202	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2203	 * is done below with DELAY rather than a sleeper function because
2204	 * this function can be called from contexts where we cannot sleep.
2205	 */
2206	highest = (oldstate > state) ? oldstate : state;
2207	if (highest == PCI_POWERSTATE_D3)
2208	    delay = 10000;
2209	else if (highest == PCI_POWERSTATE_D2)
2210	    delay = 200;
2211	else
2212	    delay = 0;
2213	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2214	    & ~PCIM_PSTAT_DMASK;
2215	result = 0;
2216	switch (state) {
2217	case PCI_POWERSTATE_D0:
2218		status |= PCIM_PSTAT_D0;
2219		break;
2220	case PCI_POWERSTATE_D1:
2221		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2222			return (EOPNOTSUPP);
2223		status |= PCIM_PSTAT_D1;
2224		break;
2225	case PCI_POWERSTATE_D2:
2226		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2227			return (EOPNOTSUPP);
2228		status |= PCIM_PSTAT_D2;
2229		break;
2230	case PCI_POWERSTATE_D3:
2231		status |= PCIM_PSTAT_D3;
2232		break;
2233	default:
2234		return (EINVAL);
2235	}
2236
2237	if (bootverbose)
2238		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2239		    state);
2240
2241	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2242	if (delay)
2243		DELAY(delay);
2244	return (0);
2245}
2246
2247int
2248pci_get_powerstate_method(device_t dev, device_t child)
2249{
2250	struct pci_devinfo *dinfo = device_get_ivars(child);
2251	pcicfgregs *cfg = &dinfo->cfg;
2252	uint16_t status;
2253	int result;
2254
2255	if (cfg->pp.pp_cap != 0) {
2256		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2257		switch (status & PCIM_PSTAT_DMASK) {
2258		case PCIM_PSTAT_D0:
2259			result = PCI_POWERSTATE_D0;
2260			break;
2261		case PCIM_PSTAT_D1:
2262			result = PCI_POWERSTATE_D1;
2263			break;
2264		case PCIM_PSTAT_D2:
2265			result = PCI_POWERSTATE_D2;
2266			break;
2267		case PCIM_PSTAT_D3:
2268			result = PCI_POWERSTATE_D3;
2269			break;
2270		default:
2271			result = PCI_POWERSTATE_UNKNOWN;
2272			break;
2273		}
2274	} else {
2275		/* No support, device is always at D0 */
2276		result = PCI_POWERSTATE_D0;
2277	}
2278	return (result);
2279}
2280
2281/*
2282 * Some convenience functions for PCI device drivers.
2283 */
2284
2285static __inline void
2286pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2287{
2288	uint16_t	command;
2289
2290	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2291	command |= bit;
2292	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2293}
2294
2295static __inline void
2296pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2297{
2298	uint16_t	command;
2299
2300	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2301	command &= ~bit;
2302	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2303}
2304
2305int
2306pci_enable_busmaster_method(device_t dev, device_t child)
2307{
2308	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2309	return (0);
2310}
2311
2312int
2313pci_disable_busmaster_method(device_t dev, device_t child)
2314{
2315	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2316	return (0);
2317}
2318
2319int
2320pci_enable_io_method(device_t dev, device_t child, int space)
2321{
2322	uint16_t bit;
2323
2324	switch(space) {
2325	case SYS_RES_IOPORT:
2326		bit = PCIM_CMD_PORTEN;
2327		break;
2328	case SYS_RES_MEMORY:
2329		bit = PCIM_CMD_MEMEN;
2330		break;
2331	default:
2332		return (EINVAL);
2333	}
2334	pci_set_command_bit(dev, child, bit);
2335	return (0);
2336}
2337
2338int
2339pci_disable_io_method(device_t dev, device_t child, int space)
2340{
2341	uint16_t bit;
2342
2343	switch(space) {
2344	case SYS_RES_IOPORT:
2345		bit = PCIM_CMD_PORTEN;
2346		break;
2347	case SYS_RES_MEMORY:
2348		bit = PCIM_CMD_MEMEN;
2349		break;
2350	default:
2351		return (EINVAL);
2352	}
2353	pci_clear_command_bit(dev, child, bit);
2354	return (0);
2355}
2356
2357/*
2358 * New style pci driver.  Parent device is either a pci-host-bridge or a
2359 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2360 */
2361
2362void
2363pci_print_verbose(struct pci_devinfo *dinfo)
2364{
2365
2366	if (bootverbose) {
2367		pcicfgregs *cfg = &dinfo->cfg;
2368
2369		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2370		    cfg->vendor, cfg->device, cfg->revid);
2371		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2372		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2373		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2374		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2375		    cfg->mfdev);
2376		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2377		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2378		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2379		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2380		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2381		if (cfg->intpin > 0)
2382			printf("\tintpin=%c, irq=%d\n",
2383			    cfg->intpin +'a' -1, cfg->intline);
2384		if (cfg->pp.pp_cap) {
2385			uint16_t status;
2386
2387			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2388			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2389			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2390			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2391			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2392			    status & PCIM_PSTAT_DMASK);
2393		}
2394		if (cfg->msi.msi_location) {
2395			int ctrl;
2396
2397			ctrl = cfg->msi.msi_ctrl;
2398			printf("\tMSI supports %d message%s%s%s\n",
2399			    cfg->msi.msi_msgnum,
2400			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2401			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2402			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2403		}
2404		if (cfg->msix.msix_location) {
2405			printf("\tMSI-X supports %d message%s ",
2406			    cfg->msix.msix_msgnum,
2407			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2408			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2409				printf("in map 0x%x\n",
2410				    cfg->msix.msix_table_bar);
2411			else
2412				printf("in maps 0x%x and 0x%x\n",
2413				    cfg->msix.msix_table_bar,
2414				    cfg->msix.msix_pba_bar);
2415		}
2416	}
2417}
2418
2419static int
2420pci_porten(device_t dev)
2421{
2422	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2423}
2424
2425static int
2426pci_memen(device_t dev)
2427{
2428	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2429}
2430
2431static void
2432pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2433{
2434	struct pci_devinfo *dinfo;
2435	pci_addr_t map, testval;
2436	int ln2range;
2437	uint16_t cmd;
2438
2439	/*
2440	 * The device ROM BAR is special.  It is always a 32-bit
2441	 * memory BAR.  Bit 0 is special and should not be set when
2442	 * sizing the BAR.
2443	 */
2444	dinfo = device_get_ivars(dev);
2445	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2446		map = pci_read_config(dev, reg, 4);
2447		pci_write_config(dev, reg, 0xfffffffe, 4);
2448		testval = pci_read_config(dev, reg, 4);
2449		pci_write_config(dev, reg, map, 4);
2450		*mapp = map;
2451		*testvalp = testval;
2452		return;
2453	}
2454
2455	map = pci_read_config(dev, reg, 4);
2456	ln2range = pci_maprange(map);
2457	if (ln2range == 64)
2458		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2459
2460	/*
2461	 * Disable decoding via the command register before
2462	 * determining the BAR's length since we will be placing it in
2463	 * a weird state.
2464	 */
2465	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2466	pci_write_config(dev, PCIR_COMMAND,
2467	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2468
2469	/*
2470	 * Determine the BAR's length by writing all 1's.  The bottom
2471	 * log_2(size) bits of the BAR will stick as 0 when we read
2472	 * the value back.
2473	 */
2474	pci_write_config(dev, reg, 0xffffffff, 4);
2475	testval = pci_read_config(dev, reg, 4);
2476	if (ln2range == 64) {
2477		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2478		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2479	}
2480
2481	/*
2482	 * Restore the original value of the BAR.  We may have reprogrammed
2483	 * the BAR of the low-level console device and when booting verbose,
2484	 * we need the console device addressable.
2485	 */
2486	pci_write_config(dev, reg, map, 4);
2487	if (ln2range == 64)
2488		pci_write_config(dev, reg + 4, map >> 32, 4);
2489	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2490
2491	*mapp = map;
2492	*testvalp = testval;
2493}
2494
2495static void
2496pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2497{
2498	struct pci_devinfo *dinfo;
2499	int ln2range;
2500
2501	/* The device ROM BAR is always a 32-bit memory BAR. */
2502	dinfo = device_get_ivars(dev);
2503	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2504		ln2range = 32;
2505	else
2506		ln2range = pci_maprange(pm->pm_value);
2507	pci_write_config(dev, pm->pm_reg, base, 4);
2508	if (ln2range == 64)
2509		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2510	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2511	if (ln2range == 64)
2512		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2513		    pm->pm_reg + 4, 4) << 32;
2514}
2515
2516struct pci_map *
2517pci_find_bar(device_t dev, int reg)
2518{
2519	struct pci_devinfo *dinfo;
2520	struct pci_map *pm;
2521
2522	dinfo = device_get_ivars(dev);
2523	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2524		if (pm->pm_reg == reg)
2525			return (pm);
2526	}
2527	return (NULL);
2528}
2529
2530int
2531pci_bar_enabled(device_t dev, struct pci_map *pm)
2532{
2533	struct pci_devinfo *dinfo;
2534	uint16_t cmd;
2535
2536	dinfo = device_get_ivars(dev);
2537	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2538	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2539		return (0);
2540	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2541	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2542		return ((cmd & PCIM_CMD_MEMEN) != 0);
2543	else
2544		return ((cmd & PCIM_CMD_PORTEN) != 0);
2545}
2546
2547static struct pci_map *
2548pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2549{
2550	struct pci_devinfo *dinfo;
2551	struct pci_map *pm, *prev;
2552
2553	dinfo = device_get_ivars(dev);
2554	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2555	pm->pm_reg = reg;
2556	pm->pm_value = value;
2557	pm->pm_size = size;
2558	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2559		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2560		    reg));
2561		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2562		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2563			break;
2564	}
2565	if (prev != NULL)
2566		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2567	else
2568		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2569	return (pm);
2570}
2571
2572static void
2573pci_restore_bars(device_t dev)
2574{
2575	struct pci_devinfo *dinfo;
2576	struct pci_map *pm;
2577	int ln2range;
2578
2579	dinfo = device_get_ivars(dev);
2580	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2581		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2582			ln2range = 32;
2583		else
2584			ln2range = pci_maprange(pm->pm_value);
2585		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2586		if (ln2range == 64)
2587			pci_write_config(dev, pm->pm_reg + 4,
2588			    pm->pm_value >> 32, 4);
2589	}
2590}
2591
2592/*
2593 * Add a resource based on a pci map register. Return 1 if the map
2594 * register is a 32bit map register or 2 if it is a 64bit register.
2595 */
2596static int
2597pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2598    int force, int prefetch)
2599{
2600	struct pci_map *pm;
2601	pci_addr_t base, map, testval;
2602	pci_addr_t start, end, count;
2603	int barlen, basezero, maprange, mapsize, type;
2604	uint16_t cmd;
2605	struct resource *res;
2606
2607	/*
2608	 * The BAR may already exist if the device is a CardBus card
2609	 * whose CIS is stored in this BAR.
2610	 */
2611	pm = pci_find_bar(dev, reg);
2612	if (pm != NULL) {
2613		maprange = pci_maprange(pm->pm_value);
2614		barlen = maprange == 64 ? 2 : 1;
2615		return (barlen);
2616	}
2617
2618	pci_read_bar(dev, reg, &map, &testval);
2619	if (PCI_BAR_MEM(map)) {
2620		type = SYS_RES_MEMORY;
2621		if (map & PCIM_BAR_MEM_PREFETCH)
2622			prefetch = 1;
2623	} else
2624		type = SYS_RES_IOPORT;
2625	mapsize = pci_mapsize(testval);
2626	base = pci_mapbase(map);
2627#ifdef __PCI_BAR_ZERO_VALID
2628	basezero = 0;
2629#else
2630	basezero = base == 0;
2631#endif
2632	maprange = pci_maprange(map);
2633	barlen = maprange == 64 ? 2 : 1;
2634
2635	/*
2636	 * For I/O registers, if bottom bit is set, and the next bit up
2637	 * isn't clear, we know we have a BAR that doesn't conform to the
2638	 * spec, so ignore it.  Also, sanity check the size of the data
2639	 * areas to the type of memory involved.  Memory must be at least
2640	 * 16 bytes in size, while I/O ranges must be at least 4.
2641	 */
2642	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2643		return (barlen);
2644	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2645	    (type == SYS_RES_IOPORT && mapsize < 2))
2646		return (barlen);
2647
2648	/* Save a record of this BAR. */
2649	pm = pci_add_bar(dev, reg, map, mapsize);
2650	if (bootverbose) {
2651		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2652		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2653		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2654			printf(", port disabled\n");
2655		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2656			printf(", memory disabled\n");
2657		else
2658			printf(", enabled\n");
2659	}
2660
2661	/*
2662	 * If base is 0, then we have problems if this architecture does
2663	 * not allow that.  It is best to ignore such entries for the
2664	 * moment.  These will be allocated later if the driver specifically
2665	 * requests them.  However, some removable busses look better when
2666	 * all resources are allocated, so allow '0' to be overriden.
2667	 *
2668	 * Similarly treat maps whose values is the same as the test value
2669	 * read back.  These maps have had all f's written to them by the
2670	 * BIOS in an attempt to disable the resources.
2671	 */
2672	if (!force && (basezero || map == testval))
2673		return (barlen);
2674	if ((u_long)base != base) {
2675		device_printf(bus,
2676		    "pci%d:%d:%d:%d bar %#x too many address bits",
2677		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2678		    pci_get_function(dev), reg);
2679		return (barlen);
2680	}
2681
2682	/*
2683	 * This code theoretically does the right thing, but has
2684	 * undesirable side effects in some cases where peripherals
2685	 * respond oddly to having these bits enabled.  Let the user
2686	 * be able to turn them off (since pci_enable_io_modes is 1 by
2687	 * default).
2688	 */
2689	if (pci_enable_io_modes) {
2690		/* Turn on resources that have been left off by a lazy BIOS */
2691		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2692			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2693			cmd |= PCIM_CMD_PORTEN;
2694			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2695		}
2696		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2697			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2698			cmd |= PCIM_CMD_MEMEN;
2699			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2700		}
2701	} else {
2702		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2703			return (barlen);
2704		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2705			return (barlen);
2706	}
2707
2708	count = (pci_addr_t)1 << mapsize;
2709	if (basezero || base == pci_mapbase(testval)) {
2710		start = 0;	/* Let the parent decide. */
2711		end = ~0ul;
2712	} else {
2713		start = base;
2714		end = base + count - 1;
2715	}
2716	resource_list_add(rl, type, reg, start, end, count);
2717
2718	/*
2719	 * Try to allocate the resource for this BAR from our parent
2720	 * so that this resource range is already reserved.  The
2721	 * driver for this device will later inherit this resource in
2722	 * pci_alloc_resource().
2723	 */
2724	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2725	    prefetch ? RF_PREFETCHABLE : 0);
2726	if (res == NULL) {
2727		/*
2728		 * If the allocation fails, clear the BAR and delete
2729		 * the resource list entry to force
2730		 * pci_alloc_resource() to allocate resources from the
2731		 * parent.
2732		 */
2733		resource_list_delete(rl, type, reg);
2734		start = 0;
2735	} else
2736		start = rman_get_start(res);
2737	pci_write_bar(dev, pm, start);
2738	return (barlen);
2739}
2740
2741/*
2742 * For ATA devices we need to decide early what addressing mode to use.
2743 * Legacy demands that the primary and secondary ATA ports sits on the
2744 * same addresses that old ISA hardware did. This dictates that we use
2745 * those addresses and ignore the BAR's if we cannot set PCI native
2746 * addressing mode.
2747 */
2748static void
2749pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2750    uint32_t prefetchmask)
2751{
2752	struct resource *r;
2753	int rid, type, progif;
2754#if 0
2755	/* if this device supports PCI native addressing use it */
2756	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2757	if ((progif & 0x8a) == 0x8a) {
2758		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2759		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2760			printf("Trying ATA native PCI addressing mode\n");
2761			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2762		}
2763	}
2764#endif
2765	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2766	type = SYS_RES_IOPORT;
2767	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2768		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2769		    prefetchmask & (1 << 0));
2770		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2771		    prefetchmask & (1 << 1));
2772	} else {
2773		rid = PCIR_BAR(0);
2774		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2775		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2776		    0x1f7, 8, 0);
2777		rid = PCIR_BAR(1);
2778		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2779		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2780		    0x3f6, 1, 0);
2781	}
2782	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2783		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2784		    prefetchmask & (1 << 2));
2785		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2786		    prefetchmask & (1 << 3));
2787	} else {
2788		rid = PCIR_BAR(2);
2789		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2790		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2791		    0x177, 8, 0);
2792		rid = PCIR_BAR(3);
2793		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2794		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2795		    0x376, 1, 0);
2796	}
2797	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2798	    prefetchmask & (1 << 4));
2799	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2800	    prefetchmask & (1 << 5));
2801}
2802
2803static void
2804pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2805{
2806	struct pci_devinfo *dinfo = device_get_ivars(dev);
2807	pcicfgregs *cfg = &dinfo->cfg;
2808	char tunable_name[64];
2809	int irq;
2810
2811	/* Has to have an intpin to have an interrupt. */
2812	if (cfg->intpin == 0)
2813		return;
2814
2815	/* Let the user override the IRQ with a tunable. */
2816	irq = PCI_INVALID_IRQ;
2817	snprintf(tunable_name, sizeof(tunable_name),
2818	    "hw.pci%d.%d.%d.INT%c.irq",
2819	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2820	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2821		irq = PCI_INVALID_IRQ;
2822
2823	/*
2824	 * If we didn't get an IRQ via the tunable, then we either use the
2825	 * IRQ value in the intline register or we ask the bus to route an
2826	 * interrupt for us.  If force_route is true, then we only use the
2827	 * value in the intline register if the bus was unable to assign an
2828	 * IRQ.
2829	 */
2830	if (!PCI_INTERRUPT_VALID(irq)) {
2831		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2832			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2833		if (!PCI_INTERRUPT_VALID(irq))
2834			irq = cfg->intline;
2835	}
2836
2837	/* If after all that we don't have an IRQ, just bail. */
2838	if (!PCI_INTERRUPT_VALID(irq))
2839		return;
2840
2841	/* Update the config register if it changed. */
2842	if (irq != cfg->intline) {
2843		cfg->intline = irq;
2844		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2845	}
2846
2847	/* Add this IRQ as rid 0 interrupt resource. */
2848	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2849}
2850
2851/* Perform early OHCI takeover from SMM. */
2852static void
2853ohci_early_takeover(device_t self)
2854{
2855	struct resource *res;
2856	uint32_t ctl;
2857	int rid;
2858	int i;
2859
2860	rid = PCIR_BAR(0);
2861	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2862	if (res == NULL)
2863		return;
2864
2865	ctl = bus_read_4(res, OHCI_CONTROL);
2866	if (ctl & OHCI_IR) {
2867		if (bootverbose)
2868			printf("ohci early: "
2869			    "SMM active, request owner change\n");
2870		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2871		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2872			DELAY(1000);
2873			ctl = bus_read_4(res, OHCI_CONTROL);
2874		}
2875		if (ctl & OHCI_IR) {
2876			if (bootverbose)
2877				printf("ohci early: "
2878				    "SMM does not respond, resetting\n");
2879			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2880		}
2881		/* Disable interrupts */
2882		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2883	}
2884
2885	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2886}
2887
2888/* Perform early UHCI takeover from SMM. */
2889static void
2890uhci_early_takeover(device_t self)
2891{
2892	struct resource *res;
2893	int rid;
2894
2895	/*
2896	 * Set the PIRQD enable bit and switch off all the others. We don't
2897	 * want legacy support to interfere with us XXX Does this also mean
2898	 * that the BIOS won't touch the keyboard anymore if it is connected
2899	 * to the ports of the root hub?
2900	 */
2901	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2902
2903	/* Disable interrupts */
2904	rid = PCI_UHCI_BASE_REG;
2905	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2906	if (res != NULL) {
2907		bus_write_2(res, UHCI_INTR, 0);
2908		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2909	}
2910}
2911
2912/* Perform early EHCI takeover from SMM. */
2913static void
2914ehci_early_takeover(device_t self)
2915{
2916	struct resource *res;
2917	uint32_t cparams;
2918	uint32_t eec;
2919	uint8_t eecp;
2920	uint8_t bios_sem;
2921	uint8_t offs;
2922	int rid;
2923	int i;
2924
2925	rid = PCIR_BAR(0);
2926	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2927	if (res == NULL)
2928		return;
2929
2930	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2931
2932	/* Synchronise with the BIOS if it owns the controller. */
2933	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2934	    eecp = EHCI_EECP_NEXT(eec)) {
2935		eec = pci_read_config(self, eecp, 4);
2936		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2937			continue;
2938		}
2939		bios_sem = pci_read_config(self, eecp +
2940		    EHCI_LEGSUP_BIOS_SEM, 1);
2941		if (bios_sem == 0) {
2942			continue;
2943		}
2944		if (bootverbose)
2945			printf("ehci early: "
2946			    "SMM active, request owner change\n");
2947
2948		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2949
2950		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2951			DELAY(1000);
2952			bios_sem = pci_read_config(self, eecp +
2953			    EHCI_LEGSUP_BIOS_SEM, 1);
2954		}
2955
2956		if (bios_sem != 0) {
2957			if (bootverbose)
2958				printf("ehci early: "
2959				    "SMM does not respond\n");
2960		}
2961		/* Disable interrupts */
2962		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2963		bus_write_4(res, offs + EHCI_USBINTR, 0);
2964	}
2965	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2966}
2967
2968/* Perform early XHCI takeover from SMM. */
2969static void
2970xhci_early_takeover(device_t self)
2971{
2972	struct resource *res;
2973	uint32_t cparams;
2974	uint32_t eec;
2975	uint8_t eecp;
2976	uint8_t bios_sem;
2977	uint8_t offs;
2978	int rid;
2979	int i;
2980
2981	rid = PCIR_BAR(0);
2982	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2983	if (res == NULL)
2984		return;
2985
2986	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2987
2988	eec = -1;
2989
2990	/* Synchronise with the BIOS if it owns the controller. */
2991	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2992	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2993		eec = bus_read_4(res, eecp);
2994
2995		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2996			continue;
2997
2998		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2999		if (bios_sem == 0)
3000			continue;
3001
3002		if (bootverbose)
3003			printf("xhci early: "
3004			    "SMM active, request owner change\n");
3005
3006		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3007
3008		/* wait a maximum of 5 second */
3009
3010		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3011			DELAY(1000);
3012			bios_sem = bus_read_1(res, eecp +
3013			    XHCI_XECP_BIOS_SEM);
3014		}
3015
3016		if (bios_sem != 0) {
3017			if (bootverbose)
3018				printf("xhci early: "
3019				    "SMM does not respond\n");
3020		}
3021
3022		/* Disable interrupts */
3023		offs = bus_read_1(res, XHCI_CAPLENGTH);
3024		bus_write_4(res, offs + XHCI_USBCMD, 0);
3025		bus_read_4(res, offs + XHCI_USBSTS);
3026	}
3027	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3028}
3029
3030void
3031pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3032{
3033	struct pci_devinfo *dinfo = device_get_ivars(dev);
3034	pcicfgregs *cfg = &dinfo->cfg;
3035	struct resource_list *rl = &dinfo->resources;
3036	const struct pci_quirk *q;
3037	int i;
3038
3039	/* ATA devices needs special map treatment */
3040	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3041	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3042	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3043	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3044	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3045		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3046	else
3047		for (i = 0; i < cfg->nummaps;)
3048			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3049			    prefetchmask & (1 << i));
3050
3051	/*
3052	 * Add additional, quirked resources.
3053	 */
3054	for (q = &pci_quirks[0]; q->devid; q++) {
3055		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3056		    && q->type == PCI_QUIRK_MAP_REG)
3057			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3058	}
3059
3060	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3061#ifdef __PCI_REROUTE_INTERRUPT
3062		/*
3063		 * Try to re-route interrupts. Sometimes the BIOS or
3064		 * firmware may leave bogus values in these registers.
3065		 * If the re-route fails, then just stick with what we
3066		 * have.
3067		 */
3068		pci_assign_interrupt(bus, dev, 1);
3069#else
3070		pci_assign_interrupt(bus, dev, 0);
3071#endif
3072	}
3073
3074	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3075	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3076		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3077			xhci_early_takeover(dev);
3078		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3079			ehci_early_takeover(dev);
3080		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3081			ohci_early_takeover(dev);
3082		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3083			uhci_early_takeover(dev);
3084	}
3085}
3086
3087void
3088pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3089{
3090#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3091	device_t pcib = device_get_parent(dev);
3092	struct pci_devinfo *dinfo;
3093	int maxslots;
3094	int s, f, pcifunchigh;
3095	uint8_t hdrtype;
3096
3097	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3098	    ("dinfo_size too small"));
3099	maxslots = PCIB_MAXSLOTS(pcib);
3100	for (s = 0; s <= maxslots; s++) {
3101		pcifunchigh = 0;
3102		f = 0;
3103		DELAY(1);
3104		hdrtype = REG(PCIR_HDRTYPE, 1);
3105		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3106			continue;
3107		if (hdrtype & PCIM_MFDEV)
3108			pcifunchigh = PCI_FUNCMAX;
3109		for (f = 0; f <= pcifunchigh; f++) {
3110			dinfo = pci_read_device(pcib, domain, busno, s, f,
3111			    dinfo_size);
3112			if (dinfo != NULL) {
3113				pci_add_child(dev, dinfo);
3114			}
3115		}
3116	}
3117#undef REG
3118}
3119
3120void
3121pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3122{
3123	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3124	device_set_ivars(dinfo->cfg.dev, dinfo);
3125	resource_list_init(&dinfo->resources);
3126	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3127	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3128	pci_print_verbose(dinfo);
3129	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3130}
3131
3132static int
3133pci_probe(device_t dev)
3134{
3135
3136	device_set_desc(dev, "PCI bus");
3137
3138	/* Allow other subclasses to override this driver. */
3139	return (BUS_PROBE_GENERIC);
3140}
3141
3142int
3143pci_attach_common(device_t dev)
3144{
3145	struct pci_softc *sc;
3146	int busno, domain, error;
3147
3148	sc = device_get_softc(dev);
3149	domain = pcib_get_domain(dev);
3150	busno = pcib_get_bus(dev);
3151	if (bootverbose)
3152		device_printf(dev, "domain=%d, physical bus=%d\n",
3153		    domain, busno);
3154	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3155	    devclass_find("pci")) {
3156		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3157		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3158		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3159		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3160		if (error)
3161			device_printf(dev, "Failed to create DMA tag: %d\n",
3162			    error);
3163		else
3164			sc->sc_dma_tag_valid = 1;
3165	}
3166	return (0);
3167}
3168
3169static int
3170pci_attach(device_t dev)
3171{
3172	int busno, domain, error;
3173
3174	error = pci_attach_common(dev);
3175	if (error)
3176		return (error);
3177
3178	/*
3179	 * Since there can be multiple independantly numbered PCI
3180	 * busses on systems with multiple PCI domains, we can't use
3181	 * the unit number to decide which bus we are probing. We ask
3182	 * the parent pcib what our domain and bus numbers are.
3183	 */
3184	domain = pcib_get_domain(dev);
3185	busno = pcib_get_bus(dev);
3186	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3187	return (bus_generic_attach(dev));
3188}
3189
3190static void
3191pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3192    int state)
3193{
3194	device_t child, pcib;
3195	struct pci_devinfo *dinfo;
3196	int dstate, i;
3197
3198	/*
3199	 * Set the device to the given state.  If the firmware suggests
3200	 * a different power state, use it instead.  If power management
3201	 * is not present, the firmware is responsible for managing
3202	 * device power.  Skip children who aren't attached since they
3203	 * are handled separately.
3204	 */
3205	pcib = device_get_parent(dev);
3206	for (i = 0; i < numdevs; i++) {
3207		child = devlist[i];
3208		dinfo = device_get_ivars(child);
3209		dstate = state;
3210		if (device_is_attached(child) &&
3211		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3212			pci_set_powerstate(child, dstate);
3213	}
3214}
3215
3216int
3217pci_suspend(device_t dev)
3218{
3219	device_t child, *devlist;
3220	struct pci_devinfo *dinfo;
3221	int error, i, numdevs;
3222
3223	/*
3224	 * Save the PCI configuration space for each child and set the
3225	 * device in the appropriate power state for this sleep state.
3226	 */
3227	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3228		return (error);
3229	for (i = 0; i < numdevs; i++) {
3230		child = devlist[i];
3231		dinfo = device_get_ivars(child);
3232		pci_cfg_save(child, dinfo, 0);
3233	}
3234
3235	/* Suspend devices before potentially powering them down. */
3236	error = bus_generic_suspend(dev);
3237	if (error) {
3238		free(devlist, M_TEMP);
3239		return (error);
3240	}
3241	if (pci_do_power_suspend)
3242		pci_set_power_children(dev, devlist, numdevs,
3243		    PCI_POWERSTATE_D3);
3244	free(devlist, M_TEMP);
3245	return (0);
3246}
3247
3248int
3249pci_resume(device_t dev)
3250{
3251	device_t child, *devlist;
3252	struct pci_devinfo *dinfo;
3253	int error, i, numdevs;
3254
3255	/*
3256	 * Set each child to D0 and restore its PCI configuration space.
3257	 */
3258	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3259		return (error);
3260	if (pci_do_power_resume)
3261		pci_set_power_children(dev, devlist, numdevs,
3262		    PCI_POWERSTATE_D0);
3263
3264	/* Now the device is powered up, restore its config space. */
3265	for (i = 0; i < numdevs; i++) {
3266		child = devlist[i];
3267		dinfo = device_get_ivars(child);
3268
3269		pci_cfg_restore(child, dinfo);
3270		if (!device_is_attached(child))
3271			pci_cfg_save(child, dinfo, 1);
3272	}
3273
3274	/*
3275	 * Resume critical devices first, then everything else later.
3276	 */
3277	for (i = 0; i < numdevs; i++) {
3278		child = devlist[i];
3279		switch (pci_get_class(child)) {
3280		case PCIC_DISPLAY:
3281		case PCIC_MEMORY:
3282		case PCIC_BRIDGE:
3283		case PCIC_BASEPERIPH:
3284			DEVICE_RESUME(child);
3285			break;
3286		}
3287	}
3288	for (i = 0; i < numdevs; i++) {
3289		child = devlist[i];
3290		switch (pci_get_class(child)) {
3291		case PCIC_DISPLAY:
3292		case PCIC_MEMORY:
3293		case PCIC_BRIDGE:
3294		case PCIC_BASEPERIPH:
3295			break;
3296		default:
3297			DEVICE_RESUME(child);
3298		}
3299	}
3300	free(devlist, M_TEMP);
3301	return (0);
3302}
3303
3304static void
3305pci_load_vendor_data(void)
3306{
3307	caddr_t data;
3308	void *ptr;
3309	size_t sz;
3310
3311	data = preload_search_by_type("pci_vendor_data");
3312	if (data != NULL) {
3313		ptr = preload_fetch_addr(data);
3314		sz = preload_fetch_size(data);
3315		if (ptr != NULL && sz != 0) {
3316			pci_vendordata = ptr;
3317			pci_vendordata_size = sz;
3318			/* terminate the database */
3319			pci_vendordata[pci_vendordata_size] = '\n';
3320		}
3321	}
3322}
3323
3324void
3325pci_driver_added(device_t dev, driver_t *driver)
3326{
3327	int numdevs;
3328	device_t *devlist;
3329	device_t child;
3330	struct pci_devinfo *dinfo;
3331	int i;
3332
3333	if (bootverbose)
3334		device_printf(dev, "driver added\n");
3335	DEVICE_IDENTIFY(driver, dev);
3336	if (device_get_children(dev, &devlist, &numdevs) != 0)
3337		return;
3338	for (i = 0; i < numdevs; i++) {
3339		child = devlist[i];
3340		if (device_get_state(child) != DS_NOTPRESENT)
3341			continue;
3342		dinfo = device_get_ivars(child);
3343		pci_print_verbose(dinfo);
3344		if (bootverbose)
3345			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3346		pci_cfg_restore(child, dinfo);
3347		if (device_probe_and_attach(child) != 0)
3348			pci_cfg_save(child, dinfo, 1);
3349	}
3350	free(devlist, M_TEMP);
3351}
3352
3353int
3354pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3355    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3356{
3357	struct pci_devinfo *dinfo;
3358	struct msix_table_entry *mte;
3359	struct msix_vector *mv;
3360	uint64_t addr;
3361	uint32_t data;
3362	void *cookie;
3363	int error, rid;
3364
3365	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3366	    arg, &cookie);
3367	if (error)
3368		return (error);
3369
3370	/* If this is not a direct child, just bail out. */
3371	if (device_get_parent(child) != dev) {
3372		*cookiep = cookie;
3373		return(0);
3374	}
3375
3376	rid = rman_get_rid(irq);
3377	if (rid == 0) {
3378		/* Make sure that INTx is enabled */
3379		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3380	} else {
3381		/*
3382		 * Check to see if the interrupt is MSI or MSI-X.
3383		 * Ask our parent to map the MSI and give
3384		 * us the address and data register values.
3385		 * If we fail for some reason, teardown the
3386		 * interrupt handler.
3387		 */
3388		dinfo = device_get_ivars(child);
3389		if (dinfo->cfg.msi.msi_alloc > 0) {
3390			if (dinfo->cfg.msi.msi_addr == 0) {
3391				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3392			    ("MSI has handlers, but vectors not mapped"));
3393				error = PCIB_MAP_MSI(device_get_parent(dev),
3394				    child, rman_get_start(irq), &addr, &data);
3395				if (error)
3396					goto bad;
3397				dinfo->cfg.msi.msi_addr = addr;
3398				dinfo->cfg.msi.msi_data = data;
3399			}
3400			if (dinfo->cfg.msi.msi_handlers == 0)
3401				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3402				    dinfo->cfg.msi.msi_data);
3403			dinfo->cfg.msi.msi_handlers++;
3404		} else {
3405			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3406			    ("No MSI or MSI-X interrupts allocated"));
3407			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3408			    ("MSI-X index too high"));
3409			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3410			KASSERT(mte->mte_vector != 0, ("no message vector"));
3411			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3412			KASSERT(mv->mv_irq == rman_get_start(irq),
3413			    ("IRQ mismatch"));
3414			if (mv->mv_address == 0) {
3415				KASSERT(mte->mte_handlers == 0,
3416		    ("MSI-X table entry has handlers, but vector not mapped"));
3417				error = PCIB_MAP_MSI(device_get_parent(dev),
3418				    child, rman_get_start(irq), &addr, &data);
3419				if (error)
3420					goto bad;
3421				mv->mv_address = addr;
3422				mv->mv_data = data;
3423			}
3424			if (mte->mte_handlers == 0) {
3425				pci_enable_msix(child, rid - 1, mv->mv_address,
3426				    mv->mv_data);
3427				pci_unmask_msix(child, rid - 1);
3428			}
3429			mte->mte_handlers++;
3430		}
3431
3432		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3433		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3434	bad:
3435		if (error) {
3436			(void)bus_generic_teardown_intr(dev, child, irq,
3437			    cookie);
3438			return (error);
3439		}
3440	}
3441	*cookiep = cookie;
3442	return (0);
3443}
3444
3445int
3446pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3447    void *cookie)
3448{
3449	struct msix_table_entry *mte;
3450	struct resource_list_entry *rle;
3451	struct pci_devinfo *dinfo;
3452	int error, rid;
3453
3454	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3455		return (EINVAL);
3456
3457	/* If this isn't a direct child, just bail out */
3458	if (device_get_parent(child) != dev)
3459		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3460
3461	rid = rman_get_rid(irq);
3462	if (rid == 0) {
3463		/* Mask INTx */
3464		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3465	} else {
3466		/*
3467		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3468		 * decrement the appropriate handlers count and mask the
3469		 * MSI-X message, or disable MSI messages if the count
3470		 * drops to 0.
3471		 */
3472		dinfo = device_get_ivars(child);
3473		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3474		if (rle->res != irq)
3475			return (EINVAL);
3476		if (dinfo->cfg.msi.msi_alloc > 0) {
3477			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3478			    ("MSI-X index too high"));
3479			if (dinfo->cfg.msi.msi_handlers == 0)
3480				return (EINVAL);
3481			dinfo->cfg.msi.msi_handlers--;
3482			if (dinfo->cfg.msi.msi_handlers == 0)
3483				pci_disable_msi(child);
3484		} else {
3485			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3486			    ("No MSI or MSI-X interrupts allocated"));
3487			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3488			    ("MSI-X index too high"));
3489			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3490			if (mte->mte_handlers == 0)
3491				return (EINVAL);
3492			mte->mte_handlers--;
3493			if (mte->mte_handlers == 0)
3494				pci_mask_msix(child, rid - 1);
3495		}
3496	}
3497	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3498	if (rid > 0)
3499		KASSERT(error == 0,
3500		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3501	return (error);
3502}
3503
3504int
3505pci_print_child(device_t dev, device_t child)
3506{
3507	struct pci_devinfo *dinfo;
3508	struct resource_list *rl;
3509	int retval = 0;
3510
3511	dinfo = device_get_ivars(child);
3512	rl = &dinfo->resources;
3513
3514	retval += bus_print_child_header(dev, child);
3515
3516	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3517	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3518	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3519	if (device_get_flags(dev))
3520		retval += printf(" flags %#x", device_get_flags(dev));
3521
3522	retval += printf(" at device %d.%d", pci_get_slot(child),
3523	    pci_get_function(child));
3524
3525	retval += bus_print_child_footer(dev, child);
3526
3527	return (retval);
3528}
3529
3530static struct
3531{
3532	int	class;
3533	int	subclass;
3534	char	*desc;
3535} pci_nomatch_tab[] = {
3536	{PCIC_OLD,		-1,			"old"},
3537	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3538	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3539	{PCIC_STORAGE,		-1,			"mass storage"},
3540	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3541	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3542	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3543	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3544	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3545	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3546	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3547	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3548	{PCIC_NETWORK,		-1,			"network"},
3549	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3550	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3551	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3552	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3553	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3554	{PCIC_DISPLAY,		-1,			"display"},
3555	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3556	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3557	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3558	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3559	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3560	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3561	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3562	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3563	{PCIC_MEMORY,		-1,			"memory"},
3564	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3565	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3566	{PCIC_BRIDGE,		-1,			"bridge"},
3567	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3568	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3569	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3570	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3571	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3572	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3573	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3574	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3575	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3576	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3577	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3578	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3579	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3580	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3581	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3582	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3583	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3584	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3585	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3586	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3587	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3588	{PCIC_INPUTDEV,		-1,			"input device"},
3589	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3590	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3591	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3592	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3593	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3594	{PCIC_DOCKING,		-1,			"docking station"},
3595	{PCIC_PROCESSOR,	-1,			"processor"},
3596	{PCIC_SERIALBUS,	-1,			"serial bus"},
3597	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3598	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3599	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3600	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3601	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3602	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3603	{PCIC_WIRELESS,		-1,			"wireless controller"},
3604	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3605	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3606	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3607	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3608	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3609	{PCIC_SATCOM,		-1,			"satellite communication"},
3610	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3611	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3612	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3613	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3614	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3615	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3616	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3617	{PCIC_DASP,		-1,			"dasp"},
3618	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3619	{0, 0,		NULL}
3620};
3621
3622void
3623pci_probe_nomatch(device_t dev, device_t child)
3624{
3625	int	i;
3626	char	*cp, *scp, *device;
3627
3628	/*
3629	 * Look for a listing for this device in a loaded device database.
3630	 */
3631	if ((device = pci_describe_device(child)) != NULL) {
3632		device_printf(dev, "<%s>", device);
3633		free(device, M_DEVBUF);
3634	} else {
3635		/*
3636		 * Scan the class/subclass descriptions for a general
3637		 * description.
3638		 */
3639		cp = "unknown";
3640		scp = NULL;
3641		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3642			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3643				if (pci_nomatch_tab[i].subclass == -1) {
3644					cp = pci_nomatch_tab[i].desc;
3645				} else if (pci_nomatch_tab[i].subclass ==
3646				    pci_get_subclass(child)) {
3647					scp = pci_nomatch_tab[i].desc;
3648				}
3649			}
3650		}
3651		device_printf(dev, "<%s%s%s>",
3652		    cp ? cp : "",
3653		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3654		    scp ? scp : "");
3655	}
3656	printf(" at device %d.%d (no driver attached)\n",
3657	    pci_get_slot(child), pci_get_function(child));
3658	pci_cfg_save(child, device_get_ivars(child), 1);
3659	return;
3660}
3661
3662/*
3663 * Parse the PCI device database, if loaded, and return a pointer to a
3664 * description of the device.
3665 *
3666 * The database is flat text formatted as follows:
3667 *
3668 * Any line not in a valid format is ignored.
3669 * Lines are terminated with newline '\n' characters.
3670 *
3671 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3672 * the vendor name.
3673 *
3674 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3675 * - devices cannot be listed without a corresponding VENDOR line.
3676 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3677 * another TAB, then the device name.
3678 */
3679
3680/*
3681 * Assuming (ptr) points to the beginning of a line in the database,
3682 * return the vendor or device and description of the next entry.
3683 * The value of (vendor) or (device) inappropriate for the entry type
3684 * is set to -1.  Returns nonzero at the end of the database.
3685 *
3686 * Note that this is slightly unrobust in the face of corrupt data;
3687 * we attempt to safeguard against this by spamming the end of the
3688 * database with a newline when we initialise.
3689 */
3690static int
3691pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3692{
3693	char	*cp = *ptr;
3694	int	left;
3695
3696	*device = -1;
3697	*vendor = -1;
3698	**desc = '\0';
3699	for (;;) {
3700		left = pci_vendordata_size - (cp - pci_vendordata);
3701		if (left <= 0) {
3702			*ptr = cp;
3703			return(1);
3704		}
3705
3706		/* vendor entry? */
3707		if (*cp != '\t' &&
3708		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3709			break;
3710		/* device entry? */
3711		if (*cp == '\t' &&
3712		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3713			break;
3714
3715		/* skip to next line */
3716		while (*cp != '\n' && left > 0) {
3717			cp++;
3718			left--;
3719		}
3720		if (*cp == '\n') {
3721			cp++;
3722			left--;
3723		}
3724	}
3725	/* skip to next line */
3726	while (*cp != '\n' && left > 0) {
3727		cp++;
3728		left--;
3729	}
3730	if (*cp == '\n' && left > 0)
3731		cp++;
3732	*ptr = cp;
3733	return(0);
3734}
3735
3736static char *
3737pci_describe_device(device_t dev)
3738{
3739	int	vendor, device;
3740	char	*desc, *vp, *dp, *line;
3741
3742	desc = vp = dp = NULL;
3743
3744	/*
3745	 * If we have no vendor data, we can't do anything.
3746	 */
3747	if (pci_vendordata == NULL)
3748		goto out;
3749
3750	/*
3751	 * Scan the vendor data looking for this device
3752	 */
3753	line = pci_vendordata;
3754	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3755		goto out;
3756	for (;;) {
3757		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3758			goto out;
3759		if (vendor == pci_get_vendor(dev))
3760			break;
3761	}
3762	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3763		goto out;
3764	for (;;) {
3765		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3766			*dp = 0;
3767			break;
3768		}
3769		if (vendor != -1) {
3770			*dp = 0;
3771			break;
3772		}
3773		if (device == pci_get_device(dev))
3774			break;
3775	}
3776	if (dp[0] == '\0')
3777		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3778	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3779	    NULL)
3780		sprintf(desc, "%s, %s", vp, dp);
3781 out:
3782	if (vp != NULL)
3783		free(vp, M_DEVBUF);
3784	if (dp != NULL)
3785		free(dp, M_DEVBUF);
3786	return(desc);
3787}
3788
3789int
3790pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3791{
3792	struct pci_devinfo *dinfo;
3793	pcicfgregs *cfg;
3794
3795	dinfo = device_get_ivars(child);
3796	cfg = &dinfo->cfg;
3797
3798	switch (which) {
3799	case PCI_IVAR_ETHADDR:
3800		/*
3801		 * The generic accessor doesn't deal with failure, so
3802		 * we set the return value, then return an error.
3803		 */
3804		*((uint8_t **) result) = NULL;
3805		return (EINVAL);
3806	case PCI_IVAR_SUBVENDOR:
3807		*result = cfg->subvendor;
3808		break;
3809	case PCI_IVAR_SUBDEVICE:
3810		*result = cfg->subdevice;
3811		break;
3812	case PCI_IVAR_VENDOR:
3813		*result = cfg->vendor;
3814		break;
3815	case PCI_IVAR_DEVICE:
3816		*result = cfg->device;
3817		break;
3818	case PCI_IVAR_DEVID:
3819		*result = (cfg->device << 16) | cfg->vendor;
3820		break;
3821	case PCI_IVAR_CLASS:
3822		*result = cfg->baseclass;
3823		break;
3824	case PCI_IVAR_SUBCLASS:
3825		*result = cfg->subclass;
3826		break;
3827	case PCI_IVAR_PROGIF:
3828		*result = cfg->progif;
3829		break;
3830	case PCI_IVAR_REVID:
3831		*result = cfg->revid;
3832		break;
3833	case PCI_IVAR_INTPIN:
3834		*result = cfg->intpin;
3835		break;
3836	case PCI_IVAR_IRQ:
3837		*result = cfg->intline;
3838		break;
3839	case PCI_IVAR_DOMAIN:
3840		*result = cfg->domain;
3841		break;
3842	case PCI_IVAR_BUS:
3843		*result = cfg->bus;
3844		break;
3845	case PCI_IVAR_SLOT:
3846		*result = cfg->slot;
3847		break;
3848	case PCI_IVAR_FUNCTION:
3849		*result = cfg->func;
3850		break;
3851	case PCI_IVAR_CMDREG:
3852		*result = cfg->cmdreg;
3853		break;
3854	case PCI_IVAR_CACHELNSZ:
3855		*result = cfg->cachelnsz;
3856		break;
3857	case PCI_IVAR_MINGNT:
3858		*result = cfg->mingnt;
3859		break;
3860	case PCI_IVAR_MAXLAT:
3861		*result = cfg->maxlat;
3862		break;
3863	case PCI_IVAR_LATTIMER:
3864		*result = cfg->lattimer;
3865		break;
3866	default:
3867		return (ENOENT);
3868	}
3869	return (0);
3870}
3871
3872int
3873pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3874{
3875	struct pci_devinfo *dinfo;
3876
3877	dinfo = device_get_ivars(child);
3878
3879	switch (which) {
3880	case PCI_IVAR_INTPIN:
3881		dinfo->cfg.intpin = value;
3882		return (0);
3883	case PCI_IVAR_ETHADDR:
3884	case PCI_IVAR_SUBVENDOR:
3885	case PCI_IVAR_SUBDEVICE:
3886	case PCI_IVAR_VENDOR:
3887	case PCI_IVAR_DEVICE:
3888	case PCI_IVAR_DEVID:
3889	case PCI_IVAR_CLASS:
3890	case PCI_IVAR_SUBCLASS:
3891	case PCI_IVAR_PROGIF:
3892	case PCI_IVAR_REVID:
3893	case PCI_IVAR_IRQ:
3894	case PCI_IVAR_DOMAIN:
3895	case PCI_IVAR_BUS:
3896	case PCI_IVAR_SLOT:
3897	case PCI_IVAR_FUNCTION:
3898		return (EINVAL);	/* disallow for now */
3899
3900	default:
3901		return (ENOENT);
3902	}
3903}
3904
3905#include "opt_ddb.h"
3906#ifdef DDB
3907#include <ddb/ddb.h>
3908#include <sys/cons.h>
3909
3910/*
3911 * List resources based on pci map registers, used for within ddb
3912 */
3913
3914DB_SHOW_COMMAND(pciregs, db_pci_dump)
3915{
3916	struct pci_devinfo *dinfo;
3917	struct devlist *devlist_head;
3918	struct pci_conf *p;
3919	const char *name;
3920	int i, error, none_count;
3921
3922	none_count = 0;
3923	/* get the head of the device queue */
3924	devlist_head = &pci_devq;
3925
3926	/*
3927	 * Go through the list of devices and print out devices
3928	 */
3929	for (error = 0, i = 0,
3930	     dinfo = STAILQ_FIRST(devlist_head);
3931	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3932	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3933
3934		/* Populate pd_name and pd_unit */
3935		name = NULL;
3936		if (dinfo->cfg.dev)
3937			name = device_get_name(dinfo->cfg.dev);
3938
3939		p = &dinfo->conf;
3940		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3941			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3942			(name && *name) ? name : "none",
3943			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3944			none_count++,
3945			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3946			p->pc_sel.pc_func, (p->pc_class << 16) |
3947			(p->pc_subclass << 8) | p->pc_progif,
3948			(p->pc_subdevice << 16) | p->pc_subvendor,
3949			(p->pc_device << 16) | p->pc_vendor,
3950			p->pc_revid, p->pc_hdr);
3951	}
3952}
3953#endif /* DDB */
3954
3955static struct resource *
3956pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3957    u_long start, u_long end, u_long count, u_int flags)
3958{
3959	struct pci_devinfo *dinfo = device_get_ivars(child);
3960	struct resource_list *rl = &dinfo->resources;
3961	struct resource_list_entry *rle;
3962	struct resource *res;
3963	struct pci_map *pm;
3964	pci_addr_t map, testval;
3965	int mapsize;
3966
3967	res = NULL;
3968	pm = pci_find_bar(child, *rid);
3969	if (pm != NULL) {
3970		/* This is a BAR that we failed to allocate earlier. */
3971		mapsize = pm->pm_size;
3972		map = pm->pm_value;
3973	} else {
3974		/*
3975		 * Weed out the bogons, and figure out how large the
3976		 * BAR/map is.  BARs that read back 0 here are bogus
3977		 * and unimplemented.  Note: atapci in legacy mode are
3978		 * special and handled elsewhere in the code.  If you
3979		 * have a atapci device in legacy mode and it fails
3980		 * here, that other code is broken.
3981		 */
3982		pci_read_bar(child, *rid, &map, &testval);
3983
3984		/*
3985		 * Determine the size of the BAR and ignore BARs with a size
3986		 * of 0.  Device ROM BARs use a different mask value.
3987		 */
3988		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3989			mapsize = pci_romsize(testval);
3990		else
3991			mapsize = pci_mapsize(testval);
3992		if (mapsize == 0)
3993			goto out;
3994		pm = pci_add_bar(child, *rid, map, mapsize);
3995	}
3996
3997	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3998		if (type != SYS_RES_MEMORY) {
3999			if (bootverbose)
4000				device_printf(dev,
4001				    "child %s requested type %d for rid %#x,"
4002				    " but the BAR says it is an memio\n",
4003				    device_get_nameunit(child), type, *rid);
4004			goto out;
4005		}
4006	} else {
4007		if (type != SYS_RES_IOPORT) {
4008			if (bootverbose)
4009				device_printf(dev,
4010				    "child %s requested type %d for rid %#x,"
4011				    " but the BAR says it is an ioport\n",
4012				    device_get_nameunit(child), type, *rid);
4013			goto out;
4014		}
4015	}
4016
4017	/*
4018	 * For real BARs, we need to override the size that
4019	 * the driver requests, because that's what the BAR
4020	 * actually uses and we would otherwise have a
4021	 * situation where we might allocate the excess to
4022	 * another driver, which won't work.
4023	 */
4024	count = (pci_addr_t)1 << mapsize;
4025	if (RF_ALIGNMENT(flags) < mapsize)
4026		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4027	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4028		flags |= RF_PREFETCHABLE;
4029
4030	/*
4031	 * Allocate enough resource, and then write back the
4032	 * appropriate BAR for that resource.
4033	 */
4034	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4035	    start, end, count, flags & ~RF_ACTIVE);
4036	if (res == NULL) {
4037		device_printf(child,
4038		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4039		    count, *rid, type, start, end);
4040		goto out;
4041	}
4042	resource_list_add(rl, type, *rid, start, end, count);
4043	rle = resource_list_find(rl, type, *rid);
4044	if (rle == NULL)
4045		panic("pci_reserve_map: unexpectedly can't find resource.");
4046	rle->res = res;
4047	rle->start = rman_get_start(res);
4048	rle->end = rman_get_end(res);
4049	rle->count = count;
4050	rle->flags = RLE_RESERVED;
4051	if (bootverbose)
4052		device_printf(child,
4053		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4054		    count, *rid, type, rman_get_start(res));
4055	map = rman_get_start(res);
4056	pci_write_bar(child, pm, map);
4057out:;
4058	return (res);
4059}
4060
4061struct resource *
4062pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4063		   u_long start, u_long end, u_long count, u_int flags)
4064{
4065	struct pci_devinfo *dinfo = device_get_ivars(child);
4066	struct resource_list *rl = &dinfo->resources;
4067	struct resource_list_entry *rle;
4068	struct resource *res;
4069	pcicfgregs *cfg = &dinfo->cfg;
4070
4071	if (device_get_parent(child) != dev)
4072		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4073		    type, rid, start, end, count, flags));
4074
4075	/*
4076	 * Perform lazy resource allocation
4077	 */
4078	switch (type) {
4079	case SYS_RES_IRQ:
4080		/*
4081		 * Can't alloc legacy interrupt once MSI messages have
4082		 * been allocated.
4083		 */
4084		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4085		    cfg->msix.msix_alloc > 0))
4086			return (NULL);
4087
4088		/*
4089		 * If the child device doesn't have an interrupt
4090		 * routed and is deserving of an interrupt, try to
4091		 * assign it one.
4092		 */
4093		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4094		    (cfg->intpin != 0))
4095			pci_assign_interrupt(dev, child, 0);
4096		break;
4097	case SYS_RES_IOPORT:
4098	case SYS_RES_MEMORY:
4099#ifdef NEW_PCIB
4100		/*
4101		 * PCI-PCI bridge I/O window resources are not BARs.
4102		 * For those allocations just pass the request up the
4103		 * tree.
4104		 */
4105		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4106			switch (*rid) {
4107			case PCIR_IOBASEL_1:
4108			case PCIR_MEMBASE_1:
4109			case PCIR_PMBASEL_1:
4110				/*
4111				 * XXX: Should we bother creating a resource
4112				 * list entry?
4113				 */
4114				return (bus_generic_alloc_resource(dev, child,
4115				    type, rid, start, end, count, flags));
4116			}
4117		}
4118#endif
4119		/* Reserve resources for this BAR if needed. */
4120		rle = resource_list_find(rl, type, *rid);
4121		if (rle == NULL) {
4122			res = pci_reserve_map(dev, child, type, rid, start, end,
4123			    count, flags);
4124			if (res == NULL)
4125				return (NULL);
4126		}
4127	}
4128	return (resource_list_alloc(rl, dev, child, type, rid,
4129	    start, end, count, flags));
4130}
4131
4132int
4133pci_activate_resource(device_t dev, device_t child, int type, int rid,
4134    struct resource *r)
4135{
4136	struct pci_devinfo *dinfo;
4137	int error;
4138
4139	error = bus_generic_activate_resource(dev, child, type, rid, r);
4140	if (error)
4141		return (error);
4142
4143	/* Enable decoding in the command register when activating BARs. */
4144	if (device_get_parent(child) == dev) {
4145		/* Device ROMs need their decoding explicitly enabled. */
4146		dinfo = device_get_ivars(child);
4147		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4148			pci_write_bar(child, pci_find_bar(child, rid),
4149			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4150		switch (type) {
4151		case SYS_RES_IOPORT:
4152		case SYS_RES_MEMORY:
4153			error = PCI_ENABLE_IO(dev, child, type);
4154			break;
4155		}
4156	}
4157	return (error);
4158}
4159
4160int
4161pci_deactivate_resource(device_t dev, device_t child, int type,
4162    int rid, struct resource *r)
4163{
4164	struct pci_devinfo *dinfo;
4165	int error;
4166
4167	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4168	if (error)
4169		return (error);
4170
4171	/* Disable decoding for device ROMs. */
4172	if (device_get_parent(child) == dev) {
4173		dinfo = device_get_ivars(child);
4174		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4175			pci_write_bar(child, pci_find_bar(child, rid),
4176			    rman_get_start(r));
4177	}
4178	return (0);
4179}
4180
4181void
4182pci_delete_child(device_t dev, device_t child)
4183{
4184	struct resource_list_entry *rle;
4185	struct resource_list *rl;
4186	struct pci_devinfo *dinfo;
4187
4188	dinfo = device_get_ivars(child);
4189	rl = &dinfo->resources;
4190
4191	if (device_is_attached(child))
4192		device_detach(child);
4193
4194	/* Turn off access to resources we're about to free */
4195	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4196	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4197
4198	/* Free all allocated resources */
4199	STAILQ_FOREACH(rle, rl, link) {
4200		if (rle->res) {
4201			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4202			    resource_list_busy(rl, rle->type, rle->rid)) {
4203				pci_printf(&dinfo->cfg,
4204				    "Resource still owned, oops. "
4205				    "(type=%d, rid=%d, addr=%lx)\n",
4206				    rle->type, rle->rid,
4207				    rman_get_start(rle->res));
4208				bus_release_resource(child, rle->type, rle->rid,
4209				    rle->res);
4210			}
4211			resource_list_unreserve(rl, dev, child, rle->type,
4212			    rle->rid);
4213		}
4214	}
4215	resource_list_free(rl);
4216
4217	device_delete_child(dev, child);
4218	pci_freecfg(dinfo);
4219}
4220
4221void
4222pci_delete_resource(device_t dev, device_t child, int type, int rid)
4223{
4224	struct pci_devinfo *dinfo;
4225	struct resource_list *rl;
4226	struct resource_list_entry *rle;
4227
4228	if (device_get_parent(child) != dev)
4229		return;
4230
4231	dinfo = device_get_ivars(child);
4232	rl = &dinfo->resources;
4233	rle = resource_list_find(rl, type, rid);
4234	if (rle == NULL)
4235		return;
4236
4237	if (rle->res) {
4238		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4239		    resource_list_busy(rl, type, rid)) {
4240			device_printf(dev, "delete_resource: "
4241			    "Resource still owned by child, oops. "
4242			    "(type=%d, rid=%d, addr=%lx)\n",
4243			    type, rid, rman_get_start(rle->res));
4244			return;
4245		}
4246
4247#ifndef __PCI_BAR_ZERO_VALID
4248		/*
4249		 * If this is a BAR, clear the BAR so it stops
4250		 * decoding before releasing the resource.
4251		 */
4252		switch (type) {
4253		case SYS_RES_IOPORT:
4254		case SYS_RES_MEMORY:
4255			pci_write_bar(child, pci_find_bar(child, rid), 0);
4256			break;
4257		}
4258#endif
4259		resource_list_unreserve(rl, dev, child, type, rid);
4260	}
4261	resource_list_delete(rl, type, rid);
4262}
4263
4264struct resource_list *
4265pci_get_resource_list (device_t dev, device_t child)
4266{
4267	struct pci_devinfo *dinfo = device_get_ivars(child);
4268
4269	return (&dinfo->resources);
4270}
4271
4272bus_dma_tag_t
4273pci_get_dma_tag(device_t bus, device_t dev)
4274{
4275	struct pci_softc *sc = device_get_softc(bus);
4276
4277	if (sc->sc_dma_tag_valid)
4278		return (sc->sc_dma_tag);
4279	return (bus_generic_get_dma_tag(bus, dev));
4280}
4281
4282uint32_t
4283pci_read_config_method(device_t dev, device_t child, int reg, int width)
4284{
4285	struct pci_devinfo *dinfo = device_get_ivars(child);
4286	pcicfgregs *cfg = &dinfo->cfg;
4287
4288	return (PCIB_READ_CONFIG(device_get_parent(dev),
4289	    cfg->bus, cfg->slot, cfg->func, reg, width));
4290}
4291
4292void
4293pci_write_config_method(device_t dev, device_t child, int reg,
4294    uint32_t val, int width)
4295{
4296	struct pci_devinfo *dinfo = device_get_ivars(child);
4297	pcicfgregs *cfg = &dinfo->cfg;
4298
4299	PCIB_WRITE_CONFIG(device_get_parent(dev),
4300	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4301}
4302
4303int
4304pci_child_location_str_method(device_t dev, device_t child, char *buf,
4305    size_t buflen)
4306{
4307
4308	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4309	    pci_get_function(child));
4310	return (0);
4311}
4312
4313int
4314pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4315    size_t buflen)
4316{
4317	struct pci_devinfo *dinfo;
4318	pcicfgregs *cfg;
4319
4320	dinfo = device_get_ivars(child);
4321	cfg = &dinfo->cfg;
4322	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4323	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4324	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4325	    cfg->progif);
4326	return (0);
4327}
4328
4329int
4330pci_assign_interrupt_method(device_t dev, device_t child)
4331{
4332	struct pci_devinfo *dinfo = device_get_ivars(child);
4333	pcicfgregs *cfg = &dinfo->cfg;
4334
4335	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4336	    cfg->intpin));
4337}
4338
4339static int
4340pci_modevent(module_t mod, int what, void *arg)
4341{
4342	static struct cdev *pci_cdev;
4343
4344	switch (what) {
4345	case MOD_LOAD:
4346		STAILQ_INIT(&pci_devq);
4347		pci_generation = 0;
4348		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4349		    "pci");
4350		pci_load_vendor_data();
4351		break;
4352
4353	case MOD_UNLOAD:
4354		destroy_dev(pci_cdev);
4355		break;
4356	}
4357
4358	return (0);
4359}
4360
4361void
4362pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4363{
4364
4365	/*
4366	 * Only do header type 0 devices.  Type 1 devices are bridges,
4367	 * which we know need special treatment.  Type 2 devices are
4368	 * cardbus bridges which also require special treatment.
4369	 * Other types are unknown, and we err on the side of safety
4370	 * by ignoring them.
4371	 */
4372	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4373		return;
4374
4375	/*
4376	 * Restore the device to full power mode.  We must do this
4377	 * before we restore the registers because moving from D3 to
4378	 * D0 will cause the chip's BARs and some other registers to
4379	 * be reset to some unknown power on reset values.  Cut down
4380	 * the noise on boot by doing nothing if we are already in
4381	 * state D0.
4382	 */
4383	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4384		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4385	pci_restore_bars(dev);
4386	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4387	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4388	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4389	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4390	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4391	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4392	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4393	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4394	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4395
4396	/* Restore MSI and MSI-X configurations if they are present. */
4397	if (dinfo->cfg.msi.msi_location != 0)
4398		pci_resume_msi(dev);
4399	if (dinfo->cfg.msix.msix_location != 0)
4400		pci_resume_msix(dev);
4401}
4402
4403void
4404pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4405{
4406	uint32_t cls;
4407	int ps;
4408
4409	/*
4410	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4411	 * we know need special treatment.  Type 2 devices are cardbus bridges
4412	 * which also require special treatment.  Other types are unknown, and
4413	 * we err on the side of safety by ignoring them.  Powering down
4414	 * bridges should not be undertaken lightly.
4415	 */
4416	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4417		return;
4418
4419	/*
4420	 * Some drivers apparently write to these registers w/o updating our
4421	 * cached copy.  No harm happens if we update the copy, so do so here
4422	 * so we can restore them.  The COMMAND register is modified by the
4423	 * bus w/o updating the cache.  This should represent the normally
4424	 * writable portion of the 'defined' part of type 0 headers.  In
4425	 * theory we also need to save/restore the PCI capability structures
4426	 * we know about, but apart from power we don't know any that are
4427	 * writable.
4428	 */
4429	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4430	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4431	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4432	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4433	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4434	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4435	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4436	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4437	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4438	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4439	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4440	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4441	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4442	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4443	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4444
4445	/*
4446	 * don't set the state for display devices, base peripherals and
4447	 * memory devices since bad things happen when they are powered down.
4448	 * We should (a) have drivers that can easily detach and (b) use
4449	 * generic drivers for these devices so that some device actually
4450	 * attaches.  We need to make sure that when we implement (a) we don't
4451	 * power the device down on a reattach.
4452	 */
4453	cls = pci_get_class(dev);
4454	if (!setstate)
4455		return;
4456	switch (pci_do_power_nodriver)
4457	{
4458		case 0:		/* NO powerdown at all */
4459			return;
4460		case 1:		/* Conservative about what to power down */
4461			if (cls == PCIC_STORAGE)
4462				return;
4463			/*FALLTHROUGH*/
4464		case 2:		/* Agressive about what to power down */
4465			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4466			    cls == PCIC_BASEPERIPH)
4467				return;
4468			/*FALLTHROUGH*/
4469		case 3:		/* Power down everything */
4470			break;
4471	}
4472	/*
4473	 * PCI spec says we can only go into D3 state from D0 state.
4474	 * Transition from D[12] into D0 before going to D3 state.
4475	 */
4476	ps = pci_get_powerstate(dev);
4477	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4478		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4479	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4480		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4481}
4482
4483/* Wrapper APIs suitable for device driver use. */
4484void
4485pci_save_state(device_t dev)
4486{
4487	struct pci_devinfo *dinfo;
4488
4489	dinfo = device_get_ivars(dev);
4490	pci_cfg_save(dev, dinfo, 0);
4491}
4492
4493void
4494pci_restore_state(device_t dev)
4495{
4496	struct pci_devinfo *dinfo;
4497
4498	dinfo = device_get_ivars(dev);
4499	pci_cfg_restore(dev, dinfo);
4500}
4501